maxiaoshan преди 1 година
родител
ревизия
cd4e02427f
променени са 3 файла, в които са добавени 51 реда и са изтрити 1 реда
  1. 41 0
      src/spiderutil/sortmapval.go
  2. 9 0
      src/spiderutil/sysconfig.go
  3. 1 1
      src/spiderutil/upload.go

+ 41 - 0
src/spiderutil/sortmapval.go

@@ -0,0 +1,41 @@
+package spiderutil
+
+import "sort"
+
+type StringValSorter struct {
+	Keys []string
+	Vals []string
+}
+
+func MapStringValueSort(m map[string]string) *StringValSorter {
+	vs := NewStringValSorter(m)
+	vs.Sort()
+	return vs
+}
+
+func NewStringValSorter(m map[string]string) *StringValSorter {
+	vs := &StringValSorter{
+		Keys: make([]string, 0, len(m)),
+		Vals: make([]string, 0, len(m)),
+	}
+	for k, v := range m {
+		vs.Keys = append(vs.Keys, k)
+		vs.Vals = append(vs.Vals, v)
+	}
+	return vs
+}
+
+func (vs *StringValSorter) Sort() {
+	sort.Sort(vs)
+}
+
+func (vs *StringValSorter) Len() int {
+	return len(vs.Vals)
+}
+func (vs *StringValSorter) Less(i, j int) bool {
+	return vs.Vals[i] < vs.Vals[j]
+}
+func (vs *StringValSorter) Swap(i, j int) {
+	vs.Vals[i], vs.Vals[j] = vs.Vals[j], vs.Vals[i]
+	vs.Keys[i], vs.Keys[j] = vs.Keys[j], vs.Keys[i]
+}

+ 9 - 0
src/spiderutil/sysconfig.go

@@ -38,6 +38,7 @@ type config struct {
 	LogLevel                  int                        `json:"logLevel"`       //日志基本1debug 2info 3warn
 	DayNum                    int                        `json:"daynum"`
 	Modal                     int                        `json:"Modal"`             //1列表页三级页分开采集,0原始采完列表采三级页(7000,7700)
+	PageTurnInfo              pageTurnInfo               `json:"pageturninfo"`      //翻页相关配置
 	IsHistoryEvent            bool                       `json:"ishistoryevent"`    //只有7000为true
 	SiteType                  map[string][]string        `json:"sitetype"`          //网站类型
 	SiteColl                  string                     `json:"sitecoll"`          //网站表名
@@ -129,4 +130,12 @@ type dbInfo struct {
 	Password string `json:"password"`
 }
 
+type pageTurnInfo struct {
+	RepeatPageTimesLimit  int `json:"repeatpagetimeslimit"`  //无限翻页连续判重页数上限
+	TurnPageMaxLimit      int `json:"turnpagemaxlimit"`      //连续翻页上限
+	NextPageMaxLimit      int `json:"nextpagemaxlimit"`      //翻页到上限后继续翻页数上限
+	ListParallelTaskLimit int `json:"listparalleltasklimit"` //列表页爬虫并行任务数量上限
+	ListThreadsNum        int `json:"listthreadsnum"`        //列表页并发数量
+}
+
 var Config config

+ 1 - 1
src/spiderutil/upload.go

@@ -57,7 +57,7 @@ func Upload(code, fileName, url string, bt []byte) (string, string, string) {
 //返回下载链接,文件名称,文件大小,文件类型,文件fid
 func UploadFile(code, fileName, url string, bt []byte) (string, string, string, string, string) {
 	if bt == nil || len(bt) < 1024*3 {
-		logger.Error("下载文件出错!", code, " upload file "+fileName, url)
+		logger.Error("下载文件出错!", code, len(bt), " upload file "+fileName, url)
 		return "", "", "", "", ""
 	}
 	bs := bytes.NewReader(bt)