maxiaoshan 4 лет назад
Сommit
ff58febbf5

+ 68 - 0
src/config.json

@@ -0,0 +1,68 @@
+{
+    "webport": "7100",
+    "mongodb_spider": "192.168.3.207:27092",
+    "mongodb_editor": "192.168.3.207:27092",
+    "spider_dbsize": 10,
+    "editor_dbsize": 2,
+    "msgname": "爬虫监控中心_7100",
+    "msgserveraddr": "spdata.jianyu360.com:801",
+    "msgserveraddrfile": "spdata.jianyu360.com:802",
+    "editoraddr": "http://127.0.0.1:6011/spider/infos",
+    "working": 0,
+    "chansize": 5,
+    "uploadevent": 7100,
+    "logLevel": 1,
+    "isdelay":false,
+    "daynum": 6,
+    "modal": 1,
+    "ishistoryevent": false,
+    "tesseractadd": "http://test.qmx.top:1688",
+    "testdir": "res/spider_a_zgzbtbggfwpt_zgysgg2.lua",
+    "redistype": "0",
+    "redisservers": "title_repeat_judgement=192.168.3.207:1679,title_repeat_fulljudgement=192.168.3.207:1679,title_repeat_listpagehref=192.168.3.207:1679",
+    "word":{
+    	"keyword":"(抽签|中标|招标|成交|合同|中标候选人|资格预审|拟建|邀请|询价|比选|议价|竞价|磋商|采购|招投标|答疑|变更公告|更正公告|竞争性谈判|竞谈|意见征询|澄清|单一来源|流标|废标|验收公告|中止|终止|违规|处罚|征集公告|开标结果|评审结果|监理|招租|租赁|评判结果|项目|遴选|补遗|竞标|征求意见|标段|定点结果|项目评审公示|采购项目违规|采购活动中违规|项目行政处罚|采购行政处罚|项目审批公示)",
+    	"notkeyword":"(招聘|拍卖|出租|出让|使用权|资产)"
+    },
+    "oss":{
+    	"ossEndpoint":"oss-cn-beijing-internal.aliyuncs.com",
+		"ossAccessKeyId":"LTAI4G5x9aoZx8dDamQ7vfZi",  
+		"ossAccessKeySecret":"Bk98FsbPYXcJe72n1bG3Ssf73acuNh",
+		"ossBucketName":"jy-datafile"
+    },
+    "redishosts": [],
+    "fileServer": "http://test.qmx.top:9333",
+    "jsvmurl": "http://127.0.0.1:8080/jsvm",
+    "luadisablelib": {
+        "baselib": {
+            "print": true
+        },
+        "oslib": {
+            "clock": true,
+            "difftime": true,
+            "execute": true,
+            "exit": true,
+            "date": false,
+            "getenv": true,
+            "remove": true,
+            "rename": true,
+            "setenv": true,
+            "setlocale": true,
+            "time": false,
+            "tmpname": true
+        },
+        "iolib": {
+            "close": false,
+            "flush": false,
+            "lines": true,
+            "input": true,
+            "output": true,
+            "open": true,
+            "popen": true,
+            "read": true,
+            "type": false,
+            "tmpfile": true,
+            "write": true
+        }
+    }
+}

+ 375 - 0
src/logs/spider.log

@@ -0,0 +1,375 @@
+2021/05/21 10:03:11 main.go:71: debug  7100
+2021/05/21 10:03:12 handler.go:133: info  高性能模式:LUA加载完成
+2021/05/21 10:03:12 spider.go:75: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2021/05/21 10:03:12 handler.go:139: info  总共加载脚本数: 1
+2021/05/21 10:03:12 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:00 main.go:71: debug  7100
+2021/05/21 10:04:02 spider.go:75: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2021/05/21 10:04:02 handler.go:133: info  高性能模式:LUA加载完成
+2021/05/21 10:04:02 handler.go:139: info  总共加载脚本数: 1
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:02 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:04:55 main.go:71: debug  7100
+2021/05/21 10:04:56 handler.go:133: info  高性能模式:LUA加载完成
+2021/05/21 10:04:56 spider.go:75: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2021/05/21 10:04:56 handler.go:139: info  总共加载脚本数: 1
+2021/05/21 10:04:56 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:05:04 upload.go:60: error  下载文件出错! a_zgzfcgw_cggg_gkzb  upload file 政府采购计划备案表.pdf http://www.gdgpo.gov.cn/attachment/202102/587408718056347/20210223064058/4127608957433325400.pdf
+2021/05/21 10:05:08 upload.go:60: error  下载文件出错! a_zgzfcgw_cggg_gkzb  upload file 政府采购计划备案表.pdf http://www.gdgpo.gov.cn/attachment/202102/587408718056347/20210223064058/4127608957433325400.pdf
+2021/05/21 10:05:50 upload.go:60: error  下载文件出错! a_zgzfcgw_cggg_gkzb  upload file 政府采购计划备案表.pdf http://www.gdgpo.gov.cn/attachment/202102/587408718056347/20210223064058/4127608957433325400.pdf
+2021/05/21 10:05:55 upload.go:60: error  下载文件出错! a_zgzfcgw_cggg_gkzb  upload file 采购文件.pdf http://www.gdgpo.gov.cn/attachment/202102/587408718056347/20210225100335/-6345194532830281714.pdf
+2021/05/21 10:05:56 upload.go:60: error  下载文件出错! a_zgzfcgw_cggg_gkzb  upload file 采购文件.pdf http://www.gdgpo.gov.cn/attachment/202102/587408718056347/20210225100335/-6345194532830281714.pdf
+2021/05/21 10:05:57 upload.go:60: error  下载文件出错! a_zgzfcgw_cggg_gkzb  upload file 采购文件.pdf http://www.gdgpo.gov.cn/attachment/202102/587408718056347/20210225100335/-6345194532830281714.pdf
+2021/05/21 10:43:32 main.go:71: debug  7100
+2021/05/21 10:43:33 handler.go:133: info  高性能模式:LUA加载完成
+2021/05/21 10:43:33 handler.go:139: info  总共加载脚本数: 1
+2021/05/21 10:43:33 spider.go:75: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2021/05/21 10:43:33 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:43:50 upload.go:60: error  下载文件出错! a_zgzfcgw_cggg_gkzb  upload file 政府采购计划备案表.pdf http://www.gdgpo.gov.cn/attachment/202102/587408718056347/20210223064058/4127608957433325400.pdf
+2021/05/21 10:43:58 upload.go:60: error  下载文件出错! a_zgzfcgw_cggg_gkzb  upload file 政府采购计划备案表.pdf http://www.gdgpo.gov.cn/attachment/202102/587408718056347/20210223064058/4127608957433325400.pdf
+2021/05/21 10:43:59 upload.go:60: error  下载文件出错! a_zgzfcgw_cggg_gkzb  upload file 政府采购计划备案表.pdf http://www.gdgpo.gov.cn/attachment/202102/587408718056347/20210223064058/4127608957433325400.pdf
+2021/05/21 10:46:23 main.go:71: debug  7100
+2021/05/21 10:46:24 spider.go:75: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2021/05/21 10:46:24 handler.go:133: info  高性能模式:LUA加载完成
+2021/05/21 10:46:24 handler.go:139: info  总共加载脚本数: 1
+2021/05/21 10:46:24 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 10:46:38 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 10:46:39 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621440000 $lt:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 3
+2021/05/21 10:46:49 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 10:46:50 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621440000 $lt:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 10:46:52 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621353600 $lt:1621440000] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 1
+2021/05/21 10:46:53 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 10:46:54 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621440000 $lt:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 10:46:55 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621353600 $lt:1621440000] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 10:46:57 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621267200 $lt:1621353600] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 10:46:58 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621180800 $lt:1621267200] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 10:46:59 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621094400 $lt:1621180800] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 10:47:00 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621008000 $lt:1621094400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:46:24 main.go:71: debug  7100
+2021/05/21 13:46:25 spider.go:75: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2021/05/21 13:46:25 handler.go:133: info  高性能模式:LUA加载完成
+2021/05/21 13:46:25 handler.go:139: info  总共加载脚本数: 1
+2021/05/21 13:46:25 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 13:46:38 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:46:40 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621440000 $lt:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 3
+2021/05/21 13:46:49 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:46:50 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621440000 $lt:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:47:13 main.go:71: debug  7100
+2021/05/21 13:47:14 handler.go:133: info  高性能模式:LUA加载完成
+2021/05/21 13:47:14 spider.go:75: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2021/05/21 13:47:14 handler.go:139: info  总共加载脚本数: 1
+2021/05/21 13:47:14 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 6
+2021/05/21 13:47:24 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:47:25 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621440000 $lt:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 3
+2021/05/21 13:47:31 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:47:32 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621440000 $lt:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:47:33 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621353600 $lt:1621440000] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 1
+2021/05/21 13:47:37 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:47:38 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621440000 $lt:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:47:39 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621353600 $lt:1621440000] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:47:40 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621267200 $lt:1621353600] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:47:41 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621180800 $lt:1621267200] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:47:42 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621094400 $lt:1621180800] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:47:43 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621008000 $lt:1621094400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:49:44 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 2
+2021/05/21 13:49:46 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:49:47 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621440000 $lt:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:49:48 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621353600 $lt:1621440000] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:49:49 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621267200 $lt:1621353600] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:49:50 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621180800 $lt:1621267200] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:49:51 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621094400 $lt:1621180800] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 13:49:52 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621008000 $lt:1621094400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 14:19:23 main.go:71: debug  7100
+2021/05/21 14:19:27 main.go:71: debug  7100
+2021/05/21 14:19:28 spider.go:75: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2021/05/21 14:19:28 handler.go:133: info  高性能模式:LUA加载完成
+2021/05/21 14:19:28 handler.go:139: info  总共加载脚本数: 1
+2021/05/21 14:19:28 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 14:19:29 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621440000 $lt:1621526400] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0
+2021/05/21 14:19:30 spider.go:111: debug  code: a_zgzfcgw_cggg_gkzb query: map[comeintime:map[$gte:1621353600 $lt:1621440000] spidercode:a_zgzfcgw_cggg_gkzb state:0] 当前查询数据量: 0

BIN
src/luaspecialcode.exe


+ 101 - 0
src/main.go

@@ -0,0 +1,101 @@
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"luaspecialcode/spider"
+	mgo "mongodb"
+	"os"
+	qu "qfw/util"
+	"regexp"
+
+	//"qfw/util/redis"
+	"runtime"
+	. "spiderutil"
+	"time"
+
+	"github.com/donnie4w/go-logger/logger"
+	"github.com/go-xweb/xweb"
+)
+
+var timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
+
+func init() {
+	qu.ReadConfig(&Config)
+	//mgo
+	spider.MgoE = &mgo.MongodbSim{
+		MongodbAddr: Config.Mongodb_spider,
+		Size:        Config.Editor_dbsize,
+		DbName:      "editor",
+	}
+	spider.MgoE.InitPool()
+	spider.MgoS = &mgo.MongodbSim{
+		MongodbAddr: Config.Mongodb_spider,
+		Size:        Config.Spider_dbsize,
+		DbName:      "spider",
+	}
+	spider.MgoS.InitPool()
+	//初始化Redis
+	InitRedis(Config.Redisservers)
+	//启动消息服务
+	spider.InitMsgClient(Config.Msgserveraddr, Config.Msgname)
+	spider.InitMsgClientFile(Config.MsgserveraddrFile, Config.Msgname+"file")
+
+	//初始化网络存储服务
+	OssInit(
+		qu.ObjToString(Config.OssInfo["ossEndpoint"]),
+		qu.ObjToString(Config.OssInfo["ossAccessKeyId"]),
+		qu.ObjToString(Config.OssInfo["ossAccessKeySecret"]),
+		qu.ObjToString(Config.OssInfo["ossBucketName"]),
+	)
+	//logger.SetConsole(false)
+	logger.SetRollingDaily("./logs", "spider.log")
+	//加载爬虫
+	spider.InitLuaCode()
+}
+
+//
+func main() {
+	//定时清理日志
+	go clearLogs()
+	//初始化爬虫服务
+	go spider.InitSpider()
+	//清理计数
+	go spider.GcCount()
+	//内存信息
+	go heapprint()
+	//查列表页信息采集三级页
+	go spider.DetailData()
+	//定时任务(现在此任务由编辑器建任务时完成)
+	//go spider.TimeTask()
+	logger.Debug(Config.Webport)
+	xweb.Run(":" + Config.Webport)
+}
+
+func heapprint() {
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+	fmt.Printf("申请内存:%dM,分配内存:%dM,未使用内存:%dM,回收内存:%dM\n", m.HeapSys/(1024*1024), m.HeapAlloc/(1024*1024),
+		m.HeapIdle/(1024*1024), m.HeapReleased/(1024*1024))
+	time.AfterFunc(1*time.Minute, heapprint)
+}
+
+func clearLogs() {
+	fmt.Println("=======clearLogs========")
+	timeInt := time.Now().AddDate(0, 0, -30).Unix()
+	dirs, err := ioutil.ReadDir("./logs")
+	if err == nil {
+		for _, f := range dirs {
+			fname := f.Name()
+			logTimeStr := timeReg.FindString(fname)
+			if logTimeStr == "" {
+				continue
+			}
+			logTimeInt, _ := time.ParseInLocation("2006-01-02", logTimeStr, time.Local)
+			if logTimeInt.Unix() < timeInt {
+				os.Remove("./logs/" + fname)
+			}
+		}
+	}
+	time.AfterFunc(24*time.Hour, clearLogs)
+}

+ 1 - 0
src/readme.txt

@@ -0,0 +1 @@
+针对增量爬虫采集三级页过程中,部分爬虫采集速度慢,做多线程采集处理

+ 751 - 0
src/res/util/comm.lua

@@ -0,0 +1,751 @@
+--[[
+企明星爬虫系统,公共文件
+Author:a7
+Date:2016/4/7
+]]
+local json=require "json"
+common={}
+
+--Lua的Eval函数
+function common.eval(script)
+	script=common.clearJson(script)
+	local tmp = "return "..script;
+	local s = loadstring(tmp);
+	if s==nil then
+		return nil
+	end
+	return s()
+end
+
+--输出
+function printf(obj)
+	print(dump(obj) )
+end
+
+function dump(obj)  
+    local getIndent, quoteStr, wrapKey, wrapVal, isArray, dumpObj  
+    getIndent = function(level)  
+        return string.rep("\t", level)  
+    end  
+    quoteStr = function(str)  
+        str = string.gsub(str, "[%c\\\"]", {  
+            ["\t"] = "\\t",  
+            ["\r"] = "\\r",  
+            ["\n"] = "\\n",  
+            ["\""] = "\\\"",  
+            ["\\"] = "\\\\",  
+        })  
+        return '"' .. str .. '"'  
+    end  
+    wrapKey = function(val)  
+        if type(val) == "number" then  
+            return "[" .. val .. "]"  
+        elseif type(val) == "string" then  
+            return "[" .. quoteStr(val) .. "]"  
+        else  
+            return "[" .. tostring(val) .. "]"  
+        end  
+    end  
+    wrapVal = function(val, level)  
+        if type(val) == "table" then  
+            return dumpObj(val, level)  
+        elseif type(val) == "number" then  
+            return val  
+        elseif type(val) == "string" then  
+            return quoteStr(val)  
+        else  
+            return tostring(val)  
+        end  
+    end  
+    local isArray = function(arr)  
+        local count = 0   
+        for k, v in pairs(arr) do  
+            count = count + 1   
+        end   
+        for i = 1, count do  
+            if arr[i] == nil then  
+                return false  
+            end   
+        end   
+        return true, count  
+    end  
+    dumpObj = function(obj, level)  
+        if type(obj) ~= "table" then  
+            return wrapVal(obj)  
+        end  
+        level = level + 1  
+        local tokens = {}  
+        tokens[#tokens + 1] = "{"  
+        local ret, count = isArray(obj)  
+        if ret then  
+            for i = 1, count do  
+                tokens[#tokens + 1] = getIndent(level) .. wrapVal(obj[i], level) .. ","  
+            end  
+        else  
+            for k, v in pairs(obj) do  
+                tokens[#tokens + 1] = getIndent(level) .. wrapKey(k) .. " = " .. wrapVal(v, level) .. ","  
+            end  
+        end  
+        tokens[#tokens + 1] = getIndent(level - 1) .. "}"  
+        return table.concat(tokens, "\n")  
+    end  
+    return dumpObj(obj, 0)  
+end  
+
+--JSON数据清理
+function common.clearJson(json)
+	--中括号替换
+	json=string.gsub(json,"%[","{")
+	json=string.gsub(json,"%]","}")
+	--键的引号及冒号替换
+	json=string.gsub(json,"\"([^\"]*)\":","%1=")
+	return json
+end
+-- 替换转义字符
+function common.replaceEscString(c)
+      c=string.gsub(c,"&lt;","<")
+      c=string.gsub(c,"&gt;",">")
+      c=string.gsub(c,"&quot;","'")
+      c=string.gsub(c,"&amp;","&")
+      c=string.gsub(c,"&#34;","\"")
+      return c
+end
+
+--返回通用当前日期时间
+function common.nowDate()
+	return os.date("%Y-%m-%d %H:%M:%S", os.time())
+end
+--返回通用日期格式
+
+monthmap={["Jan"]="01",["Feb"]="02",["Mar"]="03",["Apr"]="04",["May"]="05",["June"]="06",["Jun"]="06",["July"]="07",["Jul"]="07",["Aug"]="08",["Sept"]="09",["Sep"]="09",["Oct"]="10",["Nov"]="11",["Dec"]="12"}
+-- 处理格林威治时间
+function common.timeStrByCST(strtime)
+	local st=common.split(strtime," ")
+	return st[6].."-"..monthmap[st[2]].."-"..st[3].." "..st[4]
+end
+
+
+--日期解析
+function common.parseDate(datestr,datetype)
+	if datestr == nil then
+		return "0"
+	end
+	local tmp = {}
+	local pos=0
+	for i in string.gmatch(datestr,"(%d+)")  do
+		tmp[pos]=i
+		pos=pos+1
+	end
+	if table.getn(tmp) == 0 then
+		return "0"
+		--return os.date("%Y-%m-%d %H:%M:%S", os.time())
+	end
+	--判断日期值是否有误
+	if tmp[0]==nil or tmp[1]==nil then
+		return "0"
+	end
+	--月日
+	if datetype=="MMdd" then 
+		return tostring(os.date("%Y",os.time())).."-"..common.padDigital(tmp[0]).."-"..common.padDigital(tmp[1]).." 00:00:00"
+	end
+	if tmp[2] ~=nil then
+		--传入的格式是:年月日(中间可以有任意分隔符)
+		if datetype=="yyyyMMdd" then
+			return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).. os.date(" %H:%M:%S", os.time())
+		end
+		if tmp[3] ~=nil and tmp[4] ~=nil then
+			--年月日时分
+			if datetype=="yyyyMMddHHmm" then
+				return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":00"
+			end
+			if tmp[5] ~=nil then
+				--年月日时分秒
+				if datetype=="yyyyMMddHHmmss" then
+					return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":"..tmp[5]
+				end
+			end
+		end
+	end
+	return "0"
+--	if datetype=="yyyyMMdd" then
+--		return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).. os.date(" %H:%M:%S", os.time())
+--	--年月日时分秒
+--	elseif datetype=="yyyyMMddHHmmss" then
+--		return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":"..tmp[5]
+--	--年月日时分
+--	elseif datetype=="yyyyMMddHHmm" then 
+--		return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":00"
+--	--月日	
+--	elseif datetype=="MMdd" then 
+--		return tostring(os.date("%Y",os.time())).."-"..common.padDigital(tmp[0]).."-"..common.padDigital(tmp[1]).." 00:00:00"
+--	else 
+--	    return "0"
+--	end
+end
+
+--日期补全
+function common.padDigital(src)
+	if string.len(src)<2 then
+		return "0"..src
+	else
+		return src
+	end
+end
+--local datestr="2016年05月12日22:05:04"
+--print(parseDate(datestr,"yyyyMMddHHmm"))
+--print(parseDate("4月5日","MMdd"))
+
+--字符日期转时间戳  原始时间字符串,要求格式yyyy-MM-dd HH:mm:ss,
+function common.strToTimestamp(str)  
+    --从日期字符串中截取出年月日时分秒  
+	if string.len(str)<19 then
+	      return 0
+	  --    	return os.time()
+	end
+    local Y = tonumber(string.sub(str,1,4))
+    local M = tonumber(string.sub(str,6,7)) 
+    local D = tonumber(string.sub(str,9,10))  
+    local H = tonumber(string.sub(str,12,13))  
+    local MM = tonumber(string.sub(str,15,16))  
+    local SS = tonumber(string.sub(str,18,19))  
+ 	return os.time{year=Y, month=M, day=D, hour=H,min=MM,sec=SS} 
+end  
+
+function common.trim(s) 
+	if s == nil then
+		return ""
+	end
+	return string.gsub(s, "[\r|\n| |\t]+", "")
+end   
+
+--分割字符串
+function common.split(str, delimiter)
+    local result = {}
+	if str==nil or str=='' or delimiter==nil then
+		return result
+	end
+	
+    for match in (str..delimiter):gmatch("(.-)"..delimiter) do
+        table.insert(result, match)
+    end
+    return result
+end
+
+--正则匹配返回值修正
+function common.regTab(con,reg)
+	local tab=string.match(con,reg)
+	if tab==nil then
+		return ""
+	else
+		return tab
+	end
+end
+
+--只验证属性字段不为空 tab1属性字段,tab2待验证对象
+function common.checkData(tab1,tab2)
+	local b=true
+	local str=""
+	for _,v in pairs(tab1) do
+		if tab2[v]==nil or tab2[v]=="" then
+			str=str..v..":值空"..","
+			b=false
+		end
+	end
+	return  b,str
+end
+
+--URL编码
+function common.decodeURI(s)
+	if s == nil then
+		return ""
+	end
+    s = string.gsub(s, '%%(%x%x)', function(h) return string.char(tonumber(h, 16)) end)
+    return s
+end
+
+function common.encodeURI(s)
+	if s == nil then
+		return ""
+	end
+    s = string.gsub(s, "([^%w%.%- ])", function(c) return string.format("%%%02X", string.byte(c)) end)
+    return string.gsub(s, " ", "+")
+end
+
+
+function common.gethref(channel,href)
+	local prehttp=string.sub(channel,1,5)
+	if string.lower(prehttp)=="https" then
+		prehttp="https://"
+	else
+		prehttp="http://"
+	end
+	local pre=string.sub(href,1,4)
+	if string.lower(pre)=="http" then
+		return href
+	else 
+		-- channel=string.sub(channel,8)
+		channel=channel:match("https?://(.*)$")
+		local channelpath=common.split(channel,"/")
+
+		pre=string.sub(href,1,1)
+		if pre~="." and  pre~="/" then
+			href = "./"..href
+		end
+		pre=string.sub(href,1,2)
+		if pre==".." then
+			local infopath=common.split(href,"%./")
+			for i=1,table.getn(infopath) do
+				if table.getn(channelpath)==1 then
+					break
+				end
+		 		table.remove(channelpath,-1) 
+		 	end
+			tmp=""
+		 	for i=1,table.getn(channelpath) do
+		 		tmp=tmp..channelpath[i].."/"
+		 	end
+			local infourl = infopath[table.getn(infopath)]
+		 	href=prehttp..tmp..string.sub(infourl,0,string.len(infourl)-1)
+		else
+			if pre=="./" then
+			 	table.remove(channelpath,-1) 
+				tmp=prehttp
+			 	for i=1,table.getn(channelpath) do
+			 		tmp=tmp..channelpath[i].."/"
+			 	end
+				href=tmp..string.sub(href,3)
+			else
+				if string.sub(href,0,1)=="/" then
+					href=prehttp..channelpath[1]..href
+				else
+					href=prehttp..channelpath[1].."/"..href
+				end
+			end
+		end
+		return href
+	end
+end
+
+function common.splitf(str, delimiter)
+	if str==nil or str=='' or delimiter==nil then
+		return nil
+	end
+	
+    local result = {}
+    for match in (str..delimiter):gmatch("(.-)"..delimiter) do
+        table.insert(result, match)
+    end
+    return result
+end
+
+
+function common.checkUpdate(content,update)
+	if update == "" or update == nil then
+		return 0
+	end
+	local updates=common.splitf(update,"\n")
+  	local out=1
+    for _,v in pairs(updates) do
+   		local vs=common.splitf(v,"==")
+   		if table.getn(vs)>1 then
+   			local item={}
+   			item["tmp"]=vs[1];
+   			local tmp=findMap(item,content)["tmp"]
+   			if tmp~=vs[2] then
+     			out=-1
+   			end
+   		end
+	end
+	
+	if out==-1 then
+		return -1
+	else
+		return 0
+	end
+end
+
+--获取附件标题
+function common.getEnclosureTitle(href,content)
+	local fileTitles = {}
+	
+	local linkList = findListHtml("a", content)
+
+	for k,v in pairs(linkList) do 
+		local tempJpg1 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.jpg$")
+		local tempJpg2 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.JPG$")
+		local tempBid = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.bid$")
+		local tempPdf = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.pdf$")
+		local tempDoc = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.doc$")
+		local tempDocx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.docx$")
+		local tempXls = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xls$")
+		local tempXlsx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xlsx$")
+		local tempZip = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.zip$")
+		local tempRar = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.rar$")
+
+		if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
+			local tempTitle = findOneText("a:eq("..tostring(k-1)..")", content)
+			fileTitles[k] = tempTitle
+			--table.insert(fileTitles, tempTitle)
+		end
+	end
+
+	return fileTitles
+end
+
+--获取附件链接
+function common.getEnclosureHref(href,content)
+	local hrefs = {}
+	
+	--href = common.gethref(href, "")
+	local linkList = findListHtml("a", content)
+
+	for k,v in pairs(linkList) do 
+		local tempJpg1 = string.find(v, "%.jpg$")
+		local tempJpg2 = string.find(v, "%.JPG$")
+		local tempBid = string.find(v, "%.bid$")
+		local tempPdf = string.find(v, "%.pdf$")
+		local tempDoc = string.find(v, "%.doc$")
+		local tempDocx = string.find(v, "%.docx$")
+		local tempXls = string.find(v, "%.xls$")
+		local tempXlsx = string.find(v, "%.xlsx$")
+		local tempZip = string.find(v, "%.zip$")
+		local tempRar = string.find(v, "%.rar$")
+
+		if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
+			local tempHref = findOneText("a:eq("..tostring(k-1).."):attr(href)", content)
+			local isWholeHref = string.find(tempHref, "http")
+			if isWholeHref == nil then
+				tempHref = common.gethref(href, tempHref)
+				--tempHref = href..tempHref
+			end
+			tempHref = string.gsub(tempHref, "\\", "/")
+			hrefs[k] = tempHref
+			--table.insert(hrefs, tempHref)
+		end
+	end
+	return hrefs
+end
+
+--获取附件链接2
+function common.getEnclosureHrefByList(href,content)
+	local hrefs = {}
+	
+	--href = common.gethref(href, "")
+	local linkList = findListHtml("a", content)
+
+	for k,v in pairs(linkList) do 
+		local tempJpg1 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.jpg$")
+		local tempJpg2 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.JPG$")
+		local tempBid = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.bid$")
+		local tempPdf = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.pdf$")
+		local tempDoc = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.doc$")
+		local tempDocx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.docx$")
+		local tempXls = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xls$")
+		local tempXlsx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xlsx$")
+		local tempZip = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.zip$")
+		local tempRar = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.rar$")
+
+		if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
+			local tempHref = findOneText("a:eq("..tostring(k-1).."):attr(href)", content)
+			local isWholeHref = string.find(tempHref, "http")
+			if isWholeHref == nil then
+				tempHref = common.gethref(href, tempHref)
+				--tempHref = href..tempHref
+			end
+			tempHref = string.gsub(tempHref, "\\", "/")
+			hrefs[k] = tempHref
+			--table.insert(hrefs, tempHref)
+		end
+	end
+	return hrefs
+end
+
+
+--下载多个附件
+function common.getFileAttachmentsArray(fileNameArray,fileLinkArray)
+	local attachments = {}
+	for i,fileLink in pairs(fileLinkArray) do
+		local url,name,size,ftype,fid=downloadFile(fileNameArray[i], fileLink, "get",{},{},"")
+		local u=1
+		while url=="" and u<6 do
+			url,name,size,ftype,fid=downloadFile(fileNameArray[i],fileLink,"get",{},{},"")
+			u=u+1
+			if u==6 and url=="" then
+				saveErrLog(fileLink,"comm附件下载失败")
+			end
+		end
+		if url~=nil and url~="" and name~=nil and name~="" then
+			local file = {}
+			file = {
+				["url"]=url,
+				["filename"]=name,
+				["size"]=size,
+				["ftype"]=ftype,
+				["fid"]=fid
+			}
+			table.insert(attachments, file)
+		end
+	end
+	return attachments
+end
+
+--多附件下载,跳过获取href和title集合阶段
+function common.getFileAttachmentsArrayByHrefAndContent(href,content)
+	local fileTitles = common.getEnclosureTitle(href, content)
+	local fileLinks = common.getEnclosureHrefByList(href, content)
+	if table.getn(fileLinks) == 0 then
+		fileLinks = common.getEnclosureHref(href, content)
+	end
+	for i,v in ipairs(fileTitles) do
+		if v == "" then
+			table.remove(fileTitles, i)
+			table.remove(fileLinks, i)
+		end
+	end
+	local attachments = common.getFileAttachmentsArray(fileTitles, fileLinks)
+
+	return attachments
+end
+
+
+--确定模块的附件下载方法(获取title与href)
+--tags:模块选择器
+--withend:是否以文件类型为后缀,比如 .doc,true为后缀,false不为后缀
+filetype={"jpg","JPG","bid","pdf","png","PDF","docx","doc","xlsx","xls","zip","rar","swf","DOCX","DOC","PDF","XLSX","XLS","ZIP","RAR","SWF"}	 
+function common.getFilesLinkByTag(href,tags,content,withend)
+	local dhtml = findOneHtml(tags, content)
+	--dhtml=dhtml.."<a href='/123.doc'>123.doc</a>"
+	local alist = findListHtml(tags.." a", content)
+	local flist={}
+	for k,v in pairs(alist) do
+		local item={}
+		item["href"]="a:eq("..tostring(k-1).."):attr(href)"
+		item["title"]="a:eq("..tostring(k-1)..")"
+		item=findMap(item,dhtml)
+		item["title"]=common.trim(tostring(item["title"]))
+		item["href"]=common.gethref(href,tostring(item["href"]))
+		item["href"] = string.gsub(item["href"], "\\", "/")
+		local isWholeHref = string.find(item["href"], "http")
+		if isWholeHref == nil then
+			item["href"] = transCode("utf8",item["href"])
+		end
+		local statehref;
+		for _,ftype in pairs(filetype) do
+			if withend then
+				statehref=string.find(item["href"], "%."..ftype.."$")
+				if statehref==nil or statehref=="" then
+					statehref=string.find(item["title"], "%."..ftype.."$")
+				end
+				item["ftype"]="%."..ftype
+			else
+				statehref=string.find(item["href"], "%."..ftype)
+				if statehref==nil or statehref=="" then
+					statehref=string.find(item["title"], "%."..ftype)
+				end
+				item["ftype"]="%."..ftype
+			end
+			if statehref then
+				break
+			end
+		end
+	
+		if statehref~=nil and item["title"]~="" then
+			
+			table.insert(flist,item)
+		end
+	end
+	return flist
+end
+
+--确定模块的附件下载方法,封装
+function common.getFileAttachmentsArrayWithTag(href,tags,content,withend,param,head,ck)
+	if param == nil or head == nil  then
+		param={}
+		head={}
+		ck=""
+	end
+	local attachments = {}
+	--local nameTypeArr={"jpg","JPG","bid","pdf","PDF","doc","docx","xls","xlsx","zip","rar","swf","DOCX","DOC","PDF","XLS","XLSX","ZIP","RAR","SWF"}
+	local titleAndHrefList = common.getFilesLinkByTag(href,tags,content,withend)
+	for i,v in ipairs(titleAndHrefList) do
+		
+		local end_type = string.find(v["title"],v["ftype"].."$")
+		local file_name = ""
+		if end_type==nil or end_type=="" then
+			file_name = string.match(v["title"],"(.+"..v["ftype"]..")")
+		else
+			file_name = v["title"]
+		end
+		local url,name,size,ftype,fid=downloadFile(file_name, v["href"], "get",param,head,ck)
+		-- 附件原地址(默认为空)
+		local init_url = v["href"]
+		if url == "" then
+			local u = 0
+			while u < 2 do
+				u = u + 1
+				url,name,size,ftype,fid=downloadFile(file_name,v["href"],"get",param,head,ck)
+				if url ~= "" and size ~= "" then
+					u = 3   -- 下载无误 跳出循环
+				end
+				if u==2 and (url == "" or size == "") then
+					saveErrLog(v["href"],"comm附件下载失败")
+				end
+			end
+		end
+		
+		if url == "" and size == "" then
+			name = file_name
+		end
+
+		if type(url) ~= "string" then
+			url = ""
+		end
+		
+		-- 下载成功, 正常返回
+		if url~=nil and url~="" and name~=nil and name~="" and size ~= "" then
+			local file = {}
+			file = {
+				["url"]=url,
+				["filename"]=name,
+				["size"]=size,
+				["ftype"]=ftype,
+				["fid"]=fid,
+				["org_url"] = init_url
+			}
+			table.insert(attachments, file)
+			-- 下载失败
+		else 
+			local file = {}
+			file = {
+			    ["filename"]=name,
+				["org_url"] = init_url
+			}
+			table.insert(attachments, file)
+		end
+	end
+	return attachments
+end
+
+
+function common.getPureContent(content)
+	local startChar
+	local _,endChar
+	local resContent = content
+	while string.find(resContent, "<!%-%-")~=nil do
+		startChar,_ = string.find(resContent, "<!%-%-")
+		_,endChar = string.find(resContent, "%-%->")
+		resContent = string.sub(resContent, 1, startChar-1)..string.sub(resContent, endChar+1, string.len(resContent))
+	end
+	return resContent
+end
+
+
+function common.getMoneyAndType(orgStr)
+	orgStr = common.trim(orgStr)
+	orgStr = string.gsub(orgStr, "(", "")
+	orgStr = string.gsub(orgStr, ")", "")
+	orgStr = string.gsub(orgStr, ",", "")
+	local moneyType = ""
+	local num =0
+	local resNum =0
+	if string.find(orgStr, "万") ~= nil then
+		orgStr = string.gsub(orgStr, "万元", "")
+		orgStr = string.gsub(orgStr, "万", "")
+		if string.find(orgStr, "人民币") ~= nil then
+			orgStr = string.gsub(orgStr, "人民币", "")
+			orgStr = string.gsub(orgStr, "¥", "")
+			orgStr = string.gsub(orgStr, "¥", "")
+			moneyType = "人民币"
+		elseif string.find(orgStr, "美元") ~= nil then
+			orgStr = string.gsub(orgStr, "美元", "")
+			orgStr = string.gsub(orgStr, "$", "")
+			moneyType = "美元"
+		else
+			moneyType = "人民币"
+		end
+		local i, j = string.find(orgStr, "[0-9]+%.*[0-9]*")
+      	orgStr=string.sub(orgStr, i, j)
+		num = tonumber(orgStr)
+		num = num*10000
+	else
+	    if string.find(orgStr, "人民币") ~= nil then
+			orgStr = string.gsub(orgStr, "人民币", "")
+			orgStr = string.gsub(orgStr, "¥", "")
+			orgStr = string.gsub(orgStr, "¥", "")
+			moneyType = "人民币"
+		elseif string.find(orgStr, "美元") ~= nil then
+			orgStr = string.gsub(orgStr, "美元", "")
+			orgStr = string.gsub(orgStr, "$", "")
+			moneyType = "美元"
+		else
+			moneyType = "人民币"
+		end
+		local i, j = string.find(orgStr, "[0-9]+%.*[0-9]*")
+      	orgStr=string.sub(orgStr, i, j)
+		num = tonumber(orgStr)
+
+	end
+
+	local fmt = '%.' .. 2 .. 'f'
+    local resNum = tonumber(string.format(fmt, num))
+
+    return resNum, moneyType
+end
+
+
+function common.dataNil(data)
+	local nameNilArr={"jsondata","href","title","publishtime","detail","contenthtml"}
+	for _,name in pairs(nameNilArr) do
+		if data[name] == nil then
+			data[name] = ""
+			if name == "jsondata" then
+				data[name] = "{}"			
+			end
+		elseif name == "jsondata" and type(data[name]) == "table" then
+			local length = 0
+			for key, value in pairs(data[name]) do      
+			    length = length + 1 
+			end
+			if length > 0 then
+				data[name] = json.encode(data[name])
+			else
+				data[name] = "{}" 
+			end  
+		end
+	end
+	return data
+end
+
+--判断三级页是否跳到其他网站
+function common.hrefInThisWeb(href,itemHref)
+	itemHref = common.gethref(href,itemHref)--标准化href
+	if itemHref == "" or itemHref == nil then
+		return "", false
+	end
+	--https开头
+	local httpsindex  = string.find(itemHref,"https")
+	if httpsindex == 1 then
+		return itemHref, common.isThisWeb(href,itemHref,9)
+	end
+	--http开头
+	local httpindex  = string.find(itemHref,"http")
+	if httpindex == 1 then
+		return  itemHref, common.isThisWeb(href,itemHref,8)
+	end
+	return itemHref, false
+end
+
+function common.isThisWeb(href,itemHref,i)
+	itemHref = string.sub(itemHref,i,string.len(itemHref))	--取http://后边的内容
+	domainame = common.split(itemHref,"/")[1] --截取域名
+	if domainame ~= nil and domainame ~= "" then
+		index = string.find(href,domainame)
+		if index ~= nil and index >= 1 then
+			return true
+		end
+	end
+	return false
+end
+
+--通用方法结束
+return common;

+ 647 - 0
src/res/util/comm_back.lua

@@ -0,0 +1,647 @@
+--[[
+企明星爬虫系统,公共文件
+Author:a7
+Date:2016/4/7
+]]
+
+common={}
+
+--Lua的Eval函数
+function common.eval(script)
+	script=common.clearJson(script)
+	local tmp = "return "..script;
+	local s = loadstring(tmp);
+	if s==nil then
+		return nil
+	end
+	return s()
+end
+
+--输出
+function printf(obj)
+	print(dump(obj) )
+end
+
+function dump(obj)  
+    local getIndent, quoteStr, wrapKey, wrapVal, isArray, dumpObj  
+    getIndent = function(level)  
+        return string.rep("\t", level)  
+    end  
+    quoteStr = function(str)  
+        str = string.gsub(str, "[%c\\\"]", {  
+            ["\t"] = "\\t",  
+            ["\r"] = "\\r",  
+            ["\n"] = "\\n",  
+            ["\""] = "\\\"",  
+            ["\\"] = "\\\\",  
+        })  
+        return '"' .. str .. '"'  
+    end  
+    wrapKey = function(val)  
+        if type(val) == "number" then  
+            return "[" .. val .. "]"  
+        elseif type(val) == "string" then  
+            return "[" .. quoteStr(val) .. "]"  
+        else  
+            return "[" .. tostring(val) .. "]"  
+        end  
+    end  
+    wrapVal = function(val, level)  
+        if type(val) == "table" then  
+            return dumpObj(val, level)  
+        elseif type(val) == "number" then  
+            return val  
+        elseif type(val) == "string" then  
+            return quoteStr(val)  
+        else  
+            return tostring(val)  
+        end  
+    end  
+    local isArray = function(arr)  
+        local count = 0   
+        for k, v in pairs(arr) do  
+            count = count + 1   
+        end   
+        for i = 1, count do  
+            if arr[i] == nil then  
+                return false  
+            end   
+        end   
+        return true, count  
+    end  
+    dumpObj = function(obj, level)  
+        if type(obj) ~= "table" then  
+            return wrapVal(obj)  
+        end  
+        level = level + 1  
+        local tokens = {}  
+        tokens[#tokens + 1] = "{"  
+        local ret, count = isArray(obj)  
+        if ret then  
+            for i = 1, count do  
+                tokens[#tokens + 1] = getIndent(level) .. wrapVal(obj[i], level) .. ","  
+            end  
+        else  
+            for k, v in pairs(obj) do  
+                tokens[#tokens + 1] = getIndent(level) .. wrapKey(k) .. " = " .. wrapVal(v, level) .. ","  
+            end  
+        end  
+        tokens[#tokens + 1] = getIndent(level - 1) .. "}"  
+        return table.concat(tokens, "\n")  
+    end  
+    return dumpObj(obj, 0)  
+end  
+
+--JSON数据清理
+function common.clearJson(json)
+	--中括号替换
+	json=string.gsub(json,"%[","{")
+	json=string.gsub(json,"%]","}")
+	--键的引号及冒号替换
+	json=string.gsub(json,"\"([^\"]*)\":","%1=")
+	return json
+end
+-- 替换转义字符
+function common.replaceEscString(c)
+      c=string.gsub(c,"&lt;","<")
+      c=string.gsub(c,"&gt;",">")
+      c=string.gsub(c,"&quot;","'")
+      c=string.gsub(c,"&amp;","&")
+      c=string.gsub(c,"&#34;","\"")
+      return c
+end
+
+--返回通用当前日期时间
+function common.nowDate()
+	return os.date("%Y-%m-%d %H:%M:%S", os.time())
+end
+--返回通用日期格式
+
+monthmap={["Jan"]="01",["Feb"]="02",["Mar"]="03",["Apr"]="04",["May"]="05",["June"]="06",["Jun"]="06",["July"]="07",["Jul"]="07",["Aug"]="08",["Sept"]="09",["Sep"]="09",["Oct"]="10",["Nov"]="11",["Dec"]="12"}
+-- 处理格林威治时间
+function common.timeStrByCST(strtime)
+	local st=common.split(strtime," ")
+	return st[6].."-"..monthmap[st[2]].."-"..st[3].." "..st[4]
+end
+
+
+--日期解析
+function common.parseDate(datestr,datetype)
+	local tmp = {}
+	local pos=0
+	for i in string.gmatch(datestr,"(%d+)")  do 
+		tmp[pos]=i
+		pos=pos+1
+	end
+	if table.getn(tmp) == 0 then
+		return "0"
+		--return os.date("%Y-%m-%d %H:%M:%S", os.time())
+	end
+	--传入的格式是:年月日(中间可以有任意分隔符)
+	if datetype=="yyyyMMdd" then
+		return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).. os.date(" %H:%M:%S", os.time())
+	--年月日时分秒
+	elseif datetype=="yyyyMMddHHmmss" then 
+		return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":"..tmp[5]
+	--年月日时分
+	elseif datetype=="yyyyMMddHHmm" then 
+		return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":00"
+	--月日	
+	elseif datetype=="MMdd" then 
+		return tostring(os.date("%Y",os.time())).."-"..common.padDigital(tmp[0]).."-"..common.padDigital(tmp[1]).. os.date(" %H:%M:%S", os.time())
+	else 
+	        return "0"
+	end
+end
+
+--日期补全
+function common.padDigital(src)
+	if string.len(src)<2 then
+		return "0"..src
+	else
+		return src
+	end
+end
+--local datestr="2016年05月12日22:05:04"
+--print(parseDate(datestr,"yyyyMMddHHmm"))
+--print(parseDate("4月5日","MMdd"))
+
+--字符日期转时间戳  原始时间字符串,要求格式yyyy-MM-dd HH:mm:ss,
+function common.strToTimestamp(str)  
+    --从日期字符串中截取出年月日时分秒  
+	if string.len(str)<19 then
+	      return 0
+	  --    	return os.time()
+	end
+    local Y = tonumber(string.sub(str,1,4))
+    local M = tonumber(string.sub(str,6,7)) 
+    local D = tonumber(string.sub(str,9,10))  
+    local H = tonumber(string.sub(str,12,13))  
+    local MM = tonumber(string.sub(str,15,16))  
+    local SS = tonumber(string.sub(str,18,19))  
+ 	return os.time{year=Y, month=M, day=D, hour=H,min=MM,sec=SS} 
+end  
+
+function common.trim(s) 
+	return string.gsub(s, "[\r|\n| |\t]+", "")
+end   
+
+--分割字符串
+function common.split(str, delimiter)
+	if str==nil or str=='' or delimiter==nil then
+		return nil
+	end
+	
+    local result = {}
+    for match in (str..delimiter):gmatch("(.-)"..delimiter) do
+        table.insert(result, match)
+    end
+    return result
+end
+
+--正则匹配返回值修正
+function common.regTab(con,reg)
+	local tab=string.match(con,reg)
+	if tab==nil then
+		return ""
+	else
+		return tab
+	end
+end
+
+--只验证属性字段不为空 tab1属性字段,tab2待验证对象
+function common.checkData(tab1,tab2)
+	local b=true
+	local str=""
+	for _,v in pairs(tab1) do
+		if tab2[v]==nil or tab2[v]=="" then
+			str=str..v..":值空"..","
+			b=false
+		end
+	end
+	return  b,str
+end
+
+--URL编码
+function common.decodeURI(s)
+    s = string.gsub(s, '%%(%x%x)', function(h) return string.char(tonumber(h, 16)) end)
+    return s
+end
+
+function common.encodeURI(s)
+    s = string.gsub(s, "([^%w%.%- ])", function(c) return string.format("%%%02X", string.byte(c)) end)
+    return string.gsub(s, " ", "+")
+end
+
+
+function common.gethref(channel,href)
+	local prehttp=string.sub(channel,1,5)
+	if string.lower(prehttp)=="https" then
+		prehttp="https://"
+	else
+		prehttp="http://"
+	end
+	local pre=string.sub(href,1,4)
+	if string.lower(pre)=="http" then
+		return href
+	else 
+		-- channel=string.sub(channel,8)
+		channel=channel:match("https?://(.*)$")
+		local channelpath=common.split(channel,"/")
+
+		pre=string.sub(href,1,1)
+		if pre~="." and  pre~="/" then
+			href = "./"..href
+		end
+		pre=string.sub(href,1,2)
+		if pre==".." then
+			local infopath=common.split(href,"%./")
+			for i=1,table.getn(infopath) do
+		 		table.remove(channelpath,-1) 
+		 	end
+			tmp=""
+		 	for i=1,table.getn(channelpath) do
+		 		tmp=tmp..channelpath[i].."/"
+		 	end
+			local infourl = infopath[table.getn(infopath)]
+		 	href=prehttp..tmp..string.sub(infourl,0,string.len(infourl)-1)
+		else
+			if pre=="./" then
+			 	table.remove(channelpath,-1) 
+				tmp=prehttp
+			 	for i=1,table.getn(channelpath) do
+			 		tmp=tmp..channelpath[i].."/"
+			 	end
+				href=tmp..string.sub(href,3)
+			else
+				if string.sub(href,0,1)=="/" then
+					href=prehttp..channelpath[1]..href
+				else
+					href=prehttp..channelpath[1].."/"..href
+				end
+			end
+		end
+		return href
+	end
+end
+
+function common.splitf(str, delimiter)
+	if str==nil or str=='' or delimiter==nil then
+		return nil
+	end
+	
+    local result = {}
+    for match in (str..delimiter):gmatch("(.-)"..delimiter) do
+        table.insert(result, match)
+    end
+    return result
+end
+
+
+function common.checkUpdate(content,update)
+	if update == "" or update == nil then
+		return 0
+	end
+	local updates=common.splitf(update,"\n")
+  	local out=1
+    for _,v in pairs(updates) do
+   		local vs=common.splitf(v,"==")
+   		if table.getn(vs)>1 then
+   			local item={}
+   			item["tmp"]=vs[1];
+   			local tmp=findMap(item,content)["tmp"]
+   			if tmp~=vs[2] then
+     			out=-1
+   			end
+   		end
+	end
+	
+	if out==-1 then
+		return -1
+	else
+		return 0
+	end
+end
+
+--获取附件标题
+function common.getEnclosureTitle(href,content)
+	local fileTitles = {}
+	
+	local linkList = findListHtml("a", content)
+
+	for k,v in pairs(linkList) do 
+		local tempJpg1 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.jpg$")
+		local tempJpg2 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.JPG$")
+		local tempBid = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.bid$")
+		local tempPdf = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.pdf$")
+		local tempDoc = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.doc$")
+		local tempDocx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.docx$")
+		local tempXls = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xls$")
+		local tempXlsx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xlsx$")
+		local tempZip = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.zip$")
+		local tempRar = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.rar$")
+
+		if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
+			local tempTitle = findOneText("a:eq("..tostring(k-1)..")", content)
+			fileTitles[k] = tempTitle
+			--table.insert(fileTitles, tempTitle)
+		end
+	end
+
+	return fileTitles
+end
+
+--获取附件链接
+function common.getEnclosureHref(href,content)
+	local hrefs = {}
+	
+	--href = common.gethref(href, "")
+	local linkList = findListHtml("a", content)
+
+	for k,v in pairs(linkList) do 
+		local tempJpg1 = string.find(v, "%.jpg$")
+		local tempJpg2 = string.find(v, "%.JPG$")
+		local tempBid = string.find(v, "%.bid$")
+		local tempPdf = string.find(v, "%.pdf$")
+		local tempDoc = string.find(v, "%.doc$")
+		local tempDocx = string.find(v, "%.docx$")
+		local tempXls = string.find(v, "%.xls$")
+		local tempXlsx = string.find(v, "%.xlsx$")
+		local tempZip = string.find(v, "%.zip$")
+		local tempRar = string.find(v, "%.rar$")
+
+		if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
+			local tempHref = findOneText("a:eq("..tostring(k-1).."):attr(href)", content)
+			local isWholeHref = string.find(tempHref, "http")
+			if isWholeHref == nil then
+				tempHref = common.gethref(href, tempHref)
+				--tempHref = href..tempHref
+			end
+			tempHref = string.gsub(tempHref, "\\", "/")
+			hrefs[k] = tempHref
+			--table.insert(hrefs, tempHref)
+		end
+	end
+	return hrefs
+end
+
+--获取附件链接2
+function common.getEnclosureHrefByList(href,content)
+	local hrefs = {}
+	
+	--href = common.gethref(href, "")
+	local linkList = findListHtml("a", content)
+
+	for k,v in pairs(linkList) do 
+		local tempJpg1 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.jpg$")
+		local tempJpg2 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.JPG$")
+		local tempBid = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.bid$")
+		local tempPdf = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.pdf$")
+		local tempDoc = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.doc$")
+		local tempDocx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.docx$")
+		local tempXls = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xls$")
+		local tempXlsx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xlsx$")
+		local tempZip = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.zip$")
+		local tempRar = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.rar$")
+
+		if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
+			local tempHref = findOneText("a:eq("..tostring(k-1).."):attr(href)", content)
+			local isWholeHref = string.find(tempHref, "http")
+			if isWholeHref == nil then
+				tempHref = common.gethref(href, tempHref)
+				--tempHref = href..tempHref
+			end
+			tempHref = string.gsub(tempHref, "\\", "/")
+			hrefs[k] = tempHref
+			--table.insert(hrefs, tempHref)
+		end
+	end
+	return hrefs
+end
+
+
+--下载多个附件
+function common.getFileAttachmentsArray(fileNameArray,fileLinkArray)
+	local attachments = {}
+	for i,fileLink in pairs(fileLinkArray) do
+		local url,name,size,ftype,fid=downloadFile(fileNameArray[i], fileLink, "get",{},{},"")
+		local u=1
+		while url=="" and u<6 do
+			url,name,size,ftype,fid=downloadFile(fileNameArray[i],fileLink,"get",{},{},"")
+			u=u+1
+			if u==6 and url=="" then
+				saveErrLog(fileLink,"comm附件下载失败")
+			end
+		end
+		if url~=nil and url~="" and name~=nil and name~="" then
+			local file = {}
+			file = {
+				["url"]=url,
+				["filename"]=name,
+				["size"]=size,
+				["ftype"]=ftype,
+				["fid"]=fid
+			}
+			table.insert(attachments, file)
+		end
+	end
+	return attachments
+end
+
+--多附件下载,跳过获取href和title集合阶段
+function common.getFileAttachmentsArrayByHrefAndContent(href,content)
+	local fileTitles = common.getEnclosureTitle(href, content)
+	local fileLinks = common.getEnclosureHrefByList(href, content)
+	if table.getn(fileLinks) == 0 then
+		fileLinks = common.getEnclosureHref(href, content)
+	end
+	for i,v in ipairs(fileTitles) do
+		if v == "" then
+			table.remove(fileTitles, i)
+			table.remove(fileLinks, i)
+		end
+	end
+	local attachments = common.getFileAttachmentsArray(fileTitles, fileLinks)
+
+	return attachments
+end
+
+
+--确定模块的附件下载方法(获取title与href)
+--tags:模块选择器
+--withend:是否以文件类型为后缀,比如 .doc,true为后缀,false不为后缀
+filetype={"jpg","JPG","bid","pdf","PDF","doc","docx","xls","xlsx","zip","rar","swf","DOCX","DOC","PDF","XLS","XLSX","ZIP","RAR","SWF"}	 
+function common.getFilesLinkByTag(href,tags,content,withend)
+	local dhtml = findOneHtml(tags, content)
+	--dhtml=dhtml.."<a href='/123.doc'>123.doc</a>"
+	local alist = findListHtml(tags.." a", content)
+	local flist={}
+	for k,v in pairs(alist) do
+		local item={}
+		item["href"]="a:eq("..tostring(k-1).."):attr(href)"
+		item["title"]="a:eq("..tostring(k-1)..")"
+		item=findMap(item,dhtml)
+		item["title"]=common.trim(tostring(item["title"]))
+		item["href"]=common.gethref(href,tostring(item["href"]))
+		item["href"] = string.gsub(item["href"], "\\", "/")
+		local isWholeHref = string.find(item["href"], "http")
+		if isWholeHref == nil then
+			item["href"] = transCode("utf8",item["href"])
+		end
+		local statehref;
+		for _,ftype in pairs(filetype) do
+			if withend then
+				statehref=string.find(item["href"], "%."..ftype.."$")
+				if statehref==nil or statehref=="" then
+					statehref=string.find(item["title"], "%."..ftype.."$")
+				end
+			else
+				statehref=string.find(item["href"], "%."..ftype)
+				if statehref==nil or statehref=="" then
+					statehref=string.find(item["title"], "%."..ftype)
+				end
+			end
+			if statehref then
+				break
+			end
+		end
+	
+		if statehref~=nil and item["title"]~="" then
+			table.insert(flist,item)
+		end
+	end
+	return flist
+end
+
+--确定模块的附件下载方法,封装
+function common.getFileAttachmentsArrayWithTag(href,tags,content,withend,param,head,ck)
+	if param == nil or head == nil  then
+		param={}
+		head={}
+		ck=""
+	end
+	local attachments = {}
+	--local nameTypeArr={"jpg","JPG","bid","pdf","PDF","doc","docx","xls","xlsx","zip","rar","swf","DOCX","DOC","PDF","XLS","XLSX","ZIP","RAR","SWF"}
+	local titleAndHrefList = common.getFilesLinkByTag(href,tags,content,withend)
+	for i,v in ipairs(titleAndHrefList) do
+		local url,name,size,ftype,fid=downloadFile(v["title"], v["href"], "get",param,head,ck)
+		-- 附件原地址(默认为空)
+		local init_url = v["href"]
+		if url == "" then
+			local u = 0
+			while u < 6 do
+				u = u + 1
+				url,name,size,ftype,fid=downloadFile(v["title"],v["href"],"get",param,head,ck)
+				if url ~= "" and size ~= "" then
+					u = 7   -- 下载无误 跳出循环
+				end
+				if u==6 and (url == "" or size == "") then
+					saveErrLog(v["href"],"comm附件下载失败")
+				end
+			end
+		end
+		
+		if url == "" and size == "" then
+			name = v["title"]
+		end
+
+		if type(url) ~= "string" then
+			url = ""
+		end
+		
+		-- 下载成功, 正常返回
+		if url~=nil and url~="" and name~=nil and name~="" and size ~= "" then
+			local file = {}
+			file = {
+				["url"]=url,
+				["filename"]=name,
+				["size"]=size,
+				["ftype"]=ftype,
+				["fid"]=fid,
+				["org_url"] = init_url
+			}
+			table.insert(attachments, file)
+			-- 下载失败
+		else 
+			local file = {}
+			file = {
+			    ["filename"]=name,
+				["org_url"] = init_url
+			}
+			table.insert(attachments, file)
+		end
+	end
+	return attachments
+end
+
+
+function common.getPureContent(content)
+	local startChar
+	local _,endChar
+	local resContent = content
+	while string.find(resContent, "<!%-%-")~=nil do
+		startChar,_ = string.find(resContent, "<!%-%-")
+		_,endChar = string.find(resContent, "%-%->")
+		resContent = string.sub(resContent, 1, startChar-1)..string.sub(resContent, endChar+1, string.len(resContent))
+	end
+	return resContent
+end
+
+
+function common.getMoneyAndType(orgStr)
+	orgStr = common.trim(orgStr)
+	orgStr = string.gsub(orgStr, "(", "")
+	orgStr = string.gsub(orgStr, ")", "")
+	orgStr = string.gsub(orgStr, ",", "")
+	local moneyType = ""
+	local num =0
+	local resNum =0
+	if string.find(orgStr, "万") ~= nil then
+		orgStr = string.gsub(orgStr, "万元", "")
+		orgStr = string.gsub(orgStr, "万", "")
+		if string.find(orgStr, "人民币") ~= nil then
+			orgStr = string.gsub(orgStr, "人民币", "")
+			orgStr = string.gsub(orgStr, "¥", "")
+			orgStr = string.gsub(orgStr, "¥", "")
+			moneyType = "人民币"
+		elseif string.find(orgStr, "美元") ~= nil then
+			orgStr = string.gsub(orgStr, "美元", "")
+			orgStr = string.gsub(orgStr, "$", "")
+			moneyType = "美元"
+		else
+			moneyType = "人民币"
+		end
+		local i, j = string.find(orgStr, "[0-9]+%.*[0-9]*")
+      	orgStr=string.sub(orgStr, i, j)
+		num = tonumber(orgStr)
+		num = num*10000
+	else
+	    if string.find(orgStr, "人民币") ~= nil then
+			orgStr = string.gsub(orgStr, "人民币", "")
+			orgStr = string.gsub(orgStr, "¥", "")
+			orgStr = string.gsub(orgStr, "¥", "")
+			moneyType = "人民币"
+		elseif string.find(orgStr, "美元") ~= nil then
+			orgStr = string.gsub(orgStr, "美元", "")
+			orgStr = string.gsub(orgStr, "$", "")
+			moneyType = "美元"
+		else
+			moneyType = "人民币"
+		end
+		local i, j = string.find(orgStr, "[0-9]+%.*[0-9]*")
+      	orgStr=string.sub(orgStr, i, j)
+		num = tonumber(orgStr)
+
+	end
+
+	local fmt = '%.' .. 2 .. 'f'
+    local resNum = tonumber(string.format(fmt, num))
+
+    return resNum, moneyType
+end
+
+
+
+
+--通用方法结束
+return common;

+ 79 - 0
src/res/util/ecps.lua

@@ -0,0 +1,79 @@
+--[[
+企明星爬虫系统,公共文件
+Author:zjk
+Date:2016/4/19
+]]
+
+ecps={}
+--键值反转table
+function ecps.reversalFormat(tab,frtab,totab)
+	local tmpfrtab={}
+	for k,v in pairs(frtab) do
+		for k2,v2 in pairs(tab) do
+			if string.match(k,k2)~=nil and string.match(k,k2)~="" then
+				tmpfrtab[k]=tab[k2]
+				break
+			end	
+		end
+	end
+	local tmptotab={}
+	for k,v in pairs(totab) do
+		tmptotab[k]=tmpfrtab[v]
+		if k==tmpfrtab[v] then
+			tmptotab[k]=""
+		end
+	end
+	return tmptotab
+end
+
+--企业基本信息表单 
+ecps.baseFm={
+	["统一社会信用代码/注册号/统一社会信用代码"]="RegNo",["名称"]="EntName",["类型"]="EntTypeName",
+	["注册资本/成员出资总额"]="RegCap",
+	["法定代表人/负责人/经营者/投资人/执行事务合伙人/执行事务合伙人(委派代表)/股东"]="LeRep", 
+	["成立日期/注册日期"]="EstDate",
+	["核准日期/发照日期/吊销日期"]="IssBLicDate",
+	["营业期限自/经营期限自/合伙期限自"]="OpFrom",
+	["营业期限至/经营期限至/合伙期限至"]="OpTo",
+	["住所"]="Dom",["经营场所/主要经营场所/营业场所"]="OpLoc",
+	["经营范围/业务范围"]="OpScope",
+	["登记机关"]="RegOrg",["登记状态/经营状态"]="OpState",
+} 
+ecps.baseMap={
+	["RegNo"]="统一社会信用代码/注册号/统一社会信用代码",["EntName"]="名称",["EntTypeName"]="类型",
+	["RegCap"]="注册资本/成员出资总额",
+	["LeRep"]="法定代表人/负责人/经营者/投资人/执行事务合伙人/执行事务合伙人(委派代表)/股东",
+	["EstDate"]="成立日期/注册日期",
+	["IssBLicDate"]="核准日期/发照日期/吊销日期",
+	["OpFrom"]="营业期限自/经营期限自/合伙期限自",
+	["OpTo"]="营业期限至/经营期限至/合伙期限至",
+	["Dom"]="住所",["OpLoc"]="经营场所/主要经营场所/营业场所",
+	["OpScope"]="经营范围/业务范围",
+	["RegOrg"]="登记机关",["OpState"]="登记状态/经营状态",
+}
+ecps.baseNbFm={
+	["统一社会信用代码/注册号/统一社会信用代码"]="RegNo",
+	["企业名称"]="EntName",["企业联系电话"]="Tel",["邮政编码"]="postcode",
+	["企业通信地址"]="address",["电子邮箱/企业电子邮箱"]="email",
+	["有限责任公司本年度是否发生股东股权转让"]="equityTransfer",
+	["企业经营状态/企业登记状态"]="state", 
+	["是否有网站或网店"]="hasWebsite",
+	["企业是否有投资信息或购买其他公司股权/是否有投资信息或购买其他公司股权"]="hasInv",
+	["是否有对外担保信息"]="hasGuarantee",
+	["从业人数"]="numPeople",
+} 
+ecps.baseNbMap={
+	["RegNo"]="统一社会信用代码/注册号/统一社会信用代码",
+	["EntName"]="企业名称",["Tel"]="企业联系电话",["postcode"]="邮政编码",
+	["address"]="企业通信地址",["email"]="电子邮箱/企业电子邮箱",
+	["equityTransfer"]="有限责任公司本年度是否发生股东股权转让",
+	["state"]="企业经营状态/企业登记状态",
+	["hasWebsite"]="是否有网站或网店",
+	["hasInv"]="企业是否有投资信息或购买其他公司股权/是否有投资信息或购买其他公司股权",
+	["hasGuarantee"]="是否有对外担保信息",
+	["numPeople"]="从业人数",
+} 
+--通用方法结束
+return ecps;
+
+

+ 417 - 0
src/res/util/json.lua

@@ -0,0 +1,417 @@
+-----------------------------------------------------------------------------
+-- JSON4Lua: JSON encoding / decoding support for the Lua language.
+-- json Module.
+-- Author: Craig Mason-Jones
+-- Homepage: http://github.com/craigmj/json4lua/
+-- Version: 1.0.0
+-- This module is released under the MIT License (MIT).
+-- Please see LICENCE.txt for details.
+--
+-- USAGE:
+-- This module exposes two functions:
+--   json.encode(o)
+--     Returns the table / string / boolean / number / nil / json.null value as a JSON-encoded string.
+--   json.decode(json_string)
+--     Returns a Lua object populated with the data encoded in the JSON string json_string.
+--
+-- REQUIREMENTS:
+--   compat-5.1 if using Lua 5.0
+--
+-- CHANGELOG
+--   0.9.20 Introduction of local Lua functions for private functions (removed _ function prefix). 
+--          Fixed Lua 5.1 compatibility issues.
+--      Introduced json.null to have null values in associative arrays.
+--          json.encode() performance improvement (more than 50%) through table.concat rather than ..
+--          Introduced decode ability to ignore /**/ comments in the JSON string.
+--   0.9.10 Fix to array encoding / decoding to correctly manage nil/null values in arrays.
+-----------------------------------------------------------------------------
+
+-----------------------------------------------------------------------------
+-- Imports and dependencies
+-----------------------------------------------------------------------------
+local math = require('math')
+local string = require("string")
+local table = require("table")
+
+-----------------------------------------------------------------------------
+-- Module declaration
+-----------------------------------------------------------------------------
+local json = {}             -- Public namespace
+local json_private = {}     -- Private namespace
+
+-- Public functions
+
+-- Private functions
+local decode_scanArray
+local decode_scanComment
+local decode_scanConstant
+local decode_scanNumber
+local decode_scanObject
+local decode_scanString
+local decode_scanWhitespace
+local encodeString
+local isArray
+local isEncodable
+
+-----------------------------------------------------------------------------
+-- PUBLIC FUNCTIONS
+-----------------------------------------------------------------------------
+--- Encodes an arbitrary Lua object / variable.
+-- @param v The Lua object / variable to be JSON encoded.
+-- @return String containing the JSON encoding in internal Lua string format (i.e. not unicode)
+function json.encode (v)
+  -- Handle nil values
+  if v==nil then
+    return "null"
+  end
+  
+  local vtype = type(v)
+
+  -- Handle strings
+  if vtype=='string' then    
+    return '"' .. json_private.encodeString(v) .. '"'     -- Need to handle encoding in string
+  end
+  
+  -- Handle booleans
+  if vtype=='number' or vtype=='boolean' then
+    return tostring(v)
+  end
+  
+  -- Handle tables
+  if vtype=='table' then
+    local rval = {}
+    -- Consider arrays separately
+    local bArray, maxCount = isArray(v)
+    if bArray then
+      for i = 1,maxCount do
+        table.insert(rval, json.encode(v[i]))
+      end
+    else  -- An object, not an array
+      for i,j in pairs(v) do
+        if isEncodable(i) and isEncodable(j) then
+          table.insert(rval, '"' .. json_private.encodeString(i) .. '":' .. json.encode(j))
+        end
+      end
+    end
+    if bArray then
+      return '[' .. table.concat(rval,',') ..']'
+    else
+      return '{' .. table.concat(rval,',') .. '}'
+    end
+  end
+  
+  -- Handle null values
+  if vtype=='function' and v==null then
+    return 'null'
+  end
+  
+  assert(false,'encode attempt to encode unsupported type ' .. vtype .. ':' .. tostring(v))
+end
+
+
+--- Decodes a JSON string and returns the decoded value as a Lua data structure / value.
+-- @param s The string to scan.
+-- @param [startPos] Optional starting position where the JSON string is located. Defaults to 1.
+-- @param Lua object, number The object that was scanned, as a Lua table / string / number / boolean or nil,
+-- and the position of the first character after
+-- the scanned JSON object.
+function json.decode(s, startPos)
+  startPos = startPos and startPos or 1
+  startPos = decode_scanWhitespace(s,startPos)
+  assert(startPos<=string.len(s), 'Unterminated JSON encoded object found at position in [' .. s .. ']')
+  local curChar = string.sub(s,startPos,startPos)
+  -- Object
+  if curChar=='{' then
+    return decode_scanObject(s,startPos)
+  end
+  -- Array
+  if curChar=='[' then
+    return decode_scanArray(s,startPos)
+  end
+  -- Number
+  if string.find("+-0123456789.e", curChar, 1, true) then
+    return decode_scanNumber(s,startPos)
+  end
+  -- String
+  if curChar==[["]] or curChar==[[']] then
+    return decode_scanString(s,startPos)
+  end
+  if string.sub(s,startPos,startPos+1)=='/*' then
+    return decode(s, decode_scanComment(s,startPos))
+  end
+  -- Otherwise, it must be a constant
+  return decode_scanConstant(s,startPos)
+end
+
+--- The null function allows one to specify a null value in an associative array (which is otherwise
+-- discarded if you set the value with 'nil' in Lua. Simply set t = { first=json.null }
+function null()
+  return null -- so json.null() will also return null ;-)
+end
+-----------------------------------------------------------------------------
+-- Internal, PRIVATE functions.
+-- Following a Python-like convention, I have prefixed all these 'PRIVATE'
+-- functions with an underscore.
+-----------------------------------------------------------------------------
+
+--- Scans an array from JSON into a Lua object
+-- startPos begins at the start of the array.
+-- Returns the array and the next starting position
+-- @param s The string being scanned.
+-- @param startPos The starting position for the scan.
+-- @return table, int The scanned array as a table, and the position of the next character to scan.
+function decode_scanArray(s,startPos)
+  local array = {}  -- The return value
+  local stringLen = string.len(s)
+  assert(string.sub(s,startPos,startPos)=='[','decode_scanArray called but array does not start at position ' .. startPos .. ' in string:\n'..s )
+  startPos = startPos + 1
+  -- Infinite loop for array elements
+  repeat
+    startPos = decode_scanWhitespace(s,startPos)
+    assert(startPos<=stringLen,'JSON String ended unexpectedly scanning array.')
+    local curChar = string.sub(s,startPos,startPos)
+    if (curChar==']') then
+      return array, startPos+1
+    end
+    if (curChar==',') then
+      startPos = decode_scanWhitespace(s,startPos+1)
+    end
+    assert(startPos<=stringLen, 'JSON String ended unexpectedly scanning array.')
+    object, startPos = json.decode(s,startPos)
+    table.insert(array,object)
+  until false
+end
+
+--- Scans a comment and discards the comment.
+-- Returns the position of the next character following the comment.
+-- @param string s The JSON string to scan.
+-- @param int startPos The starting position of the comment
+function decode_scanComment(s, startPos)
+  assert( string.sub(s,startPos,startPos+1)=='/*', "decode_scanComment called but comment does not start at position " .. startPos)
+  local endPos = string.find(s,'*/',startPos+2)
+  assert(endPos~=nil, "Unterminated comment in string at " .. startPos)
+  return endPos+2  
+end
+
+--- Scans for given constants: true, false or null
+-- Returns the appropriate Lua type, and the position of the next character to read.
+-- @param s The string being scanned.
+-- @param startPos The position in the string at which to start scanning.
+-- @return object, int The object (true, false or nil) and the position at which the next character should be 
+-- scanned.
+function decode_scanConstant(s, startPos)
+  local consts = { ["true"] = true, ["false"] = false, ["null"] = nil }
+  local constNames = {"true","false","null"}
+
+  for i,k in pairs(constNames) do
+    if string.sub(s,startPos, startPos + string.len(k) -1 )==k then
+      return consts[k], startPos + string.len(k)
+    end
+  end
+  assert(nil, 'Failed to scan constant from string ' .. s .. ' at starting position ' .. startPos)
+end
+
+--- Scans a number from the JSON encoded string.
+-- (in fact, also is able to scan numeric +- eqns, which is not
+-- in the JSON spec.)
+-- Returns the number, and the position of the next character
+-- after the number.
+-- @param s The string being scanned.
+-- @param startPos The position at which to start scanning.
+-- @return number, int The extracted number and the position of the next character to scan.
+function decode_scanNumber(s,startPos)
+  local endPos = startPos+1
+  local stringLen = string.len(s)
+  local acceptableChars = "+-0123456789.e"
+  while (string.find(acceptableChars, string.sub(s,endPos,endPos), 1, true)
+  and endPos<=stringLen
+  ) do
+    endPos = endPos + 1
+  end
+  local stringValue = 'return ' .. string.sub(s,startPos, endPos-1)
+  local stringEval = loadstring(stringValue)
+  assert(stringEval, 'Failed to scan number [ ' .. stringValue .. '] in JSON string at position ' .. startPos .. ' : ' .. endPos)
+  return stringEval(), endPos
+end
+
+--- Scans a JSON object into a Lua object.
+-- startPos begins at the start of the object.
+-- Returns the object and the next starting position.
+-- @param s The string being scanned.
+-- @param startPos The starting position of the scan.
+-- @return table, int The scanned object as a table and the position of the next character to scan.
+function decode_scanObject(s,startPos)
+  local object = {}
+  local stringLen = string.len(s)
+  local key, value
+  assert(string.sub(s,startPos,startPos)=='{','decode_scanObject called but object does not start at position ' .. startPos .. ' in string:\n' .. s)
+  startPos = startPos + 1
+  repeat
+    startPos = decode_scanWhitespace(s,startPos)
+    assert(startPos<=stringLen, 'JSON string ended unexpectedly while scanning object.')
+    local curChar = string.sub(s,startPos,startPos)
+    if (curChar=='}') then
+      return object,startPos+1
+    end
+    if (curChar==',') then
+      startPos = decode_scanWhitespace(s,startPos+1)
+    end
+    assert(startPos<=stringLen, 'JSON string ended unexpectedly scanning object.')
+    -- Scan the key
+    key, startPos = json.decode(s,startPos)
+    assert(startPos<=stringLen, 'JSON string ended unexpectedly searching for value of key ' .. key)
+    startPos = decode_scanWhitespace(s,startPos)
+    assert(startPos<=stringLen, 'JSON string ended unexpectedly searching for value of key ' .. key)
+    assert(string.sub(s,startPos,startPos)==':','JSON object key-value assignment mal-formed at ' .. startPos)
+    startPos = decode_scanWhitespace(s,startPos+1)
+    assert(startPos<=stringLen, 'JSON string ended unexpectedly searching for value of key ' .. key)
+    value, startPos = json.decode(s,startPos)
+    object[key]=value
+  until false -- infinite loop while key-value pairs are found
+end
+
+-- START SoniEx2
+-- Initialize some things used by decode_scanString
+-- You know, for efficiency
+local escapeSequences = {
+  ["\\t"] = "\t",
+  ["\\f"] = "\f",
+  ["\\r"] = "\r",
+  ["\\n"] = "\n",
+  ["\\b"] = "\b"
+}
+setmetatable(escapeSequences, {__index = function(t,k)
+  -- skip "\" aka strip escape
+  return string.sub(k,2)
+end})
+-- END SoniEx2
+
+--- Scans a JSON string from the opening inverted comma or single quote to the
+-- end of the string.
+-- Returns the string extracted as a Lua string,
+-- and the position of the next non-string character
+-- (after the closing inverted comma or single quote).
+-- @param s The string being scanned.
+-- @param startPos The starting position of the scan.
+-- @return string, int The extracted string as a Lua string, and the next character to parse.
+function decode_scanString(s,startPos)
+  assert(startPos, 'decode_scanString(..) called without start position')
+  local startChar = string.sub(s,startPos,startPos)
+  -- START SoniEx2
+  -- PS: I don't think single quotes are valid JSON
+  assert(startChar == [["]] or startChar == [[']],'decode_scanString called for a non-string')
+  --assert(startPos, "String decoding failed: missing closing " .. startChar .. " for string at position " .. oldStart)
+  local t = {}
+  local i,j = startPos,startPos
+  while string.find(s, startChar, j+1) ~= j+1 do
+    local oldj = j
+    i,j = string.find(s, "\\.", j+1)
+    local x,y = string.find(s, startChar, oldj+1)
+    if not i or x < i then
+      i,j = x,y-1
+    end
+    table.insert(t, string.sub(s, oldj+1, i-1))
+    if string.sub(s, i, j) == "\\u" then
+      local a = string.sub(s,j+1,j+4)
+      j = j + 4
+      local n = tonumber(a, 16)
+      assert(n, "String decoding failed: bad Unicode escape " .. a .. " at position " .. i .. " : " .. j)
+      -- math.floor(x/2^y) == lazy right shift
+      -- a % 2^b == bitwise_and(a, (2^b)-1)
+      -- 64 = 2^6
+      -- 4096 = 2^12 (or 2^6 * 2^6)
+      local x
+      if n < 0x80 then
+        x = string.char(n % 0x80)
+      elseif n < 0x800 then
+        -- [110x xxxx] [10xx xxxx]
+        x = string.char(0xC0 + (math.floor(n/64) % 0x20), 0x80 + (n % 0x40))
+      else
+        -- [1110 xxxx] [10xx xxxx] [10xx xxxx]
+        x = string.char(0xE0 + (math.floor(n/4096) % 0x10), 0x80 + (math.floor(n/64) % 0x40), 0x80 + (n % 0x40))
+      end
+      table.insert(t, x)
+    else
+      table.insert(t, escapeSequences[string.sub(s, i, j)])
+    end
+  end
+  --table.insert(t,string.sub(s, startChar, j+1))
+  assert(string.find(s, startChar, j+1), "String decoding failed: missing closing " .. startChar .. " at position " .. j .. "(for string at position " .. startPos .. ")")
+  return table.concat(t,""), j+2
+  -- END SoniEx2
+end
+
+--- Scans a JSON string skipping all whitespace from the current start position.
+-- Returns the position of the first non-whitespace character, or nil if the whole end of string is reached.
+-- @param s The string being scanned
+-- @param startPos The starting position where we should begin removing whitespace.
+-- @return int The first position where non-whitespace was encountered, or string.len(s)+1 if the end of string
+-- was reached.
+function decode_scanWhitespace(s,startPos)
+  local whitespace=" \n\r\t"
+  local stringLen = string.len(s)
+  while ( string.find(whitespace, string.sub(s,startPos,startPos), 1, true)  and startPos <= stringLen) do
+    startPos = startPos + 1
+  end
+  return startPos
+end
+
+--- Encodes a string to be JSON-compatible.
+-- This just involves back-quoting inverted commas, back-quotes and newlines, I think ;-)
+-- @param s The string to return as a JSON encoded (i.e. backquoted string)
+-- @return The string appropriately escaped.
+
+local escapeList = {
+    ['"']  = '\\"',
+    ['\\'] = '\\\\',
+    ['/']  = '\\/', 
+    ['\b'] = '\\b',
+    ['\f'] = '\\f',
+    ['\n'] = '\\n',
+    ['\r'] = '\\r',
+    ['\t'] = '\\t'
+}
+
+function json_private.encodeString(s)
+ local s = tostring(s)
+ return s:gsub(".", function(c) return escapeList[c] end) -- SoniEx2: 5.0 compat
+end
+
+-- Determines whether the given Lua type is an array or a table / dictionary.
+-- We consider any table an array if it has indexes 1..n for its n items, and no
+-- other data in the table.
+-- I think this method is currently a little 'flaky', but can't think of a good way around it yet...
+-- @param t The table to evaluate as an array
+-- @return boolean, number True if the table can be represented as an array, false otherwise. If true,
+-- the second returned value is the maximum
+-- number of indexed elements in the array. 
+function isArray(t)
+  -- Next we count all the elements, ensuring that any non-indexed elements are not-encodable 
+  -- (with the possible exception of 'n')
+  local maxIndex = 0
+  for k,v in pairs(t) do
+    if (type(k)=='number' and math.floor(k)==k and 1<=k) then -- k,v is an indexed pair
+      if (not isEncodable(v)) then return false end -- All array elements must be encodable
+      maxIndex = math.max(maxIndex,k)
+    else
+      if (k=='n') then
+        if v ~= table.getn(t) then return false end  -- False if n does not hold the number of elements
+      else -- Else of (k=='n')
+        if isEncodable(v) then return false end
+      end  -- End of (k~='n')
+    end -- End of k,v not an indexed pair
+  end  -- End of loop across all pairs
+  return true, maxIndex
+end
+
+--- Determines whether the given Lua object / table / variable can be JSON encoded. The only
+-- types that are JSON encodable are: string, boolean, number, nil, table and json.null.
+-- In this implementation, all other types are ignored.
+-- @param o The object to examine.
+-- @return boolean True if the object should be JSON encoded, false if it should be ignored.
+function isEncodable(o)
+  local t = type(o)
+  return (t=='string' or t=='boolean' or t=='number' or t=='nil' or t=='table') or (t=='function' and o==null) 
+end
+
+return json

+ 268 - 0
src/spider/download.go

@@ -0,0 +1,268 @@
+/**
+GO代码相对简单,
+重点处理下载工具,爬虫启动,监控等。
+逻辑处理交给LUA处理
+*/
+package spider
+
+import (
+	"encoding/base64"
+	"encoding/json"
+	"math/rand"
+	mu "mfw/util"
+	"net/http"
+	"regexp"
+	lu "spiderutil"
+	"time"
+
+	"github.com/donnie4w/go-logger/logger"
+	"github.com/surfer/agent"
+)
+
+var regImgStr = "\\.(JPG|jpg|GIF|gif|PNG|png|BMP|bmp|doc|docx|pdf|xls|xlsx)$"
+var regImg *regexp.Regexp
+
+func init() {
+	regImg, _ = regexp.Compile(regImgStr)
+}
+
+//下载页面,发送消息,等待下载
+func Download(downloaderid, url, method string, head map[string]interface{}, encoding string, useproxy, ishttps bool, code string, timeout int64) string {
+	defer mu.Catch()
+	msgid := mu.UUID(8)
+	if len(head) < 1 {
+		l := len(agent.UserAgents["common"])
+		r := rand.New(rand.NewSource(time.Now().UnixNano()))
+		head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
+	}
+	isImg := regImg.MatchString(url)
+	var ret []byte
+	var err error
+	if downloaderid == "" {
+		ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
+			"url":      url,
+			"method":   method,
+			"head":     head,
+			"encoding": encoding,
+			"useproxy": useproxy,
+			"ishttps":  ishttps,
+		}, timeout)
+	} else {
+		if isAvailable(downloaderid) {
+			ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
+				"url":      url,
+				"method":   method,
+				"head":     head,
+				"encoding": encoding,
+				"useproxy": useproxy,
+				"ishttps":  ishttps,
+			}, timeout)
+		} else {
+			return ""
+		}
+	}
+	if err != nil {
+		str := code + "方法DownloadAdv,url:" + url + ",err:" + err.Error()
+		logger.Error(str, timeout)
+		//AddCheckLogs(url, code, "net", str)
+	}
+	tmp := map[string]interface{}{}
+	json.Unmarshal(ret, &tmp)
+	if v, ok := tmp["code"].(string); ok && v == "200" {
+		if isImg {
+			bs, _ := tmp["content"].(string)
+			return string(bs)
+		} else {
+			bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
+			return string(bs)
+		}
+	} else {
+		return ""
+	}
+}
+
+//下载页面,发送消息,等待下载
+func DownloadAdv(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) (string, []*http.Cookie) {
+	defer mu.Catch()
+	msgid := mu.UUID(8)
+	if len(head) < 1 {
+		l := len(agent.UserAgents["common"])
+		r := rand.New(rand.NewSource(time.Now().UnixNano()))
+		head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
+	}
+	isImg := regImg.MatchString(url)
+	var ret []byte
+	var err error
+	if downloaderid == "" {
+		ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
+			"url":      url,
+			"method":   method,
+			"head":     head,
+			"reqparam": reqparam,
+			"cookie":   mycookie,
+			"encoding": encoding,
+			"useproxy": useproxy,
+			"ishttps":  ishttps,
+		}, timeout)
+	} else {
+		if isAvailable(downloaderid) {
+			ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
+				"url":      url,
+				"method":   method,
+				"head":     head,
+				"reqparam": reqparam,
+				"cookie":   mycookie,
+				"encoding": encoding,
+				"useproxy": useproxy,
+				"ishttps":  ishttps,
+			}, timeout)
+		} else {
+			return "", nil
+		}
+	}
+	if err != nil {
+		str := code + "方法DownloadAdv,url:" + url + ",err:" + err.Error()
+		logger.Error(str, timeout)
+	}
+	tmp := map[string]interface{}{}
+	json.Unmarshal(ret, &tmp)
+	cooks := lu.ParseHttpCookie(tmp["cookie"])
+	if v, ok := tmp["code"].(string); ok && v == "200" {
+		if isImg {
+			bs, _ := tmp["content"].(string)
+			return string(bs), cooks
+		} else {
+			bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
+			return string(bs), cooks
+		}
+	} else {
+		return "", nil
+	}
+}
+
+//下载附件
+func DownloadFile_bak(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
+	defer mu.Catch()
+	msgid := mu.UUID(8)
+	if len(head) < 1 {
+		l := len(agent.UserAgents["common"])
+		r := rand.New(rand.NewSource(time.Now().UnixNano()))
+		head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
+	}
+	var ret []byte
+	var err error
+	if downloaderid == "" {
+		ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
+			"url":      url,
+			"method":   method,
+			"head":     head,
+			"reqparam": reqparam,
+			"cookie":   mycookie,
+			"encoding": encoding,
+			"useproxy": useproxy,
+			"ishttps":  ishttps,
+		}, timeout)
+	} else {
+		if isAvailable(downloaderid) {
+			ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
+				"url":      url,
+				"method":   method,
+				"head":     head,
+				"reqparam": reqparam,
+				"cookie":   mycookie,
+				"encoding": encoding,
+				"useproxy": useproxy,
+				"ishttps":  ishttps,
+			}, timeout)
+		} else {
+			return nil
+		}
+	}
+	if err != nil {
+		str := code + "方法DownloadFile,url:" + url + ",err:" + err.Error()
+		logger.Error(str, timeout)
+	}
+	tmp := map[string]interface{}{}
+	json.Unmarshal(ret, &tmp)
+	if v, ok := tmp["code"].(string); ok && v == "200" {
+		bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
+		//log.Println(string(bs))
+		return bs
+	} else {
+		return nil
+	}
+}
+
+func DownloadFile(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
+	defer mu.Catch()
+	timeout = timeout * 2
+	msgid := mu.UUID(8)
+	if len(head) < 1 {
+		l := len(agent.UserAgents["common"])
+		r := rand.New(rand.NewSource(time.Now().UnixNano()))
+		head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
+	}
+	var ret []byte
+	var err error
+	if downloaderid == "" {
+		ret, err = MsclientFile.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
+			"url":      url,
+			"method":   method,
+			"head":     head,
+			"reqparam": reqparam,
+			"cookie":   mycookie,
+			"encoding": encoding,
+			"useproxy": useproxy,
+			"ishttps":  ishttps,
+		}, timeout)
+	} else {
+		if isAvailableFile(downloaderid) {
+			ret, err = MsclientFile.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
+				"url":      url,
+				"method":   method,
+				"head":     head,
+				"reqparam": reqparam,
+				"cookie":   mycookie,
+				"encoding": encoding,
+				"useproxy": useproxy,
+				"ishttps":  ishttps,
+			}, timeout)
+		} else {
+			return nil
+		}
+	}
+	if err != nil {
+		str := code + "方法DownloadFile,url:" + url + ",err:" + err.Error()
+		logger.Error(str, timeout)
+	}
+	tmp := map[string]interface{}{}
+	json.Unmarshal(ret, &tmp)
+	if v, ok := tmp["code"].(string); ok && v == "200" {
+		bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
+		return bs
+	} else {
+		return nil
+	}
+}
+
+//下载点是否可用
+func isAvailable(code string) bool {
+	b := false
+	for k, _ := range Alldownloader {
+		if k == code {
+			b = true
+		}
+	}
+	return b
+}
+
+//下载点是否可用
+func isAvailableFile(code string) bool {
+	b := false
+	for k, _ := range AlldownloaderFile {
+		if k == code {
+			b = true
+		}
+	}
+	return b
+}

+ 22 - 0
src/spider/front.go

@@ -0,0 +1,22 @@
+package spider
+
+import (
+	mgo "mongodb"
+	qu "qfw/util"
+)
+
+var (
+	MgoS *mgo.MongodbSim
+	MgoE *mgo.MongodbSim
+)
+
+func InitLuaCode() {
+	defer qu.Catch()
+	CodeChan = map[string]int{}
+	list, _ := MgoE.Find("luaspecialcode", nil, nil, nil, false, -1, -1)
+	for _, l := range *list {
+		code := qu.ObjToString(l["code"])
+		size := qu.IntAll(l["size"])
+		CodeChan[code] = size
+	}
+}

+ 385 - 0
src/spider/handler.go

@@ -0,0 +1,385 @@
+package spider
+
+import (
+	"fmt"
+	mu "mfw/util"
+	qu "qfw/util"
+
+	//"regexp"
+	util "spiderutil"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/donnie4w/go-logger/logger"
+	"github.com/yuin/gopher-lua"
+)
+
+var Allspiders sync.Map = sync.Map{}       //主爬虫,记录多个分爬虫的下载信息
+var AllspidersMap = map[string][]*Spider{} //记录多个分爬虫(多线程)
+var AllspidersMapLock = sync.Mutex{}
+var LoopListPath sync.Map = sync.Map{}
+var CodeChan map[string]int //某个爬虫所开线程数
+//var ChanDels = map[int]string{}
+//var lock sync.Mutex
+
+var CC chan *lua.LState
+var CC2 chan *lua.LState
+
+//var Chansize int
+//var regcode, _ = regexp.Compile(`="(.*)"`)
+var InitCount int
+var InitAllLuaOver = make(chan bool, 1) //所有脚本是否加载完毕
+
+func InitSpider() {
+	scriptMap := getSpiderScriptDB("all")
+	k := 0
+	for _, v := range scriptMap {
+		LoopListPath.Store(k, v)
+		k++
+	}
+	if util.Config.Working == 0 {
+		NoQueueScript() //高性能模式
+	}
+	InitCount = k
+}
+
+//获取所有爬虫脚本--数据库
+func getSpiderScriptDB(code string) map[string]map[string]string {
+	scriptSpider := map[string]map[string]string{}
+	codes := []string{}
+	for c, _ := range CodeChan {
+		codes = append(codes, c)
+	}
+	query := map[string]interface{}{
+		"code": map[string]interface{}{
+			"$in": codes,
+		},
+	}
+	listdb, _ := MgoE.Find("luaconfig", query, nil, nil, false, -1, -1)
+	//临时历史附件
+	//listdb := mgu.Find("luaconfig_test", "editor", "editor", query, `{"_id":-1}`, nil, false, -1, -1)
+
+	for _, v := range *listdb {
+		old := qu.IntAll(v["old_lua"])
+		script := ""
+		if old == 1 {
+			script = fmt.Sprint(v["luacontent"])
+		} else {
+			if v["oldlua"] != nil {
+				if v["luacontent"] != nil {
+					script = v["luacontent"].(string)
+				}
+			} else {
+				script = GetScriptByTmp(v)
+			}
+		}
+		scriptSpider[fmt.Sprint(v["code"])] = map[string]string{
+			"code":            fmt.Sprint(v["code"]),
+			"type":            fmt.Sprint(v["state"]),
+			"script":          script,
+			"createuser":      fmt.Sprint(v["createuser"]),
+			"createuseremail": fmt.Sprint(v["createuseremail"]),
+			"modifyuser":      fmt.Sprint(v["modifyuser"]),
+			"modifyemail":     fmt.Sprint(v["next"]),
+		}
+	}
+	return scriptSpider
+}
+
+//高性能模式
+func NoQueueScript() {
+	//list, _ := MgoS.Find("spider_ldtime", nil, nil, map[string]interface{}{"code": 1, "uplimit": 1, "lowlimit": 1}, false, -1, -1)
+	LoopListPath.Range(func(key, temp interface{}) bool {
+		if info, ok := temp.(map[string]string); ok {
+			code := info["code"]
+			script := info["script"]
+			sp := NewSpider(code, script)
+			sp.Index = qu.IntAll(key)
+			if sp != nil && sp.Code != "nil" { //脚本加载成功
+				if info["createuser"] != "" {
+					sp.UserName = info["createuser"]
+				}
+				if info["createuseremail"] != "" {
+					sp.UserEmail = info["createuseremail"]
+				}
+				sp.MUserName = info["modifyuser"]
+				sp.MUserEmail = info["modifyemail"]
+				Allspiders.Store(sp.Code, sp)
+				//创建多个spider对象
+				NewMoreSpider(code, script, sp)
+				// for _, tmp := range *list {
+				// 	if qu.ObjToString(tmp["code"]) == sp.Code {
+				// 		sp.UpperLimit = qu.IntAll(tmp["uplimit"])
+				// 		sp.LowerLimit = qu.IntAll(tmp["lowlimit"])
+				// 		break
+				// 	}
+				// }
+			} else {
+				logger.Info(code, "脚本加载失败,请检查!")
+				MgoS.Save("spider_loadfail", map[string]interface{}{
+					"code":   code,
+					"type":   "初始化",
+					"script": script,
+					"intime": time.Now().Format(qu.Date_Full_Layout),
+					"event":  util.Config.Uploadevent,
+				})
+			}
+			time.Sleep(1 * time.Second)
+		}
+		return true
+	})
+	InitAllLuaOver <- true //爬虫初始化完毕
+	logger.Info("高性能模式:LUA加载完成")
+	numSpider := 0
+	Allspiders.Range(func(key, value interface{}) bool {
+		numSpider++
+		return true
+	})
+	logger.Info("总共加载脚本数:", numSpider)
+}
+
+//生成爬虫
+func NewSpider(code, luafile string) *Spider {
+	defer mu.Catch()
+	spider := &Spider{}
+	err := spider.LoadScript(code, luafile, true)
+	if err != "" {
+		return nil
+	}
+	spider.Code = spider.GetVar("spiderCode")
+	spider.SCode = spider.Code
+	spider.Name = spider.GetVar("spiderName")
+
+	//spider.LastExecTime = GetLastExectime(spider.Code)
+	spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
+	spider.Collection = spider.GetVar("spider2Collection")
+	spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
+	//spider.Thread = int64(spider.GetIntVar("spiderThread"))
+	spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
+	spider.StoreMode = spider.GetIntVar("spiderStoreMode")
+	spider.CoverAttr = spider.GetVar("spiderCoverAttr")
+	spiderSleepBase := spider.GetIntVar("spiderSleepBase")
+	if spiderSleepBase == -1 {
+		spider.SleepBase = 1000
+	} else {
+		spider.SleepBase = spiderSleepBase
+	}
+	spiderSleepRand := spider.GetIntVar("spiderSleepRand")
+	if spiderSleepRand == -1 {
+		spider.SleepRand = 1000
+	} else {
+		spider.SleepRand = spiderSleepRand
+	}
+	spiderTimeout := spider.GetIntVar("spiderTimeout")
+	if spiderTimeout == -1 {
+		spider.Timeout = 60
+	} else {
+		spider.Timeout = int64(spiderTimeout)
+	}
+	spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
+
+	date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
+	tmp := GetDownloadLast(spider.Code, date) //
+	if len(tmp) > 0 {
+		spider.TodayDowncount = int32(qu.IntAll(tmp["todaydowncount"]))
+		spider.ToDayRequestNum = int32(qu.IntAll(tmp["todaydownreq"]))
+		spider.YesterdayDowncount = int32(qu.IntAll(tmp["yesdowncount"]))
+		spider.YestoDayRequestNum = int32(qu.IntAll(tmp["yesdownreq"]))
+		spider.TotalDowncount = spider.TodayDowncount + int32(qu.IntAll(tmp["totaldown"]))
+		spider.TotalRequestNum = spider.ToDayRequestNum + int32(qu.IntAll(tmp["totalreq"]))
+	}
+	spider.UserName = spider.GetVar("spiderUserName")
+	spider.UserEmail = spider.GetVar("spiderUserEmail")
+	spider.UploadTime = spider.GetVar("spiderUploadTime")
+	//新增历史补漏
+	//qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
+	spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
+	spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
+
+	return spider
+}
+
+//为某个爬虫创建多个spider对象
+func NewMoreSpider(code, script string, sp *Spider) {
+	defer qu.Catch()
+	for i := 1; i <= CodeChan[code]; i++ {
+		spTmp := NewSpider(code, script)
+		spTmp.UserEmail = sp.UserEmail
+		spTmp.UserName = sp.UserName
+		spTmp.MUserEmail = sp.MUserEmail
+		spTmp.MUserName = sp.MUserName
+		AllspidersMapLock.Lock()
+		AllspidersMap[code] = append(AllspidersMap[code], spTmp)
+		AllspidersMapLock.Unlock()
+	}
+}
+
+//下载量入库
+func SaveDownCount(code string, addtotal bool, todayDowncount, todayRequestNum, yesterdayDowncount, yestoDayRequestNum int32) {
+	date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
+	updata := map[string]interface{}{}
+	if addtotal {
+		updata = map[string]interface{}{
+			"$inc": map[string]interface{}{"totaldown": todayDowncount, "totalreq": todayRequestNum},
+			"$set": map[string]interface{}{
+				"yesdowncount":   yesterdayDowncount,
+				"yesdownreq":     yestoDayRequestNum,
+				"todaydowncount": todayDowncount,
+				"todaydownreq":   todayRequestNum,
+				"date":           date,
+				"year":           time.Now().Year(),
+				"month":          time.Now().Month(),
+				"day":            time.Now().Day(),
+			},
+		}
+	} else {
+		updata = map[string]interface{}{
+			"$set": map[string]interface{}{
+				"yesdowncount":   yesterdayDowncount,
+				"yesdownreq":     yestoDayRequestNum,
+				"todaydowncount": todayDowncount,
+				"todaydownreq":   todayRequestNum,
+				"date":           date,
+				"year":           time.Now().Year(),
+				"month":          time.Now().Month(),
+				"day":            time.Now().Day(),
+			},
+		}
+	}
+	MgoS.Update("spider_downlog", map[string]interface{}{"code": code, "date": date}, updata, true, false)
+}
+
+//拼装脚本
+func GetScriptByTmp(luaconfig map[string]interface{}) string {
+	defer mu.Catch()
+	script := ""
+	if luaconfig["listcheck"] == nil {
+		luaconfig["listcheck"] = ""
+	}
+	if luaconfig["contentcheck"] == nil {
+		luaconfig["contentcheck"] = ""
+	}
+	if luaconfig != nil && len(luaconfig) > 0 {
+		common := luaconfig["param_common"].([]interface{})
+		//新增spiderIsHistoricalMend spiderIsMustDownload
+		if len(common) == 15 {
+			common = append(common, "", "", "")
+		} else {
+			common = append(common, false, false, "", "", "")
+		}
+		for k, v := range common {
+			if k == 4 || k == 5 || k == 6 || k == 9 || k == 10 {
+				common[k] = qu.IntAll(v)
+			}
+		}
+
+		script, _ = GetTmpModel(map[string][]interface{}{"common": common})
+		script_time := ""
+		if luaconfig["type_time"] == 0 {
+			time := luaconfig["param_time"].([]interface{})
+			script_time, _ = GetTmpModel(map[string][]interface{}{
+				"time": time,
+			})
+		} else {
+			script_time = luaconfig["str_time"].(string)
+		}
+		script_list := ""
+		if luaconfig["type_list"] == 0 {
+			list := luaconfig["param_list"].([]interface{})
+			addrs := strings.Split(list[1].(string), "\n")
+			if len(addrs) > 0 {
+				for k, v := range addrs {
+					addrs[k] = "'" + v + "'"
+				}
+				list[1] = strings.Join(addrs, ",")
+			} else {
+				list[1] = ""
+			}
+			script_list, _ = GetTmpModel(map[string][]interface{}{
+				"list":      list,
+				"listcheck": []interface{}{luaconfig["listcheck"]},
+			})
+		} else {
+			script_list = luaconfig["str_list"].(string)
+		}
+		script_content := ""
+		if luaconfig["type_content"] == 0 {
+			content := luaconfig["param_content"].([]interface{})
+			script_content, _ = GetTmpModel(map[string][]interface{}{
+				"content":      content,
+				"contentcheck": []interface{}{luaconfig["contentcheck"]},
+			})
+		} else {
+			script_content = luaconfig["str_content"].(string)
+		}
+		script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"])
+		script += ` 
+			` + script_time + `
+			` + script_list + `
+			` + script_content
+		script = ReplaceModel(script, common, luaconfig["model"].(map[string]interface{}))
+	}
+	return script
+}
+
+//生成爬虫脚本
+func GetTmpModel(param map[string][]interface{}) (script string, err interface{}) {
+	qu.Try(func() {
+		if param != nil && param["common"] != nil {
+			if len(param["common"]) < 12 {
+				err = "公共参数配置不全"
+			} else {
+				script = fmt.Sprintf(util.Tmp_common, param["common"]...)
+			}
+		}
+		if param != nil && param["time"] != nil {
+			if len(param["time"]) < 3 {
+				err = "方法:time-参数配置不全"
+			} else {
+				script += fmt.Sprintf(util.Tmp_pubtime, param["time"]...)
+			}
+		}
+		if param != nil && param["list"] != nil {
+			if len(param["list"]) < 7 {
+				err = "方法:list-参数配置不全"
+			} else {
+				list := []interface{}{param["listcheck"][0]}
+				list = append(list, param["list"]...)
+				script += fmt.Sprintf(util.Tmp_pagelist, list...)
+				script = strings.Replace(script, "#pageno#", `"..tostring(pageno).."`, -1)
+			}
+		}
+
+		if param != nil && param["content"] != nil {
+			if len(param["content"]) < 2 {
+				err = "方法:content-参数配置不全"
+			} else {
+				content := []interface{}{param["contentcheck"][0]}
+				content = append(content, param["content"]...)
+				script += fmt.Sprintf(util.Tmp_content, content...)
+			}
+		}
+	}, func(e interface{}) {
+		err = e
+	})
+	return script, err
+}
+
+//补充模型
+func ReplaceModel(script string, comm []interface{}, model map[string]interface{}) string {
+	defer mu.Catch()
+
+	//补充通用信息
+	commstr := `item["spidercode"]="` + comm[0].(string) + `";`
+	commstr += `item["site"]="` + comm[1].(string) + `";`
+	commstr += `item["channel"]="` + comm[2].(string) + `";`
+	script = strings.Replace(script, "--Common--", commstr, -1)
+	//补充模型信息
+	modelstr := ""
+	for k, v := range model {
+		modelstr += `item["` + k + `"]="` + v.(string) + `";`
+	}
+	script = strings.Replace(script, "--Model--", modelstr, -1)
+	return script
+}

+ 315 - 0
src/spider/msgservice.go

@@ -0,0 +1,315 @@
+// msgservice
+package spider
+
+import (
+	"encoding/json"
+	"fmt"
+	"log"
+	"math/rand"
+	mu "mfw/util"
+	qu "qfw/util"
+
+	//"qfw/util/redis"
+	util "spiderutil"
+	"time"
+
+	"github.com/donnie4w/go-logger/logger"
+)
+
+type DynamicIPMap struct {
+	Code        string
+	InvalidTime int64
+}
+
+var Msclient *mu.Client
+var MsclientFile *mu.Client
+var Alldownloader map[string]DynamicIPMap = make(map[string]DynamicIPMap)
+var AlldownloaderFile map[string]DynamicIPMap = make(map[string]DynamicIPMap)
+
+//初始化,启动消息客户端
+func InitMsgClient(serveraddr, name string) {
+	Msclient, _ = mu.NewClient(&mu.ClientConfig{ClientName: name,
+		MsgServerAddr: serveraddr,
+		EventHandler:  processevent,
+		OnRequestConnect: func() {
+			log.Println("重连", serveraddr, name)
+		},
+		OnConnectSuccess: func() {
+			log.Println("重连成功")
+		},
+		CanHandleEvents: []int{mu.SERVICE_DOWNLOAD_APPEND_NODE, mu.SERVICE_DOWNLOAD_DELETE_NODE, util.Config.Uploadevent},
+		ReadBufferSize:  500,
+		WriteBufferSize: 500,
+	})
+	go gc4Alldownloader()
+}
+
+//初始化,启动消息客户端File
+func InitMsgClientFile(serveraddr, name string) {
+	MsclientFile, _ = mu.NewClient(&mu.ClientConfig{ClientName: name,
+		MsgServerAddr:   serveraddr,
+		EventHandler:    processeventFile,
+		CanHandleEvents: []int{mu.SERVICE_DOWNLOAD_APPEND_NODE, mu.SERVICE_DOWNLOAD_DELETE_NODE},
+		ReadBufferSize:  200,
+		WriteBufferSize: 200,
+	})
+	go gc4AlldownloaderFile()
+}
+
+//
+func processevent(p *mu.Packet) {
+	defer mu.Catch()
+	var data []byte
+	switch p.Event {
+	case mu.SERVICE_DOWNLOAD_APPEND_NODE:
+		data = p.GetBusinessData()
+		//log.Println("获取动态地址:", len(data), string(data))
+		for i := 0; i < len(data)/8; i++ {
+			code := string(data[i*8 : (i+1)*8])
+			Alldownloader[code] = DynamicIPMap{
+				Code:        code,
+				InvalidTime: time.Now().Unix() + 60*10,
+			}
+		}
+	case mu.SERVICE_DOWNLOAD_DELETE_NODE:
+		data = p.GetBusinessData()
+		//log.Println("删除动态地址:", len(data), string(data))
+		for i := 0; i < len(data)/8; i++ {
+			code := string(data[i*8 : (i+1)*8])
+			delete(Alldownloader, code)
+		}
+		// case int32(util.Config.Uploadevent):
+		// 	param := map[string]interface{}{}
+		// 	json.Unmarshal(p.GetBusinessData(), &param)
+		// 	ret := map[string]interface{}{}
+		// 	if param["code"] != nil {
+		// 		b, err := UpdateSpiderByCodeState(param["code"].(string), param["state"].(string))
+		// 		ret["b"] = b
+		// 		ret["err"] = err
+		// 	} else {
+		// 		ret["b"] = false
+		// 		ret["err"] = "code或state值不存在"
+		// 	}
+		// 	Msclient.WriteObj(p.From, p.Msgid, mu.EVENT_RECIVE_CALLBACK, mu.SENDTO_TYPE_P2P, ret)
+	}
+}
+
+//
+func processeventFile(p *mu.Packet) {
+	defer mu.Catch()
+	var data []byte
+	switch p.Event {
+	case mu.SERVICE_DOWNLOAD_APPEND_NODE:
+		data = p.GetBusinessData()
+		//log.Println("获取动态地址:", len(data), string(data))
+		for i := 0; i < len(data)/8; i++ {
+			code := string(data[i*8 : (i+1)*8])
+			AlldownloaderFile[code] = DynamicIPMap{
+				Code:        code,
+				InvalidTime: time.Now().Unix() + 60*10,
+			}
+		}
+	case mu.SERVICE_DOWNLOAD_DELETE_NODE:
+		data = p.GetBusinessData()
+		//log.Println("删除动态地址:", len(data), string(data))
+		for i := 0; i < len(data)/8; i++ {
+			code := string(data[i*8 : (i+1)*8])
+			delete(AlldownloaderFile, code)
+		}
+		// case int32(util.Config.Uploadevent):
+		// 	param := map[string]interface{}{}
+		// 	json.Unmarshal(p.GetBusinessData(), &param)
+		// 	ret := map[string]interface{}{}
+		// 	if param["code"] != nil {
+		// 		b, err := UpdateSpiderByCodeState(param["code"].(string), param["state"].(string))
+		// 		ret["b"] = b
+		// 		ret["err"] = err
+		// 	} else {
+		// 		ret["b"] = false
+		// 		ret["err"] = "code或state值不存在"
+		// 	}
+		// 	MsclientFile.WriteObj(p.From, p.Msgid, mu.EVENT_RECIVE_CALLBACK, mu.SENDTO_TYPE_P2P, ret)
+		//
+	}
+}
+
+//
+func gc4Alldownloader() {
+	n := time.Now().Unix()
+	for _, v := range Alldownloader {
+		if v.InvalidTime < n {
+			delete(Alldownloader, v.Code)
+		}
+	}
+	util.TimeAfterFunc(1*time.Minute, gc4Alldownloader, TimeChan)
+}
+
+//
+func gc4AlldownloaderFile() {
+	n := time.Now().Unix()
+	for _, v := range AlldownloaderFile {
+		if v.InvalidTime < n {
+			delete(AlldownloaderFile, v.Code)
+		}
+	}
+	util.TimeAfterFunc(1*time.Minute, gc4AlldownloaderFile, TimeChan)
+}
+
+//获取一个下载点
+func GetOneDownloader() string {
+	if len(Alldownloader) < 1 {
+		return ""
+	}
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	pos := r.Intn(len(Alldownloader))
+	index := 0
+	retcode := ""
+	for k, _ := range Alldownloader {
+		if index == pos {
+			retcode = k
+			break
+		}
+		index++
+	}
+	//log.Printf("Alldownloader-len:%d,currentdownloader:%s\n", len(Alldownloader), retcode)
+	return retcode
+}
+
+//获取一个下载点
+func GetOneDownloaderFile() string {
+	if len(AlldownloaderFile) < 1 {
+		return ""
+	}
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	pos := r.Intn(len(AlldownloaderFile))
+	index := 0
+	retcode := ""
+	for k, _ := range AlldownloaderFile {
+		if index == pos {
+			retcode = k
+			break
+		}
+		index++
+	}
+	return retcode
+}
+
+//完成消息通知
+func SendMsgService(event int, data []map[string]interface{}) {
+	switch event {
+	case mu.SERVICE_YCML_SAVE: //通知异常名录下载完成
+		Msclient.WriteObj("", "", mu.SERVICE_YCML_NOTICE, mu.SENDTO_TYPE_ALL_RECIVER, qu.ObjToString(data[0]["area"]))
+	default:
+	}
+}
+
+//调用消息批量保存
+func SaveObjBlak(event int, checkAtrr string, c string, data []map[string]interface{}) {
+	defer mu.Catch()
+	tmp, _ := json.Marshal([]interface{}{checkAtrr, data})
+	switch event {
+	case mu.SERVICE_YCML_SAVE: //异常名录
+		Msclient.WriteObj("", "", mu.SERVICE_YCML_SAVE, mu.SENDTO_TYPE_ALL_RECIVER, tmp)
+	case mu.SERVICE_INVNAME_ANALYSIS: //存入企业名录(公示)
+		names := []string{}
+		area := ""
+		for _, v := range data {
+			if area == "" {
+				area = qu.ObjToString(v["area"])
+			}
+			names = append(names, qu.ObjToString(v["title"]))
+		}
+		if area != "" && len(names) > 0 {
+			rep := map[string]interface{}{"names": names, "area": area}
+			logger.Debug(rep)
+			Msclient.WriteObj("", "", mu.SERVICE_INVNAME_ANALYSIS, mu.SENDTO_TYPE_ALL_RECIVER, rep)
+		}
+	default:
+		flag := true
+		for i := 1; i < 6; i++ {
+			bs, err := Msclient.Call("", mu.UUID(8), event, mu.SENDTO_TYPE_ALL_RECIVER, tmp, 120)
+			if string(bs) != "" && err == nil {
+				flag = false
+				break
+			}
+			util.TimeSleepFunc(time.Duration(5*i)*time.Second, TimeSleepChan)
+		}
+		if flag {
+			for k, info := range data {
+				info["sendflag"] = "false"
+				data[k] = info
+			}
+			logger.Error("未成功传送信息-批量", event, len(data), data[0]["spidercode"])
+		}
+		MgoS.SaveBulk("data_bak", data...)
+	}
+}
+
+//调用消息保存
+func SaveObj(event int, checkAtrr string, data map[string]interface{}, saveredis bool) {
+	bs, _ := json.Marshal(data)
+	size := len(bs) / (1024 * 1024)
+	if size > 10 {
+		log.Println(event, checkAtrr, data["href"], data["title"], len(bs))
+		return
+	}
+	defer mu.Catch()
+	tmp, _ := json.Marshal([]interface{}{checkAtrr, []interface{}{data}})
+	switch event {
+	case mu.SERVICE_SPIDER_ECPS: //著作权等服务
+		Msclient.WriteObj("", "", mu.SERVICE_SPIDER_ECPS, mu.SENDTO_TYPE_ALL_RECIVER, data)
+	default:
+		flag := true
+		for i := 1; i < 6; i++ {
+			bs, err := Msclient.Call("", mu.UUID(8), event, mu.SENDTO_TYPE_ALL_RECIVER, tmp, 30)
+			if string(bs) != "" && err == nil {
+				flag = false
+				break
+			}
+			util.TimeSleepFunc(time.Duration(5*i)*time.Second, TimeSleepChan)
+		}
+		//qu.Debug("----------save-------")
+		if flag {
+			data["sendflag"] = "false"
+			logger.Error("未成功传送信息", event, data["title"])
+		} else {
+			data["sendflag"] = "true"
+		}
+		id := MgoS.Save("data_bak", data)
+		if !flag && id != "" {
+			href := fmt.Sprint(data["href"])
+			if len(href) > 5 && saveredis { //有效数据
+				db := HexToBigIntMod(href) //根据href的哈希值选择Redis的db
+				//增量
+				util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*30)
+				//全量(判断是否已存在防止覆盖id)
+				isExist, _ := util.ExistRedis("title_repeat_fulljudgement", db, "url_repeat_"+href)
+				if !isExist {
+					util.PutRedis("title_repeat_fulljudgement", db, "url_repeat_"+href, "", -1)
+				}
+			}
+		}
+	}
+}
+
+//从微信端获取验证码
+func GetCodeByWx(img []byte) (string, error) {
+	msgid := mu.UUID(8)
+	ret, err := GetMsgFromWx(msgid, img, true, 300)
+	if err != nil {
+		GetMsgFromWx(msgid, nil, false, 20)
+	}
+	tmp := make(map[string]interface{})
+	json.Unmarshal(ret, &tmp)
+	return qu.ObjToString(tmp["content"]), err
+}
+
+//从微信获取验证码消息
+func GetMsgFromWx(msgid string, img []byte, falg bool, timeout int64) ([]byte, error) {
+	ret, err := Msclient.Call("", msgid, mu.SERVICE_DISTINGUISH, mu.SENDTO_TYPE_ALL_RECIVER,
+		map[string]interface{}{
+			"img":  img,
+			"flag": falg,
+		}, timeout)
+	return ret, err
+}

+ 829 - 0
src/spider/script.go

@@ -0,0 +1,829 @@
+/**
+脚本加载+调用 封装,
+前期走文件系统加载
+后期走数据库配置,
+LUA中公共的方法需要抽出来,主脚本文件加载LUA公共文件
+*/
+package spider
+
+import (
+	"bytes"
+	"compress/gzip"
+	"crypto/aes"
+	"encoding/base64"
+	"encoding/json"
+	"io/ioutil"
+	mu "mfw/util"
+	"net/http"
+	"net/url"
+	"path"
+
+	qu "qfw/util"
+	_ "qfw/util/redis"
+	"regexp"
+	util "spiderutil"
+	"strconv"
+	"strings"
+	"sync/atomic"
+	"time"
+
+	gq "github.com/PuerkitoBio/goquery"
+	"github.com/cjoudrey/gluahttp"
+	"github.com/donnie4w/go-logger/logger"
+	lujson "github.com/yuin/gopher-json"
+	"github.com/yuin/gopher-lua"
+	"golang.org/x/text/encoding/simplifiedchinese"
+	"golang.org/x/text/transform"
+)
+
+//脚本
+type Script struct {
+	SCode, ScriptFile string
+	Encoding          string
+	Userproxy         bool
+	//Ishttps            bool
+	ErrorNum           int32  //错误数
+	Downloader         string //下载器
+	TotalRequestNum    int32  //总请求次数
+	ToDayRequestNum    int32  //今日请求次数
+	YestoDayRequestNum int32  //昨日请求次数
+	Timeout            int64  //超时时间秒
+	L                  *lua.LState
+	NoDownloadNum      int32           //未成功下载数
+	LastThreeTimes     []time.Duration //单条信息流程完成的时间,最后三次
+	FileLastThreeTimes []time.Duration //附件下载单条信息流程完成的时间,最后三次
+}
+
+const (
+	MAX_STEP = 5 //计算时的最大步长
+)
+
+var workTime = true
+
+//
+func init() {
+	go isWorkTime()
+}
+
+var TimeSleepChan = make(chan bool, 1)
+
+//加载文件
+func (s *Script) LoadScript(code, script_file string, newstate bool) string {
+	defer mu.Catch()
+	s.SCode = code
+	s.ScriptFile = script_file
+	if util.Config.Working == 0 {
+		if newstate {
+			s.L = lua.NewState(lua.Options{
+				RegistrySize:        256 * 20,
+				CallStackSize:       256,
+				IncludeGoStackTrace: false,
+			})
+		}
+	} else { //节能模式从CC池中获取lua.LState
+		if newstate { //队列模式的newstate主要区分是列表页爬虫CC还是三级页爬虫CC2
+			lState := <-CC2
+			s.L = lState
+		} else {
+			lState := <-CC
+			s.L = lState
+		}
+		//logger.Debug("获取CC资源", script_file)
+	}
+	s.L.PreloadModule("http", gluahttp.NewHttpModule(&http.Client{}).Loader)
+	s.L.PreloadModule("json", lujson.Loader)
+
+	if err := s.L.DoString(script_file); err != nil {
+		logger.Debug(code + ",加载lua脚本错误:" + err.Error())
+		//panic(code + ",加载lua脚本错误:" + err.Error())
+	}
+	s.Encoding = s.GetVar("spiderPageEncoding")
+	s.Userproxy = s.GetBoolVar("spiderUserProxy")
+	//暴露go方法
+	//download(url,head) 普通下载
+	s.L.SetGlobal("download", s.L.NewFunction(func(S *lua.LState) int {
+		if s.LastThreeTimes == nil {
+			s.LastThreeTimes = make([]time.Duration, 4)
+		}
+		if util.Config.IsDelay {
+			SleepTime(1, s.LastThreeTimes) //睡眠时间
+		}
+		start := time.Now() //起始时间
+		head := S.ToTable(-1)
+		url := S.ToString(-2)
+		ishttps := S.ToBool(-3)
+		charset := S.ToString(-4)
+		if charset == "" {
+			charset = s.Encoding
+		}
+		ret := Download(s.Downloader, url, "get", util.GetTable(head), charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
+		S.Push(lua.LString(ret))
+		atomic.AddInt32(&s.ToDayRequestNum, 1)
+		atomic.AddInt32(&s.TotalRequestNum, 1)
+		end := time.Since(start)
+		if len(s.LastThreeTimes) >= 4 {
+			s.LastThreeTimes = s.LastThreeTimes[1:]
+		}
+		s.LastThreeTimes = append(s.LastThreeTimes, end)
+		return 1
+	}))
+	//高级下载downloadAdv(url,method,param,head,cookie)
+	s.L.SetGlobal("downloadAdv", s.L.NewFunction(func(S *lua.LState) int {
+		if s.LastThreeTimes == nil {
+			s.LastThreeTimes = make([]time.Duration, 4)
+		}
+		if util.Config.IsDelay {
+			SleepTime(1, s.LastThreeTimes) //睡眠时间
+		}
+		start := time.Now() //起始时间
+		cookie := S.ToString(-1)
+		head := S.ToTable(-2)
+		param := S.ToTable(-3)
+		method := S.ToString(-4)
+		url := S.ToString(-5)
+		ishttps := S.ToBool(-6)
+		charset := S.ToString(-7)
+		if charset == "" {
+			charset = s.Encoding
+		}
+		var mycookie []*http.Cookie
+		json.Unmarshal([]byte(cookie), &mycookie)
+		var ret string
+		var retcookie []*http.Cookie
+		if param == nil {
+			ptext := map[string]interface{}{"text": S.ToString(-3)}
+			ret, retcookie = DownloadAdv(s.Downloader, url, method, ptext, util.GetTable(head), mycookie, charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
+		} else {
+			ret, retcookie = DownloadAdv(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, charset, s.Userproxy, ishttps, s.SCode, s.Timeout)
+		}
+		S.Push(lua.LString(ret))
+		scookie, _ := json.Marshal(retcookie)
+		S.Push(lua.LString(scookie))
+		atomic.AddInt32(&s.ToDayRequestNum, 1)
+		atomic.AddInt32(&s.TotalRequestNum, 1)
+		end := time.Since(start)
+		if len(s.LastThreeTimes) >= 4 {
+			s.LastThreeTimes = s.LastThreeTimes[1:]
+		}
+		s.LastThreeTimes = append(s.LastThreeTimes, end)
+		return 2
+	}))
+	//保存验证错误日志
+	s.L.SetGlobal("saveErrLog", s.L.NewFunction(func(S *lua.LState) int {
+		code := S.ToString(-4)
+		name := S.ToString(-3)
+		url := S.ToString(-2)
+		content := S.ToString(-1)
+		saveVerificationLog(code, name, url, content)
+		atomic.AddInt32(&s.ErrorNum, 1)
+		atomic.AddInt32(&s.NoDownloadNum, 1)
+		//防止恶意增加日志
+		util.TimeSleepFunc(5*time.Second, TimeSleepChan)
+		return 0
+	}))
+	//添加改版日志
+	s.L.SetGlobal("saveRevisionLog", s.L.NewFunction(func(S *lua.LState) int {
+		url := S.ToString(-2)
+		str := S.ToString(-1)
+		logger.Error(s.SCode, url, str)
+		return 0
+	}))
+	//查找信息是否存在(作废)
+	s.L.SetGlobal("findHasExit", s.L.NewFunction(func(S *lua.LState) int {
+		//c := S.ToString(-2)
+		//q := S.ToString(-1)
+		//b := findHasExit(c, q)
+		S.Push(lua.LBool(false))
+		return 1
+	}))
+	s.L.SetGlobal("findOneText", s.L.NewFunction(func(S *lua.LState) int {
+		nodetype := S.ToString(-3)
+		gpath := S.ToString(-2)
+		content := S.ToString(-1)
+		ret := util.FindOneText(gpath, content, nodetype)
+		S.Push(ret)
+		return 1
+	}))
+	s.L.SetGlobal("findContentText", s.L.NewFunction(func(S *lua.LState) int {
+		gpath := S.ToString(-2)
+		content := S.ToString(-1)
+		ret := util.FindContentText(gpath, content)
+		S.Push(ret)
+		return 1
+	}))
+	s.L.SetGlobal("findOneHtml", s.L.NewFunction(func(S *lua.LState) int {
+		nodetype := S.ToString(-3)
+		gpath := S.ToString(-2)
+		content := S.ToString(-1)
+		ret := util.FindOneHtml(gpath, content, nodetype)
+		S.Push(ret)
+		return 1
+	}))
+	s.L.SetGlobal("findListText", s.L.NewFunction(func(S *lua.LState) int {
+		gpath := S.ToString(-2)
+		content := S.ToString(-1)
+		ret := s.L.NewTable()
+		util.FindListText(gpath, content, ret)
+		S.Push(ret)
+		return 1
+	}))
+	s.L.SetGlobal("findListHtml", s.L.NewFunction(func(S *lua.LState) int {
+		gpath := S.ToString(-2)
+		content := S.ToString(-1)
+		ret := s.L.NewTable()
+		util.FindListHtml(gpath, content, ret)
+		S.Push(ret)
+		return 1
+	}))
+	// s.L.SetGlobal("findMgoData", s.L.NewFunction(func(S *lua.LState) int {
+	// 	update := [][]map[string]interface{}{}
+	// 	query := map[string]interface{}{"state": 0}
+	// 	data, _ := Mgo.Find(util.Config.TmpCollName, query, `{"_id":-1}`, nil, false, 0, 10)
+	// 	pageList := []interface{}{}
+	// 	for _, d := range *data {
+	// 		tmpMap := map[string]string{}
+	// 		tmpMap["title"] = qu.ObjToString(d["title"])
+	// 		tmpMap["detail"] = qu.ObjToString(d["detail"])
+	// 		tmpMap["href"] = qu.ObjToString(d["href"])
+	// 		publishtime := qu.Int64All(d["publishtime"])
+	// 		tmpMap["publishtime"] = qu.FormatDateByInt64(&publishtime, qu.Date_Full_Layout)
+	// 		tmpMap["_id"] = qu.BsonIdToSId(d["_id"])
+	// 		pageList = append(pageList, tmpMap)
+	// 		update = append(update, []map[string]interface{}{
+	// 			map[string]interface{}{"_id": d["_id"]},
+	// 			map[string]interface{}{"$set": map[string]interface{}{"state": 1}},
+	// 		})
+	// 	}
+	// 	ret := util.MapToTable(s.L, pageList)
+	// 	S.Push(ret)
+	// 	if len(update) > 0 {
+	// 		Mgo.UpdateBulk(util.Config.TmpCollName, update...)
+	// 	}
+	// 	return 1
+	// }))
+	s.L.SetGlobal("findMap", s.L.NewFunction(func(S *lua.LState) int {
+		qmap := S.ToTable(-2)
+		content := S.ToString(-1)
+		ret := s.L.NewTable()
+		util.FindMap(qmap, content, ret)
+		S.Push(ret)
+		return 1
+	}))
+	//公示暴露方式
+	s.L.SetGlobal("getEcpsCode", s.L.NewFunction(func(S *lua.LState) int {
+		area := strings.ToUpper(S.ToString(-2))
+		content := S.ToString(-1)
+		code, state := util.GetEcpsCode(area, []byte(content))
+		if state == "wx" {
+			code, _ = GetCodeByWx([]byte(content))
+		}
+		S.Push(lua.LString(code))
+		return 1
+	}))
+	//调用jsvm
+	s.L.SetGlobal("jsvm", s.L.NewFunction(func(S *lua.LState) int {
+		js := S.ToString(-1)
+		ret := s.L.NewTable()
+		if js == "" {
+			ret.RawSet(lua.LString("val"), lua.LString(""))
+			ret.RawSet(lua.LString("err"), lua.LString("js is null"))
+		} else {
+			rep := util.JsVmPost(util.Config.JsVmUrl, js)
+			ret.RawSet(lua.LString("val"), lua.LString(qu.ObjToString(rep["val"])))
+			ret.RawSet(lua.LString("err"), lua.LString(qu.ObjToString(rep["err"])))
+		}
+		S.Push(ret)
+		return 1
+	}))
+	//指定下载器
+	s.L.SetGlobal("changeDownloader", s.L.NewFunction(func(S *lua.LState) int {
+		s.Downloader = GetOneDownloader()
+		S.Push(lua.LString(s.Downloader))
+		return 1
+	}))
+	//指定下载器file
+	s.L.SetGlobal("changeDownloaderFile", s.L.NewFunction(func(S *lua.LState) int {
+		s.Downloader = GetOneDownloaderFile()
+		S.Push(lua.LString(s.Downloader))
+		return 1
+	}))
+	//手工延时
+	s.L.SetGlobal("timeSleep", s.L.NewFunction(func(S *lua.LState) int {
+		// if workTime {
+		// 	util.TimeSleepFunc(time.Duration(S.ToInt(-1))*time.Second, TimeSleepChan)
+		// } else {
+		// 	util.TimeSleepFunc(1*time.Second, TimeSleepChan)
+		// }
+		util.TimeSleepFunc(time.Second*2, TimeSleepChan)
+		return 0
+	}))
+	//编码解码
+	s.L.SetGlobal("transCode", s.L.NewFunction(func(S *lua.LState) int {
+		codeType := strings.ToLower(S.ToString(-2))
+		str := S.CheckString(-1)
+		switch codeType {
+		case "unicode":
+			str = transUnic(str)
+		case "urlencode_gbk":
+			data, _ := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(str)), simplifiedchinese.GBK.NewEncoder()))
+			l, _ := url.Parse("http://a.com/?" + string(data))
+			tmpstr := l.Query().Encode()
+			if len(tmpstr) > 1 {
+				str = tmpstr[0 : len(tmpstr)-1]
+			} else {
+				str = ""
+			}
+		case "urlencode_utf8":
+			l, _ := url.Parse("http://a.com/?" + str)
+			tmpstr := l.Query().Encode()
+			if len(tmpstr) > 1 {
+				str = tmpstr[0 : len(tmpstr)-1]
+			} else {
+				str = ""
+			}
+		case "urldecode_utf8":
+			str, _ = url.QueryUnescape(str)
+		case "decode64":
+			str = util.DecodeB64(str)
+		case "encodemd5":
+			str = qu.GetMd5String(str)
+		case "htmldecode": //html实体码
+			//txt := `<div align="left" style="margin-left: 0pt;"><span style='font-family:; font-size:13px; color:#000000'>&#22826;&#38451;&#23707;&#29305;&#21220;&#28040;&#38450;&#31449;&#12289;&#26494;&#28006;&#29305;&#21220;&#28040;&#38450;&#31449;&#24314;&#35774;&#39033;&#30446;&#35774;&#35745;&#20013;&#26631;&#20844;&#31034;</span></div>`
+			str = S.ToString(-1)
+			reg, _ := regexp.Compile("&#\\d+;")
+			str = reg.ReplaceAllStringFunc(str, func(src string) string {
+				v, _ := strconv.Atoi(src[2 : len(src)-1])
+				return string(rune(v))
+			})
+		}
+		S.Push(lua.LString(str))
+		return 1
+	}))
+
+	//如果服务端返回的html是gzip压缩过格式的 这里需要转一下
+	s.L.SetGlobal("unGzip", s.L.NewFunction(func(S *lua.LState) int {
+		html := S.ToString(-1)
+		bs := []byte(html)
+		gzipreader, _ := gzip.NewReader(bytes.NewReader(bs))
+		bs, _ = ioutil.ReadAll(gzipreader)
+		S.Push(lua.LString(bs))
+		return 1
+	}))
+
+	//luamaker提供的分析列表页url地址 获取列表数据公用方法
+	s.L.SetGlobal("getSimpleListPage", s.L.NewFunction(func(S *lua.LState) int {
+
+		html := S.ToString(-3)
+		date_pattern := S.ToString(-2)
+		pageListUrl := S.ToString(-1) //列表页url
+		bs := []byte(html)
+		tmparr := []string{}
+		tmpret := []int{}
+		re, _ := regexp.Compile(`采购|招标|公示|公告|意见|结果|通知|工程`)
+		doc, _ := gq.NewDocumentFromReader(bytes.NewReader(bs))
+		doc.Find("a").Each(func(i int, sq *gq.Selection) {
+			text := sq.Text()
+			if len(text) < 30 {
+				return
+			}
+			tmparr = append(tmparr, text)
+			if re.MatchString(text) {
+				tmpret = append(tmpret, 1)
+				//logger.Debug(text)
+			} else {
+				tmpret = append(tmpret, 0)
+			}
+		})
+		logger.Debug(tmpret)
+		//线性分析,算周边,只算周围5步的点
+		tmplen, thepos, themax := len(tmpret), -1, 0
+		for i := 0; i < tmplen; i++ {
+			if tmpret[i] == 0 {
+				continue
+			}
+			start, end := i-MAX_STEP, i+MAX_STEP
+			if start < 0 {
+				start = 0
+			}
+			if end > tmplen {
+				end = tmplen
+			}
+			tmp := 0
+			//从当前位置往左,往右找连续点
+			for j := i; j > start; j-- {
+				if tmpret[j] == 1 {
+					tmp++
+				} else {
+					break
+				}
+			}
+			for j := i; j < end; j++ {
+				if tmpret[j] == 1 {
+					tmp++
+				} else {
+					break
+				}
+			}
+			if tmp > themax {
+				themax = tmp
+				thepos = i
+			}
+		} //end of for...
+		//logger.Debug("找位置完成")
+		//验证
+		if thepos == -1 {
+			logger.Error("完蛋,找不到")
+			panic("不支持啊,失败啊")
+		}
+		//下边是找父容器
+		var thelink *gq.Selection
+		doc.Find("a").Each(func(i int, sq *gq.Selection) {
+			if sq.Text() == tmparr[thepos] {
+				thelink = sq
+			}
+		})
+		isfind := false
+		//同样Path向上找,不超过5步
+		for i := 0; i < MAX_STEP; i++ {
+			thelink = thelink.Parent()
+			clen := getChildrenLen(thelink)
+			if clen >= themax-1 {
+				isfind = true
+				break
+			}
+			//logger.Debug("TAG:::", thelink.Nodes[0].Data, clen)
+		}
+		//找到列表
+		pageList := []interface{}{}
+		if isfind {
+			thelink.Children().Each(func(i int, sq *gq.Selection) {
+				page := map[string]string{}
+				link_sq := sq.Find("a")
+				href := link_sq.AttrOr("href", "")
+				text := link_sq.Text()
+				page["title"] = text
+				page["href"] = dealHref(pageListUrl, href)
+				page["publishtime"] = dealPublishTime(strings.TrimSpace(sq.Text()), date_pattern)
+				//logger.Debug(i)
+				pageList = append(pageList, page)
+			})
+		} else {
+			logger.Error("完蛋,找父亲节点失败啊")
+			//panic("不支持啊,失败啊")
+		}
+
+		ret := util.MapToTable(s.L, pageList)
+		S.Push(ret)
+		return 1
+	}))
+
+	//招投标信息标题判重
+	s.L.SetGlobal("titleRepeatJudgement", s.L.NewFunction(func(S *lua.LState) int {
+		S.Push(lua.LBool(false))
+		return 1
+	}))
+	//招标信息判重新方法 2016-12-14 wanghuidong
+	s.L.SetGlobal("urlRepeatJudgement", s.L.NewFunction(func(S *lua.LState) int {
+		S.Push(lua.LBool(false))
+		return 1
+	}))
+
+	//将url放入内存缓存 2016-12-14 wanghuidong
+	s.L.SetGlobal("putUrl2Redis", s.L.NewFunction(func(S *lua.LState) int {
+		//url := S.ToString(-1)
+		return 1
+	}))
+	//解析附件中的word、pdf
+	s.L.SetGlobal("officeAnalysis", s.L.NewFunction(func(S *lua.LState) int {
+		ext := map[string]byte{"pdf": byte(0), "doc": byte(1), "docx": byte(2)}
+		str := S.ToString(-2)
+		extension := S.ToString(-1)
+		bs, _ := base64.StdEncoding.DecodeString(str)
+		bs = append([]byte{ext[extension]}, bs...)
+		msgid := mu.UUID(8)
+		Msclient.Call("", msgid, mu.SERVICE_OFFICE_ANALYSIS, mu.SENDTO_TYPE_ALL_RECIVER, bs, 60)
+		return 1
+	}))
+
+	//下载附件download(url,method,param,head,cookie,fileName)
+	s.L.SetGlobal("downloadFile", s.L.NewFunction(func(S *lua.LState) int {
+		if s.FileLastThreeTimes == nil {
+			s.FileLastThreeTimes = make([]time.Duration, 4)
+		}
+		if util.Config.IsDelay {
+			SleepTime(3, s.FileLastThreeTimes) //睡眠时间
+		}
+		start := time.Now() //起始时间
+		cookie := S.ToString(-1)
+		head := S.ToTable(-2)
+		param := S.ToTable(-3)
+		method := S.ToString(-4)
+		url := S.ToString(-5)
+		fileName := S.ToString(-6)
+		ishttps := strings.Contains(url, "https")
+		var mycookie []*http.Cookie
+		if cookie != "{}" {
+			json.Unmarshal([]byte(cookie), &mycookie)
+		} else {
+			mycookie = make([]*http.Cookie, 0)
+		}
+		fileName = strings.TrimSpace(fileName)
+		url = strings.TrimSpace(url)
+		ret := DownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, s.Userproxy, ishttps, s.SCode, s.Timeout)
+		url, name, size, ftype, fid := util.UploadFile(s.SCode, fileName, url, ret)
+		if strings.TrimSpace(ftype) == "" {
+			if len(path.Ext(name)) > 0 {
+				ftype = path.Ext(name)[1:]
+			}
+		}
+		S.Push(lua.LString(url))
+		S.Push(lua.LString(name))
+		S.Push(lua.LString(size))
+		S.Push(lua.LString(ftype))
+		S.Push(lua.LString(fid))
+		atomic.AddInt32(&s.ToDayRequestNum, 1)
+		atomic.AddInt32(&s.TotalRequestNum, 1)
+
+		end := time.Since(start)
+		if len(s.FileLastThreeTimes) >= 4 {
+			s.FileLastThreeTimes = s.FileLastThreeTimes[1:]
+		}
+		s.FileLastThreeTimes = append(s.FileLastThreeTimes, end)
+		return 5
+	}))
+	s.L.SetGlobal("clearMemoeryCache", s.L.NewFunction(func(S *lua.LState) int {
+		/*title := S.ToString(-1)
+		isExist, _ := redis.Exists("title_repeat_judgement", "title_repeat_"+title)
+		if isExist {
+			redis.Del("title_repeat_judgement", "title_repeat_"+title)
+		}*/
+		return 1
+	}))
+	//支持正则,提取
+	s.L.SetGlobal("regexp", s.L.NewFunction(func(S *lua.LState) int {
+		index := int(S.ToNumber(-1))
+		regstr := S.ToString(-2)
+		text := S.ToString(-3)
+		reg := regexp.MustCompile(regstr)
+		reps := reg.FindAllStringSubmatchIndex(text, -1)
+		ret := s.L.NewTable()
+		number := 0
+		for _, v := range reps {
+			number++
+			ret.Insert(number, lua.LString(text[v[index]:v[index+1]]))
+		}
+		S.Push(ret)
+		return 1
+	}))
+	//支持替换
+	s.L.SetGlobal("replace", s.L.NewFunction(func(S *lua.LState) int {
+		text := S.ToString(-3)
+		old := S.ToString(-2)
+		repl := S.ToString(-1)
+		text = strings.Replace(text, old, repl, -1)
+		S.Push(lua.LString(text))
+		return 1
+	}))
+	//标题的关键词、排除词过滤
+	s.L.SetGlobal("pagefilterword", s.L.NewFunction(func(S *lua.LState) int {
+		keyWordReg := regexp.MustCompile(util.Config.Word["keyword"])
+		notKeyWordReg := regexp.MustCompile(util.Config.Word["notkeyword"])
+		data := S.ToTable(-1)
+		dataMap := util.TableToMap(data)
+		ret := s.L.NewTable()
+		num := 1
+		for _, v := range dataMap {
+			tmp := v.(map[string]interface{})
+			isOk := false
+			if title := qu.ObjToString(tmp["title"]); title != "" {
+				if keyWordReg.MatchString(title) && !notKeyWordReg.MatchString(title) {
+					isOk = true
+				}
+			}
+			if isOk {
+				ret.Insert(num, util.MapToLuaTable(S, tmp))
+				num++
+			}
+		}
+		S.Push(ret)
+		return 1
+	}))
+	//标题的关键词、排除词过滤
+	s.L.SetGlobal("detailfilterword", s.L.NewFunction(func(S *lua.LState) int {
+		keyWordReg := regexp.MustCompile(util.Config.Word["keyword"])
+		notKeyWordReg := regexp.MustCompile(util.Config.Word["notkeyword"])
+		data := S.ToTable(-1)
+		dataMap := util.TableToMap(data)
+		if title := qu.ObjToString(dataMap["title"]); title != "" {
+			if keyWordReg.MatchString(title) && !notKeyWordReg.MatchString(title) {
+				S.Push(lua.LBool(true))
+				return 1
+			} else {
+				qu.Debug(s.SCode, dataMap["href"], "	title error")
+			}
+		} else {
+			qu.Debug(s.SCode, dataMap["href"], "	title error")
+		}
+		S.Push(lua.LBool(false))
+		return 1
+	}))
+	//detail过滤
+	s.L.SetGlobal("filterdetail", s.L.NewFunction(func(S *lua.LState) int {
+		/*
+			1.长度判断 (特殊处理:详情请访问原网页!;详见原网页;见原网页;无;无相关内容;无正文内容)
+			2.是否含汉字
+		*/
+		reg1 := regexp.MustCompile("(原网页|无|无相关内容|无正文内容|见附件|详见附件)")
+		reg2 := regexp.MustCompile("[\u4e00-\u9fa5]")
+		detail := S.ToString(-1)
+		if reg1.MatchString(detail) {
+			S.Push(lua.LBool(true))
+			return 1
+		}
+		if len([]rune(detail)) < 50 || !reg2.MatchString(detail) {
+			S.Push(lua.LBool(false))
+			return 1
+		}
+		S.Push(lua.LBool(false))
+		return 1
+	}))
+	//匹配汉字
+	s.L.SetGlobal("matchan", s.L.NewFunction(func(S *lua.LState) int {
+		reg1 := regexp.MustCompile("(见附件|详见附件)")
+		reg2 := regexp.MustCompile("[\u4e00-\u9fa5]")
+		detail := S.ToString(-1)
+		detail = reg1.ReplaceAllString(detail, "")
+		ok := reg2.MatchString(detail)
+		S.Push(lua.LBool(ok))
+		return 1
+	}))
+	//aes ecb模式加密
+	s.L.SetGlobal("aesEncryptECB", s.L.NewFunction(func(S *lua.LState) int {
+		origData := S.ToString(-2)
+		key := S.ToString(-1)
+		bytekey := []byte(key)
+		byteorigData := []byte(origData)
+		cipher, _ := aes.NewCipher(generateKey([]byte(bytekey)))
+		length := (len(byteorigData) + aes.BlockSize) / aes.BlockSize
+		plain := make([]byte, length*aes.BlockSize)
+		copy(plain, byteorigData)
+		pad := byte(len(plain) - len(byteorigData))
+		for i := len(byteorigData); i < len(plain); i++ {
+			plain[i] = pad
+		}
+		encrypted := make([]byte, len(plain))
+		// 分组分块加密
+		for bs, be := 0, cipher.BlockSize(); bs <= len(byteorigData); bs, be = bs+cipher.BlockSize(), be+cipher.BlockSize() {
+			cipher.Encrypt(encrypted[bs:be], plain[bs:be])
+		}
+		result := base64.StdEncoding.EncodeToString(encrypted)
+		S.Push(lua.LString(result))
+		return 1
+	}))
+	//根据正文获取发布时间
+	s.L.SetGlobal("getPublishtime", s.L.NewFunction(func(S *lua.LState) int {
+		detail := S.ToString(-2)
+		contenthtml := S.ToString(-1)
+		publishtime := util.GetPublishtime([]string{contenthtml, detail})
+		S.Push(lua.LString(publishtime))
+		return 1
+	}))
+	return ""
+}
+func dealHref(pageListUrl, href string) string {
+	returnUrl := ""
+	if href != "" {
+		r, _ := regexp.Compile("^./")
+		match := r.MatchString(href)
+		if match {
+			url2 := r.ReplaceAllString(href, "")
+			returnUrl = pageListUrl + url2
+		}
+		r2, _ := regexp.Compile("^/")
+		match2 := r2.MatchString(href)
+		if match2 {
+			r3, _ := regexp.Compile("http://[^/]*/")
+			domain := r3.FindString(pageListUrl)
+			//fmt.Println(domain)
+			url2 := r2.ReplaceAllString(href, "")
+			returnUrl = domain + url2
+		}
+
+	}
+	return returnUrl
+}
+func dealPublishTime(content string, pattern string) string {
+	publishTime := ""
+	if pattern == "yyyy-MM-dd HH:mm:ss" {
+		r, _ := regexp.Compile("\\d{4}-\\d{2}-\\d{2}\\s*\\d{2}:\\d{2}:\\d{2}")
+		publishTime = r.FindString(content)
+	} else if pattern == "yyyy-MM-dd" {
+		r, _ := regexp.Compile("\\d{4}-\\d{2}-\\d{2}")
+		publishTime = r.FindString(content)
+	} else if pattern == "MM-dd" {
+		r, _ := regexp.Compile("\\d{2}-\\d{2}")
+		publishTime = r.FindString(content)
+	}
+	return publishTime
+}
+func getChildrenLen(sq *gq.Selection) (ret int) {
+	sq.Children().Each(func(i int, sq2 *gq.Selection) {
+		ret = i
+	})
+	return
+}
+
+//
+func (s *Script) Reload() {
+	s.L.Close()
+	s.LoadScript(s.SCode, s.ScriptFile, false)
+}
+
+//unicode转码
+func transUnic(str string) string {
+	buf := bytes.NewBuffer(nil)
+	i, j := 0, len(str)
+	for i < j {
+		x := i + 6
+		if x > j {
+			buf.WriteString(str[i:])
+			break
+		}
+		if str[i] == '\\' && str[i+1] == 'u' {
+			hex := str[i+2 : x]
+			r, err := strconv.ParseUint(hex, 16, 64)
+			if err == nil {
+				buf.WriteRune(rune(r))
+			} else {
+				logger.Warn(err.Error())
+				buf.WriteString(str[i:x])
+			}
+			i = x
+		} else {
+			buf.WriteByte(str[i])
+			i++
+		}
+	}
+	return buf.String()
+}
+
+//取得变量
+func (s *Script) GetVar(key string) string {
+	return s.L.GetGlobal(key).String()
+}
+
+//
+func (s *Script) GetIntVar(key string) int {
+	lv := s.L.GetGlobal(key)
+	if v, ok := lv.(lua.LNumber); ok {
+		return int(v)
+	}
+	return -1
+}
+
+//
+func (s *Script) GetBoolVar(key string) bool {
+	lv := s.L.GetGlobal(key)
+	if v, ok := lv.(lua.LBool); ok {
+		return bool(v)
+	}
+	return false
+}
+
+func isWorkTime() {
+	workTime = util.IsWorkTime()
+	util.TimeAfterFunc(10*time.Minute, isWorkTime, TimeChan)
+}
+
+//设置睡眠时间
+func SleepTime(basetime int, times []time.Duration) {
+	st := 0 //记录最后睡眠时长
+	base := float64(basetime * 60)
+	if times[3].Seconds() > base { //最后一次大于 basetime*60秒
+		if times[2].Seconds() > base {
+			n := 0
+			if times[0].Seconds() > base {
+				n++
+			}
+			if times[1].Seconds() > base {
+				n++
+			}
+			st = n + 1
+		} else if times[2].Seconds() < base && times[0].Seconds() > base && times[1].Seconds() > base {
+			st = 1
+		}
+	}
+	if st > 0 {
+		time.Sleep(time.Duration(st) * time.Minute)
+	}
+}
+func generateKey(key []byte) (genKey []byte) {
+	genKey = make([]byte, 16)
+	copy(genKey, key)
+	for i := 16; i < len(key); {
+		for j := 0; j < 16 && i < len(key); j, i = j+1, i+1 {
+			genKey[j] ^= key[i]
+		}
+	}
+	return genKey
+}

+ 343 - 0
src/spider/spider.go

@@ -0,0 +1,343 @@
+/**
+爬虫,脚本接口,需要扩展
+*/
+package spider
+
+import (
+	"crypto/sha1"
+	"crypto/sha256"
+	"fmt"
+	"io"
+	"log"
+	"math/big"
+	"math/rand"
+	mu "mfw/util"
+	qu "qfw/util"
+	"regexp"
+	util "spiderutil"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/donnie4w/go-logger/logger"
+	"github.com/yuin/gopher-lua"
+)
+
+//爬虫()
+type Spider struct {
+	Script
+	Code                            string //代码
+	Name                            string //名称
+	DownDetail                      bool   //是否下载详细页
+	Stop                            bool   //停止标志
+	Pass                            bool   //暂停标志
+	LastPubshTime                   int64  //最后发布时间
+	LastHeartbeat                   int64  //最后心跳时间
+	SpiderRunRate                   int64  //执行频率
+	ExecuteOkTime                   int64  //任务执行成功/完成时间
+	Collection                      string //写入表名
+	Thread                          int64  //线程数
+	LastExecTime                    int64  //最后执行时间
+	LastDowncount                   int32  //最后一次下载量
+	TodayDowncount                  int32  //今日下载量
+	YesterdayDowncount              int32  //昨日下载量
+	TotalDowncount                  int32  //总下载量
+	RoundCount                      int32  //执行轮次
+	StoreMode                       int    //存储模式
+	StoreToMsgEvent                 int    //消息类型
+	CoverAttr                       string //按属性判重数据
+	SleepBase                       int    //基本延时
+	SleepRand                       int    //随机延时
+	TargetChannelUrl                string //栏目页地址
+	UpperLimit, LowerLimit          int    //正常值上限、下限
+	UserName, UserEmail, UploadTime string //开发者名称,开发者邮箱,脚本上传时间
+	MUserName, MUserEmail           string //维护人,维护人邮箱
+	Index                           int    //数组索引
+	//历史补漏
+	IsHistoricalMend bool //是否是历史补漏爬虫
+	IsMustDownload   bool //是否强制下载
+}
+
+var TimeChan = make(chan bool, 1)
+var Reg = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+(/?)`)
+
+//高性能模式定时采集三级页信息
+func DetailData() {
+	defer qu.Catch()
+	<-InitAllLuaOver                                             //脚本加载完毕,执行
+	if util.Config.Working == 0 && !util.Config.IsHistoryEvent { //高性能模式且不是7000节点,只有7000节点util.Config.IsHistoryEvent为true
+		GetListDataDownloadDetail()
+	}
+}
+func GetListDataDownloadDetail() {
+	defer qu.Catch()
+	logger.Info("+++++++++++++++++++Download Detail+++++++++++++++++++")
+	Allspiders.Range(func(k, v interface{}) bool {
+		go DownloadHighDetail(k.(string))
+		time.Sleep(2 * time.Second)
+		return true
+	})
+}
+
+//高性能模式根据列表页数据下载三级页
+func DownloadHighDetail(code string) {
+	defer qu.Catch()
+	for {
+		//logger.Info("爬虫代码:", s.Code, "已下架:", s.Stop)
+		//if !s.Stop { //爬虫是运行状态
+		/*
+			1、每轮开始先查询当天下载的数据
+			2、本次查询无数据依次向前推一天查询数据(暂定50条数据)
+		*/
+		o := map[string]interface{}{"_id": 1} //排序
+		f := map[string]interface{}{          //查询字段
+			"state":      0,
+			"comeintime": 0,
+			"event":      0,
+		}
+		q := map[string]interface{}{
+			"spidercode": code,
+			"state":      0, //0:入库状态;-1:采集失败;1:成功
+		}
+		list := &[]map[string]interface{}{} //查询数据的集合
+		for day := 0; day <= util.Config.DayNum; day++ {
+			comeintime := map[string]interface{}{"$gte": GetTime(-day)} //指定查询数据的时间
+			if day != 0 {                                               //不是当天,指定数据范围
+				comeintime["$lt"] = GetTime(-day + 1)
+			}
+			q["comeintime"] = comeintime
+			list, _ = MgoS.Find("spider_highlistdata", q, o, f, false, 0, 100)
+			//logger.Debug("code:", code, "query:", q, "当前查询数据量:", len(*list))
+			if list != nil && len(*list) > 0 {
+				break
+			} else {
+				time.Sleep(1 * time.Second)
+			}
+		}
+		if list != nil && len(*list) > 0 {
+			spChan := make(chan *Spider, len(AllspidersMap[code]))
+			AllspidersMapLock.Lock()
+			for _, sp := range AllspidersMap[code] {
+				spChan <- sp
+			}
+			AllspidersMapLock.Unlock()
+			wg := &sync.WaitGroup{}
+			for _, l := range *list {
+				spTmp := <-spChan
+				wg.Add(1)
+				go func(tmp map[string]interface{}, sp *Spider) {
+					defer func() {
+						spChan <- sp
+						wg.Done()
+					}()
+					_id := tmp["_id"]
+					query := map[string]interface{}{"_id": _id}
+					competehref := qu.ObjToString(tmp["competehref"])
+					if competehref != "" { //验证三方网站数据剑鱼是否已采集
+						title := qu.ObjToString(tmp["title"])
+						one, _ := MgoS.FindOne("data_bak", map[string]interface{}{"title": title})
+						if one != nil && len(*one) > 0 { //剑鱼已采集,舍弃此条信息
+							set := map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": true}} //已存在state置为1
+							MgoS.Update("spider_highlistdata", query, set, false, false)
+							return
+						}
+					}
+					times := qu.IntAll(tmp["times"])
+					success := true //数据是否下载成功的标志
+					delete(tmp, "_id")
+					delete(tmp, "times")
+					href := qu.ObjToString(tmp["href"])
+					data := map[string]interface{}{}
+					var err interface{}
+					for k, v := range tmp {
+						data[k] = v
+					}
+					//下载、解析、入库
+					data, err = sp.DownloadDetailPage(tmp, data)
+					if err != nil || data == nil {
+						success = false
+						times++
+						if err != nil {
+							logger.Error(sp.Code, err, tmp)
+							if len(tmp) > 0 {
+								SaveErrorData(sp.MUserName, tmp, err) //保存错误信息
+							}
+						} /*else if data == nil && times >= 3 { //下载问题,建editor任务
+							DownloadErrorData(s.Code, tmp)
+						}*/
+					} else if tmphref := qu.ObjToString(data["href"]); tmphref != href { //三级页href替换导致前后href不同
+						log.Println("beforeHref:", href, "afterHref:", tmphref)
+						//增量
+						util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+tmphref, tmphref, 3600*24*30)
+						//全量
+						db := HexToBigIntMod(tmphref)
+						isExist, _ := util.ExistRedis("title_repeat_fulljudgement", db, "url_repeat_"+tmphref)
+						if !isExist {
+							util.PutRedis("title_repeat_fulljudgement", db, "url_repeat_"+tmphref, "", -1)
+						}
+					}
+					if !success { //下载失败更新次数和状态
+						ss := map[string]interface{}{"times": times}
+						if times >= 3 { //3次下载失败今天不再下载,state置为1
+							ss["state"] = -1
+						}
+						set := map[string]interface{}{"$set": ss}
+						MgoS.Update("spider_highlistdata", query, set, false, false)
+						return
+					}
+					t1 := util.ParseDate2Int64(qu.ObjToString(data["publishtime"]))
+					if t1 > time.Now().Unix() { //防止发布时间超前
+						data["publishtime"] = time.Now().Unix()
+					}
+					delete(data, "exit")
+					delete(data, "checkpublishtime")
+					data["comeintime"] = time.Now().Unix()
+					//计数
+					tmpsp1, b := Allspiders.Load(sp.Code)
+					if b {
+						sp1, ok := tmpsp1.(*Spider)
+						if ok {
+							atomic.AddInt32(&sp1.LastDowncount, 1)
+							atomic.AddInt32(&sp1.TodayDowncount, 1)
+							atomic.AddInt32(&sp1.TotalDowncount, 1)
+						}
+					}
+					data["spidercode"] = sp.Code
+					data["dataging"] = 0
+					Store(sp.StoreMode, sp.StoreToMsgEvent, sp.Collection, sp.CoverAttr, data, true)
+					set := map[string]interface{}{"$set": map[string]interface{}{"state": 1}} //下载成功state置为1
+					MgoS.Update("spider_highlistdata", query, set, false, false)
+				}(l, spTmp)
+			}
+			wg.Wait()
+			//一轮次跑完重载脚本
+			ReloadScript(code)
+		} else { //没有数据
+			time.Sleep(2 * time.Minute)
+		}
+	}
+}
+
+//下载解析内容页
+func (s *Spider) DownloadDetailPage(param map[string]interface{}, data map[string]interface{}) (map[string]interface{}, interface{}) {
+	defer mu.Catch()
+	s.LastHeartbeat = time.Now().Unix()
+	util.TimeSleepFunc((time.Duration(s.SleepBase+GetRandMath(s.SleepRand)))*time.Millisecond, TimeSleepChan)
+	tab := s.L.NewTable()
+	for k, v := range param {
+		if val, ok := v.(string); ok {
+			tab.RawSet(lua.LString(k), lua.LString(val))
+		} else if val, ok := v.(int64); ok {
+			tab.RawSet(lua.LString(k), lua.LNumber(val))
+		} else if val, ok := v.(int32); ok {
+			tab.RawSet(lua.LString(k), lua.LNumber(val))
+		} else if val, ok := v.(float64); ok {
+			tab.RawSet(lua.LString(k), lua.LNumber(val))
+		} else if val, ok := v.(float32); ok {
+			tab.RawSet(lua.LString(k), lua.LNumber(val))
+		} else if val, ok := v.(bool); ok {
+			tab.RawSet(lua.LString(k), lua.LBool(val))
+		}
+	}
+	var err error
+	if err = s.L.CallByParam(lua.P{
+		Fn:      s.L.GetGlobal("downloadDetailPage"),
+		NRet:    1,
+		Protect: true,
+	}, tab); err != nil {
+		//panic(s.Code + "," + err.Error())
+		log.Println(s.Code + "," + err.Error())
+		atomic.AddInt32(&s.Script.ErrorNum, 1)
+		return data, err
+	}
+	lv := s.L.Get(-1)
+	s.L.Pop(1)
+	//拼map
+	if v3, ok := lv.(*lua.LTable); ok {
+		v3.ForEach(func(k, v lua.LValue) {
+			if tmp, ok := k.(lua.LString); ok {
+				key := string(tmp)
+				if value, ok := v.(lua.LString); ok {
+					data[key] = string(value)
+				} else if value, ok := v.(lua.LNumber); ok {
+					data[key] = value
+				} else if value, ok := v.(*lua.LTable); ok {
+					tmp := util.TableToMap(value)
+					data[key] = tmp
+				}
+			}
+		})
+		return data, err
+	} else {
+		return nil, err
+	}
+}
+
+//重载脚本
+func ReloadScript(code string) {
+	scriptMap := getSpiderScriptDB(code)
+	if codeInfo := scriptMap[code]; codeInfo != nil {
+		AllspidersMapLock.Lock()
+		for _, sp := range AllspidersMap[code] {
+			sp.ScriptFile = codeInfo["script"]
+			if codeInfo["createuser"] != "" {
+				sp.UserName = codeInfo["createuser"]
+			}
+			if codeInfo["createuseremail"] != "" {
+				sp.UserEmail = codeInfo["createuseremail"]
+			}
+			sp.MUserName = codeInfo["modifyuser"]
+			sp.MUserEmail = codeInfo["modifyemail"]
+			sp.LoadScript(code, sp.ScriptFile, true)
+		}
+		AllspidersMapLock.Unlock()
+	}
+
+	// for k, v := range scriptMap {
+	// 	if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
+	// 		sp := spd.(*Spider)
+	// 		sp.ScriptFile = v["script"]
+	// 		if v["createuser"] != "" {
+	// 			sp.UserName = v["createuser"]
+	// 		}
+	// 		if v["createuseremail"] != "" {
+	// 			sp.UserEmail = v["createuseremail"]
+	// 		}
+	// 		sp.MUserName = v["modifyuser"]
+	// 		sp.MUserEmail = v["modifyemail"]
+	// 		//sp.LoadScript(k, sp.ScriptFile, true) //更新上架,重载脚本
+	// 		Allspiders.Store(k, sp)
+	// 		logger.Info("上架重载脚本", sp.Code)
+	// 	}
+	// }
+}
+
+//获取随机数
+func GetRandMath(num int) int {
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	return r.Intn(num)
+}
+
+//获取hascode
+func GetHas1(data string) string {
+	t := sha1.New()
+	io.WriteString(t, data)
+	hf := Reg.FindString(data)
+	if !strings.HasSuffix(hf, "/") {
+		hf = hf + "/"
+	}
+	return hf + fmt.Sprintf("%x", t.Sum(nil))
+}
+
+//对href哈希取模
+func HexToBigIntMod(href string) int {
+	//取哈希值
+	t := sha256.New()
+	io.WriteString(t, href)
+	hex := fmt.Sprintf("%x", t.Sum(nil))
+	//取模
+	n := new(big.Int)
+	n, _ = n.SetString(hex[2:], 16)
+	return int(n.Mod(n, big.NewInt(16)).Int64())
+}

+ 245 - 0
src/spider/store.go

@@ -0,0 +1,245 @@
+package spider
+
+import (
+	"encoding/json"
+	"fmt"
+	mu "mfw/util"
+	"qfw/util"
+	lu "spiderutil"
+	"strings"
+	"sync/atomic"
+	"time"
+
+	"github.com/cron"
+
+	"github.com/donnie4w/go-logger/logger"
+	"github.com/yuin/gopher-lua"
+)
+
+type LogMap struct {
+	Code        string //爬虫代码
+	Name        string //爬虫名称
+	ChannelUrl  string //栏目网址
+	Url         string //目标页地址
+	Type_web    int    //网站改版
+	Type_script int    //脚本
+	Type_net    int    //网络
+	Type_check  int    //验证
+	Type_other  int    //其他
+	UploadTime  string //脚本上传时间
+	ErrInfo     string //错误信息
+	Errtime     int64  //时间
+
+	Round         int //下载轮次
+	DownloadCount int //下载量
+}
+
+// //数据存储批量
+// func StoreBlak(mode, event int, c, coverAttr string, data []map[string]interface{}) {
+// 	defer mu.Catch()
+// 	for _, v := range data {
+// 		if t, err := time.ParseInLocation(util.Date_Full_Layout, util.ObjToString(v["publishtime"]), time.Local); err == nil {
+// 			v["publishtime"] = t.Unix()
+// 			v["T"] = c
+// 		}
+// 	}
+// 	if mode == 1 {
+// 		b := mgu.SaveBulk(c, "spider", "spider", data...)
+// 		if b {
+// 			logger.Debug(c, mode, "保存成功")
+// 		} else {
+// 			logger.Warn(c, mode, "保存失败")
+// 		}
+// 	} else if mode == 2 {
+// 		SaveObjBlak(event, coverAttr, c, data)
+// 	}
+// }
+
+//数据存储
+func Store(mode, event int, c, coverAttr string, data map[string]interface{}, flag bool) {
+	defer mu.Catch()
+	if t, err := time.ParseInLocation(util.Date_Full_Layout, util.ObjToString(data["publishtime"]), time.Local); err == nil {
+		data["publishtime"] = t.Unix()
+	}
+	if mode == 1 {
+		if strings.HasPrefix(c, "ecps") { //公示入库标识值
+			data["signvalue"] = int(1)
+
+			if MgoS.Update(c, map[string]interface{}{"coverAttr": data["title"]}, map[string]interface{}{"$set": data}, true, false) {
+				logger.Debug(c, data["title"], mode, "保存成功")
+			}
+		} else {
+			id := MgoS.Save(c, data)
+			if id != "" {
+				logger.Debug(c, mode, "保存成功")
+			} else {
+				logger.Warn(c, mode, "保存失败", data)
+			}
+		}
+		href := fmt.Sprint(data["href"])
+		if len(href) > 5 && flag { //有效数据
+			db := HexToBigIntMod(href) //根据href的哈希值选择Redis的db
+			//增量
+			lu.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*30)
+			//全量
+			isExist, _ := lu.ExistRedis("title_repeat_fulljudgement", db, "url_repeat_"+href)
+			if !isExist {
+				lu.PutRedis("title_repeat_fulljudgement", db, "url_repeat_"+href, "", -1)
+			}
+		}
+	} else if mode == 2 {
+		data["T"] = c
+		SaveObj(event, coverAttr, data, flag)
+	}
+	//公示数据,发送其他服务请求
+	if strings.HasPrefix(c, "ecps") {
+		SendMsgService(mu.SERVICE_SPIDER_ECPS, []map[string]interface{}{map[string]interface{}{"title": data["title"], "type": "ecps"}})
+	}
+}
+
+//保存验证错误日志
+func saveVerificationLog(code, name, url, content string) {
+	defer mu.Catch()
+	data := map[string]interface{}{}
+	data["code"] = code
+	data["name"] = name
+	data["url"] = url
+	data["content"] = content
+	data["comeintime"] = time.Now().Unix()
+	data["event"] = lu.Config.Uploadevent
+	MgoS.Save("spider_errlog", data)
+}
+
+//查找信息是否存在
+// func findHasExit(c, q string) bool {
+// 	defer mu.Catch()
+// 	ret := mgu.FindOne(c, "spider", "spider", q)
+// 	if *ret != nil {
+// 		return true
+// 	} else {
+// 		return false
+// 	}
+// }
+
+//获取最后发布时间
+var spider_ldtime = map[string]map[string]interface{}{}
+
+// func GetLastPubtime(code string) int64 {
+// 	defer mu.Catch()
+// 	if len(spider_ldtime) < 1 {
+// 		list := MgoS.Find("spider_ldtime", nil, nil, nil, false, -1, -1)
+// 		for _, v := range *list {
+// 			spider_ldtime[fmt.Sprint(v["code"])] = v
+// 		}
+// 	}
+// 	if spider_ldtime[code] != nil {
+// 		lastpubtime := spider_ldtime[code]["lastpubtime"]
+// 		return util.Int64All(lastpubtime)
+// 	} else {
+// 		return 0
+// 	}
+// }
+
+//获取最后执行时间
+// func GetLastExectime(code string) int64 {
+// 	defer mu.Catch()
+// 	if len(spider_ldtime) < 1 {
+// 		list := MgoS.Find("spider_ldtime", nil, nil, nil, false, -1, -1)
+// 		for _, v := range *list {
+// 			spider_ldtime[fmt.Sprint(v["code"])] = v
+// 		}
+// 	}
+// 	if spider_ldtime[code] != nil {
+// 		lastexectime := spider_ldtime[code]["lastexectime"]
+// 		return util.Int64All(lastexectime)
+// 	} else {
+// 		return 0
+// 	}
+// }
+
+//获取下载量
+var spider_downlog = map[string]map[string]interface{}{}
+
+func GetDownloadLast(code, date string) map[string]interface{} {
+	defer mu.Catch()
+	if len(spider_downlog) < 1 {
+		list, _ := MgoS.Find("spider_downlog", map[string]interface{}{"date": date}, nil, nil, false, -1, -1)
+		for _, v := range *list {
+			spider_downlog[fmt.Sprint(v["code"])] = v
+		}
+	}
+	if spider_downlog[code] != nil {
+		return spider_downlog[code]
+	} else {
+		return nil
+	}
+}
+
+//GcCount
+func GcCount() {
+	t := time.Now()
+	if t.Hour() == 0 {
+		Allspiders.Range(func(key, value interface{}) bool {
+			v := value.(*Spider)
+			//生成爬虫数据
+			SaveDownCount(v.Code, true, v.TodayDowncount, v.ToDayRequestNum, v.YesterdayDowncount, v.YestoDayRequestNum)
+			atomic.StoreInt32(&v.YesterdayDowncount, 0)
+			atomic.AddInt32(&v.YesterdayDowncount, v.TodayDowncount)
+			atomic.StoreInt32(&v.TodayDowncount, 0)
+			atomic.StoreInt32(&v.YestoDayRequestNum, 0)
+			atomic.AddInt32(&v.YestoDayRequestNum, v.ToDayRequestNum)
+			atomic.StoreInt32(&v.ToDayRequestNum, 0)
+			atomic.StoreInt32(&v.LastDowncount, 0)
+			atomic.StoreInt32(&v.RoundCount, 0)
+			return true
+		})
+		lu.TimeSleepFunc(1*time.Hour, TimeSleepChan)
+	}
+	lu.TimeAfterFunc(30*time.Minute, GcCount, TimeChan)
+}
+
+//保存错误数据信息,重新下载
+func SaveErrorData(modifyuser string, pd map[string]interface{}, err interface{}) {
+	defer util.Catch()
+	if href := util.ObjToString(pd["href"]); href != "" {
+		pd["state"] = 0
+		pd["from"] = "lua"
+		pd["comeintime"] = time.Now().Unix()
+		pd["modifyuser"] = modifyuser
+		if luaErr, ok := err.(*lua.ApiError); ok && luaErr != nil {
+			pd["error"] = luaErr.Object.String()
+		}
+		if publishtime, ok := pd["publishtime"].(string); ok {
+			pd["publishtime"] = lu.ParseDate2Int64(publishtime)
+		}
+		if jsondata := util.ObjToString(pd["jsondata"]); jsondata != "" && jsondata != "null" {
+			tmp := map[string]interface{}{}
+			json.Unmarshal([]byte(jsondata), &tmp)
+			pd["jsondata"] = tmp
+		}
+		//mgu.Save("regatherdata", "spider", "spider", pd)
+		query := map[string]interface{}{
+			"href": href,
+		}
+		set := map[string]interface{}{
+			"$set": pd,
+		}
+		MgoS.Update("regatherdata", query, set, true, false)
+	}
+}
+
+//定时任务
+func TimeTask() {
+	c := cron.New()
+	if lu.Config.Uploadevent == 7100 { //只在7100上执行
+	}
+	c.Start()
+}
+
+//获取第day天凌晨的时间戳
+func GetTime(day int) int64 {
+	nowTime := time.Now().AddDate(0, 0, day)
+	timeStr := util.FormatDate(&nowTime, util.Date_Short_Layout)
+	t, _ := time.ParseInLocation(util.Date_Short_Layout, timeStr, time.Local)
+	return t.Unix()
+}