Browse Source

中国招标投标公共服务平台异常附件问题处理

maxiaoshan 2 years ago
parent
commit
1545c82c33
4 changed files with 241 additions and 6 deletions
  1. 6 6
      src/config.json
  2. 216 0
      src/logs/spider.log
  3. 18 0
      src/spider/script.go
  4. 1 0
      src/spider/util.go

+ 6 - 6
src/config.json

@@ -1,23 +1,23 @@
 {
-    "webport": "7100",
+    "webport": "7400",
     "mongodb_spider": "192.168.3.207:27092",
     "spider_dbsize": 50,
     "bideditor": {
-        "addr": "192.168.3.207:27001",
+        "addr": "192.168.3.207:27092",
         "db": "editor",
         "size": 5,
-        "username": "root",
-        "password": "root"
+        "username": "",
+        "password": ""
     },
     "editoraddr": "http://127.0.0.1:6011/spider/infos",
     "msgname": "爬虫采集平台7100",
-    "msgserveraddr": "spdata.jianyu360.com:803",
+    "msgserveraddr": "spdata.jianyu360.com:801",
     "msgserveraddrfile": "spdata.jianyu360.com:802",
 	"isdelay":false,
     "working": 0,
     "chansize": 4,
     "detailchansize": 20,
-    "uploadevent": 7100,
+    "uploadevent": 7400,
     "logLevel": 1,
     "daynum": 6,
     "modal": 1,

+ 216 - 0
src/logs/spider.log

@@ -191298,3 +191298,219 @@ stack traceback:
 2022/09/19 16:36:32 spider.go:779: info  Running Code: a_jazyjsxy_tzgg Stop: false
 2022/09/19 16:36:34 spider.go:779: info  Running Code: a_gysgxnyrzdbgfyxgs_xwzx_tzgg Stop: false
 2022/09/19 16:36:37 spider.go:779: info  Running Code: a_hbyyxyzcglc_tzgg Stop: false
+2022/10/28 14:04:03 main.go:139: debug  7100
+2022/10/28 14:04:03 spider.go:1089: info  Detail Download All Thread: 0
+2022/10/28 14:04:03 handler.go:405: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:04:03 handler.go:405: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:04:03 handler.go:136: info  高性能模式:LUA加载完成
+2022/10/28 14:04:03 handler.go:142: info  总共加载脚本数: 0
+2022/10/28 14:04:30 main.go:139: debug  7100
+2022/10/28 14:04:30 spider.go:1089: info  Detail Download All Thread: 0
+2022/10/28 14:04:30 handler.go:405: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:04:30 handler.go:405: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:04:30 handler.go:136: info  高性能模式:LUA加载完成
+2022/10/28 14:04:30 handler.go:142: info  总共加载脚本数: 0
+2022/10/28 14:05:17 main.go:139: debug  7100
+2022/10/28 14:05:17 spider.go:1089: info  Detail Download All Thread: 0
+2022/10/28 14:05:17 handler.go:405: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:05:17 handler.go:405: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:05:17 handler.go:136: info  高性能模式:LUA加载完成
+2022/10/28 14:05:17 handler.go:142: info  总共加载脚本数: 0
+2022/10/28 14:05:43 main.go:139: debug  7100
+2022/10/28 14:05:43 spider.go:1089: info  Detail Download All Thread: 0
+2022/10/28 14:05:43 handler.go:405: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:05:43 handler.go:405: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:05:43 spider.go:181: debug  sd_sdsggzyjyw_zfcgzzl 山东省公共资源交易网 频率: 30 , 150
+2022/10/28 14:05:43 handler.go:136: info  高性能模式:LUA加载完成
+2022/10/28 14:05:43 handler.go:142: info  总共加载脚本数: 1
+2022/10/28 14:05:44 spider.go:411: info  sd_sdsggzyjyw_zfcgzzl 本轮列表页采集详情: 0 0 110 false
+2022/10/28 14:05:45 spider.go:166: debug  sd_sdsggzyjyw_zfcgzzl 山东省公共资源交易网 ok,本轮下载量: 0 ,轮询数据长度: 1 ,下线数量: 0 ,下线爬虫: []
+2022/10/28 14:06:29 main.go:139: debug  7100
+2022/10/28 14:06:29 spider.go:1089: info  Detail Download All Thread: 0
+2022/10/28 14:06:29 handler.go:404: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:06:30 handler.go:404: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:06:30 spider.go:181: debug  sd_sdsggzyjyw_zfcgzzl 山东省公共资源交易网 频率: 30 , 150
+2022/10/28 14:06:30 handler.go:136: info  高性能模式:LUA加载完成
+2022/10/28 14:06:30 handler.go:142: info  总共加载脚本数: 1
+2022/10/28 14:07:29 main.go:139: debug  7100
+2022/10/28 14:07:29 spider.go:1090: info  Detail Download All Thread: 0
+2022/10/28 14:07:29 handler.go:404: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:07:29 handler.go:404: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:07:30 spider.go:181: debug  sd_sdsggzyjyw_zfcgzzl 山东省公共资源交易网 频率: 30 , 150
+2022/10/28 14:07:30 handler.go:136: info  高性能模式:LUA加载完成
+2022/10/28 14:07:30 handler.go:142: info  总共加载脚本数: 1
+2022/10/28 14:08:29 spider.go:1090: info  Detail Download All Thread: 0
+2022/10/28 14:08:56 main.go:139: debug  7100
+2022/10/28 14:08:56 spider.go:1090: info  Detail Download All Thread: 0
+2022/10/28 14:08:56 handler.go:404: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:08:57 handler.go:404: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:08:57 spider.go:181: debug  sd_sdsggzyjyw_zfcgzzl 山东省公共资源交易网 频率: 30 , 150
+2022/10/28 14:08:57 handler.go:136: info  高性能模式:LUA加载完成
+2022/10/28 14:08:57 handler.go:142: info  总共加载脚本数: 1
+2022/10/28 14:09:27 spider.go:1090: info  Detail Download All Thread: 0
+2022/10/28 14:09:27 main.go:139: debug  7100
+2022/10/28 14:09:27 handler.go:404: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:09:28 handler.go:404: info  节点 7100 脚本文件爬虫数 0
+2022/10/28 14:09:28 spider.go:181: debug  sd_sdsggzyjyw_zfcgzzl 山东省公共资源交易网 频率: 30 , 150
+2022/10/28 14:09:28 handler.go:136: info  高性能模式:LUA加载完成
+2022/10/28 14:09:28 handler.go:142: info  总共加载脚本数: 1
+2022/10/28 14:10:27 spider.go:1090: info  Detail Download All Thread: 0
+2022/10/28 14:11:27 spider.go:1090: info  Detail Download All Thread: 0
+2022/11/01 10:53:58 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 10:53:58 main.go:139: debug  7100
+2022/11/01 10:53:58 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:53:59 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:53:59 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 10:53:59 handler.go:142: info  总共加载脚本数: 2
+2022/11/01 10:53:59 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 10:53:59 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbhxrgs2 Stop: false
+2022/11/01 10:54:00 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbhxrgs2 	count: 1 	thread num: 0
+2022/11/01 10:54:08 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 10:54:13 upload.go:114: debug  上传文件成功! a_zgzbtbggfwpt_zhbhxrgs2 	 https://details.cebpubservice.com:7443/bulletin/getBulletin/8a9494757a859f1701823393ef287237 	 98033bff22afc8ac4e835399a4613a1dcb64f5bc0f10c1a0e29dd2b05ca30f18.pdf 	 金牛湖野生动物王国AAAA创建暨综合提升项目(消防系统升级)项目专项设计中标候选人公示.pdf 177 KB
+2022/11/01 10:55:00 main.go:139: debug  7100
+2022/11/01 10:55:00 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 10:55:00 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:55:01 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:55:02 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 10:55:02 handler.go:142: info  总共加载脚本数: 2
+2022/11/01 10:55:02 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 10:55:02 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 10:55:04 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbhxrgs2 Stop: false
+2022/11/01 10:55:04 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbhxrgs2 	count: 1 	thread num: 0
+2022/11/01 10:55:17 handler.go:1284: error  send to editor:  Post "http://127.0.0.1:6011/spider/infos": dial tcp 127.0.0.1:6011: connectex: No connection could be made because the target machine actively refused it.
+2022/11/01 10:55:55 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 10:55:55 main.go:139: debug  7100
+2022/11/01 10:55:55 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:55:55 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:55:56 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 10:55:56 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 10:55:56 handler.go:142: info  总共加载脚本数: 2
+2022/11/01 10:55:56 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 10:55:58 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbhxrgs2 Stop: false
+2022/11/01 10:55:58 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbhxrgs2 	count: 1 	thread num: 0
+2022/11/01 10:56:12 handler.go:1284: error  send to editor:  Post "http://127.0.0.1:6011/spider/infos": dial tcp 127.0.0.1:6011: connectex: No connection could be made because the target machine actively refused it.
+2022/11/01 10:56:34 main.go:139: debug  7100
+2022/11/01 10:56:34 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 10:56:34 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:56:34 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:56:35 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 10:56:35 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 10:56:35 handler.go:142: info  总共加载脚本数: 2
+2022/11/01 10:56:35 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 10:56:37 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbhxrgs2 Stop: false
+2022/11/01 10:56:37 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbhxrgs2 	count: 1 	thread num: 0
+2022/11/01 10:57:26 main.go:139: debug  7100
+2022/11/01 10:57:26 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 10:57:26 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:57:26 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:57:27 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 10:57:27 handler.go:142: info  总共加载脚本数: 2
+2022/11/01 10:57:27 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 10:57:27 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbhxrgs2 Stop: false
+2022/11/01 10:57:27 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbhxrgs2 	count: 1 	thread num: 0
+2022/11/01 10:57:29 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 10:57:43 handler.go:1284: error  send to editor:  Post "http://127.0.0.1:6011/spider/infos": dial tcp 127.0.0.1:6011: connectex: No connection could be made because the target machine actively refused it.
+2022/11/01 10:59:47 main.go:139: debug  7100
+2022/11/01 10:59:47 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 10:59:47 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:59:47 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 10:59:47 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 10:59:47 handler.go:142: info  总共加载脚本数: 1
+2022/11/01 10:59:47 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 10:59:47 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbhxrgs2 Stop: false
+2022/11/01 10:59:47 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbhxrgs2 	count: 1 	thread num: 0
+2022/11/01 11:00:05 handler.go:1284: error  send to editor:  Post "http://127.0.0.1:6011/spider/infos": dial tcp 127.0.0.1:6011: connectex: No connection could be made because the target machine actively refused it.
+2022/11/01 11:00:17 handler.go:1316: info  更新心跳个数: 1
+2022/11/01 11:00:52 main.go:139: debug  7100
+2022/11/01 11:00:52 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 11:00:52 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:00:53 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:00:53 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 11:00:53 handler.go:142: info  总共加载脚本数: 1
+2022/11/01 11:00:53 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 11:00:53 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbhxrgs2 Stop: false
+2022/11/01 11:00:53 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbhxrgs2 	count: 1 	thread num: 0
+2022/11/01 11:01:09 handler.go:1284: error  send to editor:  Post "http://127.0.0.1:6011/spider/infos": dial tcp 127.0.0.1:6011: connectex: No connection could be made because the target machine actively refused it.
+2022/11/01 11:02:27 main.go:139: debug  7100
+2022/11/01 11:02:27 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 11:02:27 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:02:27 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:02:27 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 11:02:27 handler.go:142: info  总共加载脚本数: 1
+2022/11/01 11:02:27 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 11:02:27 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 11:02:27 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbjggs2 	count: 1 	thread num: 0
+2022/11/01 11:02:44 handler.go:1284: error  send to editor:  Post "http://127.0.0.1:6011/spider/infos": dial tcp 127.0.0.1:6011: connectex: No connection could be made because the target machine actively refused it.
+2022/11/01 11:07:20 main.go:139: debug  7100
+2022/11/01 11:07:20 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 11:07:20 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:07:20 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:07:20 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 11:07:20 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 11:07:20 handler.go:142: info  总共加载脚本数: 1
+2022/11/01 11:07:20 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 11:07:20 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbjggs2 	count: 1 	thread num: 0
+2022/11/01 11:07:50 handler.go:1316: info  更新心跳个数: 1
+2022/11/01 11:09:05 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 11:09:05 main.go:139: debug  7400
+2022/11/01 11:09:05 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:09:05 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:09:05 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 11:09:05 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 11:09:05 handler.go:142: info  总共加载脚本数: 1
+2022/11/01 11:09:05 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 11:09:05 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbjggs2 	count: 1 	thread num: 0
+2022/11/01 11:09:14 upload.go:114: debug  上传文件成功! a_zgzbtbggfwpt_zhbjggs2 	 https://details.cebpubservice.com:7443/bulletin/getBulletin/8a9494757a859f1701824264e8a02f26 	 93268ca9e2f6de8eb71d1d248ed94617ad2fec978feab45c8119dd601f0c5e44.pdf 	 金牛湖野生动物王国AAAA创建暨综合提升项目(消防系统升级)项目专项设计中标结果公示.pdf 137 KB
+2022/11/01 11:09:14 script.go:281: info  111111111111111 中国招标投标公共服务平台
+2022/11/01 11:09:14 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 11:09:35 handler.go:1316: info  更新心跳个数: 1
+2022/11/01 11:11:07 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 11:11:07 main.go:139: debug  7400
+2022/11/01 11:11:07 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:11:07 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:11:07 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 11:11:07 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 11:11:07 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 11:11:07 handler.go:142: info  总共加载脚本数: 1
+2022/11/01 11:11:07 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbjggs2 	count: 1 	thread num: 0
+2022/11/01 11:11:13 upload.go:114: debug  上传文件成功! a_zgzbtbggfwpt_zhbjggs2 	 https://details.cebpubservice.com:7443/bulletin/getBulletin/8a9494757a859f1701824264e8a02f26 	 93268ca9e2f6de8eb71d1d248ed94617ad2fec978feab45c8119dd601f0c5e44.pdf 	 金牛湖野生动物王国AAAA创建暨综合提升项目(消防系统升级)项目专项设计中标结果公示.pdf 137 KB
+2022/11/01 11:11:13 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 11:11:37 main.go:139: debug  7400
+2022/11/01 11:11:37 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 11:11:37 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:11:37 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:11:37 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 11:11:37 handler.go:142: info  总共加载脚本数: 1
+2022/11/01 11:11:37 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 11:11:37 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 11:11:53 main.go:139: debug  7400
+2022/11/01 11:11:53 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 11:11:53 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:11:53 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:11:53 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 11:11:53 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 11:11:53 handler.go:142: info  总共加载脚本数: 1
+2022/11/01 11:11:53 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbjggs2 Stop: false
+2022/11/01 11:11:53 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbjggs2 	count: 1 	thread num: 0
+2022/11/01 11:12:23 handler.go:1316: info  更新心跳个数: 1
+2022/11/01 11:12:26 upload.go:114: debug  上传文件成功! a_zgzbtbggfwpt_zhbjggs2 	 https://details.cebpubservice.com:7443/bulletin/getBulletin/8a9494757a859f1701824264e8a02f26 	 93268ca9e2f6de8eb71d1d248ed94617ad2fec978feab45c8119dd601f0c5e44.pdf 	 金牛湖野生动物王国AAAA创建暨综合提升项目(消防系统升级)项目专项设计中标结果公示.pdf 137 KB
+2022/11/01 11:13:49 main.go:139: debug  7400
+2022/11/01 11:13:49 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 11:13:49 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:13:50 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:13:50 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 11:13:50 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbhxrgs2 Stop: false
+2022/11/01 11:13:50 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 11:13:50 handler.go:142: info  总共加载脚本数: 1
+2022/11/01 11:13:50 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbhxrgs2 	count: 1 	thread num: 0
+2022/11/01 11:14:06 upload.go:114: debug  上传文件成功! a_zgzbtbggfwpt_zhbhxrgs2 	 https://details.cebpubservice.com:7443/bulletin/getBulletin/8a9494757a859f1701823393ef287237 	 98033bff22afc8ac4e835399a4613a1dcb64f5bc0f10c1a0e29dd2b05ca30f18.pdf 	 金牛湖野生动物王国AAAA创建暨综合提升项目(消防系统升级)项目专项设计中标候选人公示.pdf 177 KB
+2022/11/01 11:14:53 main.go:139: debug  7400
+2022/11/01 11:14:53 spider.go:1088: info  Detail Download All Thread: 0
+2022/11/01 11:14:53 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:14:53 handler.go:405: info  节点 7400 脚本文件爬虫数 0
+2022/11/01 11:14:53 handler.go:136: info  高性能模式:LUA加载完成
+2022/11/01 11:14:53 spider.go:764: info  +++++++++++++++++++Download Detail+++++++++++++++++++
+2022/11/01 11:14:53 handler.go:142: info  总共加载脚本数: 1
+2022/11/01 11:14:53 spider.go:777: info  Running Code: a_zgzbtbggfwpt_zhbhxrgs2 Stop: false
+2022/11/01 11:14:53 spider.go:843: info  Thread Info:	Code: a_zgzbtbggfwpt_zhbhxrgs2 	count: 1 	thread num: 0
+2022/11/01 11:15:04 upload.go:114: debug  上传文件成功! a_zgzbtbggfwpt_zhbhxrgs2 	 https://details.cebpubservice.com:7443/bulletin/getBulletin/8a9494757a859f1701823393ef287237 	 98033bff22afc8ac4e835399a4613a1dcb64f5bc0f10c1a0e29dd2b05ca30f18.pdf 	 金牛湖野生动物王国AAAA创建暨综合提升项目(消防系统升级)项目专项设计中标候选人公示.pdf 177 KB

+ 18 - 0
src/spider/script.go

@@ -278,6 +278,15 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 				ftype = path.Ext(name)[1:]
 			}
 		}
+		//特殊处理中国招标投标公共服务平台异常附件过滤
+		if *site == "中国招标投标公共服务平台" {
+			if fid != "" && strings.Contains(fid, ErrFid) { //限制访问的附件
+				size, ftype, fid = "", "", "" //信息置空,AnalysisProjectInfo方法将判断数据下载失败重新下载
+			} else if bttype := qu.GetFileType(ret); bttype != "pdf" { //由字节流解析的附件类型不是pdf
+				logger.Info("Error File Type:", bttype, url)
+				size, ftype, fid = "", "", ""
+			}
+		}
 		S.Push(lua.LString(url))
 		S.Push(lua.LString(name))
 		S.Push(lua.LString(size))
@@ -920,6 +929,15 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 				ftype = path.Ext(name)[1:]
 			}
 		}
+		//特殊处理中国招标投标公共服务平台异常附件过滤
+		if *site == "中国招标投标公共服务平台" {
+			if fid != "" && strings.Contains(fid, ErrFid) { //限制访问的附件
+				size, ftype, fid = "", "", "" //信息置空,AnalysisProjectInfo方法将判断数据下载失败重新下载
+			} else if bttype := qu.GetFileType(ret); bttype != "pdf" { //由字节流解析的附件类型不是pdf
+				logger.Info("Error File Type:", bttype, url)
+				size, ftype, fid = "", "", ""
+			}
+		}
 		S.Push(lua.LString(url))
 		S.Push(lua.LString(name))
 		S.Push(lua.LString(size))

+ 1 - 0
src/spider/util.go

@@ -7,6 +7,7 @@ import (
 // var SpaceReg = regexp.MustCompile("[\\s\u3000\u2003\u00a0]+")
 // var HtmlReg = regexp.MustCompile("<[^>]*?>")
 // var HanReg = regexp.MustCompile("[\u4e00-\u9fa5]+")
+var ErrFid = "a6879f0a8570256aa21fb978e6dabb50429a30dfacff697cf0b898abbc5c262e" //限制访问的附件
 
 //初始化延迟采集站点集合
 func InitOther() {