|
@@ -1,1082 +0,0 @@
|
|
|
-Thread-5|2022-01-28 17:06:38,101|parser_control.py|run|line:56|DEBUG| parser 等待任务...
|
|
|
-Details|2022-01-28 17:06:38,102|scheduler.py|<lambda>|line:112|INFO|
|
|
|
-********** feapder begin **********
|
|
|
-Details|2022-01-28 17:06:38,103|scheduler.py|__add_task|line:215|INFO| 检查到有待做任务 8 条,不重下发新任务,将接着上回异常终止处继续抓取
|
|
|
-Thread-4|2022-01-28 17:06:47,221|collector.py|__input_data|line:108|INFO| 重置丢失任务完毕,共8条
|
|
|
-Thread-5|2022-01-28 17:06:48,223|request.py|get_response|line:305|DEBUG|
|
|
|
- -------------- Details.detail_get request for ----------------
|
|
|
- url = http://cz.fjzfcg.gov.cn/3500/notice/1c4f944709d047a7a633672964c633ce/7c36067afe5b449ea66bae09d11cf45c/
|
|
|
- method = GET
|
|
|
- body = {'files': {'list_xpath': '//div[@id="fjxz"]/p[@class="mar-L30 fjwz"]/a|//div[@id="result"]//u/a|//div[@class="TRS_Editor"]//p/a', 'url_xpath': './@href', 'name_xpath': './text()', 'files_type': ['zip', 'doxc', 'ftp'], 'file_type': 'doxc', 'url_key': 'http', 'host': 'http://cz.fjzfcg.gov.cn/3500/notice/1c4f944709d047a7a633672964c633ce/7c36067afe5b449ea66bae09d11cf45c'}, 'proxies': False, 'timeout': 22, 'stream': True, 'verify': False, 'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36'}}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,270|tools.py|dumps_json|line:843|ERROR| Object of type bytes is not JSON serializable
|
|
|
-Thread-5|2022-01-28 17:06:48,270|parser_control.py|deal_requests|line:249|ERROR|
|
|
|
- -------------- Details.detail_get error -------------
|
|
|
- error HTTPConnectionPool(host='cz.fjzfcg.gov.cn', port=80): Max retries exceeded with url: /3500/notice/1c4f944709d047a7a633672964c633ce/7c36067afe5b449ea66bae09d11cf45c/ (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000259309BDAF0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
|
|
|
- response None
|
|
|
- deal request {'base_info': b'\x80\x04\x95S\x05\x00\x00\x00\x00\x00\x00}\x94(\x8c\x03_id\x94'
|
|
|
- b'\x8c\rbson.objectid\x94\x8c\x08ObjectId\x94\x93\x94)\x81\x94'
|
|
|
- b'C\x0ca\xf3a\xae\x95G\xb8\xb7\xd1\r\xc04\x94b\x8c\x05parse\x94'
|
|
|
- b'\x8c\x0fself.detail_get\x94\x8c\x04item\x94}\x94(\x8c\x05ti'
|
|
|
- b'tle\x94\x8c]\xe4\xb9\xa1\xe9\x95\x87\xe6\x95\xac\xe8'
|
|
|
- b'\x80\x81\xe9\x99\xa2\xe5\xba\x8a\xe4\xbd\x8d\xe4'
|
|
|
- b'\xbd\xbf\xe7\x94\xa8\xe7\x8e\x87\xe8\xbe\xbe\xe6'
|
|
|
- b'\xa0\x87\xe5\x8e\xbf\xef\xbc\x88\xe5\xb8\x82\xe3'
|
|
|
- b'\x80\x81\xe5\x8c\xba\xef\xbc\x89\xe7\xac\xac\xe4'
|
|
|
- b'\xb8\x89\xe6\x96\xb9\xe8\xaf\x84\xe4\xbc\xb0\xe9'
|
|
|
- b'\x87\x87\xe8\xb4\xad\xe9\xa1\xb9\xe7\x9b\xae\xe9'
|
|
|
- b'\x87\x87\xe8\xb4\xad\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x0bpublis'
|
|
|
- b'htime\x94\x8c\x132019-07-17 16:14:02\x94\x8c\nspidercode'
|
|
|
- b'\x94\x8c\x0efj_fjsmzt_tzgg\x94\x8c\x04site\x94\x8c\x12\xe7'
|
|
|
- b'\xa6\x8f\xe5\xbb\xba\xe7\x9c\x81\xe6\xb0\x91\xe6'
|
|
|
- b'\x94\xbf\xe5\x8e\x85\x94\x8c\x07channel\x94\x8c\x0c\xe9\x80'
|
|
|
- b'\x9a\xe7\x9f\xa5\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x04are'
|
|
|
- b'a\x94\x8c\x06\xe7\xa6\x8f\xe5\xbb\xba\x94\x8c\x04cit'
|
|
|
- b'y\x94\x8c\x00\x94\x8c\x0bcompetehref\x94N\x8c\x04href\x94\x8c'
|
|
|
- b'fhttp://cz.fjzfcg.gov.cn/3500/notice/1c4f944709d047a7a633672'
|
|
|
- b'964c633ce/7c36067afe5b449ea66bae09d11cf45c/\x94\x8c\x0bpublis'
|
|
|
- b'hdept\x94h\x18\x8c\tiscompete\x94\x88\x8c\x04type\x94'
|
|
|
- b'h\x18\x8c\x01T\x94\x8c\x07bidding\x94\x8c\x10l_np_publishti'
|
|
|
- b'me\x94h\x18\x8c\ncomeintime\x94h\x18\x8c\x08sendflag\x94\x8c'
|
|
|
- b'\x05false\x94\x8c\x02_d\x94\x8c\ncomeintime\x94\x8c\x0bconte'
|
|
|
- b'nthtml\x94h\x18\x8c\x06detail\x94h\x18\x8c\x0bprojectinfo\x94Nu'
|
|
|
- b'\x8c\x0bparser_name\x94\x8c\x07details\x94\x8c\x04date\x94\x8c'
|
|
|
- b'\x132022-01-28 11:23:26\x94\x8c\x0bdeal_detail\x94]'
|
|
|
- b'\x94(\x8c\x17//div[@class="xl_main"]\x94\x8c\x19//div[@class="'
|
|
|
- b'big-box-B"]\x94e\x8c\x0bcreate_time\x94N\x8c\tparse_url\x94'
|
|
|
- b'\x8cfhttp://cz.fjzfcg.gov.cn/3500/notice/1c4f944709d047a7a63367'
|
|
|
- b'2964c633ce/7c36067afe5b449ea66bae09d11cf45c/\x94\x8c\x0ereque'
|
|
|
- b'st_params\x94}\x94\x8c\x06failed\x94K\x04\x8c\x06author\x94'
|
|
|
- b'\x8c\x07details\x94\x8c\x05ex_js\x94h\x18\x8c\tex_python\x94'
|
|
|
- b'N\x8c\x03pri\x94K\x01\x8c\x07proxies\x94\x89\x8c\x05files\x94'
|
|
|
- b'}\x94(\x8c\nlist_xpath\x94\x8ce//div[@id="fjxz"]/p[@class="ma'
|
|
|
- b'r-L30 fjwz"]/a|//div[@id="result"]//u/a|//div[@class="TRS_Ed'
|
|
|
- b'itor"]//p/a\x94\x8c\turl_xpath\x94\x8c\x07./@href\x94\x8c\nname'
|
|
|
- b'_xpath\x94\x8c\x08./text()\x94\x8c\nfiles_type\x94]'
|
|
|
- b'\x94(\x8c\x03zip\x94\x8c\x04doxc\x94\x8c\x03ftp\x94e\x8c\tfile'
|
|
|
- b'_type\x94\x8c\x04doxc\x94\x8c\x07url_key\x94\x8c\x04htt'
|
|
|
- b'p\x94\x8c\x04host\x94\x8cehttp://cz.fjzfcg.gov.cn/3500/notice/1'
|
|
|
- b'c4f944709d047a7a633672964c633ce/7c36067afe5b449ea66bae09d11c'
|
|
|
- b'f45c\x94u\x8c\x05error\x94N\x8c\x04code\x94K\x00u.',
|
|
|
- 'callback': 'detail_get',
|
|
|
- 'deal_detail': b'\x80\x04\x95;\x00\x00\x00\x00\x00\x00\x00]\x94(\x8c\x17//di'
|
|
|
- b'v[@class="xl_main"]\x94\x8c\x19//div[@class="big-box-B"]\x94'
|
|
|
- b'e.',
|
|
|
- 'error_msg': 'requests.exceptions.ConnectionError: '
|
|
|
- "HTTPConnectionPool(host='cz.fjzfcg.gov.cn', port=80): Max "
|
|
|
- 'retries exceeded with url: '
|
|
|
- '/3500/notice/1c4f944709d047a7a633672964c633ce/7c36067afe5b449ea66bae09d11cf45c/ '
|
|
|
- '(Caused by '
|
|
|
- "NewConnectionError('<urllib3.connection.HTTPConnection object "
|
|
|
- 'at 0x0000016835E6B850>: Failed to establish a new connection: '
|
|
|
- "[Errno 11001] getaddrinfo failed'))",
|
|
|
- 'files': {'file_type': 'doxc',
|
|
|
- 'files_type': ['zip', 'doxc', 'ftp'],
|
|
|
- 'host': 'http://cz.fjzfcg.gov.cn/3500/notice/1c4f944709d047a7a633672964c633ce/7c36067afe5b449ea66bae09d11cf45c',
|
|
|
- 'list_xpath': '//div[@id="fjxz"]/p[@class="mar-L30 '
|
|
|
- 'fjwz"]/a|//div[@id="result"]//u/a|//div[@class="TRS_Editor"]//p/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'filter_repeat': False,
|
|
|
- 'item': b'\x80\x04\x95$\x02\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05titl'
|
|
|
- b'e\x94\x8c]\xe4\xb9\xa1\xe9\x95\x87\xe6\x95\xac\xe8\x80\x81'
|
|
|
- b'\xe9\x99\xa2\xe5\xba\x8a\xe4\xbd\x8d\xe4\xbd\xbf\xe7\x94\xa8\xe7'
|
|
|
- b'\x8e\x87\xe8\xbe\xbe\xe6\xa0\x87\xe5\x8e\xbf\xef\xbc\x88\xe5\xb8'
|
|
|
- b'\x82\xe3\x80\x81\xe5\x8c\xba\xef\xbc\x89\xe7\xac\xac\xe4\xb8\x89'
|
|
|
- b'\xe6\x96\xb9\xe8\xaf\x84\xe4\xbc\xb0\xe9\x87\x87\xe8\xb4\xad\xe9'
|
|
|
- b'\xa1\xb9\xe7\x9b\xae\xe9\x87\x87\xe8\xb4\xad\xe5\x85\xac\xe5\x91'
|
|
|
- b'\x8a\x94\x8c\x0bpublishtime\x94\x8c\x132019-07-17 16:14:02\x94\x8c\n'
|
|
|
- b'spidercode\x94\x8c\x0efj_fjsmzt_tzgg\x94\x8c\x04site\x94\x8c'
|
|
|
- b'\x12\xe7\xa6\x8f\xe5\xbb\xba\xe7\x9c\x81\xe6\xb0\x91\xe6\x94\xbf'
|
|
|
- b'\xe5\x8e\x85\x94\x8c\x07channel\x94\x8c\x0c\xe9\x80\x9a\xe7'
|
|
|
- b'\x9f\xa5\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x04area\x94\x8c\x06\xe7\xa6'
|
|
|
- b'\x8f\xe5\xbb\xba\x94\x8c\x04city\x94\x8c\x00\x94\x8c\x0bcompetehref'
|
|
|
- b'\x94N\x8c\x04href\x94\x8cfhttp://cz.fjzfcg.gov.cn/3500/notice/1c4f9'
|
|
|
- b'44709d047a7a633672964c633ce/7c36067afe5b449ea66bae09d11cf45c'
|
|
|
- b'/\x94\x8c\x0bpublishdept\x94h\x0e\x8c\tiscompete\x94\x88\x8c\x04typ'
|
|
|
- b'e\x94h\x0e\x8c\x01T\x94\x8c\x07bidding\x94\x8c\x10l_np_publishtime'
|
|
|
- b'\x94h\x0e\x8c\ncomeintime\x94h\x0e\x8c\x08sendflag\x94\x8c\x05false'
|
|
|
- b'\x94\x8c\x02_d\x94\x8c\ncomeintime\x94\x8c\x0bcontenthtml'
|
|
|
- b'\x94h\x0e\x8c\x06detail\x94h\x0e\x8c\x0bprojectinfo\x94Nu.',
|
|
|
- 'parser_name': 'Details',
|
|
|
- 'proxies': False,
|
|
|
- 'response': 'None',
|
|
|
- 'retry_times': 2,
|
|
|
- 'url': 'http://cz.fjzfcg.gov.cn/3500/notice/1c4f944709d047a7a633672964c633ce/7c36067afe5b449ea66bae09d11cf45c/'}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,294|request.py|get_response|line:305|DEBUG|
|
|
|
- -------------- Details.detail_get request for ----------------
|
|
|
- url = http://cz.fjzfcg.gov.cn/3500/notice/d2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877162da31a8/
|
|
|
- method = GET
|
|
|
- body = {'files': {'list_xpath': '//div[@id="fjxz"]/p[@class="mar-L30 fjwz"]/a|//div[@id="result"]//u/a|//div[@class="TRS_Editor"]//p/a', 'url_xpath': './@href', 'name_xpath': './text()', 'files_type': ['zip', 'doxc', 'ftp'], 'file_type': 'doxc', 'url_key': 'http', 'host': 'http://cz.fjzfcg.gov.cn/3500/notice/d2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877162da31a8'}, 'proxies': False, 'timeout': 22, 'stream': True, 'verify': False, 'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36'}}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,333|tools.py|dumps_json|line:843|ERROR| Object of type bytes is not JSON serializable
|
|
|
-Thread-5|2022-01-28 17:06:48,334|parser_control.py|deal_requests|line:249|ERROR|
|
|
|
- -------------- Details.detail_get error -------------
|
|
|
- error HTTPConnectionPool(host='cz.fjzfcg.gov.cn', port=80): Max retries exceeded with url: /3500/notice/d2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877162da31a8/ (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000259309ECA30>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
|
|
|
- response None
|
|
|
- deal request {'base_info': b'\x80\x04\x955\x05\x00\x00\x00\x00\x00\x00}\x94(\x8c\x03_id\x94'
|
|
|
- b'\x8c\rbson.objectid\x94\x8c\x08ObjectId\x94\x93\x94)\x81\x94'
|
|
|
- b'C\x0ca\xf3a\xae\x95G\xb8\xb7\xd1\r\xc0B\x94b\x8c\x05parse\x94'
|
|
|
- b'\x8c\x0fself.detail_get\x94\x8c\x04item\x94}\x94(\x8c\x05ti'
|
|
|
- b'tle\x94\x8c?\xe7\xa6\x8f\xe5\xbb\xba\xe7\x9c\x81\xe5'
|
|
|
- b'\x85\xbb\xe8\x80\x81\xe6\x9c\x8d\xe5\x8a\xa1\xe7'
|
|
|
- b'\xbb\xbc\xe5\x90\x88\xe4\xbf\xa1\xe6\x81\xaf\xe5'
|
|
|
- b'\xb9\xb3\xe5\x8f\xb0\xe9\x87\x87\xe8\xb4\xad\xe9'
|
|
|
- b'\xa1\xb9\xe7\x9b\xae\xe6\x8b\x9b\xe6\xa0\x87\xe5'
|
|
|
- b'\x85\xac\xe5\x91\x8a\x94\x8c\x0bpublishtime\x94\x8c\x132019-0'
|
|
|
- b'5-22 16:01:08\x94\x8c\nspidercode\x94\x8c\x0efj_fjsmzt_tzgg\x94'
|
|
|
- b'\x8c\x04site\x94\x8c\x12\xe7\xa6\x8f\xe5\xbb\xba\xe7'
|
|
|
- b'\x9c\x81\xe6\xb0\x91\xe6\x94\xbf\xe5\x8e\x85\x94\x8c\x07channe'
|
|
|
- b'l\x94\x8c\x0c\xe9\x80\x9a\xe7\x9f\xa5\xe5\x85\xac\xe5\x91\x8a'
|
|
|
- b'\x94\x8c\x04area\x94\x8c\x06\xe7\xa6\x8f\xe5\xbb\xba'
|
|
|
- b'\x94\x8c\x04city\x94\x8c\x00\x94\x8c\x0bcompetehref'
|
|
|
- b'\x94N\x8c\x04href\x94\x8cfhttp://cz.fjzfcg.gov.cn/3500/notice/d'
|
|
|
- b'2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877162da'
|
|
|
- b'31a8/\x94\x8c\x0bpublishdept\x94h\x18\x8c\tiscompet'
|
|
|
- b'e\x94\x88\x8c\x04type\x94h\x18\x8c\x01T\x94\x8c\x07biddin'
|
|
|
- b'g\x94\x8c\x10l_np_publishtime\x94h\x18\x8c\ncomeintime\x94'
|
|
|
- b'h\x18\x8c\x08sendflag\x94\x8c\x05false\x94\x8c\x02_d\x94\x8c\n'
|
|
|
- b'comeintime\x94\x8c\x0bcontenthtml\x94h\x18\x8c\x06detail\x94'
|
|
|
- b'h\x18\x8c\x0bprojectinfo\x94Nu\x8c\x0bparser_name\x94\x8c\x07de'
|
|
|
- b'tails\x94\x8c\x04date\x94\x8c\x132022-01-28 11:23:26\x94\x8c'
|
|
|
- b'\x0bdeal_detail\x94]\x94(\x8c\x17//div[@class="xl_main"'
|
|
|
- b']\x94\x8c\x19//div[@class="big-box-B"]\x94e\x8c\x0bcreate_time'
|
|
|
- b'\x94N\x8c\tparse_url\x94\x8cfhttp://cz.fjzfcg.gov.cn/3500/not'
|
|
|
- b'ice/d2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877'
|
|
|
- b'162da31a8/\x94\x8c\x0erequest_params\x94}\x94\x8c\x06fail'
|
|
|
- b'ed\x94K\x04\x8c\x06author\x94\x8c\x07details\x94\x8c\x05ex'
|
|
|
- b'_js\x94h\x18\x8c\tex_python\x94N\x8c\x03pri\x94K\x01\x8c\x07pro'
|
|
|
- b'xies\x94\x89\x8c\x05files\x94}\x94(\x8c\nlist_xpath\x94\x8ce'
|
|
|
- b'//div[@id="fjxz"]/p[@class="mar-L30 fjwz"]/a|//div[@id="resu'
|
|
|
- b'lt"]//u/a|//div[@class="TRS_Editor"]//p/a\x94\x8c\turl_xpat'
|
|
|
- b'h\x94\x8c\x07./@href\x94\x8c\nname_xpath\x94\x8c\x08./tex'
|
|
|
- b't()\x94\x8c\nfiles_type\x94]\x94(\x8c\x03zip\x94\x8c\x04doxc'
|
|
|
- b'\x94\x8c\x03ftp\x94e\x8c\tfile_type\x94\x8c\x04doxc\x94\x8c'
|
|
|
- b'\x07url_key\x94\x8c\x04http\x94\x8c\x04host\x94\x8cehttp://cz.f'
|
|
|
- b'jzfcg.gov.cn/3500/notice/d2bad35854053876b45269f56e50dee2/91'
|
|
|
- b'255ff3752c4bc48770877162da31a8\x94u\x8c\x05error\x94N\x8c\x04c'
|
|
|
- b'ode\x94K\x00u.',
|
|
|
- 'callback': 'detail_get',
|
|
|
- 'deal_detail': b'\x80\x04\x95;\x00\x00\x00\x00\x00\x00\x00]\x94(\x8c\x17//di'
|
|
|
- b'v[@class="xl_main"]\x94\x8c\x19//div[@class="big-box-B"]\x94'
|
|
|
- b'e.',
|
|
|
- 'error_msg': 'requests.exceptions.ConnectionError: '
|
|
|
- "HTTPConnectionPool(host='cz.fjzfcg.gov.cn', port=80): Max "
|
|
|
- 'retries exceeded with url: '
|
|
|
- '/3500/notice/d2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877162da31a8/ '
|
|
|
- '(Caused by '
|
|
|
- "NewConnectionError('<urllib3.connection.HTTPConnection object "
|
|
|
- 'at 0x0000016835E877F0>: Failed to establish a new connection: '
|
|
|
- "[Errno 11001] getaddrinfo failed'))",
|
|
|
- 'files': {'file_type': 'doxc',
|
|
|
- 'files_type': ['zip', 'doxc', 'ftp'],
|
|
|
- 'host': 'http://cz.fjzfcg.gov.cn/3500/notice/d2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877162da31a8',
|
|
|
- 'list_xpath': '//div[@id="fjxz"]/p[@class="mar-L30 '
|
|
|
- 'fjwz"]/a|//div[@id="result"]//u/a|//div[@class="TRS_Editor"]//p/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'filter_repeat': False,
|
|
|
- 'item': b'\x80\x04\x95\x06\x02\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05titl'
|
|
|
- b'e\x94\x8c?\xe7\xa6\x8f\xe5\xbb\xba\xe7\x9c\x81\xe5\x85\xbb'
|
|
|
- b'\xe8\x80\x81\xe6\x9c\x8d\xe5\x8a\xa1\xe7\xbb\xbc\xe5\x90\x88\xe4'
|
|
|
- b'\xbf\xa1\xe6\x81\xaf\xe5\xb9\xb3\xe5\x8f\xb0\xe9\x87\x87\xe8\xb4'
|
|
|
- b'\xad\xe9\xa1\xb9\xe7\x9b\xae\xe6\x8b\x9b\xe6\xa0\x87\xe5\x85\xac'
|
|
|
- b'\xe5\x91\x8a\x94\x8c\x0bpublishtime\x94\x8c\x132019-05-22 16:01'
|
|
|
- b':08\x94\x8c\nspidercode\x94\x8c\x0efj_fjsmzt_tzgg\x94\x8c\x04site'
|
|
|
- b'\x94\x8c\x12\xe7\xa6\x8f\xe5\xbb\xba\xe7\x9c\x81\xe6\xb0\x91\xe6'
|
|
|
- b'\x94\xbf\xe5\x8e\x85\x94\x8c\x07channel\x94\x8c\x0c\xe9\x80'
|
|
|
- b'\x9a\xe7\x9f\xa5\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x04area\x94\x8c\x06'
|
|
|
- b'\xe7\xa6\x8f\xe5\xbb\xba\x94\x8c\x04city\x94\x8c\x00\x94\x8c\x0bc'
|
|
|
- b'ompetehref\x94N\x8c\x04href\x94\x8cfhttp://cz.fjzfcg.gov.cn/3500/no'
|
|
|
- b'tice/d2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877162da31'
|
|
|
- b'a8/\x94\x8c\x0bpublishdept\x94h\x0e\x8c\tiscompete\x94\x88\x8c\x04t'
|
|
|
- b'ype\x94h\x0e\x8c\x01T\x94\x8c\x07bidding\x94\x8c\x10l_np_publishti'
|
|
|
- b'me\x94h\x0e\x8c\ncomeintime\x94h\x0e\x8c\x08sendflag\x94\x8c\x05fal'
|
|
|
- b'se\x94\x8c\x02_d\x94\x8c\ncomeintime\x94\x8c\x0bcontenthtml\x94h'
|
|
|
- b'\x0e\x8c\x06detail\x94h\x0e\x8c\x0bprojectinfo\x94Nu.',
|
|
|
- 'parser_name': 'Details',
|
|
|
- 'proxies': False,
|
|
|
- 'response': 'None',
|
|
|
- 'retry_times': 2,
|
|
|
- 'url': 'http://cz.fjzfcg.gov.cn/3500/notice/d2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877162da31a8/'}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,380|request.py|get_response|line:305|DEBUG|
|
|
|
- -------------- Details.detail_get request for ----------------
|
|
|
- url = https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7e3a9c7e946b44017e9f51af707454.html
|
|
|
- method = GET
|
|
|
- body = {'files': {'list_xpath': '//div[@class="info-article in active"]//div/a', 'url_xpath': './@href', 'name_xpath': './text()', 'files_type': ['zip', 'doxc', 'ftp', 'pdf'], 'url_key': 'http'}, 'proxies': False, 'timeout': 22, 'stream': True, 'verify': False, 'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36'}}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,394|tools.py|dumps_json|line:843|ERROR| Object of type bytes is not JSON serializable
|
|
|
-Thread-5|2022-01-28 17:06:48,395|parser_control.py|deal_requests|line:249|ERROR|
|
|
|
- -------------- Details.detail_get error -------------
|
|
|
- error dictionary update sequence element #0 has length 1; 2 is required
|
|
|
- response None
|
|
|
- deal request {'base_info': b'\x80\x04\x95\x84\x04\x00\x00\x00\x00\x00\x00}\x94(\x8c\x03'
|
|
|
- b'_id\x94\x8c\rbson.objectid\x94\x8c\x08ObjectId\x94\x93'
|
|
|
- b'\x94)\x81\x94C\x0ca\xf3\xa1a\x81\xdbV\xa5\x9f\xf9hq\x94b'
|
|
|
- b'\x8c\x05parse\x94\x8c\x0fself.detail_get\x94\x8c\x04item'
|
|
|
- b'\x94}\x94(\x8c\x05title\x94\x8cQ\xe5\xb9\xbf\xe4\xb8\x9c'
|
|
|
- b'\xe8\xbd\xbb\xe5\xb7\xa5\xe8\x81\x8c\xe4\xb8\x9a'
|
|
|
- b'\xe6\x8a\x80\xe6\x9c\xaf\xe5\xad\xa6\xe9\x99\xa2'
|
|
|
- b'\xe6\x96\xb0\xe8\x83\xbd\xe6\xba\x90\xe6\xb1\xbd'
|
|
|
- b'\xe8\xbd\xa6\xe6\xa3\x80\xe6\xb5\x8b\xe5\xae\x9e'
|
|
|
- b'\xe8\xae\xad\xe8\xae\xbe\xe5\xa4\x87\xe8\xb4\xad'
|
|
|
- b'\xe7\xbd\xae\xe6\x8b\x9b\xe6\xa0\x87\xe5\x85\xac'
|
|
|
- b'\xe5\x91\x8a\x94\x8c\x0bpublishtime\x94\x8c\x132022-01-28 15:09'
|
|
|
- b':43\x94\x8c\nspidercode\x94\x8c\x13gd_gdszfcgwxwz_cggg\x94\x8c'
|
|
|
- b'\x04site\x94\x8c\x18\xe5\xb9\xbf\xe4\xb8\x9c\xe7\x9c'
|
|
|
- b'\x81\xe6\x94\xbf\xe5\xba\x9c\xe9\x87\x87\xe8\xb4'
|
|
|
- b'\xad\xe7\xbd\x91\x94\x8c\x07channel\x94\x8c\x0c\xe9\x87\x87'
|
|
|
- b'\xe8\xb4\xad\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x04area'
|
|
|
- b'\x94\x8c\x06\xe5\xb9\xbf\xe4\xb8\x9c\x94\x8c\x04city'
|
|
|
- b'\x94\x8c\x00\x94\x8c\x0bcompetehref\x94N\x8c\x04href\x94\x8c`'
|
|
|
- b'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8'
|
|
|
- b'a7e3a9c7e946b44017e9f51af707454.html\x94\x8c\x0bpublishde'
|
|
|
- b'pt\x94h\x18\x8c\tiscompete\x94\x88\x8c\x04type\x94h\x18\x8c'
|
|
|
- b'\x01T\x94\x8c\x07bidding\x94\x8c\x10l_np_publishtime\x94'
|
|
|
- b'h\x18\x8c\ncomeintime\x94h\x18\x8c\x08sendflag\x94\x8c\x05fa'
|
|
|
- b'lse\x94\x8c\x02_d\x94\x8c\ncomeintime\x94\x8c\x0bcontenth'
|
|
|
- b'tml\x94h\x18\x8c\x06detail\x94h\x18\x8c\x0bprojectinfo\x94N'
|
|
|
- b'u\x8c\x0bparser_name\x94\x8c\x07details\x94\x8c\x04date\x94'
|
|
|
- b'\x8c\x132022-01-28 15:55:12\x94\x8c\x0bdeal_detail\x94'
|
|
|
- b']\x94\x8c&//div[@class="info-article in active"]\x94a\x8c\x0bcr'
|
|
|
- b'eate_time\x94N\x8c\tparse_url\x94\x8c`https://gdgpo.czt.gd.go'
|
|
|
- b'v.cn/freecms/site/gd/ggxx/info/2022/8a7e3a9c7e946b44017e9f51'
|
|
|
- b'af707454.html\x94\x8c\x0erequest_params\x94}\x94\x8c\x06faile'
|
|
|
- b'd\x94K\x02\x8c\x06author\x94\x8c\x07details\x94\x8c\x05ex_'
|
|
|
- b'js\x94h\x18\x8c\tex_python\x94N\x8c\x03pri\x94K\x01\x8c\x07prox'
|
|
|
- b'ies\x94\x89\x8c\x05files\x94}\x94(\x8c\nlist_xpath\x94\x8c-/'
|
|
|
- b'/div[@class="info-article in active"]//div/a\x94\x8c\turl_xpath'
|
|
|
- b'\x94\x8c\x07./@href\x94\x8c\nname_xpath\x94\x8c\x08./text'
|
|
|
- b'()\x94\x8c\nfiles_type\x94]\x94(\x8c\x03zip\x94\x8c\x04doxc\x94'
|
|
|
- b'\x8c\x03ftp\x94\x8c\x03pdf\x94e\x8c\x07url_key\x94\x8c\x04htt'
|
|
|
- b'p\x94u\x8c\x05error\x94N\x8c\x04code\x94K\x00u.',
|
|
|
- 'callback': 'detail_get',
|
|
|
- 'deal_detail': b'\x80\x04\x95-\x00\x00\x00\x00\x00\x00\x00]\x94\x8c&//div[@cl'
|
|
|
- b'ass="info-article in active"]\x94a.',
|
|
|
- 'error_msg': 'ValueError: dictionary update sequence element #0 has length 1; '
|
|
|
- '2 is required',
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'filter_repeat': False,
|
|
|
- 'item': b'\x80\x04\x95\x1d\x02\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05titl'
|
|
|
- b'e\x94\x8cQ\xe5\xb9\xbf\xe4\xb8\x9c\xe8\xbd\xbb\xe5\xb7\xa5'
|
|
|
- b'\xe8\x81\x8c\xe4\xb8\x9a\xe6\x8a\x80\xe6\x9c\xaf\xe5\xad\xa6\xe9'
|
|
|
- b'\x99\xa2\xe6\x96\xb0\xe8\x83\xbd\xe6\xba\x90\xe6\xb1\xbd\xe8\xbd'
|
|
|
- b'\xa6\xe6\xa3\x80\xe6\xb5\x8b\xe5\xae\x9e\xe8\xae\xad\xe8\xae\xbe'
|
|
|
- b'\xe5\xa4\x87\xe8\xb4\xad\xe7\xbd\xae\xe6\x8b\x9b\xe6\xa0\x87\xe5'
|
|
|
- b'\x85\xac\xe5\x91\x8a\x94\x8c\x0bpublishtime\x94\x8c\x132022-01-28'
|
|
|
- b' 15:09:43\x94\x8c\nspidercode\x94\x8c\x13gd_gdszfcgwxwz_cggg'
|
|
|
- b'\x94\x8c\x04site\x94\x8c\x18\xe5\xb9\xbf\xe4\xb8\x9c\xe7\x9c\x81\xe6'
|
|
|
- b'\x94\xbf\xe5\xba\x9c\xe9\x87\x87\xe8\xb4\xad\xe7\xbd\x91\x94\x8c'
|
|
|
- b'\x07channel\x94\x8c\x0c\xe9\x87\x87\xe8\xb4\xad\xe5\x85\xac'
|
|
|
- b'\xe5\x91\x8a\x94\x8c\x04area\x94\x8c\x06\xe5\xb9\xbf\xe4\xb8\x9c\x94'
|
|
|
- b'\x8c\x04city\x94\x8c\x00\x94\x8c\x0bcompetehref\x94N\x8c\x04href\x94'
|
|
|
- b'\x8c`https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7'
|
|
|
- b'e3a9c7e946b44017e9f51af707454.html\x94\x8c\x0bpublishdept'
|
|
|
- b'\x94h\x0e\x8c\tiscompete\x94\x88\x8c\x04type\x94h\x0e\x8c\x01T'
|
|
|
- b'\x94\x8c\x07bidding\x94\x8c\x10l_np_publishtime\x94h\x0e\x8c\ncomein'
|
|
|
- b'time\x94h\x0e\x8c\x08sendflag\x94\x8c\x05false\x94\x8c\x02_d\x94\x8c'
|
|
|
- b'\ncomeintime\x94\x8c\x0bcontenthtml\x94h\x0e\x8c\x06detail'
|
|
|
- b'\x94h\x0e\x8c\x0bprojectinfo\x94Nu.',
|
|
|
- 'parser_name': 'Details',
|
|
|
- 'proxies': False,
|
|
|
- 'response': 'None',
|
|
|
- 'retry_times': 2,
|
|
|
- 'url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7e3a9c7e946b44017e9f51af707454.html'}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,446|request.py|get_response|line:305|DEBUG|
|
|
|
- -------------- Details.detail_get request for ----------------
|
|
|
- url = https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7edc7d7e9e62ca017e9f00529a7d80.html
|
|
|
- method = GET
|
|
|
- body = {'files': {'list_xpath': '//div[@class="info-article in active"]//div/a', 'url_xpath': './@href', 'name_xpath': './text()', 'files_type': ['zip', 'doxc', 'ftp', 'pdf'], 'url_key': 'http'}, 'proxies': False, 'timeout': 22, 'stream': True, 'verify': False, 'headers': {'User-Agent': 'Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36'}}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,458|tools.py|dumps_json|line:843|ERROR| Object of type bytes is not JSON serializable
|
|
|
-Thread-5|2022-01-28 17:06:48,459|parser_control.py|deal_requests|line:249|ERROR|
|
|
|
- -------------- Details.detail_get error -------------
|
|
|
- error dictionary update sequence element #0 has length 1; 2 is required
|
|
|
- response None
|
|
|
- deal request {'base_info': b'\x80\x04\x95\x92\x04\x00\x00\x00\x00\x00\x00}\x94(\x8c\x03'
|
|
|
- b'_id\x94\x8c\rbson.objectid\x94\x8c\x08ObjectId\x94\x93'
|
|
|
- b'\x94)\x81\x94C\x0ca\xf3\xa1c\x81\xdbV\xa5\x9f\xf9hv\x94b'
|
|
|
- b'\x8c\x05parse\x94\x8c\x0fself.detail_get\x94\x8c\x04item'
|
|
|
- b'\x94}\x94(\x8c\x05title\x94\x8cW\xe5\xb9\xbf\xe4\xb8\x9c'
|
|
|
- b'\xe7\x9c\x81\xe8\x8b\xb1\xe5\xbe\xb7\xe7\x9b\x91'
|
|
|
- b'\xe7\x8b\xb1\xe8\x81\x8c\xe5\xb7\xa5\xe9\xa5\xad'
|
|
|
- b'\xe5\xa0\x82\xe8\xbf\x90\xe8\x90\xa5\xe6\x9c\x8d'
|
|
|
- b'\xe5\x8a\xa1\xe9\x87\x87\xe8\xb4\xad\xe9\xa1\xb9\xe7\x9b\xae('
|
|
|
- b'GZSW21201FG4176A)\xe7\xbb\x93\xe6\x9e\x9c\xe5\x85\xac\xe5\x91'
|
|
|
- b'\x8a\x94\x8c\x0bpublishtime\x94\x8c\x132022-01-28 14:51:5'
|
|
|
- b'6\x94\x8c\nspidercode\x94\x8c\x15gd_gdszfcgwxwz_zbcjgg\x94\x8c'
|
|
|
- b'\x04site\x94\x8c\x18\xe5\xb9\xbf\xe4\xb8\x9c\xe7\x9c'
|
|
|
- b'\x81\xe6\x94\xbf\xe5\xba\x9c\xe9\x87\x87\xe8\xb4'
|
|
|
- b'\xad\xe7\xbd\x91\x94\x8c\x07channel\x94\x8c\x12\xe4\xb8\xad'
|
|
|
- b'\xe6\xa0\x87\xe6\x88\x90\xe4\xba\xa4\xe5\x85\xac'
|
|
|
- b'\xe5\x91\x8a\x94\x8c\x04area\x94\x8c\x06\xe5\xb9\xbf'
|
|
|
- b'\xe4\xb8\x9c\x94\x8c\x04city\x94\x8c\x00\x94\x8c\x0bcompeteh'
|
|
|
- b'ref\x94N\x8c\x04href\x94\x8c`https://gdgpo.czt.gd.gov.cn/freecm'
|
|
|
- b's/site/gd/ggxx/info/2022/8a7edc7d7e9e62ca017e9f00529a7d80.ht'
|
|
|
- b'ml\x94\x8c\x0bpublishdept\x94h\x18\x8c\tiscompete\x94\x88'
|
|
|
- b'\x8c\x04type\x94h\x18\x8c\x01T\x94\x8c\x07bidding\x94\x8c'
|
|
|
- b'\x10l_np_publishtime\x94h\x18\x8c\ncomeintime\x94h\x18\x8c'
|
|
|
- b'\x08sendflag\x94\x8c\x05false\x94\x8c\x02_d\x94\x8c\ncomeint'
|
|
|
- b'ime\x94\x8c\x0bcontenthtml\x94h\x18\x8c\x06detail\x94h\x18\x8c'
|
|
|
- b'\x0bprojectinfo\x94Nu\x8c\x0bparser_name\x94\x8c\x07detai'
|
|
|
- b'ls\x94\x8c\x04date\x94\x8c\x132022-01-28 15:55:14\x94\x8c\x0bde'
|
|
|
- b'al_detail\x94]\x94\x8c&//div[@class="info-article in active"]'
|
|
|
- b'\x94a\x8c\x0bcreate_time\x94N\x8c\tparse_url\x94\x8c`https://g'
|
|
|
- b'dgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7edc7d7e'
|
|
|
- b'9e62ca017e9f00529a7d80.html\x94\x8c\x0erequest_params'
|
|
|
- b'\x94}\x94\x8c\x06failed\x94K\x02\x8c\x06author\x94\x8c\x07det'
|
|
|
- b'ails\x94\x8c\x05ex_js\x94h\x18\x8c\tex_python\x94N\x8c\x03pr'
|
|
|
- b'i\x94K\x01\x8c\x07proxies\x94\x89\x8c\x05files\x94}\x94(\x8c\n'
|
|
|
- b'list_xpath\x94\x8c-//div[@class="info-article in active"]//div'
|
|
|
- b'/a\x94\x8c\turl_xpath\x94\x8c\x07./@href\x94\x8c\nname_xpat'
|
|
|
- b'h\x94\x8c\x08./text()\x94\x8c\nfiles_type\x94]\x94(\x8c\x03z'
|
|
|
- b'ip\x94\x8c\x04doxc\x94\x8c\x03ftp\x94\x8c\x03pdf\x94e\x8c'
|
|
|
- b'\x07url_key\x94\x8c\x04http\x94u\x8c\x05error\x94N\x8c\x04code'
|
|
|
- b'\x94K\x00u.',
|
|
|
- 'callback': 'detail_get',
|
|
|
- 'deal_detail': b'\x80\x04\x95-\x00\x00\x00\x00\x00\x00\x00]\x94\x8c&//div[@cl'
|
|
|
- b'ass="info-article in active"]\x94a.',
|
|
|
- 'error_msg': 'ValueError: dictionary update sequence element #0 has length 1; '
|
|
|
- '2 is required',
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'filter_repeat': False,
|
|
|
- 'item': b'\x80\x04\x95+\x02\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05titl'
|
|
|
- b'e\x94\x8cW\xe5\xb9\xbf\xe4\xb8\x9c\xe7\x9c\x81\xe8\x8b\xb1'
|
|
|
- b'\xe5\xbe\xb7\xe7\x9b\x91\xe7\x8b\xb1\xe8\x81\x8c\xe5\xb7\xa5\xe9'
|
|
|
- b'\xa5\xad\xe5\xa0\x82\xe8\xbf\x90\xe8\x90\xa5\xe6\x9c\x8d\xe5\x8a'
|
|
|
- b'\xa1\xe9\x87\x87\xe8\xb4\xad\xe9\xa1\xb9\xe7\x9b\xae(GZSW21201FG417'
|
|
|
- b'6A)\xe7\xbb\x93\xe6\x9e\x9c\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x0bpu'
|
|
|
- b'blishtime\x94\x8c\x132022-01-28 14:51:56\x94\x8c\nspidercode'
|
|
|
- b'\x94\x8c\x15gd_gdszfcgwxwz_zbcjgg\x94\x8c\x04site\x94'
|
|
|
- b'\x8c\x18\xe5\xb9\xbf\xe4\xb8\x9c\xe7\x9c\x81\xe6\x94\xbf\xe5\xba'
|
|
|
- b'\x9c\xe9\x87\x87\xe8\xb4\xad\xe7\xbd\x91\x94\x8c\x07channel'
|
|
|
- b'\x94\x8c\x12\xe4\xb8\xad\xe6\xa0\x87\xe6\x88\x90\xe4\xba\xa4\xe5'
|
|
|
- b'\x85\xac\xe5\x91\x8a\x94\x8c\x04area\x94\x8c\x06\xe5\xb9\xbf\xe4\xb8'
|
|
|
- b'\x9c\x94\x8c\x04city\x94\x8c\x00\x94\x8c\x0bcompetehref\x94N\x8c'
|
|
|
- b'\x04href\x94\x8c`https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx'
|
|
|
- b'/info/2022/8a7edc7d7e9e62ca017e9f00529a7d80.html\x94\x8c\x0bpubli'
|
|
|
- b'shdept\x94h\x0e\x8c\tiscompete\x94\x88\x8c\x04type\x94h\x0e\x8c'
|
|
|
- b'\x01T\x94\x8c\x07bidding\x94\x8c\x10l_np_publishtime\x94h\x0e\x8c\n'
|
|
|
- b'comeintime\x94h\x0e\x8c\x08sendflag\x94\x8c\x05false\x94\x8c\x02_d'
|
|
|
- b'\x94\x8c\ncomeintime\x94\x8c\x0bcontenthtml\x94h\x0e\x8c\x06deta'
|
|
|
- b'il\x94h\x0e\x8c\x0bprojectinfo\x94Nu.',
|
|
|
- 'parser_name': 'Details',
|
|
|
- 'proxies': False,
|
|
|
- 'response': 'None',
|
|
|
- 'retry_times': 2,
|
|
|
- 'url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7edc7d7e9e62ca017e9f00529a7d80.html'}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,484|request.py|get_response|line:305|DEBUG|
|
|
|
- -------------- Details.detail_get request for ----------------
|
|
|
- url = https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7ee7dd7e9e4962017e9f56e40058a5.html
|
|
|
- method = GET
|
|
|
- body = {'files': {'list_xpath': '//div[@class="info-article in active"]//div/a', 'url_xpath': './@href', 'name_xpath': './text()', 'files_type': ['zip', 'doxc', 'ftp', 'pdf'], 'url_key': 'http'}, 'proxies': False, 'timeout': 22, 'stream': True, 'verify': False, 'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36'}}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,503|tools.py|dumps_json|line:843|ERROR| Object of type bytes is not JSON serializable
|
|
|
-Thread-5|2022-01-28 17:06:48,504|parser_control.py|deal_requests|line:249|ERROR|
|
|
|
- -------------- Details.detail_get error -------------
|
|
|
- error dictionary update sequence element #0 has length 1; 2 is required
|
|
|
- response None
|
|
|
- deal request {'base_info': b'\x80\x04\x95\x8f\x04\x00\x00\x00\x00\x00\x00}\x94(\x8c\x03'
|
|
|
- b'_id\x94\x8c\rbson.objectid\x94\x8c\x08ObjectId\x94\x93'
|
|
|
- b'\x94)\x81\x94C\x0ca\xf3\xa1c\x81\xdbV\xa5\x9f\xf9hw\x94b'
|
|
|
- b'\x8c\x05parse\x94\x8c\x0fself.detail_get\x94\x8c\x04item'
|
|
|
- b'\x94}\x94(\x8c\x05title\x94\x8cT\xe4\xbd\x9b\xe5\xb1\xb1'
|
|
|
- b'\xe5\xb8\x82\xe9\xa1\xba\xe5\xbe\xb7\xe5\x8c\xba'
|
|
|
- b'\xe4\xba\xba\xe6\xb0\x91\xe6\xb3\x95\xe9\x99\xa2'
|
|
|
- b'\xe4\xbf\xa1\xe6\x81\xaf\xe5\x8c\x96\xe8\xbd\xaf'
|
|
|
- b'\xe7\xa1\xac\xe4\xbb\xb6\xe8\xae\xbe\xe5\xa4\x87'
|
|
|
- b'\xe7\xbb\xb4\xe6\x8a\xa4\xe6\x9c\x8d\xe5\x8a\xa1'
|
|
|
- b'\xe9\xa1\xb9\xe7\x9b\xae\xe7\xbb\x93\xe6\x9e\x9c'
|
|
|
- b'\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x0bpublishtime\x94\x8c\x132'
|
|
|
- b'022-01-28 14:37:36\x94\x8c\nspidercode\x94\x8c\x15gd_gdszfcg'
|
|
|
- b'wxwz_zbcjgg\x94\x8c\x04site\x94\x8c\x18\xe5\xb9\xbf'
|
|
|
- b'\xe4\xb8\x9c\xe7\x9c\x81\xe6\x94\xbf\xe5\xba\x9c'
|
|
|
- b'\xe9\x87\x87\xe8\xb4\xad\xe7\xbd\x91\x94\x8c\x07channel\x94'
|
|
|
- b'\x8c\x12\xe4\xb8\xad\xe6\xa0\x87\xe6\x88\x90\xe4'
|
|
|
- b'\xba\xa4\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x04area\x94'
|
|
|
- b'\x8c\x06\xe5\xb9\xbf\xe4\xb8\x9c\x94\x8c\x04city\x94'
|
|
|
- b'\x8c\x00\x94\x8c\x0bcompetehref\x94N\x8c\x04href\x94\x8c`https'
|
|
|
- b'://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7ee7'
|
|
|
- b'dd7e9e4962017e9f56e40058a5.html\x94\x8c\x0bpublishdept\x94h\x18'
|
|
|
- b'\x8c\tiscompete\x94\x88\x8c\x04type\x94h\x18\x8c\x01'
|
|
|
- b'T\x94\x8c\x07bidding\x94\x8c\x10l_np_publishtime\x94h'
|
|
|
- b'\x18\x8c\ncomeintime\x94h\x18\x8c\x08sendflag\x94\x8c\x05fal'
|
|
|
- b'se\x94\x8c\x02_d\x94\x8c\ncomeintime\x94\x8c\x0bcontentht'
|
|
|
- b'ml\x94h\x18\x8c\x06detail\x94h\x18\x8c\x0bprojectinfo\x94Nu'
|
|
|
- b'\x8c\x0bparser_name\x94\x8c\x07details\x94\x8c\x04date\x94\x8c'
|
|
|
- b'\x132022-01-28 15:55:14\x94\x8c\x0bdeal_detail\x94]\x94\x8c&/'
|
|
|
- b'/div[@class="info-article in active"]\x94a\x8c\x0bcreate_time'
|
|
|
- b'\x94N\x8c\tparse_url\x94\x8c`https://gdgpo.czt.gd.gov.cn/free'
|
|
|
- b'cms/site/gd/ggxx/info/2022/8a7ee7dd7e9e4962017e9f56e40058a5.'
|
|
|
- b'html\x94\x8c\x0erequest_params\x94}\x94\x8c\x06failed'
|
|
|
- b'\x94K\x02\x8c\x06author\x94\x8c\x07details\x94\x8c\x05ex_j'
|
|
|
- b's\x94h\x18\x8c\tex_python\x94N\x8c\x03pri\x94K\x01\x8c\x07proxi'
|
|
|
- b'es\x94\x89\x8c\x05files\x94}\x94(\x8c\nlist_xpath\x94\x8c-//'
|
|
|
- b'div[@class="info-article in active"]//div/a\x94\x8c\turl_xp'
|
|
|
- b'ath\x94\x8c\x07./@href\x94\x8c\nname_xpath\x94\x8c\x08./text('
|
|
|
- b')\x94\x8c\nfiles_type\x94]\x94(\x8c\x03zip\x94\x8c\x04do'
|
|
|
- b'xc\x94\x8c\x03ftp\x94\x8c\x03pdf\x94e\x8c\x07url_ke'
|
|
|
- b'y\x94\x8c\x04http\x94u\x8c\x05error\x94N\x8c\x04code\x94K\x00'
|
|
|
- b'u.',
|
|
|
- 'callback': 'detail_get',
|
|
|
- 'deal_detail': b'\x80\x04\x95-\x00\x00\x00\x00\x00\x00\x00]\x94\x8c&//div[@cl'
|
|
|
- b'ass="info-article in active"]\x94a.',
|
|
|
- 'error_msg': 'ValueError: dictionary update sequence element #0 has length 1; '
|
|
|
- '2 is required',
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'filter_repeat': False,
|
|
|
- 'item': b'\x80\x04\x95(\x02\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05titl'
|
|
|
- b'e\x94\x8cT\xe4\xbd\x9b\xe5\xb1\xb1\xe5\xb8\x82\xe9\xa1\xba'
|
|
|
- b'\xe5\xbe\xb7\xe5\x8c\xba\xe4\xba\xba\xe6\xb0\x91\xe6\xb3\x95\xe9'
|
|
|
- b'\x99\xa2\xe4\xbf\xa1\xe6\x81\xaf\xe5\x8c\x96\xe8\xbd\xaf\xe7\xa1'
|
|
|
- b'\xac\xe4\xbb\xb6\xe8\xae\xbe\xe5\xa4\x87\xe7\xbb\xb4\xe6\x8a\xa4'
|
|
|
- b'\xe6\x9c\x8d\xe5\x8a\xa1\xe9\xa1\xb9\xe7\x9b\xae\xe7\xbb\x93\xe6'
|
|
|
- b'\x9e\x9c\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x0bpublishtime\x94\x8c'
|
|
|
- b'\x132022-01-28 14:37:36\x94\x8c\nspidercode\x94\x8c\x15gd_gdszfcgwx'
|
|
|
- b'wz_zbcjgg\x94\x8c\x04site\x94\x8c\x18\xe5\xb9\xbf\xe4\xb8'
|
|
|
- b'\x9c\xe7\x9c\x81\xe6\x94\xbf\xe5\xba\x9c\xe9\x87\x87\xe8\xb4\xad'
|
|
|
- b'\xe7\xbd\x91\x94\x8c\x07channel\x94\x8c\x12\xe4\xb8\xad\xe6'
|
|
|
- b'\xa0\x87\xe6\x88\x90\xe4\xba\xa4\xe5\x85\xac\xe5\x91\x8a\x94\x8c'
|
|
|
- b'\x04area\x94\x8c\x06\xe5\xb9\xbf\xe4\xb8\x9c\x94\x8c\x04cit'
|
|
|
- b'y\x94\x8c\x00\x94\x8c\x0bcompetehref\x94N\x8c\x04href\x94\x8c`htt'
|
|
|
- b'ps://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7ee7dd7e9e'
|
|
|
- b'4962017e9f56e40058a5.html\x94\x8c\x0bpublishdept\x94h\x0e\x8c\tisco'
|
|
|
- b'mpete\x94\x88\x8c\x04type\x94h\x0e\x8c\x01T\x94\x8c\x07biddin'
|
|
|
- b'g\x94\x8c\x10l_np_publishtime\x94h\x0e\x8c\ncomeintime\x94'
|
|
|
- b'h\x0e\x8c\x08sendflag\x94\x8c\x05false\x94\x8c\x02_d\x94\x8c\ncome'
|
|
|
- b'intime\x94\x8c\x0bcontenthtml\x94h\x0e\x8c\x06detail\x94'
|
|
|
- b'h\x0e\x8c\x0bprojectinfo\x94Nu.',
|
|
|
- 'parser_name': 'Details',
|
|
|
- 'proxies': False,
|
|
|
- 'response': 'None',
|
|
|
- 'retry_times': 2,
|
|
|
- 'url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7ee7dd7e9e4962017e9f56e40058a5.html'}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,552|request.py|get_response|line:305|DEBUG|
|
|
|
- -------------- Details.detail_get request for ----------------
|
|
|
- url = https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7efa517e9032e5017e9b37b0c50534.html
|
|
|
- method = GET
|
|
|
- body = {'files': {'list_xpath': '//div[@class="info-article in active"]//div/a', 'url_xpath': './@href', 'name_xpath': './text()', 'files_type': ['zip', 'doxc', 'ftp', 'pdf'], 'url_key': 'http'}, 'proxies': False, 'timeout': 22, 'stream': True, 'verify': False, 'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36'}}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,564|tools.py|dumps_json|line:843|ERROR| Object of type bytes is not JSON serializable
|
|
|
-Thread-5|2022-01-28 17:06:48,565|parser_control.py|deal_requests|line:249|ERROR|
|
|
|
- -------------- Details.detail_get error -------------
|
|
|
- error dictionary update sequence element #0 has length 1; 2 is required
|
|
|
- response None
|
|
|
- deal request {'base_info': b'\x80\x04\x95\x8d\x04\x00\x00\x00\x00\x00\x00}\x94(\x8c\x03'
|
|
|
- b'_id\x94\x8c\rbson.objectid\x94\x8c\x08ObjectId\x94\x93'
|
|
|
- b'\x94)\x81\x94C\x0ca\xf2\x95\xb9{\xdc<\xbf\xf2)V\xe6\x94b'
|
|
|
- b'\x8c\x05parse\x94\x8c\x0fself.detail_get\x94\x8c\x04item'
|
|
|
- b'\x94}\x94(\x8c\x05title\x94\x8cZ\xe5\xb9\xbf\xe4\xb8\x9c'
|
|
|
- b'\xe7\x9c\x81\xe4\xba\xba\xe5\x8a\x9b\xe8\xb5\x84'
|
|
|
- b'\xe6\xba\x90\xe5\xb8\x82\xe5\x9c\xba\xe8\xae\xbe'
|
|
|
- b'\xe6\x96\xbd\xe8\xae\xbe\xe5\xa4\x87\xe8\xb4\xad'
|
|
|
- b'\xe7\xbd\xae\xe9\x9b\x86\xe6\x88\x90\xe5\x8f\x8a'
|
|
|
- b'\xe5\xb1\x95\xe9\x99\x88\xe5\xb8\x83\xe7\xbd\xae'
|
|
|
- b'\xe6\x9c\x8d\xe5\x8a\xa1\xe9\xa1\xb9\xe7\x9b\xae'
|
|
|
- b'\xe6\x8b\x9b\xe6\xa0\x87\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x0bp'
|
|
|
- b'ublishtime\x94\x8c\x132022-01-27 19:14:45\x94\x8c\nspidercod'
|
|
|
- b'e\x94\x8c\x13gd_gdszfcgwxwz_cggg\x94\x8c\x04site\x94\x8c'
|
|
|
- b'\x18\xe5\xb9\xbf\xe4\xb8\x9c\xe7\x9c\x81\xe6\x94'
|
|
|
- b'\xbf\xe5\xba\x9c\xe9\x87\x87\xe8\xb4\xad\xe7\xbd'
|
|
|
- b'\x91\x94\x8c\x07channel\x94\x8c\x0c\xe9\x87\x87\xe8\xb4\xad'
|
|
|
- b'\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x04area\x94\x8c\x06'
|
|
|
- b'\xe5\xb9\xbf\xe4\xb8\x9c\x94\x8c\x04city\x94\x8c\x00'
|
|
|
- b'\x94\x8c\x0bcompetehref\x94N\x8c\x04href\x94\x8c`https://gdg'
|
|
|
- b'po.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7efa517e90'
|
|
|
- b'32e5017e9b37b0c50534.html\x94\x8c\x0bpublishdept\x94h\x18\x8c\t'
|
|
|
- b'iscompete\x94\x88\x8c\x04type\x94h\x18\x8c\x01T\x94\x8c\x07bi'
|
|
|
- b'dding\x94\x8c\x10l_np_publishtime\x94h\x18\x8c\ncomeintime\x94'
|
|
|
- b'h\x18\x8c\x08sendflag\x94\x8c\x05false\x94\x8c\x02_d\x94\x8c\n'
|
|
|
- b'comeintime\x94\x8c\x0bcontenthtml\x94h\x18\x8c\x06detail\x94'
|
|
|
- b'h\x18\x8c\x0bprojectinfo\x94Nu\x8c\x0bparser_name\x94\x8c\x07de'
|
|
|
- b'tails\x94\x8c\x04date\x94\x8c\x132022-01-27 20:53:12\x94\x8c'
|
|
|
- b'\x0bdeal_detail\x94]\x94\x8c&//div[@class="info-article in a'
|
|
|
- b'ctive"]\x94a\x8c\x0bcreate_time\x94N\x8c\tparse_url\x94\x8c`ht'
|
|
|
- b'tps://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7'
|
|
|
- b'efa517e9032e5017e9b37b0c50534.html\x94\x8c\x0erequest_par'
|
|
|
- b'ams\x94}\x94\x8c\x06failed\x94K\x13\x8c\x06author\x94\x8c\x07'
|
|
|
- b'details\x94\x8c\x05ex_js\x94h\x18\x8c\tex_python\x94N\x8c'
|
|
|
- b'\x03pri\x94K\x01\x8c\x07proxies\x94\x89\x8c\x05files\x94}\x94'
|
|
|
- b'(\x8c\nlist_xpath\x94\x8c-//div[@class="info-article in active'
|
|
|
- b'"]//div/a\x94\x8c\turl_xpath\x94\x8c\x07./@href\x94\x8c\nname_x'
|
|
|
- b'path\x94\x8c\x08./text()\x94\x8c\nfiles_type\x94]\x94('
|
|
|
- b'\x8c\x03zip\x94\x8c\x04doxc\x94\x8c\x03ftp\x94\x8c\x03pdf'
|
|
|
- b'\x94e\x8c\x07url_key\x94\x8c\x04http\x94u\x8c\x05error\x94'
|
|
|
- b'N\x8c\x04code\x94K\x00u.',
|
|
|
- 'callback': 'detail_get',
|
|
|
- 'deal_detail': b'\x80\x04\x95-\x00\x00\x00\x00\x00\x00\x00]\x94\x8c&//div[@cl'
|
|
|
- b'ass="info-article in active"]\x94a.',
|
|
|
- 'error_msg': 'ValueError: dictionary update sequence element #0 has length 1; '
|
|
|
- '2 is required',
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'filter_repeat': False,
|
|
|
- 'item': b'\x80\x04\x95&\x02\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05titl'
|
|
|
- b'e\x94\x8cZ\xe5\xb9\xbf\xe4\xb8\x9c\xe7\x9c\x81\xe4\xba\xba'
|
|
|
- b'\xe5\x8a\x9b\xe8\xb5\x84\xe6\xba\x90\xe5\xb8\x82\xe5\x9c\xba\xe8'
|
|
|
- b'\xae\xbe\xe6\x96\xbd\xe8\xae\xbe\xe5\xa4\x87\xe8\xb4\xad\xe7\xbd'
|
|
|
- b'\xae\xe9\x9b\x86\xe6\x88\x90\xe5\x8f\x8a\xe5\xb1\x95\xe9\x99\x88'
|
|
|
- b'\xe5\xb8\x83\xe7\xbd\xae\xe6\x9c\x8d\xe5\x8a\xa1\xe9\xa1\xb9\xe7'
|
|
|
- b'\x9b\xae\xe6\x8b\x9b\xe6\xa0\x87\xe5\x85\xac\xe5\x91\x8a\x94\x8c'
|
|
|
- b'\x0bpublishtime\x94\x8c\x132022-01-27 19:14:45\x94\x8c\nspiderc'
|
|
|
- b'ode\x94\x8c\x13gd_gdszfcgwxwz_cggg\x94\x8c\x04site\x94\x8c\x18\xe5'
|
|
|
- b'\xb9\xbf\xe4\xb8\x9c\xe7\x9c\x81\xe6\x94\xbf\xe5\xba\x9c\xe9\x87'
|
|
|
- b'\x87\xe8\xb4\xad\xe7\xbd\x91\x94\x8c\x07channel\x94\x8c\x0c'
|
|
|
- b'\xe9\x87\x87\xe8\xb4\xad\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x04area\x94'
|
|
|
- b'\x8c\x06\xe5\xb9\xbf\xe4\xb8\x9c\x94\x8c\x04city\x94\x8c\x00\x94\x8c'
|
|
|
- b'\x0bcompetehref\x94N\x8c\x04href\x94\x8c`https://gdgpo.czt.gd.gov.'
|
|
|
- b'cn/freecms/site/gd/ggxx/info/2022/8a7efa517e9032e5017e9b37b0c50534.h'
|
|
|
- b'tml\x94\x8c\x0bpublishdept\x94h\x0e\x8c\tiscompete\x94\x88\x8c\x04t'
|
|
|
- b'ype\x94h\x0e\x8c\x01T\x94\x8c\x07bidding\x94\x8c\x10l_np_publishti'
|
|
|
- b'me\x94h\x0e\x8c\ncomeintime\x94h\x0e\x8c\x08sendflag\x94\x8c\x05fal'
|
|
|
- b'se\x94\x8c\x02_d\x94\x8c\ncomeintime\x94\x8c\x0bcontenthtml\x94h'
|
|
|
- b'\x0e\x8c\x06detail\x94h\x0e\x8c\x0bprojectinfo\x94Nu.',
|
|
|
- 'parser_name': 'Details',
|
|
|
- 'proxies': False,
|
|
|
- 'response': 'None',
|
|
|
- 'retry_times': 2,
|
|
|
- 'url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7efa517e9032e5017e9b37b0c50534.html'}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,615|request.py|get_response|line:305|DEBUG|
|
|
|
- -------------- Details.detail_get request for ----------------
|
|
|
- url = https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/9b5da8cc-1545-438c-8ad6-ccdd8bd71b10.html
|
|
|
- method = GET
|
|
|
- body = {'files': {'list_xpath': '//div[@class="info-article in active"]//div/a', 'url_xpath': './@href', 'name_xpath': './text()', 'files_type': ['zip', 'doxc', 'ftp', 'pdf'], 'url_key': 'http'}, 'proxies': False, 'timeout': 22, 'stream': True, 'verify': False, 'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36'}}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,628|tools.py|dumps_json|line:843|ERROR| Object of type bytes is not JSON serializable
|
|
|
-Thread-5|2022-01-28 17:06:48,628|parser_control.py|deal_requests|line:249|ERROR|
|
|
|
- -------------- Details.detail_get error -------------
|
|
|
- error dictionary update sequence element #0 has length 1; 2 is required
|
|
|
- response None
|
|
|
- deal request {'base_info': b'\x80\x04\x95\x86\x04\x00\x00\x00\x00\x00\x00}\x94(\x8c\x03'
|
|
|
- b'_id\x94\x8c\rbson.objectid\x94\x8c\x08ObjectId\x94\x93'
|
|
|
- b'\x94)\x81\x94C\x0ca\xf3\xa1\\\x81\xdbV\xa5\x9f\xf9hU\x94b'
|
|
|
- b'\x8c\x05parse\x94\x8c\x0fself.detail_get\x94\x8c\x04item'
|
|
|
- b'\x94}\x94(\x8c\x05title\x94\x8cE\xe9\x9f\xb6\xe5\x85\xb3'
|
|
|
- b'\xe5\xb8\x82\xe6\xad\xa6\xe6\xb1\x9f\xe5\x8c\xba'
|
|
|
- b'\xe4\xba\xba\xe6\xb0\x91\xe6\xa3\x80\xe5\xaf\x9f'
|
|
|
- b'\xe9\x99\xa2\xe7\x94\xb5\xe5\xad\x90\xe5\x8d\x96'
|
|
|
- b'\xe5\x9c\xba\xe7\x9b\xb4\xe6\x8e\xa5\xe8\xae\xa2'
|
|
|
- b'\xe8\xb4\xad\xe6\x88\x90\xe4\xba\xa4\xe5\x85\xac'
|
|
|
- b'\xe5\x91\x8a\x94\x8c\x0bpublishtime\x94\x8c\x132022-01-28 15:48'
|
|
|
- b':52\x94\x8c\nspidercode\x94\x8c\x13gd_gdszfcgwxwz_ysgg\x94\x8c'
|
|
|
- b'\x04site\x94\x8c\x18\xe5\xb9\xbf\xe4\xb8\x9c\xe7\x9c'
|
|
|
- b'\x81\xe6\x94\xbf\xe5\xba\x9c\xe9\x87\x87\xe8\xb4'
|
|
|
- b'\xad\xe7\xbd\x91\x94\x8c\x07channel\x94\x8c\x12\xe7\x94\xb5'
|
|
|
- b'\xe5\xad\x90\xe5\x8d\x96\xe5\x9c\xba\xe4\xbf\xa1'
|
|
|
- b'\xe6\x81\xaf\x94\x8c\x04area\x94\x8c\x06\xe5\xb9\xbf'
|
|
|
- b'\xe4\xb8\x9c\x94\x8c\x04city\x94\x8c\x00\x94\x8c\x0bcompeteh'
|
|
|
- b'ref\x94N\x8c\x04href\x94\x8cdhttps://gdgpo.czt.gd.gov.cn/freecm'
|
|
|
- b's/site/gd/ggxx/info/2022/9b5da8cc-1545-438c-8ad6-ccdd8bd71b1'
|
|
|
- b'0.html\x94\x8c\x0bpublishdept\x94h\x18\x8c\tiscompete\x94\x88'
|
|
|
- b'\x8c\x04type\x94h\x18\x8c\x01T\x94\x8c\x07bidding\x94\x8c'
|
|
|
- b'\x10l_np_publishtime\x94h\x18\x8c\ncomeintime\x94h\x18\x8c'
|
|
|
- b'\x08sendflag\x94\x8c\x05false\x94\x8c\x02_d\x94\x8c\ncomeint'
|
|
|
- b'ime\x94\x8c\x0bcontenthtml\x94h\x18\x8c\x06detail\x94h\x18\x8c'
|
|
|
- b'\x0bprojectinfo\x94Nu\x8c\x0bparser_name\x94\x8c\x07detai'
|
|
|
- b'ls\x94\x8c\x04date\x94\x8c\x132022-01-28 15:55:07\x94\x8c\x0bde'
|
|
|
- b'al_detail\x94]\x94\x8c&//div[@class="info-article in active"]'
|
|
|
- b'\x94a\x8c\x0bcreate_time\x94N\x8c\tparse_url\x94\x8cdhttps://g'
|
|
|
- b'dgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/9b5da8cc-1'
|
|
|
- b'545-438c-8ad6-ccdd8bd71b10.html\x94\x8c\x0erequest_params'
|
|
|
- b'\x94}\x94\x8c\x06failed\x94K\x02\x8c\x06author\x94\x8c\x07det'
|
|
|
- b'ails\x94\x8c\x05ex_js\x94h\x18\x8c\tex_python\x94N\x8c\x03pr'
|
|
|
- b'i\x94K\x01\x8c\x07proxies\x94\x89\x8c\x05files\x94}\x94(\x8c\n'
|
|
|
- b'list_xpath\x94\x8c-//div[@class="info-article in active"]//div'
|
|
|
- b'/a\x94\x8c\turl_xpath\x94\x8c\x07./@href\x94\x8c\nname_xpat'
|
|
|
- b'h\x94\x8c\x08./text()\x94\x8c\nfiles_type\x94]\x94(\x8c\x03z'
|
|
|
- b'ip\x94\x8c\x04doxc\x94\x8c\x03ftp\x94\x8c\x03pdf\x94e\x8c'
|
|
|
- b'\x07url_key\x94\x8c\x04http\x94u\x8c\x05error\x94N\x8c\x04code'
|
|
|
- b'\x94K\x00u.',
|
|
|
- 'callback': 'detail_get',
|
|
|
- 'deal_detail': b'\x80\x04\x95-\x00\x00\x00\x00\x00\x00\x00]\x94\x8c&//div[@cl'
|
|
|
- b'ass="info-article in active"]\x94a.',
|
|
|
- 'error_msg': 'ValueError: dictionary update sequence element #0 has length 1; '
|
|
|
- '2 is required',
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'filter_repeat': False,
|
|
|
- 'item': b'\x80\x04\x95\x1b\x02\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05titl'
|
|
|
- b'e\x94\x8cE\xe9\x9f\xb6\xe5\x85\xb3\xe5\xb8\x82\xe6\xad\xa6'
|
|
|
- b'\xe6\xb1\x9f\xe5\x8c\xba\xe4\xba\xba\xe6\xb0\x91\xe6\xa3\x80\xe5'
|
|
|
- b'\xaf\x9f\xe9\x99\xa2\xe7\x94\xb5\xe5\xad\x90\xe5\x8d\x96\xe5\x9c'
|
|
|
- b'\xba\xe7\x9b\xb4\xe6\x8e\xa5\xe8\xae\xa2\xe8\xb4\xad\xe6\x88\x90'
|
|
|
- b'\xe4\xba\xa4\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x0bpublishtime\x94'
|
|
|
- b'\x8c\x132022-01-28 15:48:52\x94\x8c\nspidercode\x94\x8c\x13gd_gdsz'
|
|
|
- b'fcgwxwz_ysgg\x94\x8c\x04site\x94\x8c\x18\xe5\xb9\xbf\xe4\xb8\x9c'
|
|
|
- b'\xe7\x9c\x81\xe6\x94\xbf\xe5\xba\x9c\xe9\x87\x87\xe8\xb4\xad\xe7'
|
|
|
- b'\xbd\x91\x94\x8c\x07channel\x94\x8c\x12\xe7\x94\xb5\xe5\xad'
|
|
|
- b'\x90\xe5\x8d\x96\xe5\x9c\xba\xe4\xbf\xa1\xe6\x81\xaf\x94\x8c\x04area'
|
|
|
- b'\x94\x8c\x06\xe5\xb9\xbf\xe4\xb8\x9c\x94\x8c\x04city\x94\x8c\x00\x94'
|
|
|
- b'\x8c\x0bcompetehref\x94N\x8c\x04href\x94\x8cdhttps://gdgpo.czt.gd'
|
|
|
- b'.gov.cn/freecms/site/gd/ggxx/info/2022/9b5da8cc-1545-438c-8ad6-ccdd8'
|
|
|
- b'bd71b10.html\x94\x8c\x0bpublishdept\x94h\x0e\x8c\tiscompete'
|
|
|
- b'\x94\x88\x8c\x04type\x94h\x0e\x8c\x01T\x94\x8c\x07bidding'
|
|
|
- b'\x94\x8c\x10l_np_publishtime\x94h\x0e\x8c\ncomeintime\x94h'
|
|
|
- b'\x0e\x8c\x08sendflag\x94\x8c\x05false\x94\x8c\x02_d\x94\x8c\ncomei'
|
|
|
- b'ntime\x94\x8c\x0bcontenthtml\x94h\x0e\x8c\x06detail\x94h'
|
|
|
- b'\x0e\x8c\x0bprojectinfo\x94Nu.',
|
|
|
- 'parser_name': 'Details',
|
|
|
- 'proxies': False,
|
|
|
- 'response': 'None',
|
|
|
- 'retry_times': 2,
|
|
|
- 'url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/9b5da8cc-1545-438c-8ad6-ccdd8bd71b10.html'}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,677|request.py|get_response|line:305|DEBUG|
|
|
|
- -------------- Details.detail_get request for ----------------
|
|
|
- url = https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/ab73c655-102a-4923-b4cb-dadfdc82c913.html
|
|
|
- method = GET
|
|
|
- body = {'files': {'list_xpath': '//div[@class="info-article in active"]//div/a', 'url_xpath': './@href', 'name_xpath': './text()', 'files_type': ['zip', 'doxc', 'ftp', 'pdf'], 'url_key': 'http'}, 'proxies': False, 'timeout': 22, 'stream': True, 'verify': False, 'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36'}}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,691|tools.py|dumps_json|line:843|ERROR| Object of type bytes is not JSON serializable
|
|
|
-Thread-5|2022-01-28 17:06:48,691|parser_control.py|deal_requests|line:249|ERROR|
|
|
|
- -------------- Details.detail_get error -------------
|
|
|
- error dictionary update sequence element #0 has length 1; 2 is required
|
|
|
- response None
|
|
|
- deal request {'base_info': b'\x80\x04\x95\x86\x04\x00\x00\x00\x00\x00\x00}\x94(\x8c\x03'
|
|
|
- b'_id\x94\x8c\rbson.objectid\x94\x8c\x08ObjectId\x94\x93'
|
|
|
- b'\x94)\x81\x94C\x0ca\xf3Z\x94J\xa3\xe2Z\x12\xe9\t\x00\x94b'
|
|
|
- b'\x8c\x05parse\x94\x8c\x0fself.detail_get\x94\x8c\x04item'
|
|
|
- b'\x94}\x94(\x8c\x05title\x94\x8cE\xe5\xb9\xbf\xe4\xb8\x9c'
|
|
|
- b'\xe7\x9c\x81\xe4\xbd\x9b\xe5\xb1\xb1\xe8\x88\xaa'
|
|
|
- b'\xe9\x81\x93\xe4\xba\x8b\xe5\x8a\xa1\xe4\xb8\xad'
|
|
|
- b'\xe5\xbf\x83\xe7\x94\xb5\xe5\xad\x90\xe5\x8d\x96'
|
|
|
- b'\xe5\x9c\xba\xe7\x9b\xb4\xe6\x8e\xa5\xe8\xae\xa2'
|
|
|
- b'\xe8\xb4\xad\xe6\x88\x90\xe4\xba\xa4\xe5\x85\xac'
|
|
|
- b'\xe5\x91\x8a\x94\x8c\x0bpublishtime\x94\x8c\x132022-01-28 10:00'
|
|
|
- b':38\x94\x8c\nspidercode\x94\x8c\x13gd_gdszfcgwxwz_ysgg\x94\x8c'
|
|
|
- b'\x04site\x94\x8c\x18\xe5\xb9\xbf\xe4\xb8\x9c\xe7\x9c'
|
|
|
- b'\x81\xe6\x94\xbf\xe5\xba\x9c\xe9\x87\x87\xe8\xb4'
|
|
|
- b'\xad\xe7\xbd\x91\x94\x8c\x07channel\x94\x8c\x12\xe7\x94\xb5'
|
|
|
- b'\xe5\xad\x90\xe5\x8d\x96\xe5\x9c\xba\xe4\xbf\xa1'
|
|
|
- b'\xe6\x81\xaf\x94\x8c\x04area\x94\x8c\x06\xe5\xb9\xbf'
|
|
|
- b'\xe4\xb8\x9c\x94\x8c\x04city\x94\x8c\x00\x94\x8c\x0bcompeteh'
|
|
|
- b'ref\x94N\x8c\x04href\x94\x8cdhttps://gdgpo.czt.gd.gov.cn/freecm'
|
|
|
- b's/site/gd/ggxx/info/2022/ab73c655-102a-4923-b4cb-dadfdc82c91'
|
|
|
- b'3.html\x94\x8c\x0bpublishdept\x94h\x18\x8c\tiscompete\x94\x88'
|
|
|
- b'\x8c\x04type\x94h\x18\x8c\x01T\x94\x8c\x07bidding\x94\x8c'
|
|
|
- b'\x10l_np_publishtime\x94h\x18\x8c\ncomeintime\x94h\x18\x8c'
|
|
|
- b'\x08sendflag\x94\x8c\x05false\x94\x8c\x02_d\x94\x8c\ncomeint'
|
|
|
- b'ime\x94\x8c\x0bcontenthtml\x94h\x18\x8c\x06detail\x94h\x18\x8c'
|
|
|
- b'\x0bprojectinfo\x94Nu\x8c\x0bparser_name\x94\x8c\x07detai'
|
|
|
- b'ls\x94\x8c\x04date\x94\x8c\x132022-01-28 10:53:07\x94\x8c\x0bde'
|
|
|
- b'al_detail\x94]\x94\x8c&//div[@class="info-article in active"]'
|
|
|
- b'\x94a\x8c\x0bcreate_time\x94N\x8c\tparse_url\x94\x8cdhttps://g'
|
|
|
- b'dgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/ab73c655-1'
|
|
|
- b'02a-4923-b4cb-dadfdc82c913.html\x94\x8c\x0erequest_params'
|
|
|
- b'\x94}\x94\x8c\x06failed\x94K\x0b\x8c\x06author\x94\x8c\x07det'
|
|
|
- b'ails\x94\x8c\x05ex_js\x94h\x18\x8c\tex_python\x94N\x8c\x03pr'
|
|
|
- b'i\x94K\x01\x8c\x07proxies\x94\x89\x8c\x05files\x94}\x94(\x8c\n'
|
|
|
- b'list_xpath\x94\x8c-//div[@class="info-article in active"]//div'
|
|
|
- b'/a\x94\x8c\turl_xpath\x94\x8c\x07./@href\x94\x8c\nname_xpat'
|
|
|
- b'h\x94\x8c\x08./text()\x94\x8c\nfiles_type\x94]\x94(\x8c\x03z'
|
|
|
- b'ip\x94\x8c\x04doxc\x94\x8c\x03ftp\x94\x8c\x03pdf\x94e\x8c'
|
|
|
- b'\x07url_key\x94\x8c\x04http\x94u\x8c\x05error\x94N\x8c\x04code'
|
|
|
- b'\x94K\x00u.',
|
|
|
- 'callback': 'detail_get',
|
|
|
- 'deal_detail': b'\x80\x04\x95-\x00\x00\x00\x00\x00\x00\x00]\x94\x8c&//div[@cl'
|
|
|
- b'ass="info-article in active"]\x94a.',
|
|
|
- 'error_msg': 'ValueError: dictionary update sequence element #0 has length 1; '
|
|
|
- '2 is required',
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'filter_repeat': False,
|
|
|
- 'item': b'\x80\x04\x95\x1b\x02\x00\x00\x00\x00\x00\x00}\x94(\x8c\x05titl'
|
|
|
- b'e\x94\x8cE\xe5\xb9\xbf\xe4\xb8\x9c\xe7\x9c\x81\xe4\xbd\x9b'
|
|
|
- b'\xe5\xb1\xb1\xe8\x88\xaa\xe9\x81\x93\xe4\xba\x8b\xe5\x8a\xa1\xe4'
|
|
|
- b'\xb8\xad\xe5\xbf\x83\xe7\x94\xb5\xe5\xad\x90\xe5\x8d\x96\xe5\x9c'
|
|
|
- b'\xba\xe7\x9b\xb4\xe6\x8e\xa5\xe8\xae\xa2\xe8\xb4\xad\xe6\x88\x90'
|
|
|
- b'\xe4\xba\xa4\xe5\x85\xac\xe5\x91\x8a\x94\x8c\x0bpublishtime\x94'
|
|
|
- b'\x8c\x132022-01-28 10:00:38\x94\x8c\nspidercode\x94\x8c\x13gd_gdsz'
|
|
|
- b'fcgwxwz_ysgg\x94\x8c\x04site\x94\x8c\x18\xe5\xb9\xbf\xe4\xb8\x9c'
|
|
|
- b'\xe7\x9c\x81\xe6\x94\xbf\xe5\xba\x9c\xe9\x87\x87\xe8\xb4\xad\xe7'
|
|
|
- b'\xbd\x91\x94\x8c\x07channel\x94\x8c\x12\xe7\x94\xb5\xe5\xad'
|
|
|
- b'\x90\xe5\x8d\x96\xe5\x9c\xba\xe4\xbf\xa1\xe6\x81\xaf\x94\x8c\x04area'
|
|
|
- b'\x94\x8c\x06\xe5\xb9\xbf\xe4\xb8\x9c\x94\x8c\x04city\x94\x8c\x00\x94'
|
|
|
- b'\x8c\x0bcompetehref\x94N\x8c\x04href\x94\x8cdhttps://gdgpo.czt.gd'
|
|
|
- b'.gov.cn/freecms/site/gd/ggxx/info/2022/ab73c655-102a-4923-b4cb-dadfd'
|
|
|
- b'c82c913.html\x94\x8c\x0bpublishdept\x94h\x0e\x8c\tiscompete'
|
|
|
- b'\x94\x88\x8c\x04type\x94h\x0e\x8c\x01T\x94\x8c\x07bidding'
|
|
|
- b'\x94\x8c\x10l_np_publishtime\x94h\x0e\x8c\ncomeintime\x94h'
|
|
|
- b'\x0e\x8c\x08sendflag\x94\x8c\x05false\x94\x8c\x02_d\x94\x8c\ncomei'
|
|
|
- b'ntime\x94\x8c\x0bcontenthtml\x94h\x0e\x8c\x06detail\x94h'
|
|
|
- b'\x0e\x8c\x0bprojectinfo\x94Nu.',
|
|
|
- 'parser_name': 'Details',
|
|
|
- 'proxies': False,
|
|
|
- 'response': 'None',
|
|
|
- 'retry_times': 2,
|
|
|
- 'url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/ab73c655-102a-4923-b4cb-dadfdc82c913.html'}
|
|
|
-
|
|
|
-Thread-5|2022-01-28 17:06:48,741|parser_control.py|run|line:56|DEBUG| parser 等待任务...
|
|
|
-Thread-3|2022-01-28 17:06:49,216|tools.py|dumps_json|line:843|ERROR| Object of type ObjectId is not JSON serializable
|
|
|
-Thread-3|2022-01-28 17:06:49,222|item_buffer.py|__add_item_to_db|line:300|DEBUG|
|
|
|
- -------------- item 批量入库 --------------
|
|
|
- 表名: mgp_list
|
|
|
- datas: [{'_id': ObjectId('61f361ae9547b8b7d10dc034'),
|
|
|
- 'author': None,
|
|
|
- 'code': 0,
|
|
|
- 'create_time': None,
|
|
|
- 'date': '2022-01-28 11:23:26',
|
|
|
- 'deal_detail': ['//div[@class="xl_main"]', '//div[@class="big-box-B"]'],
|
|
|
- 'error': None,
|
|
|
- 'ex_js': '',
|
|
|
- 'ex_python': None,
|
|
|
- 'failed': 5,
|
|
|
- 'files': {'file_type': 'doxc',
|
|
|
- 'files_type': ['zip', 'doxc', 'ftp'],
|
|
|
- 'host': 'http://cz.fjzfcg.gov.cn/3500/notice/1c4f944709d047a7a633672964c633ce/7c36067afe5b449ea66bae09d11cf45c',
|
|
|
- 'list_xpath': '//div[@id="fjxz"]/p[@class="mar-L30 '
|
|
|
- 'fjwz"]/a|//div[@id="result"]//u/a|//div[@class="TRS_Editor"]//p/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'item': {'T': 'bidding',
|
|
|
- '_d': 'comeintime',
|
|
|
- 'area': '福建',
|
|
|
- 'channel': '通知公告',
|
|
|
- 'city': '',
|
|
|
- 'comeintime': '',
|
|
|
- 'competehref': None,
|
|
|
- 'contenthtml': '',
|
|
|
- 'detail': '',
|
|
|
- 'href': 'http://cz.fjzfcg.gov.cn/3500/notice/1c4f944709d047a7a633672964c633ce/7c36067afe5b449ea66bae09d11cf45c/',
|
|
|
- 'iscompete': True,
|
|
|
- 'l_np_publishtime': '',
|
|
|
- 'projectinfo': None,
|
|
|
- 'publishdept': '',
|
|
|
- 'publishtime': '2019-07-17 16:14:02',
|
|
|
- 'sendflag': 'false',
|
|
|
- 'site': '福建省民政厅',
|
|
|
- 'spidercode': 'fj_fjsmzt_tzgg',
|
|
|
- 'title': '乡镇敬老院床位使用率达标县(市、区)第三方评估采购项目采购公告',
|
|
|
- 'type': ''},
|
|
|
- 'parse': 'self.detail_get',
|
|
|
- 'parse_url': 'http://cz.fjzfcg.gov.cn/3500/notice/1c4f944709d047a7a633672964c633ce/7c36067afe5b449ea66bae09d11cf45c/',
|
|
|
- 'parser_name': 'details',
|
|
|
- 'pri': 1,
|
|
|
- 'proxies': False,
|
|
|
- 'request_params': {}},
|
|
|
- {'_id': ObjectId('61f361ae9547b8b7d10dc042'),
|
|
|
- 'author': None,
|
|
|
- 'code': 0,
|
|
|
- 'create_time': None,
|
|
|
- 'date': '2022-01-28 11:23:26',
|
|
|
- 'deal_detail': ['//div[@class="xl_main"]', '//div[@class="big-box-B"]'],
|
|
|
- 'error': None,
|
|
|
- 'ex_js': '',
|
|
|
- 'ex_python': None,
|
|
|
- 'failed': 5,
|
|
|
- 'files': {'file_type': 'doxc',
|
|
|
- 'files_type': ['zip', 'doxc', 'ftp'],
|
|
|
- 'host': 'http://cz.fjzfcg.gov.cn/3500/notice/d2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877162da31a8',
|
|
|
- 'list_xpath': '//div[@id="fjxz"]/p[@class="mar-L30 '
|
|
|
- 'fjwz"]/a|//div[@id="result"]//u/a|//div[@class="TRS_Editor"]//p/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'item': {'T': 'bidding',
|
|
|
- '_d': 'comeintime',
|
|
|
- 'area': '福建',
|
|
|
- 'channel': '通知公告',
|
|
|
- 'city': '',
|
|
|
- 'comeintime': '',
|
|
|
- 'competehref': None,
|
|
|
- 'contenthtml': '',
|
|
|
- 'detail': '',
|
|
|
- 'href': 'http://cz.fjzfcg.gov.cn/3500/notice/d2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877162da31a8/',
|
|
|
- 'iscompete': True,
|
|
|
- 'l_np_publishtime': '',
|
|
|
- 'projectinfo': None,
|
|
|
- 'publishdept': '',
|
|
|
- 'publishtime': '2019-05-22 16:01:08',
|
|
|
- 'sendflag': 'false',
|
|
|
- 'site': '福建省民政厅',
|
|
|
- 'spidercode': 'fj_fjsmzt_tzgg',
|
|
|
- 'title': '福建省养老服务综合信息平台采购项目招标公告',
|
|
|
- 'type': ''},
|
|
|
- 'parse': 'self.detail_get',
|
|
|
- 'parse_url': 'http://cz.fjzfcg.gov.cn/3500/notice/d2bad35854053876b45269f56e50dee2/91255ff3752c4bc48770877162da31a8/',
|
|
|
- 'parser_name': 'details',
|
|
|
- 'pri': 1,
|
|
|
- 'proxies': False,
|
|
|
- 'request_params': {}},
|
|
|
- {'_id': ObjectId('61f3a16181db56a59ff96871'),
|
|
|
- 'author': None,
|
|
|
- 'code': 0,
|
|
|
- 'create_time': None,
|
|
|
- 'date': '2022-01-28 15:55:12',
|
|
|
- 'deal_detail': ['//div[@class="info-article in active"]'],
|
|
|
- 'error': None,
|
|
|
- 'ex_js': '',
|
|
|
- 'ex_python': None,
|
|
|
- 'failed': 3,
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'item': {'T': 'bidding',
|
|
|
- '_d': 'comeintime',
|
|
|
- 'area': '广东',
|
|
|
- 'channel': '采购公告',
|
|
|
- 'city': '',
|
|
|
- 'comeintime': '',
|
|
|
- 'competehref': None,
|
|
|
- 'contenthtml': '',
|
|
|
- 'detail': '',
|
|
|
- 'href': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7e3a9c7e946b44017e9f51af707454.html',
|
|
|
- 'iscompete': True,
|
|
|
- 'l_np_publishtime': '',
|
|
|
- 'projectinfo': None,
|
|
|
- 'publishdept': '',
|
|
|
- 'publishtime': '2022-01-28 15:09:43',
|
|
|
- 'sendflag': 'false',
|
|
|
- 'site': '广东省政府采购网',
|
|
|
- 'spidercode': 'gd_gdszfcgwxwz_cggg',
|
|
|
- 'title': '广东轻工职业技术学院新能源汽车检测实训设备购置招标公告',
|
|
|
- 'type': ''},
|
|
|
- 'parse': 'self.detail_get',
|
|
|
- 'parse_url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7e3a9c7e946b44017e9f51af707454.html',
|
|
|
- 'parser_name': 'details',
|
|
|
- 'pri': 1,
|
|
|
- 'proxies': False,
|
|
|
- 'request_params': {}},
|
|
|
- {'_id': ObjectId('61f3a16381db56a59ff96876'),
|
|
|
- 'author': None,
|
|
|
- 'code': 0,
|
|
|
- 'create_time': None,
|
|
|
- 'date': '2022-01-28 15:55:14',
|
|
|
- 'deal_detail': ['//div[@class="info-article in active"]'],
|
|
|
- 'error': None,
|
|
|
- 'ex_js': '',
|
|
|
- 'ex_python': None,
|
|
|
- 'failed': 3,
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'item': {'T': 'bidding',
|
|
|
- '_d': 'comeintime',
|
|
|
- 'area': '广东',
|
|
|
- 'channel': '中标成交公告',
|
|
|
- 'city': '',
|
|
|
- 'comeintime': '',
|
|
|
- 'competehref': None,
|
|
|
- 'contenthtml': '',
|
|
|
- 'detail': '',
|
|
|
- 'href': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7edc7d7e9e62ca017e9f00529a7d80.html',
|
|
|
- 'iscompete': True,
|
|
|
- 'l_np_publishtime': '',
|
|
|
- 'projectinfo': None,
|
|
|
- 'publishdept': '',
|
|
|
- 'publishtime': '2022-01-28 14:51:56',
|
|
|
- 'sendflag': 'false',
|
|
|
- 'site': '广东省政府采购网',
|
|
|
- 'spidercode': 'gd_gdszfcgwxwz_zbcjgg',
|
|
|
- 'title': '广东省英德监狱职工饭堂运营服务采购项目(GZSW21201FG4176A)结果公告',
|
|
|
- 'type': ''},
|
|
|
- 'parse': 'self.detail_get',
|
|
|
- 'parse_url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7edc7d7e9e62ca017e9f00529a7d80.html',
|
|
|
- 'parser_name': 'details',
|
|
|
- 'pri': 1,
|
|
|
- 'proxies': False,
|
|
|
- 'request_params': {}},
|
|
|
- {'_id': ObjectId('61f3a16381db56a59ff96877'),
|
|
|
- 'author': None,
|
|
|
- 'code': 0,
|
|
|
- 'create_time': None,
|
|
|
- 'date': '2022-01-28 15:55:14',
|
|
|
- 'deal_detail': ['//div[@class="info-article in active"]'],
|
|
|
- 'error': None,
|
|
|
- 'ex_js': '',
|
|
|
- 'ex_python': None,
|
|
|
- 'failed': 3,
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'item': {'T': 'bidding',
|
|
|
- '_d': 'comeintime',
|
|
|
- 'area': '广东',
|
|
|
- 'channel': '中标成交公告',
|
|
|
- 'city': '',
|
|
|
- 'comeintime': '',
|
|
|
- 'competehref': None,
|
|
|
- 'contenthtml': '',
|
|
|
- 'detail': '',
|
|
|
- 'href': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7ee7dd7e9e4962017e9f56e40058a5.html',
|
|
|
- 'iscompete': True,
|
|
|
- 'l_np_publishtime': '',
|
|
|
- 'projectinfo': None,
|
|
|
- 'publishdept': '',
|
|
|
- 'publishtime': '2022-01-28 14:37:36',
|
|
|
- 'sendflag': 'false',
|
|
|
- 'site': '广东省政府采购网',
|
|
|
- 'spidercode': 'gd_gdszfcgwxwz_zbcjgg',
|
|
|
- 'title': '佛山市顺德区人民法院信息化软硬件设备维护服务项目结果公告',
|
|
|
- 'type': ''},
|
|
|
- 'parse': 'self.detail_get',
|
|
|
- 'parse_url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7ee7dd7e9e4962017e9f56e40058a5.html',
|
|
|
- 'parser_name': 'details',
|
|
|
- 'pri': 1,
|
|
|
- 'proxies': False,
|
|
|
- 'request_params': {}},
|
|
|
- {'_id': ObjectId('61f295b97bdc3cbff22956e6'),
|
|
|
- 'author': None,
|
|
|
- 'code': 0,
|
|
|
- 'create_time': None,
|
|
|
- 'date': '2022-01-27 20:53:12',
|
|
|
- 'deal_detail': ['//div[@class="info-article in active"]'],
|
|
|
- 'error': None,
|
|
|
- 'ex_js': '',
|
|
|
- 'ex_python': None,
|
|
|
- 'failed': 20,
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'item': {'T': 'bidding',
|
|
|
- '_d': 'comeintime',
|
|
|
- 'area': '广东',
|
|
|
- 'channel': '采购公告',
|
|
|
- 'city': '',
|
|
|
- 'comeintime': '',
|
|
|
- 'competehref': None,
|
|
|
- 'contenthtml': '',
|
|
|
- 'detail': '',
|
|
|
- 'href': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7efa517e9032e5017e9b37b0c50534.html',
|
|
|
- 'iscompete': True,
|
|
|
- 'l_np_publishtime': '',
|
|
|
- 'projectinfo': None,
|
|
|
- 'publishdept': '',
|
|
|
- 'publishtime': '2022-01-27 19:14:45',
|
|
|
- 'sendflag': 'false',
|
|
|
- 'site': '广东省政府采购网',
|
|
|
- 'spidercode': 'gd_gdszfcgwxwz_cggg',
|
|
|
- 'title': '广东省人力资源市场设施设备购置集成及展陈布置服务项目招标公告',
|
|
|
- 'type': ''},
|
|
|
- 'parse': 'self.detail_get',
|
|
|
- 'parse_url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/8a7efa517e9032e5017e9b37b0c50534.html',
|
|
|
- 'parser_name': 'details',
|
|
|
- 'pri': 1,
|
|
|
- 'proxies': False,
|
|
|
- 'request_params': {}},
|
|
|
- {'_id': ObjectId('61f3a15c81db56a59ff96855'),
|
|
|
- 'author': None,
|
|
|
- 'code': 0,
|
|
|
- 'create_time': None,
|
|
|
- 'date': '2022-01-28 15:55:07',
|
|
|
- 'deal_detail': ['//div[@class="info-article in active"]'],
|
|
|
- 'error': None,
|
|
|
- 'ex_js': '',
|
|
|
- 'ex_python': None,
|
|
|
- 'failed': 3,
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'item': {'T': 'bidding',
|
|
|
- '_d': 'comeintime',
|
|
|
- 'area': '广东',
|
|
|
- 'channel': '电子卖场信息',
|
|
|
- 'city': '',
|
|
|
- 'comeintime': '',
|
|
|
- 'competehref': None,
|
|
|
- 'contenthtml': '',
|
|
|
- 'detail': '',
|
|
|
- 'href': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/9b5da8cc-1545-438c-8ad6-ccdd8bd71b10.html',
|
|
|
- 'iscompete': True,
|
|
|
- 'l_np_publishtime': '',
|
|
|
- 'projectinfo': None,
|
|
|
- 'publishdept': '',
|
|
|
- 'publishtime': '2022-01-28 15:48:52',
|
|
|
- 'sendflag': 'false',
|
|
|
- 'site': '广东省政府采购网',
|
|
|
- 'spidercode': 'gd_gdszfcgwxwz_ysgg',
|
|
|
- 'title': '韶关市武江区人民检察院电子卖场直接订购成交公告',
|
|
|
- 'type': ''},
|
|
|
- 'parse': 'self.detail_get',
|
|
|
- 'parse_url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/9b5da8cc-1545-438c-8ad6-ccdd8bd71b10.html',
|
|
|
- 'parser_name': 'details',
|
|
|
- 'pri': 1,
|
|
|
- 'proxies': False,
|
|
|
- 'request_params': {}},
|
|
|
- {'_id': ObjectId('61f35a944aa3e25a12e90900'),
|
|
|
- 'author': None,
|
|
|
- 'code': 0,
|
|
|
- 'create_time': None,
|
|
|
- 'date': '2022-01-28 10:53:07',
|
|
|
- 'deal_detail': ['//div[@class="info-article in active"]'],
|
|
|
- 'error': None,
|
|
|
- 'ex_js': '',
|
|
|
- 'ex_python': None,
|
|
|
- 'failed': 12,
|
|
|
- 'files': {'files_type': ['zip', 'doxc', 'ftp', 'pdf'],
|
|
|
- 'list_xpath': '//div[@class="info-article in active"]//div/a',
|
|
|
- 'name_xpath': './text()',
|
|
|
- 'url_key': 'http',
|
|
|
- 'url_xpath': './@href'},
|
|
|
- 'item': {'T': 'bidding',
|
|
|
- '_d': 'comeintime',
|
|
|
- 'area': '广东',
|
|
|
- 'channel': '电子卖场信息',
|
|
|
- 'city': '',
|
|
|
- 'comeintime': '',
|
|
|
- 'competehref': None,
|
|
|
- 'contenthtml': '',
|
|
|
- 'detail': '',
|
|
|
- 'href': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/ab73c655-102a-4923-b4cb-dadfdc82c913.html',
|
|
|
- 'iscompete': True,
|
|
|
- 'l_np_publishtime': '',
|
|
|
- 'projectinfo': None,
|
|
|
- 'publishdept': '',
|
|
|
- 'publishtime': '2022-01-28 10:00:38',
|
|
|
- 'sendflag': 'false',
|
|
|
- 'site': '广东省政府采购网',
|
|
|
- 'spidercode': 'gd_gdszfcgwxwz_ysgg',
|
|
|
- 'title': '广东省佛山航道事务中心电子卖场直接订购成交公告',
|
|
|
- 'type': ''},
|
|
|
- 'parse': 'self.detail_get',
|
|
|
- 'parse_url': 'https://gdgpo.czt.gd.gov.cn/freecms/site/gd/ggxx/info/2022/ab73c655-102a-4923-b4cb-dadfdc82c913.html',
|
|
|
- 'parser_name': 'details',
|
|
|
- 'pri': 1,
|
|
|
- 'proxies': False,
|
|
|
- 'request_params': {}}]
|
|
|
-
|
|
|
-Thread-3|2022-01-28 17:06:49,723|mongo_pipeline.py|save_items|line:49|INFO| 共导出 8 条数据到 mgp_list, 新增 8条, 重复 0 条
|
|
|
-Details|2022-01-28 17:06:53,273|scheduler.py|<lambda>|line:117|INFO|
|
|
|
-********** feapder end **********
|
|
|
-Details|2022-01-28 17:06:53,275|scheduler.py|spider_end|line:520|INFO| 《magp:details1》爬虫结束,耗时 3分20秒
|
|
|
-Details|2022-01-28 17:06:53,276|scheduler.py|delete_tables|line:444|INFO| 正在删除key magp:details1:z_spider_status
|