|
@@ -32,6 +32,21 @@ class ListPageSpider(feapder.AirSpider):
|
|
|
def proxy(self):
|
|
|
return swordfish_proxy()
|
|
|
|
|
|
+ @staticmethod
|
|
|
+ def extract_address(region):
|
|
|
+ if region:
|
|
|
+ args = region.split(' ')
|
|
|
+ if len(args) == 2:
|
|
|
+ area, city = args
|
|
|
+ elif len(args) == 1:
|
|
|
+ area, city = args[0], ''
|
|
|
+ else:
|
|
|
+ area, city, *argi = args
|
|
|
+ else:
|
|
|
+ area, city = '全国', ''
|
|
|
+ area, city = area.strip().replace('省', ''), city.strip()
|
|
|
+ return area, city
|
|
|
+
|
|
|
def start_callback(self):
|
|
|
self._task_coll_name = 'zgzb_list'
|
|
|
self._proxies = None
|
|
@@ -42,12 +57,11 @@ class ListPageSpider(feapder.AirSpider):
|
|
|
page_size = 1000
|
|
|
# today = datetime.date.today()
|
|
|
# end_day = today - datetime.timedelta(days=-3)
|
|
|
- url = 'http://www.cebpubservice.com/ctpsp_iiss/searchbusinesstypebeforedooraction/getStringMethod.do'
|
|
|
task_menus = [
|
|
|
Menu('未按数据规范-招标公告', 'a_zgzbtbggfwpt_wasjgf_zbgg', '招标公告', 'tenderBulletin'),
|
|
|
- # Menu('未按数据规范-开标记录', 'a_zgzbtbggfwpt_wasjgf_kbjl', '开标记录', 'openBidRecord'),
|
|
|
- # Menu('未按数据规范-评标公示', 'a_zgzbtbggfwpt_wasjgf_pbgs', '评标公示', 'winCandidateBulletin'),
|
|
|
- # Menu('未按数据规范-中标公告', 'a_zgzbtbggfwpt_wasjgf_zhbgg', '中标公告', 'winBidBulletin'),
|
|
|
+ Menu('未按数据规范-开标记录', 'a_zgzbtbggfwpt_wasjgf_kbjl', '开标记录', 'openBidRecord'),
|
|
|
+ Menu('未按数据规范-评标公示', 'a_zgzbtbggfwpt_wasjgf_pbgs', '评标公示', 'winCandidateBulletin'),
|
|
|
+ Menu('未按数据规范-中标公告', 'a_zgzbtbggfwpt_wasjgf_zhbgg', '中标公告', 'winBidBulletin'),
|
|
|
]
|
|
|
for menu in task_menus:
|
|
|
business_type = menu.type
|
|
@@ -71,12 +85,13 @@ class ListPageSpider(feapder.AirSpider):
|
|
|
'msg': f'{business_type}-第{page}页',
|
|
|
'interval': 1, # 切换代理间隔时长
|
|
|
}
|
|
|
- yield feapder.Request(url, timeout=5, data=data, meta=meta,
|
|
|
- menu=menu)
|
|
|
+ yield feapder.Request(data=data, meta=meta, menu=menu)
|
|
|
|
|
|
def download_midware(self, request):
|
|
|
+ request.url = 'http://www.cebpubservice.com/ctpsp_iiss/searchbusinesstypebeforedooraction/getStringMethod.do'
|
|
|
request.proxies = self._proxies
|
|
|
request.method = 'POST'
|
|
|
+ request.timeout = 5
|
|
|
request.headers = {
|
|
|
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
|
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,sq;q=0.7',
|
|
@@ -127,21 +142,6 @@ class ListPageSpider(feapder.AirSpider):
|
|
|
logger.error(f"{msg}--请求失败")
|
|
|
logger.exception(f'异常原因:{e}')
|
|
|
|
|
|
- @staticmethod
|
|
|
- def extract_address(region):
|
|
|
- if region:
|
|
|
- args = region.split(' ')
|
|
|
- if len(args) == 2:
|
|
|
- area, city = args
|
|
|
- elif len(args) == 1:
|
|
|
- area, city = args[0], ''
|
|
|
- else:
|
|
|
- area, city, *argi = args
|
|
|
- else:
|
|
|
- area, city = '全国', ''
|
|
|
- area, city = area.strip().replace('省', ''), city.strip()
|
|
|
- return area, city
|
|
|
-
|
|
|
def parse(self, request, response):
|
|
|
menu = request.menu
|
|
|
resp_json = response.json
|