|
@@ -19,44 +19,47 @@ headers = {
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
|
"roleId;": ""
|
|
|
}
|
|
|
+cookie_pool = WebCookiePool(redis_key="zgydcgyzbw_ck",
|
|
|
+ page_url="http://www.chinaunicombidding.cn/bidInformation",
|
|
|
+ cookie_key="jqmEwVYRfTEJT",
|
|
|
+ headless=True)
|
|
|
+
|
|
|
+
|
|
|
+class Spider(feapder.BiddingDetailSpider):
|
|
|
|
|
|
-class Details(feapder.BiddingDetailSpider):
|
|
|
- ct = 0
|
|
|
- cookie_pool = WebCookiePool(redis_key="zgydcgyzbw_ck", page_url="http://www.chinaunicombidding.cn/bidInformation",
|
|
|
- cookie_key="jqmEwVYRfTEJT", driver_type="FIREFOX",
|
|
|
- usages_local_driver=True,headless=True)
|
|
|
def start_requests(self):
|
|
|
data_lsit = self.get_tasks_by_rabbitmq(limit=50)
|
|
|
for item in data_lsit:
|
|
|
request_params = item.get("request_params")
|
|
|
- timeout = request_params.get('timeout', 10)
|
|
|
- request_params.pop('timeout', None)
|
|
|
-
|
|
|
- yield feapder.Request(url=item.get("parse_url"), item=item,
|
|
|
- deal_detail=item.get("deal_detail"), callback=eval(item.get("parse")),
|
|
|
- **request_params, timeout=timeout, proxies=False)
|
|
|
+ timeout = request_params.pop('timeout', 10)
|
|
|
+ yield feapder.Request(url=item.get("parse_url"),
|
|
|
+ proxies=False,
|
|
|
+ timeout=timeout,
|
|
|
+ callback=eval(item.get("parse")),
|
|
|
+ item=item,
|
|
|
+ deal_detail=item.get("deal_detail"),
|
|
|
+ **request_params)
|
|
|
|
|
|
def download_midware(self, request):
|
|
|
request.headers = headers
|
|
|
- request.cookies = self.cookie_pool.get_cookie()
|
|
|
+ request.cookies = cookie_pool.get_cookie()
|
|
|
|
|
|
- def detail_get(self, request, response):
|
|
|
- if self.ct > 5:
|
|
|
- return
|
|
|
+ def validate(self, request, response):
|
|
|
if response.status_code != 200:
|
|
|
- self.ct += 1
|
|
|
- self.cookie_pool.del_cookie(self.cookie_pool.get_cookie())
|
|
|
- yield request
|
|
|
- else:
|
|
|
- self.ct = 0
|
|
|
- items = request.item
|
|
|
- list_item = DataBakItem(**items)
|
|
|
+ raise ConnectionRefusedError
|
|
|
+
|
|
|
+ def detail_get(self, request, response):
|
|
|
+ items = request.item
|
|
|
+ data_item = DataBakItem(**items)
|
|
|
|
|
|
- html = response.json.get('data').get('annoText')
|
|
|
- list_item.contenthtml = html
|
|
|
+ html = response.json.get('data').get('annoText')
|
|
|
+ data_item.contenthtml = html
|
|
|
+ yield data_item
|
|
|
|
|
|
- yield list_item
|
|
|
+ def exception_request(self, request, response):
|
|
|
+ cookie_pool.del_cookie(cookie_pool.get_cookie())
|
|
|
+ yield request
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- Details(redis_key="lzz:zgydcgyzbw_cgxqgs").start()
|
|
|
+ Spider(redis_key="lzz:zgydcgyzbw_cgxqgs").start()
|