Procházet zdrojové kódy

代码问题处理

dzr před 1 měsícem
rodič
revize
eef5c978f4
1 změnil soubory, kde provedl 29 přidání a 26 odebrání
  1. 29 26
      a_zgltcgyzbw_cgxq_new/招标信息-详情页.py

+ 29 - 26
a_zgltcgyzbw_cgxq_new/招标信息-详情页.py

@@ -19,44 +19,47 @@ headers = {
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
     "roleId;": ""
 }
+cookie_pool = WebCookiePool(redis_key="zgydcgyzbw_ck",
+                            page_url="http://www.chinaunicombidding.cn/bidInformation",
+                            cookie_key="jqmEwVYRfTEJT",
+                            headless=True)
+
+
+class Spider(feapder.BiddingDetailSpider):
 
-class Details(feapder.BiddingDetailSpider):
-    ct = 0
-    cookie_pool = WebCookiePool(redis_key="zgydcgyzbw_ck", page_url="http://www.chinaunicombidding.cn/bidInformation",
-                                         cookie_key="jqmEwVYRfTEJT", driver_type="FIREFOX",
-                                         usages_local_driver=True,headless=True)
     def start_requests(self):
         data_lsit = self.get_tasks_by_rabbitmq(limit=50)
         for item in data_lsit:
             request_params = item.get("request_params")
-            timeout = request_params.get('timeout', 10)
-            request_params.pop('timeout', None)
-
-            yield feapder.Request(url=item.get("parse_url"), item=item,
-                                  deal_detail=item.get("deal_detail"), callback=eval(item.get("parse")),
-                                  **request_params, timeout=timeout, proxies=False)
+            timeout = request_params.pop('timeout', 10)
+            yield feapder.Request(url=item.get("parse_url"),
+                                  proxies=False,
+                                  timeout=timeout,
+                                  callback=eval(item.get("parse")),
+                                  item=item,
+                                  deal_detail=item.get("deal_detail"),
+                                  **request_params)
 
     def download_midware(self, request):
         request.headers = headers
-        request.cookies = self.cookie_pool.get_cookie()
+        request.cookies = cookie_pool.get_cookie()
 
-    def detail_get(self, request, response):
-        if self.ct > 5:
-            return
+    def validate(self, request, response):
         if response.status_code != 200:
-            self.ct += 1
-            self.cookie_pool.del_cookie(self.cookie_pool.get_cookie())
-            yield request
-        else:
-            self.ct = 0
-            items = request.item
-            list_item = DataBakItem(**items)
+            raise ConnectionRefusedError
+
+    def detail_get(self, request, response):
+        items = request.item
+        data_item = DataBakItem(**items)
 
-            html = response.json.get('data').get('annoText')
-            list_item.contenthtml = html
+        html = response.json.get('data').get('annoText')
+        data_item.contenthtml = html
+        yield data_item
 
-            yield list_item
+    def exception_request(self, request, response):
+        cookie_pool.del_cookie(cookie_pool.get_cookie())
+        yield request
 
 
 if __name__ == "__main__":
-    Details(redis_key="lzz:zgydcgyzbw_cgxqgs").start()
+    Spider(redis_key="lzz:zgydcgyzbw_cgxqgs").start()