Browse Source

details_webcookie

maguopeng 3 năm trước cách đây
mục cha
commit
4ca1c80041
2 tập tin đã thay đổi với 59 bổ sung11 xóa
  1. 9 11
      Details/details_webcookie.py
  2. 50 0
      FworkSpider/untils/WebCookiePool.py

+ 9 - 11
Details/details_webcookie.py

@@ -32,7 +32,7 @@ class Details(feapder.Spider):
 
     def start_requests(self):
         while True:
-            data_lsit = self.to_db.find(self.db_name,{"parser_name":"details_webcookie"},sort={"date":-1},limit=1)
+            data_lsit = self.to_db.find(self.db_name,{"parser_name":"details_webcookie"},sort={"date":-1},limit=100)
             for item in data_lsit:
                 request_params = item.get("request_params")
 
@@ -120,7 +120,7 @@ class Details(feapder.Spider):
             cookie_pool = WebCookiePool(redis_key=key, page_url=page_url, cookie_key=cookie_key)
             cookie_pool.del_cookie(request.cookies)
             yield request
-        if response.code in (request.down_mid.get("code")):
+        elif response.status_code in request.down_mid.get("code"):
             '''失败处理,response——code不为正确的状态码时,删除当前cookie并重新生产cookie'''
             down_mid = copy.copy(request.down_mid)
             key = down_mid.get("key")
@@ -129,15 +129,13 @@ class Details(feapder.Spider):
             cookie_pool = WebCookiePool(redis_key=key, page_url=page_url, cookie_key=cookie_key)
             cookie_pool.del_cookie(request.cookies)
             yield request
-        items = request.item
-        list_item = DataBakItem()
-        for key in items:
-            list_item.__setitem__(key,items[key])
-        html = ''
-        exec(request.deal_detail)
-
-        list_item.contenthtml = html
-        yield list_item
+        else:
+            items = request.item
+            list_item = DataBakItem()
+            for key in items:
+                list_item.__setitem__(key,items[key])
+            exec(request.deal_detail)
+            yield list_item
 
     def failed_request(self, request, response):
         '''请求、解析次数超过上限后,将原信息重新保存至mongo,并修改failed字段'''

+ 50 - 0
FworkSpider/untils/WebCookiePool.py

@@ -0,0 +1,50 @@
+import json
+import sys
+import requests
+import re,execjs
+
+
+sys.path.append('C:/Users/topnet/Desktop/crawlab_feader/FworkSpider')
+sys.path.append('/app/spiders/sword_feapder/FworkSpider')
+# from utils.cookie_pool import PageCookiePool
+from feapder.utils.webdriver import WebDriverPool
+from feapder.utils.log import log
+from FworkSpider.untils.cookie_pool import PageCookiePool
+
+class WebCookiePool(PageCookiePool):
+    def __init__(self, redis_key, page_url=None,cookie_key=None,
+                 min_cookies=10000, must_contained_keys=(), keep_alive=False, **kwargs):
+        super(WebCookiePool, self).__init__(redis_key, page_url=None,
+                                           min_cookies=10000, must_contained_keys=(), keep_alive=False, **kwargs)
+        self.page_url = page_url
+        self.cookie_key = cookie_key
+        self._kwargs = kwargs
+        self._kwargs.setdefault("load_images", False)
+        self._kwargs.setdefault("headless", True)
+        self._kwargs.setdefault("executable_path", "D:\\geckodriver.exe")
+        self._kwargs.setdefault("driver_type", "FIREFOX")
+
+
+    def create_cookie(self):
+        with WebDriverPool(**self._kwargs).get() as driver_pool:
+            # driver = driver_pool.driver
+            driver_pool.get(self.page_url)
+            import time
+            try:
+                count = 0
+                while self.cookie_key not in driver_pool.cookies.keys():
+                    time.sleep(1)
+                    count+=1
+                    if count>=30:
+                        # driver_pool.close()
+                        return
+                cookies = driver_pool.cookies
+                # driver_pool.close()
+                return cookies
+                # driver_pool.close()
+            except Exception as e:
+                log.error(f"获取cookie失败,{e}")
+
+
+if __name__ == '__main__':
+    WebCookiePool(redis_key='gdcookie',cookie_key='SUB',page_url="https://weibo.com/p/1005051203448454/home?from=page_100505_profile&wvr=6&mod=data&is_all=1#place").create_cookie()