123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051 |
- import json
- import sys
- import requests
- import re,execjs
- sys.path.append('/app/spiders/sword_feapder/FworkSpider')
- # from utils.cookie_pool import PageCookiePool
- from feapder.utils.webdriver import WebDriver
- from feapder.utils.log import log
- from untils.cookie_pool import PageCookiePool
- class WebCookiePool(PageCookiePool):
- def __init__(self, redis_key, page_url=None,cookie_key=None,
- min_cookies=10000, must_contained_keys=(), keep_alive=False, **kwargs):
- super(WebCookiePool, self).__init__(redis_key, page_url=None,
- min_cookies=10000, must_contained_keys=(), keep_alive=False, **kwargs)
- self.page_url = page_url
- self.cookie_key = cookie_key
- self._kwargs = kwargs
- self._kwargs.setdefault("load_images", False)
- self._kwargs.setdefault("headless", True)
- self._kwargs.setdefault("driver_type", "FIREFOX")
- def create_cookie(self):
- with WebDriver(**self._kwargs) as driver_pool:
- import time
- # time.sleep(1111)
- try:
- # driver_pool = self.driver_pool.get()
- driver_pool.get(self.page_url)
- count = 0
- while self.cookie_key not in driver_pool.cookies.keys():
- time.sleep(1)
- count+=1
- if count>=30:
- return
- cookies = driver_pool.cookies
- return cookies
- except Exception as e:
- log.error(f"获取cookie失败,{e}")
- if __name__ == '__main__':
- for i in range(10):
- print(f'开始第{i+1}次获取cookie')
- if i%3==0:
- WebCookiePool(redis_key='gdcookie',cookie_key='SUB',page_url="https://weibo.com/p/1005051203448454/home?from=page_100505_profile&wvr=6&mod=data&is_all=1#place").create_cookie()
- elif i%3==1:
- WebCookiePool(redis_key='gd2cookie',cookie_key='locale',page_url="https://www.jianshu.com/p/4c5bc85fc3fd").create_cookie()
- else:
- WebCookiePool(redis_key='gd3cookie',cookie_key='cna',page_url="https://docs-next.crawlab.cn/zh/guide/installation/docker.html#%E5%A4%96%E9%83%A8-mongodb").create_cookie()
|