WebCookiePool.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2025-05-26
  4. ---------
  5. @summary: 浏览器 Cookie 池
  6. ---------
  7. @author: Dzr
  8. """
  9. from feapder.network import user_agent as user_agent_pool
  10. from feapder.network.proxy_pool import SpringBoardProxyPool
  11. from feapder.utils.log import log
  12. from feapder.utils.webdriver import DrissionPageDriver
  13. from untils.cookie_pool import PageCookiePool
  14. DRISSIONPAGE = dict(
  15. singleton_tab=True, # 一个标签页是否支持多例操作,True=单例;False=多例
  16. headless=False, # 是否为无头浏览器
  17. load_images=False, # 是否加载图片
  18. user_agent=None, # 字符串
  19. proxy=None, # xxx.xxx.xxx.xxx:xxxx
  20. timeout=15, # 请求超时时间,用于元素等待、alert 等待、WebPage的 s 模式连接等等
  21. retry=1, # 连接失败重试次数
  22. interval=0.5, # 连接失败重试间隔(秒)
  23. page_load=30, # 页面加载超时时间(秒)
  24. window_size=(1024, 800), # 窗口大小
  25. driver_type="chromium",
  26. load_mode="normal", # 网页加载策略, 可选值:"normal", "eager", "none"
  27. browser_path=None, # 浏览器可执行文件路径
  28. download_path=None, # 下载文件的路径
  29. custom_argument=[
  30. "--no-sandbox",
  31. "--ignore-certificate-errors"
  32. ]
  33. )
  34. class BrowserCookiePool(PageCookiePool):
  35. def __init__(self, redis_key, page_url, cookie_key, **kwargs):
  36. proxy_api = kwargs.pop("proxy_api", None)
  37. self._retry = kwargs.pop("retry", 3)
  38. self._interval = kwargs.pop("interval", 1.5)
  39. self._render_time = kwargs.pop("render_time", 3)
  40. self._proxies = kwargs.pop("proxies") # 仅支持字符串
  41. self._enable_proxy = kwargs.pop("enable_proxy", False)
  42. self._proxy = None
  43. if self._proxies is None and self._enable_proxy:
  44. self._proxy = SpringBoardProxyPool(proxy_api=proxy_api)
  45. DRISSIONPAGE["proxy"] = self._proxy.get_proxy()["http"]
  46. else:
  47. DRISSIONPAGE["proxy"] = self._proxies
  48. DRISSIONPAGE["user_agent"] = user_agent_pool.get("chrome")
  49. DRISSIONPAGE["load_images"] = kwargs.pop("load_images", False)
  50. super(BrowserCookiePool, self).__init__(redis_key, **kwargs)
  51. self.page_url = page_url
  52. self.cookie_key = cookie_key
  53. def create_cookie(self):
  54. nums = 0
  55. with DrissionPageDriver(**DRISSIONPAGE) as driver:
  56. while True:
  57. try:
  58. driver.tab.get(self.page_url,
  59. retry=DRISSIONPAGE["retry"],
  60. timeout=DRISSIONPAGE["timeout"],
  61. interval=DRISSIONPAGE["interval"])
  62. driver.wait_for_dom_load(timeout=self._render_time, raise_err=True)
  63. cookies = driver.tab.cookies().as_dict()
  64. if self.cookie_key in cookies.keys():
  65. return cookies
  66. driver.tab.wait(self._interval)
  67. nums += 1
  68. if nums >= self._retry:
  69. return
  70. except Exception as e:
  71. log.error(f"获取cookie失败,{e}")
  72. driver.tab.clear_cache()
  73. if self._enable_proxy:
  74. self._proxy.change_proxy(force=True)
  75. WebCookiePool = BrowserCookiePool