WebCookiePool.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2025-05-26
  4. ---------
  5. @summary: 浏览器 Cookie 池
  6. ---------
  7. @author: Dzr
  8. """
  9. from feapder.network import user_agent as user_agent_pool
  10. from feapder.network.proxy_pool import SpringBoardProxyPool
  11. from feapder.utils.log import log
  12. from feapder.utils.webdriver import DrissionPageDriver
  13. from untils.cookie_pool import PageCookiePool
  14. DRISSIONPAGE = dict(
  15. singleton_tab=True, # 一个标签页是否支持多例操作,True=单例;False=多例
  16. headless=True, # 是否为无头浏览器
  17. load_images=False, # 是否加载图片
  18. user_agent=None, # 字符串
  19. proxy=None, # xxx.xxx.xxx.xxx:xxxx
  20. timeout=15, # 请求超时时间,用于元素等待、alert 等待、WebPage的 s 模式连接等等
  21. retry=1, # 连接失败重试次数
  22. interval=0.5, # 连接失败重试间隔(秒)
  23. page_load=30, # 页面加载超时时间(秒)
  24. window_size=(1024, 800), # 窗口大小
  25. driver_type="chromium",
  26. load_mode="normal", # 网页加载策略, 可选值:"normal", "eager", "none"
  27. browser_path=None, # 浏览器可执行文件路径
  28. download_path=None, # 下载文件的路径
  29. custom_argument=[
  30. "--no-sandbox",
  31. "--ignore-certificate-errors"
  32. ]
  33. )
  34. class BrowserCookiePool(PageCookiePool):
  35. def __init__(self, redis_key, page_url, cookie_key, **kwargs):
  36. self._retry = kwargs.pop("retry", 3)
  37. self._interval = kwargs.pop("interval", 1.5)
  38. self._render_time = kwargs.pop("render_time", 3)
  39. self._enable_proxy = kwargs.pop("enable_proxy", False)
  40. self._proxies = kwargs.pop("proxies", None) # 仅支持字符串
  41. self._proxy = None
  42. proxy_api = kwargs.pop("proxy_api", None)
  43. if self._enable_proxy and self._proxies is None:
  44. self._proxy = SpringBoardProxyPool(proxy_api=proxy_api)
  45. DRISSIONPAGE["proxy"] = self._proxy.get_proxy()["http"]
  46. else:
  47. DRISSIONPAGE["proxy"] = self._proxies
  48. DRISSIONPAGE["user_agent"] = kwargs.pop("user_agent", None) or user_agent_pool.get("chrome")
  49. DRISSIONPAGE["load_images"] = kwargs.pop("load_images", False)
  50. super(BrowserCookiePool, self).__init__(redis_key, **kwargs)
  51. self.page_url = page_url
  52. self.cookie_key = cookie_key
  53. def proxies(self, proxy):
  54. DRISSIONPAGE["proxy"] = proxy
  55. return self
  56. def user_agent(self, ua):
  57. DRISSIONPAGE["user_agent"] = ua
  58. return self
  59. def headless(self, on_off=True):
  60. DRISSIONPAGE["headless"] = on_off
  61. return self
  62. def create_cookie(self):
  63. with DrissionPageDriver(**DRISSIONPAGE) as driver:
  64. for _ in range(self._retry):
  65. try:
  66. driver.tab.get(self.page_url,
  67. retry=DRISSIONPAGE["retry"],
  68. timeout=DRISSIONPAGE["timeout"],
  69. interval=DRISSIONPAGE["interval"])
  70. driver.wait_for_dom_load(timeout=self._render_time, raise_err=True)
  71. cookies = driver.tab.cookies().as_dict()
  72. if self.cookie_key in cookies.keys():
  73. return cookies
  74. driver.tab.wait(self._interval)
  75. except Exception as e:
  76. log.error(f"获取cookie失败,{e}")
  77. driver.tab.clear_cache()
  78. if self._enable_proxy:
  79. self._proxy.change_proxy(force=True)
  80. WebCookiePool = BrowserCookiePool