jsl_clearance_s.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2023-04-24
  4. ---------
  5. @summary: jsl通用模板
  6. ---------
  7. @author: jsl
  8. """
  9. import json
  10. import re
  11. import execjs
  12. import requests
  13. from untils.cookie_pool import PageCookiePool
  14. class DTCookiePool(PageCookiePool):
  15. def __init__(self, redis_key, header, page_url=None, **kwargs):
  16. super(DTCookiePool, self).__init__(redis_key, page_url=None,
  17. min_cookies=10000,
  18. must_contained_keys=(),
  19. keep_alive=False, **kwargs)
  20. self.headers = header
  21. self.page_url = page_url
  22. self.proxies = kwargs.get('proxies') or False
  23. def create_cookie(self):
  24. session = requests.Session()
  25. session.proxies = self.proxies
  26. start_url = self.page_url
  27. res = session.get(start_url, headers=self.headers, timeout=120, verify=False)
  28. js_func = "".join(re.findall("document.cookie=(.*?)location.href", res.text))
  29. js_func = 'function sd() { return ' + js_func + "}"
  30. ctx = execjs.compile(js_func)
  31. sss = ctx.call("sd")
  32. cookie = {}
  33. for temp, index in res.cookies.get_dict().items():
  34. cookie[temp] = index
  35. for item in sss.split(";"):
  36. if '=' in item:
  37. cookie[item.split("=")[0]] = item.split("=")[-1]
  38. res = session.get(start_url, cookies=cookie,headers=self.headers,timeout=120,verify=False)
  39. html_str = res.content.decode()
  40. js_do_data = "".join(re.findall('};go\((.*?)\)', html_str))
  41. js_func = re.sub("<(/*?)script>", "", html_str)
  42. location = re.compile('location(.*?)}}else')
  43. location2 = re.compile('location(.*?)}else')
  44. setTimeout = re.compile('setTimeout(.*?)document')
  45. gox = re.compile('};go(.*?)\)')
  46. js_func = re.sub(location, "}}else", js_func)
  47. js_func = re.sub(location2, "}else", js_func)
  48. js_func = re.sub(setTimeout, "document", js_func)
  49. js_func = re.sub('0x5dc;}(.*?)\(document', "0x5dc;}document", js_func)
  50. js_func = re.sub(gox, "return document['cookie']\n};", js_func)
  51. js_func = '''const jsdom = require("jsdom");
  52. const {JSDOM} = jsdom;
  53. const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`,
  54. {
  55. url: "https://example.org/",
  56. referrer: "https://example.com/",
  57. contentType: "text/html",
  58. });
  59. window = dom.window;
  60. document = window.document;
  61. location = window.location;
  62. ''' + js_func
  63. ctx = execjs.compile(js_func)
  64. # with open('wzjyjt_xxgg_pm.js', 'w+', encoding='utf-8') as f:
  65. # f.write(js_func)
  66. try:
  67. ss = ctx.call("go", json.loads(js_do_data))
  68. for item in ss.split(";"):
  69. if '=' in item:
  70. session.cookies.setdefault(item.split("=")[0], item.split("=")[-1])
  71. session.get(start_url,headers=self.headers,timeout=120,verify=False)
  72. cookies = requests.utils.dict_from_cookiejar(session.cookies)
  73. return cookies
  74. except Exception as e:
  75. pass