123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899 |
- # -*- coding: utf-8 -*-
- """
- Created on 2023-12-25
- ---------
- @summary: jsl+创宇云盾 通用模板
- ---------
- @author: jsl、创宇5秒盾
- """
- import json
- import re
- import execjs
- import requests
- from feapder.network.cookie_pool import PageCookiePool
- class DTCookiePool(PageCookiePool):
- def __init__(self, redis_key, header, page_url=None, cwd=None, save_js=False, **kwargs):
- super(DTCookiePool, self).__init__(redis_key, page_url=None,
- min_cookies=10000,
- must_contained_keys=(),
- keep_alive=False, **kwargs)
- self.headers = header
- self.page_url = page_url
- self.proxies = kwargs.get('proxies') or False
- self.cwd = cwd
- self.is_save_js = save_js
- def create_cookie(self):
- proxies = self.proxies
- try:
- session = requests.Session()
- session.proxies = proxies
- start_url = self.page_url
- res = session.get(start_url, headers=self.headers,timeout=120, verify=False)
- js_func = "".join(re.findall("document.cookie=(.*?)location.href", res.text))
- js_func = 'function sd() { return ' + js_func + "}"
- ctx = execjs.compile(js_func)
- sss = ctx.call("sd")
- cookie = {}
- for temp, index in res.cookies.get_dict().items():
- cookie[temp] = index
- for item in sss.split(";"):
- if '=' in item:
- cookie[item.split("=")[0]] = item.split("=")[-1]
- res = session.get(start_url, cookies=cookie,headers=self.headers,timeout=120,verify=False)
- html_str = res.content.decode()
- if "<!DOCTYPE html>" in html_str:
- html_str = re.sub("<!DOCTYPE html>[\s\S]*?</html>", "", html_str.strip(),re.S)
- if self.is_save_js:
- with open('./source_code.js', 'w+', encoding='utf-8') as f:
- f.write(html_str)
- js_do_data = "".join(re.findall('};go\((.*?)\)', html_str))
- js_func = re.sub("<(/*?)script>", "", html_str)
- location = re.compile('location(.*?)}}else')
- location2 = re.compile('location(.*?)}else')
- setTimeout = re.compile('0x5dc;}}(.*?)setTimeout,function\(\)\{')
- setTimeout2 = re.compile('0x5dc;}(.*?)setTimeout\(function\(\)\{')
- gox = re.compile('};go(.*?)\)')
- js_func = re.sub(location, "}}else", js_func)
- js_func = re.sub(location2, "}else", js_func)
- js_func = re.sub(setTimeout, "0x5dc;}}", js_func)
- js_func = re.sub(setTimeout2, "0x5dc;}", js_func)
- js_func = re.sub(gox, "return document['cookie']\n};", js_func)
- js_func = '''const jsdom = require("jsdom");
- const {JSDOM} = jsdom;
- const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`,
- {
- url: "https://example.org/",
- referrer: "https://example.com/",
- contentType: "text/html",
- });
- window = dom.window;
- document = window.document;
- location = window.location;
- ''' + js_func
- ctx = execjs.compile(js_func,cwd=self.cwd)
- if self.is_save_js:
- with open('./clean_code.js', 'w+', encoding='utf-8') as f:
- f.write(js_func)
- ss = ctx.call("go", json.loads(js_do_data))
- for item in ss.split(";"):
- if '=' in item:
- session.cookies.setdefault(item.split("=")[0], item.split("=")[-1])
- session.get(start_url,headers=self.headers,timeout=120,verify=False)
- cookies = requests.utils.dict_from_cookiejar(session.cookies)
- return cookies
- except Exception as e:
- print("cookie生产错误:",e)
- return {}
|