# -*- coding: utf-8 -*- """ Created on 2023-12-25 --------- @summary: jsl+创宇云盾 通用模板 --------- @author: jsl、创宇5秒盾 """ import json import re import execjs import requests from feapder.network.cookie_pool import PageCookiePool class DTCookiePool(PageCookiePool): def __init__(self, redis_key, header, page_url=None, cwd=None, save_js=False, **kwargs): super(DTCookiePool, self).__init__(redis_key, page_url=None, min_cookies=10000, must_contained_keys=(), keep_alive=False, **kwargs) self.headers = header self.page_url = page_url self.proxies = kwargs.get('proxies') or False self.cwd = cwd self.is_save_js = save_js def create_cookie(self): proxies = self.proxies try: session = requests.Session() session.proxies = proxies start_url = self.page_url res = session.get(start_url, headers=self.headers,timeout=120, verify=False) js_func = "".join(re.findall("document.cookie=(.*?)location.href", res.text)) js_func = 'function sd() { return ' + js_func + "}" ctx = execjs.compile(js_func) sss = ctx.call("sd") cookie = {} for temp, index in res.cookies.get_dict().items(): cookie[temp] = index for item in sss.split(";"): if '=' in item: cookie[item.split("=")[0]] = item.split("=")[-1] res = session.get(start_url, cookies=cookie,headers=self.headers,timeout=120,verify=False) html_str = res.content.decode() if "" in html_str: html_str = re.sub("[\s\S]*?", "", html_str.strip(),re.S) if self.is_save_js: with open('./source_code.js', 'w+', encoding='utf-8') as f: f.write(html_str) js_do_data = "".join(re.findall('};go\((.*?)\)', html_str)) js_func = re.sub("<(/*?)script>", "", html_str) location = re.compile('location(.*?)}}else') location2 = re.compile('location(.*?)}else') setTimeout = re.compile('0x5dc;}}(.*?)setTimeout,function\(\)\{') setTimeout2 = re.compile('0x5dc;}(.*?)setTimeout\(function\(\)\{') gox = re.compile('};go(.*?)\)') js_func = re.sub(location, "}}else", js_func) js_func = re.sub(location2, "}else", js_func) js_func = re.sub(setTimeout, "0x5dc;}}", js_func) js_func = re.sub(setTimeout2, "0x5dc;}", js_func) js_func = re.sub(gox, "return document['cookie']\n};", js_func) js_func = '''const jsdom = require("jsdom"); const {JSDOM} = jsdom; const dom = new JSDOM(`

Hello world

`, { url: "https://example.org/", referrer: "https://example.com/", contentType: "text/html", }); window = dom.window; document = window.document; location = window.location; ''' + js_func ctx = execjs.compile(js_func,cwd=self.cwd) if self.is_save_js: with open('./clean_code.js', 'w+', encoding='utf-8') as f: f.write(js_func) ss = ctx.call("go", json.loads(js_do_data)) for item in ss.split(";"): if '=' in item: session.cookies.setdefault(item.split("=")[0], item.split("=")[-1]) session.get(start_url,headers=self.headers,timeout=120,verify=False) cookies = requests.utils.dict_from_cookiejar(session.cookies) return cookies except Exception as e: print("cookie生产错误:",e) return {}