3
0
lizongze 2 жил өмнө
parent
commit
41a285646a

+ 90 - 0
FworkSpider/untils/jsl_5s.py

@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2023-08-08
+---------
+@summary: jsl+创宇云盾 通用模板
+---------
+@author: jsl、创宇5秒盾
+"""
+import re
+import sys
+import json
+import execjs
+import requests
+sys.path.append('/app/spiders/sword_feapder/FworkSpider')
+from untils.cookie_pool import PageCookiePool
+
+
+
+class DTCookiePool(PageCookiePool):
+    def __init__(self,redis_key,header,page_url=None,**kwargs):
+        super(DTCookiePool, self).__init__(redis_key,page_url=None,
+        min_cookies=10000,must_contained_keys=(),keep_alive=False,**kwargs)
+        self.headers=header
+        self.page_url = page_url
+        self.proxies = kwargs.get('proxies') or False
+
+    def create_cookie(self):
+
+        proxies = self.proxies
+
+        session = requests.Session()
+        session.proxies = proxies
+        start_url = self.page_url
+        res = session.get(start_url, headers=self.headers,timeout=120, verify=False)
+        js_func = "".join(re.findall("document.cookie=(.*?)location.href", res.text))
+        js_func = 'function sd() { return ' + js_func + "}"
+        ctx = execjs.compile(js_func)
+        sss = ctx.call("sd")
+        cookie = {}
+        for temp, index in res.cookies.get_dict().items():
+            cookie[temp] = index
+
+        for item in sss.split(";"):
+            if '=' in item:
+                cookie[item.split("=")[0]] = item.split("=")[-1]
+
+        res = session.get(start_url, cookies=cookie,headers=self.headers,timeout=120,verify=False)
+        html_str = res.content.decode()
+        if "<!DOCTYPE html>" in html_str:
+            html_str = re.sub("<!DOCTYPE html>[\s\S]*?</html>", "", html_str.strip(),re.S)
+        js_do_data = "".join(re.findall('};go\((.*?)\)', html_str))
+        js_func = re.sub("<(/*?)script>", "", html_str)
+        location = re.compile('location(.*?)}}else')
+        location2 = re.compile('location(.*?)}else')
+        setTimeout = re.compile('0x5dc;}}(.*?)setTimeout,function\(\)\{')
+        setTimeout2 = re.compile('0x5dc;}(.*?)setTimeout\(function\(\)\{')
+        gox = re.compile('};go(.*?)\)')
+        js_func = re.sub(location, "}}else", js_func)
+        js_func = re.sub(location2, "}else", js_func)
+        js_func = re.sub(setTimeout, "0x5dc;}}", js_func)
+        js_func = re.sub(setTimeout2, "0x5dc;}", js_func)
+        js_func = re.sub(gox, "return document['cookie']\n};", js_func)
+
+        js_func = '''const jsdom = require("jsdom");
+                    const {JSDOM} = jsdom;
+                    const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`,
+                                        {
+                                            url: "https://example.org/",
+                                            referrer: "https://example.com/",
+                                            contentType: "text/html",
+                                        });
+                    window = dom.window;
+                    document = window.document;
+                    location = window.location;
+                    ''' + js_func
+        ctx = execjs.compile(js_func)
+        # with open('ex_js.js', 'w+', encoding='utf-8') as f:
+        #     f.write(js_func)
+        try:
+            ss = ctx.call("go", json.loads(js_do_data))
+            for item in ss.split(";"):
+                if '=' in item:
+                    session.cookies.setdefault(item.split("=")[0], item.split("=")[-1])
+            session.get(start_url,headers=self.headers,timeout=120,verify=False)
+            cookies = requests.utils.dict_from_cookiejar(session.cookies)
+            return cookies
+        except Exception as e:
+            pass
+
+