瀏覽代碼

jsl_5s 优化

lizongze 1 年之前
父節點
當前提交
3317378e5e
共有 2 個文件被更改,包括 17 次插入6 次删除
  1. 15 5
      FworkSpider/crawl_func/jsl_5s.py
  2. 2 1
      FworkSpider/untils/tools.py

+ 15 - 5
FworkSpider/crawl_func/jsl_5s.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2023-08-08
+Created on 2023-12-25
 ---------
 @summary: jsl+创宇云盾 通用模板
 ---------
@@ -24,6 +24,9 @@ class DTCookiePool(PageCookiePool):
         self.headers = header
         self.page_url = page_url
         self.proxies = kwargs.get('proxies') or False
+        self.cwd = kwargs.get('cwd') or None
+        self.is_save_js = kwargs.get('is_save_js') or False
+
 
     def create_cookie(self):
 
@@ -49,6 +52,11 @@ class DTCookiePool(PageCookiePool):
             html_str = res.content.decode()
             if "<!DOCTYPE html>" in html_str:
                 html_str = re.sub("<!DOCTYPE html>[\s\S]*?</html>", "", html_str.strip(),re.S)
+
+            if self.is_save_js:
+                with open('./source_code.js', 'w+', encoding='utf-8') as f:
+                    f.write(html_str)
+
             js_do_data = "".join(re.findall('};go\((.*?)\)', html_str))
             js_func = re.sub("<(/*?)script>", "", html_str)
             location = re.compile('location(.*?)}}else')
@@ -74,9 +82,11 @@ class DTCookiePool(PageCookiePool):
                         document = window.document;
                         location = window.location;
                         ''' + js_func
-            ctx = execjs.compile(js_func)
-            # with open('wzjyjt_xxgg_pm.js', 'w+', encoding='utf-8') as f:
-            #     f.write(js_func)
+            ctx = execjs.compile(js_func,cwd=self.cwd)
+
+            if self.is_save_js:
+                with open('./clean_code.js', 'w+', encoding='utf-8') as f:
+                    f.write(js_func)
 
             ss = ctx.call("go", json.loads(js_do_data))
             for item in ss.split(";"):
@@ -86,6 +96,6 @@ class DTCookiePool(PageCookiePool):
             cookies = requests.utils.dict_from_cookiejar(session.cookies)
             return cookies
         except Exception as e:
+            print("cookie生产错误:",e)
             return {}
 
-

+ 2 - 1
FworkSpider/untils/tools.py

@@ -293,7 +293,7 @@ def njpc_fields_extract_special(html, data_item):
 
 def get_proxy(scheme=None, default=None, socks5h=False):
     proxies = get_proxy_from_jyapi()
-    print(f"切换代理:{proxies}")
+    print(f"\r切换代理:{proxies}",end="")
     if proxies is not None:
         if socks5h:
             proxyh = {
@@ -301,6 +301,7 @@ def get_proxy(scheme=None, default=None, socks5h=False):
                 "https": proxies.get("http").replace("socks5", "socks5h")
             }
             proxies = proxyh
+        print(f"\r切换代理:{proxies}", end="")
         if not scheme:
             return proxies
         else: