lizongze vor 1 Jahr
Ursprung
Commit
b9f66c90d5

+ 1 - 1
qlm/utils/clean_file.py

@@ -152,7 +152,7 @@ headers = {
 
 # 打码接口
 def get_code(file_path: str) -> dict:
-    upload_address = "http://123.57.163.80:2119/v1/images/verify"
+    upload_address = "http://pycaptcha.spdata.jianyu360.com/v1/images/verify"
     with open(file_path, 'rb') as f:
         image_bytes = f.read()
     content = {'file': image_bytes}

+ 1 - 1
ybw/crawler/crawl_scheduler.py

@@ -164,7 +164,7 @@ class Scheduler:
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         logger.info(f'[关闭调度]')
-        release_account(self.account_id, self.crawl_type,disable_log=True)
+        release_account(self.account_id, self.crawl_type)
         self.crawl_start = False
 
         if exc_type is not None:

+ 3 - 4
ybw/crawler/login.py

@@ -119,7 +119,7 @@ def recognition_captcha(image_stream):
     @param image_stream: 验证码图片流
     @return:
     """
-    url = "http://123.57.163.80:2119/v1/images/verify"
+    url = "http://pycaptcha.spdata.jianyu360.com/v1/images/verify"
     img_headers = {'accept': 'application/json'}
     image_file = {'file': image_stream}
     r = requests.post(url, headers=img_headers, files=image_file, stream=True)
@@ -147,7 +147,6 @@ def download_captcha(image, session: Session, save_to_local=False):
         'Referer': 'https://www.chinabidding.cn/public/2020/html/login.html?source=1',
     }
     r = session.get(url, headers=headers, stream=True)
-    # TODO 405 IP封禁 待处理
     stream = io.BytesIO()
     stream.write(r.content)
     if save_to_local:
@@ -315,7 +314,7 @@ def login_check(account: str = None, refer=None, allow_output_log=True):
 
     cookies = load_login_cookies(account)
     if cookies is None:
-        '''没有该账号的cookies信息,请检查 login_cookie.json.json 配置文件'''
+        '''没有该账号的cookies信息,请检查 login_cookie.json 配置文件'''
         return True
 
     ts = int(time.time())
@@ -327,7 +326,7 @@ def login_check(account: str = None, refer=None, allow_output_log=True):
         raise CrawlError(code=10021,reason="系统繁忙,请等待一会儿,自动刷新。")
 
     if allow_output_log:
-        logger.info("账号信息:{}".format(json.dumps(member, indent=4, ensure_ascii=False)))
+        logger.info("账号信息:{}", json.dumps(member, indent=4, ensure_ascii=False))
 
     '''处理本地 cookies'''
     login_cookies: dict = dict_from_cookiejar(r.cookies)

+ 3 - 7
ybw/detail_spider.py

@@ -138,11 +138,6 @@ class DetailSpider:
                     if code == 200:
                         retries += 1
                     else:
-                        # if proxy is None:
-                        #     proxy = Proxy(True)
-                        # else:
-                        #     proxy.switch()
-                        # proxies = proxy.proxies
                         time.sleep(1800)
                         retries += 1
                     continue
@@ -185,8 +180,8 @@ class DetailSpider:
         item["contenthtml"] = html
         special = {
             '若附件无法下载,你可以尝试使用360极速浏览器进行下载!': '',
-            # 'DD000E;|EE000F;|FF000E;|DD000F;|EE000E;|AA000E;': '',
-            '[(]?[)]?[A-Z]{2}000[A-Z]{1};[(]?[\d{1,4}]*[;]?[)]?[;]?':''
+            # 'DD000E;|EE000F;|FF000E;': '',
+            '[(]?[)]?[A-Z]{2}000[A-Z]{1};[(]?[\d{1,4}]*[;]?[)]?[;]?': '',
         }
         item["detail"] = cleaner(html, special)
         item["comeintime"] = int2long(int(time.time()))
@@ -202,6 +197,7 @@ class DetailSpider:
     def crawl_spider(self, sc: Scheduler):
         while True:
             next_task_interval = None
+            logger.info(f"[count:]{str(sc.count)}")
             if sc.count >= sc.total:
                 return True
             item = sc.crawl_task

+ 1 - 1
zbytb/utils/clean_file.py

@@ -156,7 +156,7 @@ headers = {
 
 # 打码接口
 def get_code(file_path: str) -> dict:
-    upload_address = "http://123.57.163.80:2119/v1/images/verify"
+    upload_address = "http://pycaptcha.spdata.jianyu360.com/v1/images/verify"
     with open(file_path, 'rb') as f:
         image_bytes = f.read()
     content = {'file': image_bytes}