lizongze 3 жил өмнө
parent
commit
75af5ecbfe

+ 11 - 7
ybw/crawler/account.py

@@ -5,6 +5,8 @@ import requests
 
 from utils.log import logger
 from utils.tools import wait
+from utils.databases import mongo_table
+from bson.objectid import ObjectId
 
 ROOT_PATH = Path(__file__).parent.parent
 
@@ -31,7 +33,7 @@ def read_account():
 
 
 def get_account(site, crawl_type):
-    url = "http://cc.spdata.jianyu360.com/competing_goods/account/fetch"
+    url = "http://172.17.4.232:1405/competing_goods/account/fetch"
     params = {
         "site": site,
         "crawl_type": crawl_type
@@ -40,19 +42,20 @@ def get_account(site, crawl_type):
         response = requests.get(url,
                                 headers=_headers,
                                 params=params,
-                                timeout=10)
-        print(response.json())
+                                timeout=60)
         data = response.json()['data']
+        logger.info("当前账号状态:{}".format(data['crawl_detail']))
     except requests.RequestException:
         # 网络不通信时,无法获取账号
+        logger.info("网络异常,获取账号失败")
         data = None
     return data
 
 
 def release_account(uid, crawl_type, disable_log=False):
 
-
-    url = "http://cc.spdata.jianyu360.com/competing_goods/account/release"
+    # url = "http://cc.spdata.jianyu360.com/competing_goods/account/release"
+    url = url = 'http://172.17.4.232:1405/competing_goods/account/release'
     if uid is not None:
         params = {
             "uid": uid,
@@ -63,10 +66,11 @@ def release_account(uid, crawl_type, disable_log=False):
                 response = requests.get(url,
                                         headers=_headers,
                                         params=params,
-                                        timeout=10)
+                                        timeout=60)
                 if response.status_code == 200:
+                    acc_status = mongo_table('py_spider', 'match_account').find_one({'_id': ObjectId(uid)})['crawl_detail']
                     if not disable_log:
-                        logger.info(f"release_account >>> {response.json()}")
+                        logger.info(f"release_account >>> {response.json()}, status : {acc_status}")
                     break
             except requests.RequestException:
                 logger.error("网络异常,归还账号失败")

+ 1 - 1
ybw/crawler/crawl_scheduler.py

@@ -115,7 +115,7 @@ class Scheduler:
 
     def finished(self, interval=None):
         logger.info("任务结束")
-        release_account(self.account_id, self.crawl_type)
+        # release_account(self.account_id, self.crawl_type)
         wait(interval)
 
     @staticmethod

+ 1 - 1
ybw/crawler/login.py

@@ -326,7 +326,7 @@ def login_check(account: str = None, refer=None, allow_output_log=True):
         raise CrawlError(code=10021,reason="系统繁忙,请等待一会儿,自动刷新。")
 
     if allow_output_log:
-        logger.info("账号信息:{}", json.dumps(member, indent=4, ensure_ascii=False))
+        logger.info("账号信息:{}".format(json.dumps(member, indent=4, ensure_ascii=False)))
 
     '''处理本地 cookies'''
     login_cookies: dict = dict_from_cookiejar(r.cookies)

+ 14 - 7
zbytb/crawler/account.py

@@ -5,6 +5,8 @@ import requests
 
 from utils.log import logger
 from utils.tools import wait
+from utils.databases import mongo_table
+from bson.objectid import ObjectId
 
 ROOT_PATH = Path(__file__).parent.parent
 
@@ -31,7 +33,7 @@ def read_account():
 
 
 def get_account(site, crawl_type):
-    url = "http://cc.spdata.jianyu360.com/competing_goods/account/fetch"
+    url = "http://172.17.4.232:1405/competing_goods/account/fetch"
     params = {
         "site": site,
         "crawl_type": crawl_type
@@ -40,17 +42,20 @@ def get_account(site, crawl_type):
         response = requests.get(url,
                                 headers=_headers,
                                 params=params,
-                                timeout=10)
-        print(response.json())
+                                timeout=60)
         data = response.json()['data']
+        logger.info("当前账号状态:{}".format(data['crawl_detail']))
     except requests.RequestException:
         # 网络不通信时,无法获取账号
+        logger.info("网络异常,获取账号失败")
         data = None
     return data
 
 
-def release_account(uid, crawl_type):
-    url = "http://cc.spdata.jianyu360.com/competing_goods/account/release"
+def release_account(uid, crawl_type, disable_log=False):
+
+    # url = "http://cc.spdata.jianyu360.com/competing_goods/account/release"
+    url = url = 'http://172.17.4.232:1405/competing_goods/account/release'
     if uid is not None:
         params = {
             "uid": uid,
@@ -61,9 +66,11 @@ def release_account(uid, crawl_type):
                 response = requests.get(url,
                                         headers=_headers,
                                         params=params,
-                                        timeout=10)
+                                        timeout=60)
                 if response.status_code == 200:
-                    logger.debug(f"release_account >>> {response.json()}")
+                    acc_status = mongo_table('py_spider', 'match_account').find_one({'_id': ObjectId(uid)})['crawl_detail']
+                    if not disable_log:
+                        logger.info(f"release_account >>> {response.json()}, status : {acc_status}")
                     break
             except requests.RequestException:
                 logger.error("网络异常,归还账号失败")

+ 1 - 1
zbytb/main.py

@@ -31,4 +31,4 @@ def detail_page_spider():
 
 
 if __name__ == '__main__':
-    detail_page_spider()
+    list_page_spider()

+ 2 - 1
zbytb/release_account.py

@@ -1,8 +1,9 @@
 from crawler.account import read_account, release_account
-
+from utils.log import logger
 
 def _send_message():
     account = read_account()
+    logger.info("[容器重启]{}".format(account))
     if account is not None:
         release_account(**account)