dongzhaorui 3 gadi atpakaļ
vecāks
revīzija
cac604c32f
1 mainītis faili ar 17 papildinājumiem un 18 dzēšanām
  1. 17 18
      codes_hospital/defaults.py

+ 17 - 18
codes_hospital/defaults.py

@@ -15,6 +15,7 @@ urllib3.disable_warnings()
 
 zktest_unexists_name = mongo_table('py_spider', 'zktest_unexists_name')
 f_hospital_codes = mongo_table('py_theme', 'f_hospital_codes')
+openid = None  # 全局openid
 
 
 class TimerError(IOError):
@@ -40,11 +41,9 @@ def spider_listener(func):
     def wrapper(*args, **kwargs):
         if all([
             0 <= datetime.datetime.now().weekday() <= 4,  # 周一到周五
-            9 <= datetime.datetime.now().hour <= 17  # 早9点到晚5
+            9 <= datetime.datetime.now().hour <= 18  # 早9点到晚19
         ]):
-            # print("进入执行++++++++++++++++ ", func.__name__)
             result = func(*args, **kwargs)
-            # print("执行完毕------------------- ", func.__name__)
             return result
         raise TimerError('小程序接口停止运营')
     return wrapper
@@ -160,19 +159,21 @@ def quote(data):
     return quote_str
 
 
-def check_response(response):
+def check_response(response, *args):
     resp_json = response.json()
     logger.debug(json.dumps(resp_json, indent=4, ensure_ascii=False))
+    open_id = f" OpenId:{args[0]}" if len(args) > 0 else None
     if 'resultType' in resp_json and resp_json['resultType'] == 'ipError':
-        raise CrawlError(resp_json['resultTypeMemo'])
+        raise CrawlError(resp_json['resultTypeMemo'] + open_id)
     if 'code' in resp_json and resp_json["code"] != '0':
-        raise CrawlError(resp_json['msg'])
+        raise CrawlError(resp_json['msg'] + open_id)
     if len(resp_json) == 0:
-        raise CrawlError(f"详情页请求结果为空")
+        raise CrawlError("详情页请求结果为空" + open_id)
 
 
 @spider_listener
 def callback_requests(func, *args, **kwargs):
+    global openid
     proxy = kwargs.pop('proxy', None)
     openid = kwargs.pop('openid')
     openid_dq = kwargs.pop('openid_dq')
@@ -190,7 +191,7 @@ def callback_requests(func, *args, **kwargs):
             if proxy is not None:
                 proxy.switch()
         except (CrawlError, AssertionError) as e:
-            logger.error(f"[OpenId异常]:{e}")
+            logger.error(f"[查询异常]:{e}")
             openid = get_openid(openid_dq)
             time.sleep(3)
 
@@ -237,7 +238,7 @@ def get_jgdm(query, proxies, openid):
         raise RequestError(f"'{query}'jgdm请求失败, 原因:{e}")
 
     # print(response)
-    check_response(response)
+    check_response(response, openid)
     resp_json = response.json()
     assert 'resultType' in resp_json and resp_json['resultType'] != 'ipError'
     documents = resp_json['jginfoList']["documents"]
@@ -283,7 +284,7 @@ def get_hospital(query, jgdm, proxies, openid):
     except requests.RequestException as e:
         raise RequestError(f"'{jgdm}'医院详情请求失败, 原因:{e}")
 
-    check_response(response)
+    check_response(response, openid)
     resp_json = response.json()
     # print(json.dumps(resp_json, indent=4, ensure_ascii=False))
     assert "code" in resp_json and resp_json["code"] == '0'
@@ -320,22 +321,20 @@ def get_hospital(query, jgdm, proxies, openid):
 
 
 def query_hospital(tasks, proxy, openid_deque):
+    global openid
     while len(tasks) > 0:
         task = tasks.pop(0)
         query = task['name']
-        logger.info(f"[开始查询]{query}")
         openid = get_openid(openid_deque)
-        request_params = dict(
-            proxy=proxy,
-            openid=openid,
-            openid_dq=openid_deque
-        )
+        logger.info(f"[开始查询]{query}")
+        params = dict(proxy=proxy, openid=openid, openid_dq=openid_deque)
         # 列表页
-        jgdm_lst = callback_requests(get_jgdm, query, **request_params)
+        jgdm_lst = callback_requests(get_jgdm, query, **params)
         time.sleep(3)
         # 详情页
         for jgdm in jgdm_lst:
-            callback_requests(get_hospital, query, jgdm, **request_params)
+            params.update(dict(openid=openid))
+            callback_requests(get_hospital, query, jgdm, **params)
             logger.info(f"[保存数据]jgdm:{jgdm}")
             time.sleep(15)
         # 更新采集任务状态