dongzhaorui 3 жил өмнө
parent
commit
55dea5d6a5

+ 39 - 34
codes_hospital/crawl_hospital.py

@@ -14,12 +14,24 @@ urllib3.disable_warnings()
 
 zktest_unexists_name = mongo_table('py_spider', 'zktest_unexists_name')
 f_hospital_codes = mongo_table('py_theme', 'f_hospital_codes')
-openid_deque = deque([
+OPENID_DEQUE = deque([
+    'o0VVO5ck5WeNXrXGjscVGc74bXok',  # 未授权
+    'o0VVO5ZVwVYgKcvkgtDYE24entOo',  # 未授权
     "o0VVO5QnhbdQfl4fkZWw8faTGkZM",
     "o0VVO5V2LyoV6gn24F02czJqapfo",
+    "o0VVO5Qj5EZzjeaKjCQUhhiYprBw",
+    "o0VVO5VMXL0AWbzsnBkwddWJ74us",
+    "o0VVO5TjthOUa3xC1ufF0G8kxK7c",
 ])
 
 
+def get_openid():
+    global OPENID_DEQUE
+    openid = OPENID_DEQUE.popleft()
+    OPENID_DEQUE.append(openid)
+    return openid
+
+
 def md5_hex(val):
     salt = "A523B4A5C52203AA9C2D97F6CB45CB35"
     val = val + salt
@@ -111,7 +123,27 @@ def quote(data):
     return quote_str
 
 
-def get_jgdm(query, proxies):
+def callback_requests(func, *args, **kwargs):
+    proxy = kwargs.pop('proxy', None)
+    openid = kwargs.pop('openid')
+    while True:
+        kwargs['openid'] = openid
+        logger.debug(f"[当前openid]:{openid}")
+        proxies = proxy.proxies if proxy is not None else None
+        kwargs['proxies'] = proxies
+        logger.debug(f"[当前代理]:{proxies}")
+        try:
+            return func(*args, **kwargs)
+        except (IOError, AssertionError) as e:
+            if not isinstance(e, AssertionError):
+                logger.error(f"[访问异常]:{e}")
+            time.sleep(3)
+            openid = get_openid()
+            if proxy is not None:
+                proxy.switch()
+
+
+def get_jgdm(query, proxies, openid):
     results = []
     url = "https://ss.cods.org.cn/MiniProService/search/searchRMini"
     headers = {
@@ -131,7 +163,7 @@ def get_jgdm(query, proxies):
         "mobile": "",
         "isDeepSearch": False,
         "platform": "weixin",
-        "openid": "o0VVO5Wjhblu4tgm4OkMaJecvsO4"
+        "openid": openid
     }
     json_str = quote(val)
     sign = md5_hex(json_str)
@@ -165,15 +197,7 @@ def get_jgdm(query, proxies):
     return results
 
 
-def get_openid():
-    global openid_deque
-    openid = openid_deque.popleft()
-    pid = openid
-    openid_deque.append(openid)
-    return pid
-
-
-def get_hospital(query, jgdm, proxies):
+def get_hospital(query, jgdm, proxies, openid):
     url = "https://ss.cods.org.cn/MiniProService/detailPage/detail.base"
     headers = {
         "Host": "ss.cods.org.cn",
@@ -184,8 +208,6 @@ def get_hospital(query, jgdm, proxies):
         "Referer": "https://servicewechat.com/wxa97584cd2e4d83ad/10/page-frame.html",
         "Connection": "keep-alive"
     }
-    openid = get_openid()
-    print('openid >> ', openid)
     val = {
         "jgdm": jgdm,
         "keyword": query,
@@ -241,33 +263,16 @@ def get_hospital(query, jgdm, proxies):
     return hospital
 
 
-def callback_requests(func, *args, **kwargs):
-    proxy = kwargs.pop('proxy', None)
-    while True:
-        try:
-            proxies = proxy.proxies if proxy is not None else None
-            logger.debug(f"[当前代理]:{proxies}")
-            if kwargs.get('proxies') is None:
-                kwargs.setdefault('proxies', proxies)
-            else:
-                kwargs.update({'proxies': proxies})
-            return func(*args, **kwargs)
-        except (IOError, AssertionError) as e:
-            logger.error(f"[访问异常]:{e}")
-            time.sleep(3)
-            if proxy is not None:
-                proxy.switch()
-
-
 def query_hospital(tasks, proxy):
     while len(tasks) > 0:
         task = tasks.pop(0)
         query = task['name']
         logger.info(f"[开始查询]{query}")
-        jgdm_lst = callback_requests(get_jgdm, query, proxy=proxy)
+        openid = get_openid()
+        jgdm_lst = callback_requests(get_jgdm, query, proxy=proxy, openid=openid)
         time.sleep(3)
         for jgdm in jgdm_lst:
-            callback_requests(get_hospital, query, jgdm, proxy=proxy)
+            callback_requests(get_hospital, query, jgdm, proxy=proxy, openid=openid)
             time.sleep(30)
 
         total = len(jgdm_lst)  # 事业单位的数量