dongzhaorui vor 3 Jahren
Ursprung
Commit
b928fc84a5
1 geänderte Dateien mit 18 neuen und 3 gelöschten Zeilen
  1. 18 3
      codes_hospital/crawl_hospital.py

+ 18 - 3
codes_hospital/crawl_hospital.py

@@ -1,5 +1,6 @@
 import json
 import time
+from collections import deque
 
 import execjs
 import requests
@@ -13,6 +14,10 @@ urllib3.disable_warnings()
 
 zktest_unexists_name = mongo_table('py_spider', 'zktest_unexists_name')
 f_hospital_codes = mongo_table('py_theme', 'f_hospital_codes')
+openid_deque = deque([
+    "o0VVO5QnhbdQfl4fkZWw8faTGkZM",
+    "o0VVO5V2LyoV6gn24F02czJqapfo",
+])
 
 
 def md5_hex(val):
@@ -149,7 +154,7 @@ def get_jgdm(query, proxies):
 
     # print(response)
     resp_json = response.json()
-    # print(json.dumps(resp_json, indent=4, ensure_ascii=False))
+    print(json.dumps(resp_json, indent=4, ensure_ascii=False))
     assert 'resultType' in resp_json and resp_json['resultType'] != 'ipError'
     documents = resp_json['jginfoList']["documents"]
     for item in documents:
@@ -160,6 +165,14 @@ def get_jgdm(query, proxies):
     return results
 
 
+def get_openid():
+    global openid_deque
+    openid = openid_deque.popleft()
+    pid = openid
+    openid_deque.append(openid)
+    return pid
+
+
 def get_hospital(query, jgdm, proxies):
     url = "https://ss.cods.org.cn/MiniProService/detailPage/detail.base"
     headers = {
@@ -171,11 +184,13 @@ def get_hospital(query, jgdm, proxies):
         "Referer": "https://servicewechat.com/wxa97584cd2e4d83ad/10/page-frame.html",
         "Connection": "keep-alive"
     }
+    openid = get_openid()
+    print('openid >> ', openid)
     val = {
         "jgdm": jgdm,
         "keyword": query,
         "platform": "weixin",
-        "openid": "o0VVO5QnhbdQfl4fkZWw8faTGkZM"
+        "openid": openid
     }
     json_str = quote(val)
     data = {
@@ -253,7 +268,7 @@ def query_hospital(tasks, proxy):
         time.sleep(3)
         for jgdm in jgdm_lst:
             callback_requests(get_hospital, query, jgdm, proxy=proxy)
-            time.sleep(5)
+            time.sleep(30)
 
         total = len(jgdm_lst)  # 事业单位的数量
         zktest_unexists_name.update_one(