|
@@ -14,12 +14,24 @@ urllib3.disable_warnings()
|
|
|
|
|
|
zktest_unexists_name = mongo_table('py_spider', 'zktest_unexists_name')
|
|
|
f_hospital_codes = mongo_table('py_theme', 'f_hospital_codes')
|
|
|
-openid_deque = deque([
|
|
|
+OPENID_DEQUE = deque([
|
|
|
+ 'o0VVO5ck5WeNXrXGjscVGc74bXok', # 未授权
|
|
|
+ 'o0VVO5ZVwVYgKcvkgtDYE24entOo', # 未授权
|
|
|
"o0VVO5QnhbdQfl4fkZWw8faTGkZM",
|
|
|
"o0VVO5V2LyoV6gn24F02czJqapfo",
|
|
|
+ "o0VVO5Qj5EZzjeaKjCQUhhiYprBw",
|
|
|
+ "o0VVO5VMXL0AWbzsnBkwddWJ74us",
|
|
|
+ "o0VVO5TjthOUa3xC1ufF0G8kxK7c",
|
|
|
])
|
|
|
|
|
|
|
|
|
+def get_openid():
|
|
|
+ global OPENID_DEQUE
|
|
|
+ openid = OPENID_DEQUE.popleft()
|
|
|
+ OPENID_DEQUE.append(openid)
|
|
|
+ return openid
|
|
|
+
|
|
|
+
|
|
|
def md5_hex(val):
|
|
|
salt = "A523B4A5C52203AA9C2D97F6CB45CB35"
|
|
|
val = val + salt
|
|
@@ -111,7 +123,27 @@ def quote(data):
|
|
|
return quote_str
|
|
|
|
|
|
|
|
|
-def get_jgdm(query, proxies):
|
|
|
+def callback_requests(func, *args, **kwargs):
|
|
|
+ proxy = kwargs.pop('proxy', None)
|
|
|
+ openid = kwargs.pop('openid')
|
|
|
+ while True:
|
|
|
+ kwargs['openid'] = openid
|
|
|
+ logger.debug(f"[当前openid]:{openid}")
|
|
|
+ proxies = proxy.proxies if proxy is not None else None
|
|
|
+ kwargs['proxies'] = proxies
|
|
|
+ logger.debug(f"[当前代理]:{proxies}")
|
|
|
+ try:
|
|
|
+ return func(*args, **kwargs)
|
|
|
+ except (IOError, AssertionError) as e:
|
|
|
+ if not isinstance(e, AssertionError):
|
|
|
+ logger.error(f"[访问异常]:{e}")
|
|
|
+ time.sleep(3)
|
|
|
+ openid = get_openid()
|
|
|
+ if proxy is not None:
|
|
|
+ proxy.switch()
|
|
|
+
|
|
|
+
|
|
|
+def get_jgdm(query, proxies, openid):
|
|
|
results = []
|
|
|
url = "https://ss.cods.org.cn/MiniProService/search/searchRMini"
|
|
|
headers = {
|
|
@@ -131,7 +163,7 @@ def get_jgdm(query, proxies):
|
|
|
"mobile": "",
|
|
|
"isDeepSearch": False,
|
|
|
"platform": "weixin",
|
|
|
- "openid": "o0VVO5Wjhblu4tgm4OkMaJecvsO4"
|
|
|
+ "openid": openid
|
|
|
}
|
|
|
json_str = quote(val)
|
|
|
sign = md5_hex(json_str)
|
|
@@ -165,15 +197,7 @@ def get_jgdm(query, proxies):
|
|
|
return results
|
|
|
|
|
|
|
|
|
-def get_openid():
|
|
|
- global openid_deque
|
|
|
- openid = openid_deque.popleft()
|
|
|
- pid = openid
|
|
|
- openid_deque.append(openid)
|
|
|
- return pid
|
|
|
-
|
|
|
-
|
|
|
-def get_hospital(query, jgdm, proxies):
|
|
|
+def get_hospital(query, jgdm, proxies, openid):
|
|
|
url = "https://ss.cods.org.cn/MiniProService/detailPage/detail.base"
|
|
|
headers = {
|
|
|
"Host": "ss.cods.org.cn",
|
|
@@ -184,8 +208,6 @@ def get_hospital(query, jgdm, proxies):
|
|
|
"Referer": "https://servicewechat.com/wxa97584cd2e4d83ad/10/page-frame.html",
|
|
|
"Connection": "keep-alive"
|
|
|
}
|
|
|
- openid = get_openid()
|
|
|
- print('openid >> ', openid)
|
|
|
val = {
|
|
|
"jgdm": jgdm,
|
|
|
"keyword": query,
|
|
@@ -241,33 +263,16 @@ def get_hospital(query, jgdm, proxies):
|
|
|
return hospital
|
|
|
|
|
|
|
|
|
-def callback_requests(func, *args, **kwargs):
|
|
|
- proxy = kwargs.pop('proxy', None)
|
|
|
- while True:
|
|
|
- try:
|
|
|
- proxies = proxy.proxies if proxy is not None else None
|
|
|
- logger.debug(f"[当前代理]:{proxies}")
|
|
|
- if kwargs.get('proxies') is None:
|
|
|
- kwargs.setdefault('proxies', proxies)
|
|
|
- else:
|
|
|
- kwargs.update({'proxies': proxies})
|
|
|
- return func(*args, **kwargs)
|
|
|
- except (IOError, AssertionError) as e:
|
|
|
- logger.error(f"[访问异常]:{e}")
|
|
|
- time.sleep(3)
|
|
|
- if proxy is not None:
|
|
|
- proxy.switch()
|
|
|
-
|
|
|
-
|
|
|
def query_hospital(tasks, proxy):
|
|
|
while len(tasks) > 0:
|
|
|
task = tasks.pop(0)
|
|
|
query = task['name']
|
|
|
logger.info(f"[开始查询]{query}")
|
|
|
- jgdm_lst = callback_requests(get_jgdm, query, proxy=proxy)
|
|
|
+ openid = get_openid()
|
|
|
+ jgdm_lst = callback_requests(get_jgdm, query, proxy=proxy, openid=openid)
|
|
|
time.sleep(3)
|
|
|
for jgdm in jgdm_lst:
|
|
|
- callback_requests(get_hospital, query, jgdm, proxy=proxy)
|
|
|
+ callback_requests(get_hospital, query, jgdm, proxy=proxy, openid=openid)
|
|
|
time.sleep(30)
|
|
|
|
|
|
total = len(jgdm_lst) # 事业单位的数量
|