|
@@ -1,6 +1,8 @@
|
|
|
+import datetime
|
|
|
import json
|
|
|
import time
|
|
|
from collections import deque
|
|
|
+from functools import wraps
|
|
|
|
|
|
import execjs
|
|
|
import requests
|
|
@@ -15,16 +17,42 @@ urllib3.disable_warnings()
|
|
|
zktest_unexists_name = mongo_table('py_spider', 'zktest_unexists_name')
|
|
|
f_hospital_codes = mongo_table('py_theme', 'f_hospital_codes')
|
|
|
OPENID_DEQUE = deque([
|
|
|
- 'o0VVO5ck5WeNXrXGjscVGc74bXok', # 未授权
|
|
|
- 'o0VVO5ZVwVYgKcvkgtDYE24entOo', # 未授权
|
|
|
+ # 'o0VVO5ck5WeNXrXGjscVGc74bXok', # 未授权
|
|
|
+ # 'o0VVO5ZVwVYgKcvkgtDYE24entOo', # 未授权
|
|
|
"o0VVO5QnhbdQfl4fkZWw8faTGkZM",
|
|
|
"o0VVO5V2LyoV6gn24F02czJqapfo",
|
|
|
"o0VVO5Qj5EZzjeaKjCQUhhiYprBw",
|
|
|
"o0VVO5VMXL0AWbzsnBkwddWJ74us",
|
|
|
"o0VVO5TjthOUa3xC1ufF0G8kxK7c",
|
|
|
+ "o0VVO5aA5dedD_hDSjOVRHtSMMsY",
|
|
|
+ "o0VVO5fB9K2hDEtqx3LnnoSh5lJY",
|
|
|
+ "o0VVO5R05eei1I4CxIqNUyJIMplA",
|
|
|
+ "o0VVO5aRpkS8mkVZOoG3h8YGzGFE",
|
|
|
+ "o0VVO5UvD12EERomhUUNL134itfc",
|
|
|
])
|
|
|
|
|
|
|
|
|
+class TimerError(IOError):
|
|
|
+
|
|
|
+ def __init__(self, *args, **kwargs):
|
|
|
+ self.msg = args[0]
|
|
|
+
|
|
|
+
|
|
|
+def crawl_timer(func):
|
|
|
+ @wraps(func)
|
|
|
+ def wrapper(*args, **kwargs):
|
|
|
+ if all([
|
|
|
+ 0 <= datetime.datetime.now().weekday() <= 4, # 周一到周五
|
|
|
+ 9 <= datetime.datetime.now().hour <= 17 # 早9点到晚5点
|
|
|
+ ]):
|
|
|
+ # print("进入++++++++++++++++ ", func.__name__)
|
|
|
+ result = func(*args, **kwargs)
|
|
|
+ # print("执行完毕------------------- ", func.__name__)
|
|
|
+ return result
|
|
|
+ raise TimerError('小程序接口停止运营')
|
|
|
+ return wrapper
|
|
|
+
|
|
|
+
|
|
|
def get_openid():
|
|
|
global OPENID_DEQUE
|
|
|
openid = OPENID_DEQUE.popleft()
|
|
@@ -123,6 +151,7 @@ def quote(data):
|
|
|
return quote_str
|
|
|
|
|
|
|
|
|
+@crawl_timer
|
|
|
def callback_requests(func, *args, **kwargs):
|
|
|
proxy = kwargs.pop('proxy', None)
|
|
|
openid = kwargs.pop('openid')
|
|
@@ -267,20 +296,20 @@ def query_hospital(tasks, proxy):
|
|
|
while len(tasks) > 0:
|
|
|
task = tasks.pop(0)
|
|
|
query = task['name']
|
|
|
- logger.info(f"[开始查询]{query}")
|
|
|
openid = get_openid()
|
|
|
+ logger.info(f"[开始查询]{query}")
|
|
|
jgdm_lst = callback_requests(get_jgdm, query, proxy=proxy, openid=openid)
|
|
|
+ total = len(jgdm_lst) # 事业单位的数量
|
|
|
+ logger.info(f"[查询成功]获取{total}条'{query}'相关信息")
|
|
|
time.sleep(3)
|
|
|
for jgdm in jgdm_lst:
|
|
|
callback_requests(get_hospital, query, jgdm, proxy=proxy, openid=openid)
|
|
|
+ logger.info(f"[保存数据]jgdm:{jgdm}")
|
|
|
time.sleep(30)
|
|
|
-
|
|
|
- total = len(jgdm_lst) # 事业单位的数量
|
|
|
zktest_unexists_name.update_one(
|
|
|
{'_id': task['_id']},
|
|
|
{'$set': {'is_crawl': True, 'count': total}}
|
|
|
)
|
|
|
- logger.info(f"[查询成功]获取{total}条'{query}'相关信息")
|
|
|
time.sleep(20)
|
|
|
|
|
|
|
|
@@ -303,7 +332,11 @@ def crawl_spider():
|
|
|
break
|
|
|
|
|
|
logger.info(f'获取{len(tasks)}条新任务')
|
|
|
- query_hospital(tasks, proxy)
|
|
|
+ try:
|
|
|
+ query_hospital(tasks, proxy)
|
|
|
+ except TimerError as e:
|
|
|
+ logger.info(f'[消息通知]{e.msg}')
|
|
|
+ time.sleep(3600)
|
|
|
proxy.switch()
|
|
|
|
|
|
|