dongzhaorui 3 年之前
父節點
當前提交
b5776c24a7
共有 1 個文件被更改,包括 40 次插入7 次删除
  1. 40 7
      codes_hospital/crawl_hospital.py

+ 40 - 7
codes_hospital/crawl_hospital.py

@@ -1,6 +1,8 @@
+import datetime
 import json
 import time
 from collections import deque
+from functools import wraps
 
 import execjs
 import requests
@@ -15,16 +17,42 @@ urllib3.disable_warnings()
 zktest_unexists_name = mongo_table('py_spider', 'zktest_unexists_name')
 f_hospital_codes = mongo_table('py_theme', 'f_hospital_codes')
 OPENID_DEQUE = deque([
-    'o0VVO5ck5WeNXrXGjscVGc74bXok',  # 未授权
-    'o0VVO5ZVwVYgKcvkgtDYE24entOo',  # 未授权
+    # 'o0VVO5ck5WeNXrXGjscVGc74bXok',  # 未授权
+    # 'o0VVO5ZVwVYgKcvkgtDYE24entOo',  # 未授权
     "o0VVO5QnhbdQfl4fkZWw8faTGkZM",
     "o0VVO5V2LyoV6gn24F02czJqapfo",
     "o0VVO5Qj5EZzjeaKjCQUhhiYprBw",
     "o0VVO5VMXL0AWbzsnBkwddWJ74us",
     "o0VVO5TjthOUa3xC1ufF0G8kxK7c",
+    "o0VVO5aA5dedD_hDSjOVRHtSMMsY",
+    "o0VVO5fB9K2hDEtqx3LnnoSh5lJY",
+    "o0VVO5R05eei1I4CxIqNUyJIMplA",
+    "o0VVO5aRpkS8mkVZOoG3h8YGzGFE",
+    "o0VVO5UvD12EERomhUUNL134itfc",
 ])
 
 
+class TimerError(IOError):
+
+    def __init__(self, *args, **kwargs):
+        self.msg = args[0]
+
+
+def crawl_timer(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        if all([
+            0 <= datetime.datetime.now().weekday() <= 4,  # 周一到周五
+            9 <= datetime.datetime.now().hour <= 17  # 早9点到晚5点
+        ]):
+            # print("进入++++++++++++++++ ", func.__name__)
+            result = func(*args, **kwargs)
+            # print("执行完毕------------------- ", func.__name__)
+            return result
+        raise TimerError('小程序接口停止运营')
+    return wrapper
+
+
 def get_openid():
     global OPENID_DEQUE
     openid = OPENID_DEQUE.popleft()
@@ -123,6 +151,7 @@ def quote(data):
     return quote_str
 
 
+@crawl_timer
 def callback_requests(func, *args, **kwargs):
     proxy = kwargs.pop('proxy', None)
     openid = kwargs.pop('openid')
@@ -267,20 +296,20 @@ def query_hospital(tasks, proxy):
     while len(tasks) > 0:
         task = tasks.pop(0)
         query = task['name']
-        logger.info(f"[开始查询]{query}")
         openid = get_openid()
+        logger.info(f"[开始查询]{query}")
         jgdm_lst = callback_requests(get_jgdm, query, proxy=proxy, openid=openid)
+        total = len(jgdm_lst)  # 事业单位的数量
+        logger.info(f"[查询成功]获取{total}条'{query}'相关信息")
         time.sleep(3)
         for jgdm in jgdm_lst:
             callback_requests(get_hospital, query, jgdm, proxy=proxy, openid=openid)
+            logger.info(f"[保存数据]jgdm:{jgdm}")
             time.sleep(30)
-
-        total = len(jgdm_lst)  # 事业单位的数量
         zktest_unexists_name.update_one(
             {'_id': task['_id']},
             {'$set': {'is_crawl': True, 'count': total}}
         )
-        logger.info(f"[查询成功]获取{total}条'{query}'相关信息")
         time.sleep(20)
 
 
@@ -303,7 +332,11 @@ def crawl_spider():
             break
 
         logger.info(f'获取{len(tasks)}条新任务')
-        query_hospital(tasks, proxy)
+        try:
+            query_hospital(tasks, proxy)
+        except TimerError as e:
+            logger.info(f'[消息通知]{e.msg}')
+            time.sleep(3600)
         proxy.switch()