1 year ago · e2ed9e900b
--- a/A数据处理/sync_data/monitor.py
+++ b/A数据处理/sync_data/monitor.py
@@ -7,14 +7,14 @@ Created on 2023-04-05
 
				 @author: Dzr
			
 
				 """
			
 
				 import hashlib
			
 
				-from datetime import datetime, time, timedelta
			
 
				+from datetime import datetime, timedelta
			
 
				+from operator import itemgetter
			
 
				 
			
 
				 from bson.int64 import Int64
			
 
				 from bson.son import SON
			
 
				 from pymongo import MongoClient
			
 
				 
			
 
				 from log import logger
			
 
				-from operator import itemgetter
			
 
				 
			
 
				 # mongo
			
 
				 MONGO_HOST = "172.17.4.87"
			
@@ -27,19 +27,13 @@ MONGO_DB2 = "editor"
 
				 mongodb1 = client[MONGO_DB1]
			
 
				 mongodb2 = client[MONGO_DB2]
			
 
				 
			
 
				-# 爬虫数据表
			
 
				+# 爬虫数据生产表
			
 
				 data_bak = mongodb1["data_bak"]
			
 
				-
			
 
				-# 心跳表
			
 
				-spider_heartbeat = mongodb1["spider_heartbeat"]
			
 
				-
			
 
				-# py_spiders列表
			
 
				-py_spiders_crawl_list = mongodb1["crawl_data"]
			
 
				-
			
 
				-# 列表页汇总表
			
 
				+# 爬虫心跳表
			
 
				+spider_heartbeat = mongodb1["pyspider_heartbeat"]
			
 
				+# 日采集详情汇总表
			
 
				 spider_monitor = mongodb1["spider_monitor"]
			
 
				-
			
 
				-# luaconfig表
			
 
				+# 采集任务表
			
 
				 spider_lua_config = mongodb2["luaconfig"]
			
 
				 
			
 
				 # 特殊网站
			
@@ -63,7 +57,7 @@ def get_md5(*args, **kwargs):
 
				                 data_lst.append(v)
			
 
				 
			
 
				         if not data_lst or len(data_lst) != 3:
			
 
				-            logger.error(f"[Monitor]条件缺失{conditions},当前内容:{data_lst}")
			
 
				+            logger.error(f"组合条件缺失:{conditions},当前内容:{data_lst}")
			
 
				             return None
			
 
				 
			
 
				         data_lst = sorted(data_lst)
			
@@ -81,31 +75,30 @@ def get_runtime(datestr=None):
 
				     return datestr
			
 
				 
			
 
				 
			
 
				-def save(documents, collection):
			
 
				+def save(documents, collection, ordered=False):
			
 
				     """保存数据"""
			
 
				     is_list = isinstance(documents, list)
			
 
				     documents = documents if is_list else [documents]
			
 
				 
			
 
				-    count = 0
			
 
				     data_lst = []
			
 
				     for items in documents:
			
 
				         items.pop("_id", None)
			
 
				         items.pop("business_type", None)
			
 
				         items["comeintime"] = Int64(datetime.now().timestamp())
			
 
				         data_lst.append(items)
			
 
				-        count += 1
			
 
				-        if len(data_lst) % 100 == 0:
			
 
				-            collection.insert_many(data_lst)
			
 
				-            data_lst.clear()
			
 
				-            logger.info(f"[Monitor]{collection.name}-批量上传{count}条数据--已保存")
			
 
				+        if len(data_lst) == 100:
			
 
				+            ret = collection.insert_many(data_lst, ordered)
			
 
				+            logger.info(f"MongoDB {collection.name} 保存 {len(ret.inserted_ids)} 条数据")
			
 
				+            data_lst = []
			
 
				 
			
 
				     # 提交剩余数据
			
 
				-    if len(data_lst) > 0:
			
 
				-        collection.insert_many(data_lst)
			
 
				-    logger.info(f"[Monitor]{collection.name}-批量上传{count}条数据--已保存")
			
 
				+    if data_lst:
			
 
				+        collection.insert_many(data_lst, ordered)
			
 
				 
			
 
				+    logger.info(f"MongoDB {collection.name} 保存 {len(documents)} 条数据")
			
 
				 
			
 
				-def get_crawler_basic_information():
			
 
				+
			
 
				+def get_spider_lst():
			
 
				     """爬虫基础信息"""
			
 
				     crawler_lst = []
			
 
				     q = {"platform": "python", "state": 11}
			
@@ -126,7 +119,7 @@ def get_crawler_basic_information():
 
				             })
			
 
				     finally:
			
 
				         client.close()
			
 
				-        logger.info(f"[Monitor]爬虫采集监控--共计{len(crawler_lst)}个爬虫")
			
 
				+        logger.info(f"爬虫监控 - 已上线 {len(crawler_lst)} 个爬虫")
			
 
				         yield from crawler_lst
			
 
				 
			
 
				 
			
@@ -152,9 +145,9 @@ def aggregate_query(collection, pipeline, is_print_error=False):
 
				         yield from results
			
 
				 
			
 
				 
			
 
				-def aggregate_query_crawl_count(runtime):
			
 
				+def aggregate_query_count(runtime):
			
 
				     """
			
 
				-    feapder爬虫列表和详情采集聚合查询结果统计
			
 
				+    feapder爬虫列表和详情采集数据量统计聚合查询结果统计
			
 
				 
			
 
				     @param runtime: 运行时间
			
 
				     @return:
			
@@ -164,8 +157,8 @@ def aggregate_query_crawl_count(runtime):
 
				         {"$match": {"runtime": runtime}},
			
 
				         {
			
 
				             "$group": {
			
 
				-                "_id": "$spider_id",
			
 
				-                "rel_count": {"$sum": "$rel_count"},  # 入库量
			
 
				+                "_id": "$batch_no",
			
 
				+                "rel_count": {"$sum": "$rel_count"},  # 入库量(去重)
			
 
				                 "count": {"$sum": "$count"},  # 下载量
			
 
				                 "spider_item": {
			
 
				                     "$addToSet": {
			
@@ -181,8 +174,8 @@ def aggregate_query_crawl_count(runtime):
 
				     ]
			
 
				     results = aggregate_query(spider_heartbeat, pipeline)
			
 
				     for doc in results:
			
 
				+        spider_dict = {}
			
 
				         spider_item = doc["spider_item"]
			
 
				-        label_dict = {}
			
 
				         for items in spider_item:
			
 
				             site = items["site"]
			
 
				             channel = items["channel"]
			
@@ -190,21 +183,19 @@ def aggregate_query_crawl_count(runtime):
 
				             business_type = items["business_type"]
			
 
				 
			
 
				             if len(spider_item) > 1 and site not in special_sites:
			
 
				-                logger.warning(f"[Monitor]{spidercode} -> {site} --存在风险, {len(spider_item)}")
			
 
				-
			
 
				-            is_list = str(business_type).endswith("List")
			
 
				+                logger.warning(f"{spidercode} -> {site} --存在风险, {len(spider_item)}")
			
 
				 
			
 
				             hash_key = get_md5(**items)  # 防止多站点对应1个spidercode,数据相互重叠
			
 
				             if not hash_key:
			
 
				-                # logger.error(f"[Monitor]{site}-{channel}-{spidercode}--监控异常")
			
 
				-                logger.error(f"[Monitor]{doc['_id']}--爬虫异常")
			
 
				+                logger.error(f"异常批次号 {doc['_id']}")
			
 
				                 continue
			
 
				 
			
 
				+            is_list = str(business_type).endswith("List")
			
 
				             if not aggregate_items.get(hash_key):
			
 
				                 data = {
			
 
				-                    "spider_id": doc["_id"],
			
 
				+                    "batch_no": doc["_id"],
			
 
				                     "site": site,
			
 
				-                    "channel": items["channel"],
			
 
				+                    "channel": channel,
			
 
				                     "spidercode": spidercode,
			
 
				                     "business_type": business_type,
			
 
				                     "runtime": runtime,
			
@@ -241,96 +232,26 @@ def aggregate_query_crawl_count(runtime):
 
				                 aggregate_items.update({hash_key: data})
			
 
				 
			
 
				             # 监控爬虫任务，当 spidercode_at_site_num > 1
			
 
				-            # 表明创建的爬虫任务存在问题，问题反馈数据寻源人员
			
 
				+            # 表明创建的爬虫任务存在问题，需反馈数据寻源相关人员
			
 
				             if site not in special_sites:
			
 
				-                label = f"{business_type}_{spidercode}"
			
 
				-                if label not in label_dict:
			
 
				-                    aggregate_items[hash_key]["spidercode_at_site_num"] = 1
			
 
				-                    conditions = {"keys": [hash_key], "websites": [site]}
			
 
				-                    label_dict.setdefault(label, conditions)
			
 
				+                spider = aggregate_items[hash_key]
			
 
				+                if spidercode not in spider_dict:
			
 
				+                    spider["spidercode_at_site_num"] = 1
			
 
				+                    values = {"keys": [hash_key], "sites": [site]}
			
 
				+                    spider_dict.setdefault(spidercode, values)
			
 
				                 else:
			
 
				-                    # 相同spidercode但site不同的爬虫进行计数+1
			
 
				-                    websites = label_dict[label]["websites"]
			
 
				-                    if site not in websites:
			
 
				-                        keys = label_dict[label]["keys"]
			
 
				+                    # 相同 spidercode 但 site 不同的爬虫进行计数+1
			
 
				+                    sites = spider_dict[spidercode]["sites"]
			
 
				+                    if site not in sites:
			
 
				+                        keys = spider_dict[spidercode]["keys"]
			
 
				                         for key in keys:
			
 
				+                            # 更新相同 spidercode 的 spidercode_at_site_num
			
 
				                             aggregate_items[key]["spidercode_at_site_num"] += 1
			
 
				-                        # 记录身份id - hash_key
			
 
				-                        keys.append(hash_key)
			
 
				-                        # 记录站点
			
 
				-                        websites.append(site)
			
 
				-                        aggregate_items[hash_key]["spidercode_at_site_num"] = len(websites)
			
 
				-
			
 
				-    return aggregate_items
			
 
				-
			
 
				-
			
 
				-def aggregate_query_py_spiders(runtime):
			
 
				-    """py_spiders爬虫列表和详情采集聚合查询结果统计"""
			
 
				-    aggregate_items = {}
			
 
				-    if runtime is not None:
			
 
				-        today = datetime.fromisoformat(runtime).date()
			
 
				-    else:
			
 
				-        today = datetime.now().date()
			
 
				-    yesterday = today + timedelta(days=-1)
			
 
				 
			
 
				-    runtime = yesterday.strftime("%Y-%m-%d")
			
 
				-    start_time = int(datetime.combine(yesterday, time()).timestamp())
			
 
				-    end_time = int(datetime.combine(today, time()).timestamp())
			
 
				+                        keys.append(hash_key)  # 记录新爬虫
			
 
				+                        sites.append(site)  # 添加新站点
			
 
				+                        spider["spidercode_at_site_num"] = len(site)
			
 
				 
			
 
				-    pipeline = [
			
 
				-        {
			
 
				-            "$addFields": {
			
 
				-                "rel_count": {
			
 
				-                    "$cond": {
			
 
				-                        "if": {"$ne": ["$finished", True]},
			
 
				-                        "then": 1,
			
 
				-                        "else": 0
			
 
				-                    }
			
 
				-                }
			
 
				-            }
			
 
				-        },
			
 
				-        {"$match": {"comeintime": {"$gte": start_time, "$lt": end_time}}},
			
 
				-        {
			
 
				-            "$group": {
			
 
				-                "_id": "$spidercode",
			
 
				-                "count": {"$sum": 1},  # 当天采集总数
			
 
				-                "rel_count": {"$sum": 1},  # 当天采集总数
			
 
				-                "spider_item": {
			
 
				-                    "$addToSet": {
			
 
				-                        "site": "$site",
			
 
				-                        "channel": "$channel",
			
 
				-                        "spidercode": "$spidercode",
			
 
				-                        "business_type": "List"
			
 
				-                    }
			
 
				-                }
			
 
				-            }
			
 
				-        },
			
 
				-    ]
			
 
				-    results = aggregate_query(py_spiders_crawl_list, pipeline)
			
 
				-    for doc in results:
			
 
				-        spider_item = doc["spider_item"]
			
 
				-        for items in spider_item:
			
 
				-            site = items["site"]
			
 
				-            channel = items["channel"]
			
 
				-            spidercode = items["spidercode"]
			
 
				-            business_type = items["business_type"]
			
 
				-            hask_key = get_md5(site, channel, spidercode)
			
 
				-            spider_id = get_md5(spidercode + business_type + runtime)
			
 
				-            data = {
			
 
				-                "spider_id": spider_id,
			
 
				-                "site": site,
			
 
				-                "channel": items["channel"],
			
 
				-                "spidercode": spidercode,
			
 
				-                "business_type": business_type,
			
 
				-                "runtime": runtime,
			
 
				-                "list_count": doc["count"],
			
 
				-                "list_rel_count": doc["rel_count"],
			
 
				-                "detail_count": 0,
			
 
				-                "detail_rel_count": 0,
			
 
				-                "spidercode_at_site_num": 1,  # 爬虫代码与站点对应的关系数量
			
 
				-                "frame": "py_spiders"
			
 
				-            }
			
 
				-            aggregate_items[hask_key] = data
			
 
				     return aggregate_items
			
 
				 
			
 
				 
			
@@ -338,51 +259,52 @@ def aggregate_query_crawl_list(runtime):
 
				     """feapder列表爬虫采集聚合查询结果统计"""
			
 
				     aggregate_items = {}
			
 
				     pipeline = [
			
 
				-        {"$match": {"runtime": runtime, "business_type": {"$regex": "List"}}},
			
 
				+        {
			
 
				+            "$match": {
			
 
				+                "runtime": runtime,
			
 
				+                "business_type": {"$regex": "List"},
			
 
				+                "status_code": {"$ne": -1}
			
 
				+            }
			
 
				+        },
			
 
				         {
			
 
				             "$group": {
			
 
				-                "_id": "$spider_id",
			
 
				-                "spider_item": {
			
 
				-                    "$addToSet": {
			
 
				-                        "site": "$site",
			
 
				-                        "channel": "$channel",
			
 
				-                        "spidercode": "$spidercode",
			
 
				-                        "count": {"$ifNull": ["$count", 0]},
			
 
				-                        "rel_count": {"$ifNull": ["$rel_count", 0]},
			
 
				-                    }
			
 
				-                }
			
 
				+                "_id": "$batch_no",
			
 
				+                "count": {"$sum": "$count"},
			
 
				+                "rel_count": {"$sum": "$rel_count"},
			
 
				+                "site": {"$first": "$site"},
			
 
				+                "channel": {"$first": "$channel"},
			
 
				+                "spidercode": {"$first": "$spidercode"},
			
 
				             }
			
 
				         }
			
 
				     ]
			
 
				     results = aggregate_query(spider_heartbeat, pipeline)
			
 
				-    for doc in results:
			
 
				-        spider_item = doc["spider_item"]
			
 
				-        for item in spider_item:
			
 
				-            hask_key = get_md5(**item)
			
 
				-            if not aggregate_items.get(hask_key):
			
 
				-                values = {"list_allintimes": 0, "list_nodatatimes": 0}
			
 
				-                aggregate_items.setdefault(hask_key, values)
			
 
				-
			
 
				-            if all([
			
 
				-                item["count"] > 0,
			
 
				-                item["rel_count"] > 0,
			
 
				-                item["count"] == item["rel_count"]
			
 
				-            ]):
			
 
				-                aggregate_items[hask_key]["list_allintimes"] += 1
			
 
				-
			
 
				-            if item["count"] == 0:
			
 
				-                aggregate_items[hask_key]["list_nodatatimes"] = 1
			
 
				+    for items in results:
			
 
				+        hask_key = get_md5(**items)
			
 
				+        if not aggregate_items.get(hask_key):
			
 
				+            values = {"list_allintimes": 0, "list_nodatatimes": 0}
			
 
				+            aggregate_items.setdefault(hask_key, values)
			
 
				+
			
 
				+        if all([
			
 
				+            items["count"] > 0,
			
 
				+            items["rel_count"] > 0,
			
 
				+            items["count"] == items["rel_count"]
			
 
				+        ]):
			
 
				+            aggregate_items[hask_key]["list_allintimes"] += 1
			
 
				+
			
 
				+        if items["count"] == 0:
			
 
				+            aggregate_items[hask_key]["list_nodatatimes"] += 1
			
 
				+
			
 
				     return aggregate_items
			
 
				 
			
 
				 
			
 
				-def aggregate_query_crawlab_info(runtime):
			
 
				+def aggregate_query_crawlab_information(runtime):
			
 
				     """feapder爬虫采集聚合查询crawlab平台运行信息统计"""
			
 
				     aggregate_items = {}
			
 
				     pipeline = [
			
 
				         {
			
 
				             "$project": {
			
 
				                 "_id": 0,
			
 
				-                "spider_id": 1,
			
 
				+                "batch_no": 1,
			
 
				                 "spidercode": 1,
			
 
				                 "runtime": 1,
			
 
				                 "node_ip": 1,
			
@@ -393,7 +315,7 @@ def aggregate_query_crawlab_info(runtime):
 
				         {"$match": {"runtime": runtime}},
			
 
				         {
			
 
				             "$group": {
			
 
				-                "_id": "$spider_id",
			
 
				+                "_id": "$batch_no",
			
 
				                 "crawlab_item": {
			
 
				                     "$addToSet": {
			
 
				                         "spidercode": "$spidercode",
			
@@ -412,27 +334,26 @@ def aggregate_query_crawlab_info(runtime):
 
				         spidercode = items.pop("spidercode")
			
 
				         if not aggregate_items.get(spidercode):
			
 
				             aggregate_items.setdefault(spidercode, items)
			
 
				+
			
 
				     return aggregate_items
			
 
				 
			
 
				 
			
 
				-_runtime = get_runtime()
			
 
				-aggregate_results = aggregate_query_crawl_count(_runtime)
			
 
				-aggregate_query_py_spiders = aggregate_query_py_spiders(_runtime)
			
 
				-aggregate_list_results = aggregate_query_crawl_list(_runtime)
			
 
				-aggregate_query_crawlab_results = aggregate_query_crawlab_info(_runtime)
			
 
				+_runtime = get_runtime()  # 统计时间
			
 
				+aggregate_count_items = aggregate_query_count(_runtime)
			
 
				+aggregate_crawl_list_items = aggregate_query_crawl_list(_runtime)
			
 
				+aggregate_query_crawlab_items = aggregate_query_crawlab_information(_runtime)
			
 
				 
			
 
				 
			
 
				 def get_node_and_taskid(spidercode, default=None):
			
 
				     """获取最新爬虫工作节点和任务id"""
			
 
				-    if aggregate_query_crawlab_results.get(spidercode):
			
 
				-        default = aggregate_query_crawlab_results[spidercode]
			
 
				+    if aggregate_query_crawlab_items.get(spidercode):
			
 
				+        default = aggregate_query_crawlab_items[spidercode]
			
 
				     return default
			
 
				 
			
 
				 
			
 
				 def get_list_isgetdata(hash_key, default=0):
			
 
				     """列表页是否采集数据"""
			
 
				-    query_result = (aggregate_results.get(hash_key)
			
 
				-                    or aggregate_query_py_spiders.get(hash_key))
			
 
				+    query_result = aggregate_count_items.get(hash_key)
			
 
				     if query_result:
			
 
				         default = query_result["list_count"]
			
 
				     return True if default > 0 else False
			
@@ -440,36 +361,36 @@ def get_list_isgetdata(hash_key, default=0):
 
				 
			
 
				 def get_list_allintimes(hash_key, default=0):
			
 
				     """日采集列表数量与入库数量相等的次数（扣除标题去重数量 + 增量（全量）去重数量）"""
			
 
				-    if aggregate_list_results.get(hash_key):
			
 
				-        default = aggregate_list_results[hash_key]["list_allintimes"]
			
 
				+    if aggregate_crawl_list_items.get(hash_key):
			
 
				+        default = aggregate_crawl_list_items[hash_key]["list_allintimes"]
			
 
				     return default
			
 
				 
			
 
				 
			
 
				 def get_list_runtimes(hash_key, default=0):
			
 
				     """列表采集运行频次"""
			
 
				-    if aggregate_results.get(hash_key):
			
 
				-        default = aggregate_results[hash_key]["list_runtimes"]
			
 
				+    if aggregate_count_items.get(hash_key):
			
 
				+        default = aggregate_count_items[hash_key]["list_runtimes"]
			
 
				     return default
			
 
				 
			
 
				 
			
 
				 def get_list_nodatatimes(hash_key, default=-1):
			
 
				     """列表页采集无数据次数(过滤后)"""
			
 
				-    if aggregate_list_results.get(hash_key):
			
 
				-        default = aggregate_list_results[hash_key]["list_nodatatimes"]
			
 
				+    if aggregate_crawl_list_items.get(hash_key):
			
 
				+        default = aggregate_crawl_list_items[hash_key]["list_nodatatimes"]
			
 
				     return default
			
 
				 
			
 
				 
			
 
				 def get_detail_downloadnum(hash_key, default=0):
			
 
				     """详情页下载量"""
			
 
				-    if aggregate_results.get(hash_key):
			
 
				-        default = aggregate_results[hash_key]["detail_count"]
			
 
				+    if aggregate_count_items.get(hash_key):
			
 
				+        default = aggregate_count_items[hash_key]["detail_count"]
			
 
				     return default
			
 
				 
			
 
				 
			
 
				 def get_detail_downloadsuccessnum(hash_key, default=0):
			
 
				     """详情页下载成功量"""
			
 
				-    if aggregate_results.get(hash_key):
			
 
				-        default = aggregate_results[hash_key]["detail_rel_count"]
			
 
				+    if aggregate_count_items.get(hash_key):
			
 
				+        default = aggregate_count_items[hash_key]["detail_rel_count"]
			
 
				     return default
			
 
				 
			
 
				 
			
@@ -481,30 +402,27 @@ def get_detail_downloadfailnum(**kwargs):
 
				     return count
			
 
				 
			
 
				 
			
 
				-def main():
			
 
				+def start_monitor():
			
 
				     summary_queue = []
			
 
				-    crawlers = get_crawler_basic_information()
			
 
				-    for crawler in crawlers:
			
 
				-        join_data = {**crawler, "is_valid": False}  # 创建爬虫基础数据
			
 
				-
			
 
				-        site = crawler["site"]
			
 
				-        channel = crawler["channel"]
			
 
				-        spidercode = crawler["code"]
			
 
				+    spider_lst = get_spider_lst()
			
 
				+    for spider in spider_lst:
			
 
				+        site = spider["site"]
			
 
				+        channel = spider["channel"]
			
 
				+        spidercode = spider["code"]
			
 
				 
			
 
				+        join_data = {**spider, "is_valid": False}  # 创建爬虫基础数据
			
 
				         hash_key = get_md5(site, channel, spidercode)
			
 
				-        query_result = (aggregate_results.get(hash_key)
			
 
				-                        or aggregate_query_py_spiders.get(hash_key))
			
 
				+        query_result = aggregate_count_items.get(hash_key)
			
 
				         if query_result:
			
 
				-            # crawlab平台
			
 
				-            crawlab = get_node_and_taskid(spidercode)
			
 
				+            crawlab = get_node_and_taskid(spidercode)  # crawlab 运行详情
			
 
				             if crawlab:
			
 
				                 join_data["py_taskid"] = crawlab["crawlab_taskid"] or ""
			
 
				                 join_data["py_nodename"] = crawlab["node_ip"] or ""
			
 
				 
			
 
				-            # 聚合查询心跳统计结果
			
 
				+            # 聚合查询 - 统计数据采集详情信息(来自采集心跳)
			
 
				             result = query_result
			
 
				             join_data["frame"] = result["frame"]  # 采集框架
			
 
				-            join_data["spider_id"] = result["spider_id"]
			
 
				+            join_data["batch_no"] = result["batch_no"]
			
 
				             join_data["business_type"] = result["business_type"]
			
 
				             join_data["list_count"] = result["list_count"]
			
 
				             join_data["list_rel_count"] = result["list_rel_count"]
			
@@ -512,12 +430,12 @@ def main():
 
				             join_data["detail_rel_count"] = result["detail_rel_count"]
			
 
				             join_data["spidercode_at_site_num"] = result["spidercode_at_site_num"]
			
 
				 
			
 
				-        # 列表采集汇总数据
			
 
				+        # 采集汇总 - 列表数据
			
 
				         join_data["list_isgetdata"] = get_list_isgetdata(hash_key)
			
 
				         join_data["list_allintimes"] = get_list_allintimes(hash_key)
			
 
				         join_data["list_runtimes"] = get_list_runtimes(hash_key)
			
 
				         join_data["list_nodatatimes"] = get_list_nodatatimes(hash_key)
			
 
				-        # 详情采集汇总数据
			
 
				+        # 采集汇总 - 详情数据
			
 
				         join_data["detail_downloadnum"] = get_detail_downloadnum(hash_key)
			
 
				         join_data["detail_downloadsuccessnum"] = get_detail_downloadsuccessnum(hash_key)
			
 
				         join_data["detail_downloadfailnum"] = get_detail_downloadfailnum(**join_data)
			
@@ -527,10 +445,11 @@ def main():
 
				             join_data["is_valid"] = True
			
 
				 
			
 
				         summary_queue.append(join_data)
			
 
				-        logger.info(f"[Monitor]{crawler['site']}-{crawler['channel']}-{spidercode}--完成统计")
			
 
				+        logger.info(f"{site} {channel} {spidercode} --统计完成")
			
 
				     # 上传数据库
			
 
				     save(summary_queue, spider_monitor)
			
 
				+    logger.info(f"爬虫监控 - 统计完成")
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    main()
			
 
				+    start_monitor()