Parcourir la source

删除py_spiders相关统计与汇总代码;更新心跳表名

dongzhaorui il y a 1 an
Parent
commit
61cbe03efe
1 fichiers modifiés avec 1 ajouts et 60 suppressions
  1. 1 60
      A数据处理/sync_data/summary.py

+ 1 - 60
A数据处理/sync_data/summary.py

@@ -25,10 +25,7 @@ mongodb = client[MONGO_DB]
 data_bak = mongodb["data_bak"]
 
 # 心跳表
-spider_heartbeat = mongodb["spider_heartbeat"]
-
-# py_spiders列表
-py_spiders_crawl_list = mongodb["crawl_data"]
+spider_heartbeat = mongodb["pyspider_heartbeat"]
 
 # 竞品列表
 ybw_list = mongodb["ybw_list"]
@@ -133,60 +130,6 @@ def feapder_crawl_aggregate_of_list_pages(datestr=None):
         logger.info("[Summary]feapder数据汇总结束")
 
 
-def py_spiders_crawl_aggregate_of_list_pages(datestr=None):
-    """py_spiders采集列表页数据汇总(前一天的数据)"""
-    if datestr is not None:
-        today = datetime.fromisoformat(datestr).date()
-    else:
-        today = datetime.now().date()
-    yesterday = today + timedelta(days=-1)
-
-    runtime = yesterday.strftime("%Y-%m-%d")
-    start_time = int(datetime.combine(yesterday, time()).timestamp())
-    end_time = int(datetime.combine(today, time()).timestamp())
-
-    pipeline = [
-        {
-            "$addFields": {
-                "rel_count": {
-                    "$cond": {
-                        "if": {"$ne": ["$finished", True]},
-                        "then": 1,
-                        "else": 0
-                    }
-                }
-            }
-        },
-        {"$match": {"comeintime": {"$gte": start_time, "$lt": end_time}}},
-        {
-            "$group": {
-                "_id": "$spidercode",
-                "count": {"$sum": 1},  # 当天采集总数
-                "rel_count": {"$sum": 1},  # 当天采集总数
-                # "rel_count": {"$sum": "$rel_count"},  # 当天采集详情总数(仅成功)
-                "spider_item": {
-                    "$addToSet": {
-                        "site": "$site",
-                        "channel": "$channel",
-                        "spidercode": "$spidercode",
-                        "business_type": "List"
-                    }
-                }
-            }
-        },
-        {"$sort": SON([("rel_count", -1)])}
-    ]
-    cursor = py_spiders_crawl_list.aggregate(pipeline, allowDiskUse=True)
-    try:
-        results = []
-        for doc in cursor:
-            results.extend(summary_data(doc, runtime))
-        save(results, summary_table_of_list_pages)
-    finally:
-        client.close()
-        logger.info("[Summary]py_spiders数据汇总结束")
-
-
 def competing_products_crawl_aggregate(collection, datestr=None):
     """竞品采集聚合查询"""
     if datestr is not None:
@@ -285,7 +228,5 @@ def zgzb_crawl_aggregate_of_list_pages(datestr=None):
 
 if __name__ == '__main__':
     feapder_crawl_aggregate_of_list_pages()
-    py_spiders_crawl_aggregate_of_list_pages()
     competing_products_crawl_aggregate_of_list_pages()
     zgzb_crawl_aggregate_of_list_pages()
-