9 hónapja · a78dcaf17a
--- a/tools/mongo断流监控/mongo_monitor.py
+++ b/tools/mongo断流监控/mongo_monitor.py
@@ -1,14 +1,21 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 
			
 
				-import pymongo
			
 
				+from pymongo import MongoClient
			
 
				+from urllib.parse import quote_plus
			
 
				 from datetime import datetime, timedelta
			
 
				 import pandas as pd
			
 
				 import os
			
 
				 
			
 
				-# MongoDB连接配置
			
 
				-client = pymongo.MongoClient("mongodb://192.168.3.149:27180/")
			
 
				-db = client["data_quality"]
			
 
				-collection = db["bidding_qb1031"]
			
 
				+# MongoDB 连接配置
			
 
				+username = quote_plus("viewdata")
			
 
				+password = quote_plus("viewdata")
			
 
				+source_client = MongoClient(
			
 
				+    f'mongodb://{username}:{password}@127.0.0.1:27088/',
			
 
				+    unicode_decode_error_handler="ignore",
			
 
				+    directConnection=True
			
 
				+)
			
 
				+source_db = source_client['qfw_ai']
			
 
				+source_collection = source_db['bidding']
			
 
				 
			
 
				 # 定义一周的时间范围，转换为Unix时间戳格式
			
 
				 end_date = int(datetime.now().timestamp())
			
@@ -21,31 +28,39 @@ tags = [
 
				     "情报_招标代理",
			
 
				     "情报_管理咨询",
			
 
				     "情报_保险",
			
 
				-    "情报_工程设计咨询"
			
 
				+    "情报_工程设计咨询",
			
 
				+    "情报_安防",
			
 
				+    "情报_印务商机",
			
 
				+    "情报_环境采购",
			
 
				+    "情报_家具招投标"
			
 
				 ]
			
 
				 
			
 
				-# 查询条件，注意 comeintime 使用时间戳范围
			
 
				+# 查询条件
			
 
				 query = {
			
 
				     "comeintime": {"$gte": start_date, "$lt": end_date},
			
 
				     "tag_topinformation": {"$in": tags}
			
 
				 }
			
 
				 
			
 
				+# 初始化字典，将所有标签的计数设置为0
			
 
				+data = {tag: 0 for tag in tags}
			
 
				+
			
 
				 # 统计每个标签的数量
			
 
				-results = collection.aggregate([
			
 
				+results = source_collection.aggregate([
			
 
				     {"$match": query},
			
 
				     {"$unwind": "$tag_topinformation"},  # 展开数组元素
			
 
				     {"$match": {"tag_topinformation": {"$in": tags}}},  # 再次匹配展开后的标签值
			
 
				     {"$group": {"_id": "$tag_topinformation", "count": {"$sum": 1}}}
			
 
				 ])
			
 
				 
			
 
				-# 初始化字典，将所有标签的计数设置为0
			
 
				-data = {tag: 0 for tag in tags}
			
 
				-
			
 
				 # 更新字典中有数据的标签的数量
			
 
				 for result in results:
			
 
				+    print(f"标签: {result['_id']}, 计数: {result['count']}")  # 调试信息
			
 
				     data[result["_id"]] = result["count"]
			
 
				 
			
 
				-# 创建DataFrame，以当前时间范围为索引
			
 
				+# 检查数据字典以确保所有标签都被更新
			
 
				+print("数据字典内容:", data)  # 打印整个数据字典
			
 
				+
			
 
				+# 创建DataFrame
			
 
				 date_range = f"{datetime.fromtimestamp(start_date).strftime('%Y/%m/%d')}-{datetime.fromtimestamp(end_date).strftime('%Y/%m/%d')}"
			
 
				 df = pd.DataFrame([data], index=[date_range])
			
 
				 
			
@@ -54,19 +69,14 @@ output_file = "weekly_data_statistics.xlsx"
 
				 
			
 
				 # 检查文件是否存在，不存在则创建
			
 
				 if not os.path.exists(output_file):
			
 
				-    # 创建一个包含所有标签列的DataFrame并写入Excel文件
			
 
				     df.to_excel(output_file, sheet_name="Weekly Statistics", index_label="日期")
			
 
				 else:
			
 
				-    # 如果文件已存在，以追加模式写入数据，不覆盖已有数据
			
 
				     with pd.ExcelWriter(output_file, mode="a", engine="openpyxl", if_sheet_exists="overlay") as writer:
			
 
				-        # 载入现有数据，检查是否需要覆盖日期索引
			
 
				         existing_df = pd.read_excel(output_file, sheet_name="Weekly Statistics", index_col=0)
			
 
				         if date_range in existing_df.index:
			
 
				-            # 更新相同日期范围的数据
			
 
				-            existing_df.update(df)
			
 
				-            existing_df.to_excel(writer, sheet_name="Weekly Statistics", index_label="日期")
			
 
				+            existing_df.update(df)  # 更新已有行
			
 
				         else:
			
 
				-            # 追加新数据行
			
 
				-            df.to_excel(writer, sheet_name="Weekly Statistics", index_label="日期")
			
 
				+            existing_df = pd.concat([existing_df, df])  # 添加新行
			
 
				+        existing_df.to_excel(writer, sheet_name="Weekly Statistics", index_label="日期")
			
 
				 
			
 
				 print(f"统计结果已保存到 {output_file}")