|
@@ -1,14 +1,21 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
-import pymongo
|
|
|
+from pymongo import MongoClient
|
|
|
+from urllib.parse import quote_plus
|
|
|
from datetime import datetime, timedelta
|
|
|
import pandas as pd
|
|
|
import os
|
|
|
|
|
|
-# MongoDB连接配置
|
|
|
-client = pymongo.MongoClient("mongodb://192.168.3.149:27180/")
|
|
|
-db = client["data_quality"]
|
|
|
-collection = db["bidding_qb1031"]
|
|
|
+# MongoDB 连接配置
|
|
|
+username = quote_plus("viewdata")
|
|
|
+password = quote_plus("viewdata")
|
|
|
+source_client = MongoClient(
|
|
|
+ f'mongodb://{username}:{password}@127.0.0.1:27088/',
|
|
|
+ unicode_decode_error_handler="ignore",
|
|
|
+ directConnection=True
|
|
|
+)
|
|
|
+source_db = source_client['qfw_ai']
|
|
|
+source_collection = source_db['bidding']
|
|
|
|
|
|
# 定义一周的时间范围,转换为Unix时间戳格式
|
|
|
end_date = int(datetime.now().timestamp())
|
|
@@ -21,31 +28,39 @@ tags = [
|
|
|
"情报_招标代理",
|
|
|
"情报_管理咨询",
|
|
|
"情报_保险",
|
|
|
- "情报_工程设计咨询"
|
|
|
+ "情报_工程设计咨询",
|
|
|
+ "情报_安防",
|
|
|
+ "情报_印务商机",
|
|
|
+ "情报_环境采购",
|
|
|
+ "情报_家具招投标"
|
|
|
]
|
|
|
|
|
|
-# 查询条件,注意 comeintime 使用时间戳范围
|
|
|
+# 查询条件
|
|
|
query = {
|
|
|
"comeintime": {"$gte": start_date, "$lt": end_date},
|
|
|
"tag_topinformation": {"$in": tags}
|
|
|
}
|
|
|
|
|
|
+# 初始化字典,将所有标签的计数设置为0
|
|
|
+data = {tag: 0 for tag in tags}
|
|
|
+
|
|
|
# 统计每个标签的数量
|
|
|
-results = collection.aggregate([
|
|
|
+results = source_collection.aggregate([
|
|
|
{"$match": query},
|
|
|
{"$unwind": "$tag_topinformation"}, # 展开数组元素
|
|
|
{"$match": {"tag_topinformation": {"$in": tags}}}, # 再次匹配展开后的标签值
|
|
|
{"$group": {"_id": "$tag_topinformation", "count": {"$sum": 1}}}
|
|
|
])
|
|
|
|
|
|
-# 初始化字典,将所有标签的计数设置为0
|
|
|
-data = {tag: 0 for tag in tags}
|
|
|
-
|
|
|
# 更新字典中有数据的标签的数量
|
|
|
for result in results:
|
|
|
+ print(f"标签: {result['_id']}, 计数: {result['count']}") # 调试信息
|
|
|
data[result["_id"]] = result["count"]
|
|
|
|
|
|
-# 创建DataFrame,以当前时间范围为索引
|
|
|
+# 检查数据字典以确保所有标签都被更新
|
|
|
+print("数据字典内容:", data) # 打印整个数据字典
|
|
|
+
|
|
|
+# 创建DataFrame
|
|
|
date_range = f"{datetime.fromtimestamp(start_date).strftime('%Y/%m/%d')}-{datetime.fromtimestamp(end_date).strftime('%Y/%m/%d')}"
|
|
|
df = pd.DataFrame([data], index=[date_range])
|
|
|
|
|
@@ -54,19 +69,14 @@ output_file = "weekly_data_statistics.xlsx"
|
|
|
|
|
|
# 检查文件是否存在,不存在则创建
|
|
|
if not os.path.exists(output_file):
|
|
|
- # 创建一个包含所有标签列的DataFrame并写入Excel文件
|
|
|
df.to_excel(output_file, sheet_name="Weekly Statistics", index_label="日期")
|
|
|
else:
|
|
|
- # 如果文件已存在,以追加模式写入数据,不覆盖已有数据
|
|
|
with pd.ExcelWriter(output_file, mode="a", engine="openpyxl", if_sheet_exists="overlay") as writer:
|
|
|
- # 载入现有数据,检查是否需要覆盖日期索引
|
|
|
existing_df = pd.read_excel(output_file, sheet_name="Weekly Statistics", index_col=0)
|
|
|
if date_range in existing_df.index:
|
|
|
- # 更新相同日期范围的数据
|
|
|
- existing_df.update(df)
|
|
|
- existing_df.to_excel(writer, sheet_name="Weekly Statistics", index_label="日期")
|
|
|
+ existing_df.update(df) # 更新已有行
|
|
|
else:
|
|
|
- # 追加新数据行
|
|
|
- df.to_excel(writer, sheet_name="Weekly Statistics", index_label="日期")
|
|
|
+ existing_df = pd.concat([existing_df, df]) # 添加新行
|
|
|
+ existing_df.to_excel(writer, sheet_name="Weekly Statistics", index_label="日期")
|
|
|
|
|
|
print(f"统计结果已保存到 {output_file}")
|