liumiaomiao 5 months ago
parent
commit
f74e464b24

+ 6 - 0
lib/learn

@@ -0,0 +1,6 @@
+from pymongo import MongoClient
+#连接正式环境mongo
+collection_bid = MongoClient(f'mongodb://{"viewdata"}:{"viewdata"}@{"127.0.0.1:27088"}/',unicode_decode_error_handler="ignore", directConnection=True)["qfw"]["bidding"]
+#连接测试环境mongo
+db = MongoClient('192.168.3.149', 27180, unicode_decode_error_handler="ignore").data_quality
+coll_user = db["standard_sample_data_all"]

+ 19 - 19
tools/mongo断流监控/mongo_monitor.py

@@ -6,25 +6,25 @@ from lib.monitor_tools import monitor
 
 #标准库-bidding
 bidding_count=monitor.bidding()
-#存库
-monitor.save_to_mongo("bidding",bidding_count)
-print("bidding入库成功")
-#高质量库-mongo
-bidding_ai_count=monitor.bidding_ai()
-monitor.save_to_mongo("bidding_ai",bidding_ai_count)
-print("bidding_ai入库成功")
-# nzj-mysql
-nzj_count=monitor.nzj()
-monitor.save_to_mongo("nzj",nzj_count)
-print("nzj入库成功")
-# bidding_碎片化
-# bidding_fragment_count=monitor.bidding_fragment()
-# monitor.save_to_mongo("bidding_fragment",bidding_fragment_count)
-# print("bidding_fragment入库成功")
-# 人脉clickhouse数据
-connections_count=monitor.connections()
-monitor.save_to_mongo('connections',connections_count)
-print("connections入库成功")
+# #存库
+# monitor.save_to_mongo("bidding",bidding_count)
+# print("bidding入库成功")
+# #高质量库-mongo
+# bidding_ai_count=monitor.bidding_ai()
+# monitor.save_to_mongo("bidding_ai",bidding_ai_count)
+# print("bidding_ai入库成功")
+# # nzj-mysql
+# nzj_count=monitor.nzj()
+# monitor.save_to_mongo("nzj",nzj_count)
+# print("nzj入库成功")
+#bidding_碎片化
+bidding_fragment_count=monitor.bidding_fragment()
+monitor.save_to_mongo("bidding_fragment",bidding_fragment_count)
+print("bidding_fragment入库成功")
+# #人脉clickhouse数据
+# connections_count=monitor.connections()
+# monitor.save_to_mongo('connections',connections_count)
+# print("connections入库成功")
 
 
 

+ 2 - 2
tools/从mongo库导出数据execl/mongo_to_execl.py

@@ -21,7 +21,7 @@ def export_to_excel(db_name, collection_name, fields, output_file):
     projection = {field: 1 for field in fields}
 
     # 查询数据
-    data = collection.find({"flag":1},projection).sort("_id", 1)
+    data = collection.find({"flag":3},projection).sort("_id", 1)
 
     # 将数据转换为DataFrame
     df = pd.DataFrame(list(data))
@@ -34,7 +34,7 @@ if __name__ == "__main__":
 
     db_name = 'data_quality'  # 替换为你的数据库名称
     # collection_name = 'standard_sample_data_all_ai' # 替换为你的集合名称
-    collection_name = 'customer_data'  # 替换为你的集合名称
+    collection_name = 'bidding_20250123'  # 替换为你的集合名称
     # 定义参数
 
     fields = ['id', 'site','toptype','subtype','area','city','buyer','projectname','projectcode','budget','s_winner','bidamount','multipackage_ai','href','jyhref']  # 替换为你需要导出的字段

BIN
tools/从mongo库导出数据execl/output.xlsx


+ 1 - 1
tools/周报表格导出/DataExport_forTesting.py

@@ -201,7 +201,7 @@ columns_timeliness = ['日期'] + list(timeliness_data.keys())
 data_row_timeliness = [date_range] + list(timeliness_data.values())
 
 # 创建DataFrame并写入Excel
-excel_file = 'mongo_data_statistics_combined1.xlsx'
+excel_file = '../周报表格导出/mongo_data_statistics_combined1.xlsx'
 
 with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:
     # 写入第一个sheet(断流监控_mongo库)

BIN
tools/周报表格导出/mongo_data_statistics_combined1.xlsx


+ 85 - 0
tools/标准样本数据入库/样本分析数据入mysql库.py

@@ -0,0 +1,85 @@
+from pymongo import MongoClient
+from bson import ObjectId  # 导入 ObjectId
+import pymysql
+from mongodb_helper import MongoDBInterface
+
+# MongoDB 配置信息
+MongodbConfigLocal = {
+    "ip_port": "127.0.0.1:27088",
+    "user": "viewdata",
+    "password": "viewdata",
+    "db": "qfw",
+    "col": "bidding"  # 替换为实际集合名称
+}
+
+# MySQL 配置信息
+mysql_config = {
+    "host": "192.168.3.217",
+    "user": "root",
+    "password": "=PDT49#80Z!RVv52_z",
+    "database": "quality",
+    "port": 4000
+}
+
+# 字段映射
+field_mapping = {
+    "toptype": "toptype_ai",
+    "subtype": "subtype_ai",
+    "area": "area_ai",
+    "city": "city_ai",
+    "buyer": "buyer_ai",
+    "projectname": "projectname_ai",
+    "projectcode": "projectcode_ai",
+    "budget": "budget_ai",
+    "s_winner": "s_winner_ai",
+    "bidamount": "bidamount_ai",
+    "multipackage": "multipackage_ai"
+}
+
+def main():
+    # 实例化 MongoDBInterface
+    mongo_db_interface = MongoDBInterface(MongodbConfigLocal)
+
+    # 使用 MySQL 的 with 语句管理连接
+    with pymysql.connect(
+            host=mysql_config["host"],
+            port=mysql_config["port"],
+            user=mysql_config["user"],
+            password=mysql_config["password"],
+            database=mysql_config["database"]
+    ) as mysql_conn:
+        with mysql_conn.cursor() as mysql_cursor:
+            # 从 MySQL 中读取 _id 列表
+            mysql_cursor.execute("SELECT _id FROM bid_llizhikun")
+            ids = mysql_cursor.fetchall()
+
+            for (_id,) in ids:
+                # 将 _id 转换为 ObjectId 类型
+                try:
+                    object_id = ObjectId(_id)
+                except Exception as e:
+                    print(f"Invalid ObjectId: {_id}, skipping. Error: {e}")
+                    continue
+
+                # 使用 MongoDBInterface 的 find_by_id 方法从 MongoDB 查询数据
+                mongo_data = mongo_db_interface.find_by_id(MongodbConfigLocal["col"], object_id)
+                if not mongo_data:
+                    continue
+
+                # 构造更新数据
+                update_fields = {field_mapping[key]: mongo_data.get(key, None) for key in field_mapping}
+
+                # 构造更新 SQL
+                update_sql = f"""
+                UPDATE bid_llizhikun
+                SET {", ".join([f"{field} = %s" for field in update_fields.keys()])}
+                WHERE _id = %s
+                """
+                update_values = list(update_fields.values()) + [_id]
+
+                # 执行更新操作
+                mysql_cursor.execute(update_sql, update_values)
+                mysql_conn.commit()
+
+if __name__ == "__main__":
+    main()

+ 2 - 2
tools/样本数据导出/ai_exchange_to_multipacket.py

@@ -3,8 +3,8 @@ from pymongo import MongoClient
 #将高质量库的多包标识 转换成 multipackage_ai 字段表示,便于后续数据导出
 
 db = MongoClient('192.168.3.149', 27180, unicode_decode_error_handler="ignore").data_quality
-collection = db["customer_data"]
-for info in collection.find({"flag":1}).sort("_id", 1):
+collection = db["bidding_20250123"]
+for info in collection.find({"flag":3}).sort("_id", 1):
         com_package=info.get("com_package", "")
         if len(com_package) > 1:
                 collection.update_one({"_id": info["_id"]}, {"$set": {"multipackage_ai": 1}})

+ 7 - 4
tools/样本数据导出/fix_site_data_export.py

@@ -4,9 +4,12 @@ from pymongo import MongoClient
 def sample_data(N,M):
     # 连接MongoDB数据库
     db = MongoClient('192.168.3.149', 27180, unicode_decode_error_handler="ignore").data_quality
-    collection = db["bidding_20241205_ai"]
+    collection = db["bidding_20250123"]
     # 把符合条件的站点名称存起来
-    site_list = {"陕西采购与招标网","北京市投资项目在线审批监管平台","浙江省投资项目在线审批监管平台"}
+    site_list = {"中粮贸易阳光采购平台","航天电子采购平台","青海省政府采购电子卖场","金正大集团电子采购平台",
+"建华建材电子采购平台",
+"淄博市政府采购网上商城",
+"福建省宁德市政府采购网"}
     # 初始化已标记的文档数量
     marked_count = 0
     marked_site_count = 0
@@ -44,7 +47,7 @@ def sample_data(N,M):
                 for info in collection.find({"site": site}).sort("_id", 1).skip(i*jiange).limit(1):
                     print(f"Updating document with _id: {info['_id']}")
                     # 更新文档,设置标记
-                    update_result = collection.update_one({"_id": info["_id"]}, {"$set": {"flag": 2}})
+                    update_result = collection.update_one({"_id": info["_id"]}, {"$set": {"flag": 3}})
                     if update_result.modified_count == 0:
                         print("No document updated for _id:", info["_id"])
                     else:
@@ -57,4 +60,4 @@ def sample_data(N,M):
 
     print(f"Total marked documents: {marked_count}")
 
-sample_data(35,10)
+sample_data(200,10)

+ 44 - 7
tools/波动率计算/一份数据基于时间段计算波动率.py

@@ -21,7 +21,8 @@ pipeline = [
         "$project": {
             "spidercode": 1,
             "create_time": 1,
-            "date": {"$toDate": {"$multiply": ["$create_time", 1000]}}  # 转换为毫秒级日期
+            "date": {"$toDate": {"$multiply": ["$create_time", 1000]}},   # 转换为毫秒级日期
+            "_id": 1
         }
     },
     {
@@ -51,7 +52,8 @@ pipeline = [
                         0
                     ]
                 }
-            }
+            },
+            "original_id": {"$first": "$_id"}  # 保存原始的 _id
         }
     },
     {
@@ -71,17 +73,52 @@ pipeline = [
                         ]
                     }
                 ]
-            }
+            },
+            "original_id": 1  # 保证返回原始的 _id 字段
         }
     }
 ]
 
 # 执行聚合查询
-result = collection.aggregate(pipeline)
+try:
+    result = collection.aggregate(pipeline)
+
+    # 检查结果是否为空
+    result_list = list(result)  # 将游标转换为列表,以便调试
+    if not result_list:
+        print("查询没有返回结果!")
+    else:
+        # 存储结果到数据库
+        for doc in result_list:
+            print(doc)
+            spidercode = doc['spidercode']
+            volatility = doc['volatility']
+            original_id = doc['original_id']  # 获取 MongoDB 的原始 _id 字段
+
+            # 保留波动率两位小数
+            if volatility is not None:
+                volatility = round(volatility, 2)
+
+                # 使用原始的 _id 进行唯一匹配更新
+                update_query = {
+                    "_id": original_id  # 使用原始 _id 字段来更新文档
+                }
+
+                # 更新查询
+                update_data = {
+                    "$set": {
+                        "volatility": volatility
+                    }
+                }
+
+                # 更新数据库中的文档
+                collection.update_one(update_query, update_data)
+
+        # 打印成功更新的信息
+        print("波动率数据更新成功。")
 
-# 打印结果
-for doc in result:
-    print(doc)
+except Exception as e:
+    print(f"发生错误: {e}")
 
 '''
 示例输出: