liumiaomiao 1 月之前
父节点
当前提交
9a50b5c0fc
共有 66 个文件被更改,包括 5371 次插入563 次删除
  1. 21 1
      tools/test.py
  2. 46 4
      tools/从es导出数据/es.py
  3. 23 40
      tools/从mongo导出数据/mongo_into_mongo.py
  4. 22 6
      tools/从mongo库导出数据execl/mongo_to_execl.py
  5. 二进制
      tools/从mongo库导出数据execl/output.xlsx
  6. 二进制
      tools/从mongo库导出数据execl/联通1-2月份交叉验证后免送洗-抽样数据2000条.xlsx
  7. 1 1
      tools/分数字段结果分析/score_ai.py
  8. 1 1
      tools/周报/mongo,es断流监控/es_monitor.py
  9. 4 4
      tools/周报/mongo,es断流监控/mongo_monitor.py
  10. 209 0
      tools/周报/mongo,es断流监控/monitor_all.py
  11. 11 21
      tools/周报/周报表格导出/DataExport_forTesting.py
  12. 二进制
      tools/周报/周报表格导出/mongo_data_statistics_combined1.xlsx
  13. 258 224
      tools/周报/周报表格导出/weekly_data_store.py
  14. 124 0
      tools/基于抽取表ai和规则对比/ai抽取和规则抽取对比.py
  15. 111 0
      tools/基于抽取表ai和规则对比/new.py
  16. 201 0
      tools/基于抽取表ai和规则对比/一致性对比.py
  17. 7 12
      tools/数据抽样/sample_data_export_new.py
  18. 79 0
      tools/数据抽样/sample_data_export_online.py
  19. 108 0
      tools/数据抽样/抽样方法最新.py
  20. 54 0
      tools/数据质量监控平台/kb-数据问题统计/execl_kb.py
  21. 160 0
      tools/数据质量监控平台/kb-数据问题统计/task_kb.py
  22. 212 0
      tools/数据质量监控平台/基于标准数据的字段分析结果.py
  23. 259 0
      tools/数据质量监控平台/标讯基础信息分析结果入库.py
  24. 2 2
      tools/标准样本数据入库/File_import_mysql.py
  25. 1 1
      tools/标准样本数据入库/execl_into_mongo.py
  26. 0 0
      tools/标准样本数据入库/insert_errors.log
  27. 二进制
      tools/标准样本数据入库/标准样本数据汇总.xlsx
  28. 2 2
      tools/标准样本数据入库/样本分析数据入mongo库.py
  29. 89 0
      tools/标讯数据附件为空数量统计/统计.py
  30. 68 0
      tools/生成标准样本库的分析数据/test.py
  31. 77 0
      tools/生成标准样本库的分析数据/test2.py
  32. 92 0
      tools/生成标准样本库的分析数据/test3.py
  33. 二进制
      tools/生成标准样本库的分析数据/数据分析结果.xlsx
  34. 34 52
      tools/生成标准样本库的分析数据/根据样本数据拉取正式数据生成分析表mongo.py
  35. 210 0
      tools/生成标准样本库的分析数据/生成统计结果.py
  36. 6 79
      tools/生成标准样本库的分析数据/生成统计结果_入库.py
  37. 2297 51
      tools/高质量站点第一版/_id.csv
  38. 二进制
      tools/高质量站点第一版/export_result_信用.xlsx
  39. 二进制
      tools/高质量站点第一版/export_result_招标.xlsx
  40. 二进制
      tools/高质量站点第一版/export_result_招标新.xlsx
  41. 二进制
      tools/高质量站点第一版/export_result_结果.xlsx
  42. 二进制
      tools/高质量站点第一版/export_result_采购意向.xlsx
  43. 二进制
      tools/高质量站点第一版/export_result_采购意向新.xlsx
  44. 二进制
      tools/高质量站点第一版/export_result_预告.xlsx
  45. 二进制
      tools/高质量站点第一版/export_result_预告新.xlsx
  46. 二进制
      tools/高质量站点第一版/flag1_unique_spidercodes.xlsx
  47. 51 0
      tools/高质量站点第一版/spidercodes.csv
  48. 二进制
      tools/高质量站点第一版/transformed_data.xlsx
  49. 39 29
      tools/高质量站点第一版/根据id找出爬虫代码.py
  50. 17 23
      tools/高质量站点第一版/统计标讯数量.py
  51. 16 5
      tools/高质量站点第一版/高质量站点-脚本41.py
  52. 二进制
      tools/高质量站点第二版/.~spidercode_stats.xlsx
  53. 91 0
      tools/高质量站点第二版/ai抽取和规则抽取对比结果.py
  54. 二进制
      tools/高质量站点第二版/spidercode_analysis.xlsx
  55. 二进制
      tools/高质量站点第二版/spidercode_stats.xlsx
  56. 二进制
      tools/高质量站点第二版/spidercode_stats1.xlsx
  57. 二进制
      tools/高质量站点第二版/spidercode_stats_信用_err.xlsx
  58. 二进制
      tools/高质量站点第二版/spidercode_stats_招标_err.xlsx
  59. 二进制
      tools/高质量站点第二版/spidercode_stats_结果_err.xlsx
  60. 二进制
      tools/高质量站点第二版/spidercode_stats_采购意向.xlsx
  61. 二进制
      tools/高质量站点第二版/spidercode_stats_采购意向_err.xlsx
  62. 二进制
      tools/高质量站点第二版/spidercode_stats_预告_err.xlsx
  63. 201 0
      tools/高质量站点第二版/增加一致性对比-智昆.py
  64. 27 5
      tools/高质量站点第二版/找出爬虫比例.py
  65. 73 0
      tools/高质量站点第二版/找出爬虫比例2.py
  66. 67 0
      tools/高质量站点第二版/统计三个大模型和规则一致性的比例.py

+ 21 - 1
tools/test.py

@@ -1 +1,21 @@
-main_ids=[]
+from pymongo import MongoClient
+
+client = MongoClient('mongodb://172.20.45.129:27002/')
+db = client['data_quality']
+collection = db['bidding_202505']
+
+# 删除 comeintime 在 [1747152000, 1747238400] 范围内的文档
+# result = collection.delete_many({
+#     "comeintime": {
+#         "$gte": 1747152000,  # 大于等于 from
+#         "$lte": 1747238400   # 小于等于 to
+#     }
+# })
+count = collection.count_documents({
+    "comeintime": {
+        "$gte": 1747152000,
+        "$lte": 1747238400
+    }
+})
+print(f"符合条件的数据量: {count}")
+# print(f"删除了 {result.deleted_count} 条文档")

+ 46 - 4
tools/从es导出数据/es.py

@@ -18,10 +18,10 @@ def ES_bidding(es_query):
         'index': "bidding",
         'size': 1000,
         # mongo存的数据库表
-        'mg_host': '192.168.3.149',
-        'mg_port': 27180,
+        'mg_host': '172.20.45.129',
+        'mg_port': 27002,
         'database': 'data_quality',
-        'collection': 'bidding_20250109'
+        'collection': 'bidding_20250515_fujian'
     }
     query = es_query
     # 传入查询语句query 以及配置信息
@@ -29,8 +29,50 @@ def ES_bidding(es_query):
 
 def run():
     # 根据ES语句查找bidding
+
     es_query = {"track_total_hits": True,
-                "query": {"bool": {"must": [{"range": {"publishtime": {"from": "1736352000", "to": "1736398799"}}}]}}}
+                "query": {
+                    "bool": {
+                        "must": [{
+                            "range": {
+                                "publishtime": {
+                                    "gte": 1747238400,
+                                    "lte": 1747324800
+                                }
+                            }
+                        }, {
+                            "bool": {
+                                "must": [{
+                                    "multi_match": {
+                                        "query": "详见附件",
+                                        "type": "phrase",
+                                        "fields": ["detail"]
+                                    }
+                                }]
+                            }
+                        }],
+                        "must_not": []
+                    }
+                },
+                "highlight": {
+                    "pre_tags": [""],
+                    "post_tags": [""],
+                    "fields": {
+                        "detail": {
+                            "fragment_size": 115,
+                            "number_of_fragments": 1
+                        }
+                    }
+                },
+                "sort": [{
+                    "dataweight": "desc"
+                }, {
+                    "publishtime": "desc"
+                }],
+                "from": 0
+            }
+    # es_query = {"track_total_hits": True,
+    #             "query": {"bool": {"must": [{"range": {"comeintime": {"from": "1747324800", "to": "1747411200"}}}]}}}
     # es_query = {"track_total_hits": True,
     #             "query": {"bool": {"must": [{"range": {"publishtime": {"from": "1691337600", "to": "1691424000"}}},
     #                                         {"terms": {"subtype": ["中标", "合同","成交"]}}]}}}

+ 23 - 40
tools/从mongo导出数据/mongo_into_mongo.py

@@ -5,61 +5,44 @@ from lib.mogodb_helper import MongoDBInterface
 #复制mongo数据源 从一个源1到另一个源3
 # 从源1中导入到源3 ,和源2 一样的数据
 #mongo库 源1
+# MongodbConfig = {
+#     "ip_port": "127.0.0.1:27088",
+#     "user": "viewdata",
+#     "password": "viewdata",
+#     "db": "qfw",
+# }
 MongodbConfig = {
-    "ip_port": "127.0.0.1:27088",
-    "user": "viewdata",
-    "password": "viewdata",
-    "db": "qfw",
+    "ip_port": "172.20.45.129:27002",
+    "db": "data_quality",
 }
-
 mdb = MongoDBInterface(MongodbConfig)
 #mongo库 源3
-MongodbConfigInsert = {
-    "ip_port": "192.168.3.149:27180",
-    "db": "data_quality",
-}
-insertdb = MongoDBInterface(MongodbConfigInsert)
+# MongodbConfigInsert = {
+#     "ip_port": "192.168.3.149:27180",
+#     "db": "data_quality",
+# }
+# insertdb = MongoDBInterface(MongodbConfigInsert)
 
-max_id = ObjectId("0" * 24)
-# max_id = ObjectId("655ec5609aed6eb2ffa654ca")
+# max_id = ObjectId("0" * 24)
+max_id = ObjectId("6826ab875463fc3ceb767fe5")
 
 # 连接MongoDB数据库
-with MongoClient('192.168.3.149', 27180) as client:
+with MongoClient('172.20.45.129', 27002) as client:
     #源2
     db = client.data_quality
-    coll = db.standard_sample_data
-    for row in coll.find().sort("_id", 1):
-    # for item in coll_user.find({"_id": {"$gte": max_id}}).sort("_id", 1):
+    coll = db.bidding_202505_main
+    # for row in coll.find().sort("_id", 1):
+    for row in coll.find({"_id": {"$gt": max_id}}).sort("_id", 1):
         _id = row.get("_id", "")
         # print(row["_id"])
         if _id:
-            m_id = ObjectId(_id)
-            result=mdb.find_by_id("bidding",m_id)
+            # m_id = ObjectId(_id)
+            result=mdb.find_by_id("bidding_202505",_id)
             if result==None:
-                print(row["_id"]+"在大库没找到")
+                print(f"{_id}+在大库没找到")
             if result:
                 result["_id"] = ObjectId(row["_id"])
-                insertdb.insert2db("standard_sample_data_all",result)
+                mdb.insert2db("bidding_202505_chouqu",result)
 
 # 关闭数据库连接
 client.close()
-
-
-"""
-
-http://172.17.162.35:8880/search
-
-# 深圳分公司安检信息系统维护服务 0.0 南方航空物流股份有限公司 广东 深圳市 ['安检']  []
-# 运输事业发展中心信息系统运维-交通委北区机房、视频会议及运输中心终端运维管理技术服务项目 825250.0 北京市运输事业发展中心 北京 北京市 ['运输事业发展中心信息系统运维 交通委北>区机房 视频会议及运输中心终端运维管理技术服务项目', '运输事业发展中心信息系统运维 交通委北区机房 视频会议及运输中心终端运维管理技术服务项目'] 北京航天星桥科技股份有限公司 []
-# 软件开发项目(软件开发-拓信智防) 0.0 指定集成公司与河北拓信智防电子科技有限公司 河北 保定市 ['车辆管理平台']  []
-# 新乡高新技术产业开发区社会治理委员会“雪亮工程”维保项目 3410000.0 新乡高新技术产业开发区社会治理委员会 河南 新乡市 ['维保', '雪亮工程']  []
-# 山西省大同市第三人民医院新建医技、急诊楼配套项目(暂估价)医用箱式中型物流传输系统采购项目 20028500.0 山西省大同市第三人民医院 山西 大同市 ['箱式中型物流']  []
-# 山西省大同市第三人民医院新建医技、急诊楼配套项目(暂估价)医用箱式中型物流传输系统采购项目 20028500.0 山西省大同市第三人民医院 山西 大同市 ['物流系统']  []
-
-
-广州市白云区三元里群英大街13号加装电梯工程 0.0 广州市白云区三元里群英大街13号加装电梯工程 广东 广州市 ['电梯']  []
-软件开发服务(二次)软件开发服务 6500000.0 呼伦贝尔市住房和城乡建设局 内蒙古 呼伦贝尔市 ['软件', '软件开发服务 二次 软件开发服务']  []
-富顺县救灾物资储备库建设工程项目 2346407.0 富顺县应急管理指挥中心 四川 自贡市 ['备库建设', '救灾物资']  []
-中共成都市委党校明志楼多媒体教室LED屏采购项目 650000.0 中共成都市委党校 四川 成都市 ['LED一体机', '触控一体机'] 成都香草山信息技术有限责任公司 []
-吉木萨尔县大有镇村庄规划(2021-2035年)编制项目 150000.0 吉木萨尔县大有镇人民政府 新疆 昌吉回族自治州 ['建材']  []
-"""

+ 22 - 6
tools/从mongo库导出数据execl/mongo_to_execl.py

@@ -1,7 +1,7 @@
 # 导入必要的库
 from pymongo import MongoClient
 import pandas as pd
-
+import pytz  # 用于处理时区
 def export_to_excel(db_name, collection_name, fields, output_file):
     """
     从MongoDB导出特定字段到Excel文件。
@@ -13,7 +13,9 @@ def export_to_excel(db_name, collection_name, fields, output_file):
     - output_file: 输出的Excel文件名
     """
     # 连接到MongoDB
-    client = MongoClient('mongodb://192.168.3.149:27180/')
+    client = MongoClient('mongodb://172.20.45.129:27002/')
+    # client = MongoClient('mongodb://127.0.0.1:27087/', unicode_decode_error_handler="ignore",directConnection=True)  # 清洗库
+
     db = client[db_name]
     collection = db[collection_name]
 
@@ -21,23 +23,37 @@ def export_to_excel(db_name, collection_name, fields, output_file):
     projection = {field: 1 for field in fields}
 
     # 查询数据
-    data = collection.find({"flag":3},projection).sort("_id", 1)
+    data = collection.find({"mark_1": 1},projection).sort("_id", -1)
 
     # 将数据转换为DataFrame
     df = pd.DataFrame(list(data))
 
+    # 转换时间戳字段
+    time_fields = ['bidopentime', 'bidendtime']
+    for field in time_fields:
+        if field in df.columns:
+            # 1. 转换为 datetime 对象(秒级时间戳)
+            df[field] = pd.to_datetime(df[field], unit='s', errors='coerce')
+            # 2. 转换为北京时间(UTC+8)
+            beijing_tz = pytz.timezone('Asia/Shanghai')
+            df[field] = df[field].dt.tz_localize('UTC').dt.tz_convert(beijing_tz)
+            # 3. 去掉时区信息,保留纯时间(否则Excel可能显示异常)
+            df[field] = df[field].dt.tz_localize(None)
+
     # 导出到Excel文件
     df.to_excel(output_file, index=False)
 
 if __name__ == "__main__":
     # 连接到 MongoDB
 
-    db_name = 'data_quality'  # 替换为你的数据库名称
+    db_name = 'wjh'  # 替换为你的数据库名称
+    # db_name = 'jyqyfw'  # 替换为你的数据库名称
     # collection_name = 'standard_sample_data_all_ai' # 替换为你的集合名称
-    collection_name = 'bidding_20250123'  # 替换为你的集合名称
+    collection_name = 'unicom_1_2'  # 替换为你的集合名称
     # 定义参数
 
-    fields = ['id', 'site','toptype','subtype','area','city','buyer','projectname','projectcode','budget','s_winner','bidamount','multipackage_ai','href','jyhref']  # 替换为你需要导出的字段
+    fields = ['_id','site','toptype','subtype','area','city','buyer','projectname','projectcode','budget','s_winner','bidamount','bidopentime','bidendtime','label','href','jybxhref','spidercode']  # 替换为你需要导出的字段
+    # fields = ['_id','id','site','href','jybxhref','spidercode','projectinfo']  # 替换为你需要导出的字段
     output_file = 'output.xlsx'
 
     # 调用函数导出数据

二进制
tools/从mongo库导出数据execl/output.xlsx


二进制
tools/从mongo库导出数据execl/联通1-2月份交叉验证后免送洗-抽样数据2000条.xlsx


+ 1 - 1
tools/分数字段结果分析/score_ai.py

@@ -42,7 +42,7 @@ if "score" in data.columns:
     # 对分数进行倒序排序
     score_distribution_df = score_distribution_df.sort_values(by='标讯总分数', ascending=False)
 
-    # 输出到 Excel 文件
+    # 输出到 Excel 记录
     with pd.ExcelWriter('标讯总分数统计.xlsx', engine='openpyxl') as writer:
         score_distribution_df.to_excel(writer, sheet_name='标讯总分数分布', index=False)
 

+ 1 - 1
tools/周报/mongo,es断流监控/es_monitor.py

@@ -22,6 +22,6 @@ print("es_bidding_fragment入库成功")
 
 #医械通
 medical_count = monitor.medical_es()
-monitor.save_to_mongo("medical_es",medical_count)
+monitor.save_to_mongo("es_medical",medical_count)
 print(medical_count)
 print("医械通数据入库成功")

+ 4 - 4
tools/周报/mongo,es断流监控/mongo_monitor.py

@@ -27,10 +27,10 @@ print(medical_count)
 monitor.save_to_mongo('medical',medical_count)
 print("医械通数据入库成功")
 
-# #bidding_碎片化
-# bidding_fragment_count=monitor.bidding_fragment()
-# monitor.save_to_mongo("bidding_fragment",bidding_fragment_count)
-# print("bidding_fragment入库成功")
+#bidding_碎片化
+bidding_fragment_count=monitor.bidding_fragment()
+monitor.save_to_mongo("bidding_fragment",bidding_fragment_count)
+print("bidding_fragment入库成功")
 
 
 

+ 209 - 0
tools/周报/mongo,es断流监控/monitor_all.py

@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# author : liumiaomiao
+from datetime import datetime, timedelta
+from lib.monitor_tools_online import monitor,MongoUtil
+
+#es-bidding
+es_bidding_count=monitor.es_bidding()
+#存库
+monitor.save_to_mongo("es_bidding",es_bidding_count)
+print("es_bidding入库成功")
+
+#es-拟在建
+es_nzj_count=monitor.es_nzj()
+monitor.save_to_mongo("es_nzj",es_nzj_count)
+print("es_nzj入库成功")
+
+#es-bidding_碎片化
+es_bidding_fragment_count=monitor.es_bidding_fragment()
+monitor.save_to_mongo("es_bidding_fragment",es_bidding_fragment_count)
+print("es_bidding_fragment入库成功")
+
+#医械通
+medical_count = monitor.medical_es()
+monitor.save_to_mongo("es_medical",medical_count)
+print(medical_count)
+print("医械通数据入库成功")
+
+#标准库-bidding
+bidding_count=monitor.bidding()
+#存库
+monitor.save_to_mongo("bidding",bidding_count)
+print("bidding入库成功")
+
+#nzj-mysql,需要查正式环境tidb库,需要
+nzj_count=monitor.nzj()
+print(nzj_count)
+monitor.save_to_mongo("nzj",nzj_count)
+print("nzj入库成功")
+
+#人脉clickhouse数据
+connections_count=monitor.connections()
+monitor.save_to_mongo('connections',connections_count)
+print("connections入库成功")
+
+#医械通
+medical_count=monitor.medical()
+print(medical_count)
+monitor.save_to_mongo('medical',medical_count)
+print("医械通数据入库成功")
+
+#bidding_碎片化
+bidding_fragment_count=monitor.bidding_fragment()
+monitor.save_to_mongo("bidding_fragment",bidding_fragment_count)
+print("bidding_fragment入库成功")
+
+#数据时效性 每周统计
+def data_timeliness():
+    try:
+        now = datetime.now()
+        end_date = now.strftime("%Y-%m-%d")
+        start_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d")
+
+        collection = MongoUtil.get_coon(host='172.31.31.202:27081',database='qfw', collection='bidding_zhibiao', authuser='dataFx', authpass='data@fenxi')
+
+        query = {"日期": {"$gte": start_date, "$lt": end_date}}
+        a1s = 0
+        a2s = 0
+        a3s = 0
+        a4s = 0
+        a5s = 0
+        a6s = 0
+        a7s = 0
+        a8s = 0
+        a9s = 0
+        a10s = 0
+        a11s = 0
+
+        for item in collection.find(query).sort("_id",-1):
+            a1=item.get("数据时效指标").get("数据时效分位数统计").get("a1")
+            number_str1 = a1.rstrip('%')
+            # 转换为浮点数
+            number_float1 = float(number_str1)
+            a1s+=number_float1
+
+            a2 = item.get("数据时效指标").get("数据时效分位数统计").get("a2")
+            number_str2 = a2.rstrip('%')
+            # 转换为浮点数
+            number_float2 = float(number_str2)
+            a2s+=number_float2
+
+            a3 = item.get("数据时效指标").get("数据时效分位数统计").get("a3")
+            number_str3 = a3.rstrip('%')
+            # 转换为浮点数
+            number_float3 = float(number_str3)
+            a3s += number_float3
+
+            a4 = item.get("数据时效指标").get("数据时效分位数统计").get("a4")
+            number_str4 = a4.rstrip('%')
+            # 转换为浮点数
+            number_float4 = float(number_str4)
+            a4s += number_float4
+
+            a5 = item.get("数据时效指标").get("数据时效分位数统计").get("a5")
+            number_str5 = a5.rstrip('%')
+            # 转换为浮点数
+            number_float5 = float(number_str5)
+            a5s += number_float5
+
+            a6 = item.get("数据时效指标").get("数据时效分位数统计").get("a6")
+            number_str6 = a6.rstrip('%')
+            # 转换为浮点数
+            number_float6 = float(number_str6)
+            a6s += number_float6
+
+            a7 = item.get("数据时效指标").get("数据时效分位数统计").get("a7")
+            number_str7 = a7.rstrip('%')
+            # 转换为浮点数
+            number_float7 = float(number_str7)
+            a7s += number_float7
+
+            a8 = item.get("数据时效指标").get("数据时效分位数统计").get("a8")
+            number_str8 = a8.rstrip('%')
+            # 转换为浮点数
+            number_float8 = float(number_str8)
+            a8s += number_float8
+
+            a9 = item.get("数据时效指标").get("数据时效分位数统计").get("a9")
+            number_str9 = a9.rstrip('%')
+            # 转换为浮点数
+            number_float9 = float(number_str9)
+            a9s += number_float9
+
+            a10 = item.get("数据时效指标").get("数据时效分位数统计").get("a10")
+            number_str10 = a10.rstrip('%')
+            # 转换为浮点数
+            number_float10 = float(number_str10)
+            a10s += number_float10
+
+            a11 = item.get("数据时效指标").get("数据时效分位数统计").get("a11")
+            number_str11 = a11.rstrip('%')
+            # 转换为浮点数
+            number_float11 = float(number_str11)
+            a11s += number_float11
+
+        a11=a1s/7
+        a11 = round(a11, 2)
+        a11=f"{a11}%"
+
+        a22=a2s/7
+        a22 = round(a22, 2)
+        a22 = f"{a22}%"
+
+        a33=a3s/7
+        a33 = round(a33, 2)
+        a33 = f"{a33}%"
+
+        a44=a4s/7
+        a44 = round(a44, 2)
+        a44 = f"{a44}%"
+
+        a55=a5s/7
+        a55 = round(a55, 2)
+        a55 = f"{a55}%"
+
+        a66=a6s/7
+        a66 = round(a66, 2)
+        a66 = f"{a66}%"
+
+        a77=a7s/7
+        a77 = round(a77, 2)
+        a77 = f"{a77}%"
+
+        a88=a8s/7
+        a88 = round(a88, 2)
+        a88 = f"{a88}%"
+
+        a99 = a9s / 7
+        a99 = round(a99, 2)
+        a99 = f"{a99}%"
+
+        a100 = a10s / 7
+        a100 = round(a100, 2)
+        a100 = f"{a100}%"
+
+        a111 = a11s / 7
+        a111 = round(a111, 2)
+        a111 = f"{a111}%"
+
+        times={}
+
+        times["a1"]=  a11
+        times["a2"] = a22
+        times["a3"] = a33
+        times["a4"] = a44
+        times["a5"] = a55
+        times["a6"] = a66
+        times["a7"] = a77
+        times["a8"] = a88
+        times["a9"] = a99
+        times["a10"] = a100
+        times["a11"] = a111
+        return times
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+times=data_timeliness()
+print(times)
+monitor.save_to_mongo('data_timeliness',times)

+ 11 - 21
tools/周报/周报表格导出/DataExport_forTesting.py

@@ -15,9 +15,10 @@ db = client[dbname]
 collection = db[collection_name]
 
 # 获取当前时间和一周前的时间
-end_time = datetime.now()
-start_time = end_time - timedelta(weeks=1)
-
+# end_time = datetime.now().replace(hour=23, minute=59, second=59, microsecond=999999)
+# start_time = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
+end_time = (datetime.now() - timedelta(days=1)).replace(hour=23, minute=59, second=59, microsecond=999999)
+start_time =(datetime.now() - timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
 # 将datetime转换为Unix时间戳(整数类型,去掉小数部分)
 start_timestamp = int(start_time.timestamp())
 end_timestamp = int(end_time.timestamp())
@@ -36,7 +37,6 @@ pipeline_mongo = [
         "$match": {
             "$or": [
                 {"bidding.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
-                {"bidding_ai.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
                 {"connections.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
                 {"nzj.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
                 {"medical.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
@@ -54,7 +54,6 @@ data_mongo = list(collection.aggregate(pipeline_mongo))
 
 # 初始化MongoDB字段统计数据
 bidding_count = 0
-bidding_ai_count = 0
 connections_count = 0
 nzj_count = 0
 medical_count = 0
@@ -75,8 +74,6 @@ bidding_fragment_data = {
 for doc in data_mongo:
     if 'bidding' in doc:
         bidding_count += doc['bidding'].get('count', 0)
-    if 'bidding_ai' in doc:
-        bidding_ai_count += doc['bidding_ai'].get('count', 0)
     if 'connections' in doc:
         connections_count += doc['connections'].get('count', 0)
     if 'nzj' in doc:
@@ -96,15 +93,11 @@ pipeline_es = [
         "$match": {
             "$or": [
                 {"es_bidding.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
-                {"es_bidding_ai.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
                 {"es_nzj.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
-                {"medical_es.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
+                {"es_medical.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
                 {"es_bidding_fragment.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}}
             ]
         }
-    },
-    {
-        "$limit": 5  # 限制查询返回的结果为前5条数据,便于调试
     }
 ]
 
@@ -113,7 +106,6 @@ data_es = list(collection.aggregate(pipeline_es))
 
 # 初始化ES字段统计数据
 es_bidding_count = 0
-es_bidding_ai_count = 0
 es_nzj_count = 0
 es_medical_count = 0
 es_bidding_fragment_data = {
@@ -133,11 +125,9 @@ es_bidding_fragment_data = {
 for doc in data_es:
     if 'es_bidding' in doc:
         es_bidding_count += doc['es_bidding'].get('count', 0)
-    if 'es_bidding_ai' in doc:
-        es_bidding_ai_count += doc['es_bidding_ai'].get('count', 0)
     if 'es_nzj' in doc:
         es_nzj_count += doc['es_nzj'].get('count', 0)
-    if 'es_medical_count' in doc:
+    if 'es_medical' in doc:
         es_medical_count += doc['es_medical'].get('count', 0)
     if 'es_bidding_fragment' in doc:
         for key, value in doc['es_bidding_fragment'].get('count', {}).items():
@@ -199,17 +189,17 @@ for doc in data_timeliness:
 date_range = f"{start_time.strftime('%Y/%m/%d')}-{end_time.strftime('%Y/%m/%d')}"
 
 # 构建Excel数据
-columns = ['日期', '标讯每周入库数据量', '高质量库每周入库数据量', '人脉管理数据', '拟在建数据量(全国)','医械通'] + list(bidding_fragment_data.keys())
-data_row_mongo = [date_range, bidding_count, bidding_ai_count, connections_count, nzj_count,medical_count] + list(bidding_fragment_data.values())
+columns = ['日期', '标讯每周入库数据量', '人脉管理数据', '拟在建数据量(全国)','医械通'] + list(bidding_fragment_data.keys())
+data_row_mongo = [date_range, bidding_count, connections_count, nzj_count,medical_count] + list(bidding_fragment_data.values())
 
-columns_es = ['日期', '标讯每周入库数据量', '高质量库每周数据入库量', '拟在建数据量(全国)','医械通'] + list(es_bidding_fragment_data.keys())
-data_row_es = [date_range, es_bidding_count, es_bidding_ai_count, es_nzj_count,es_medical_count] + list(es_bidding_fragment_data.values())
+columns_es = ['日期', '标讯每周入库数据量', '拟在建数据量(全国)','医械通'] + list(es_bidding_fragment_data.keys())
+data_row_es = [date_range, es_bidding_count,  es_nzj_count,es_medical_count] + list(es_bidding_fragment_data.values())
 
 columns_timeliness = ['日期'] + list(timeliness_data.keys())
 data_row_timeliness = [date_range] + list(timeliness_data.values())
 
 # 创建DataFrame并写入Excel
-excel_file = '/mongo_data_statistics_combined1.xlsx'
+excel_file = 'mongo_data_statistics_combined1.xlsx'
 
 with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:
     # 写入第一个sheet(断流监控_mongo库)

二进制
tools/周报/周报表格导出/mongo_data_statistics_combined1.xlsx


+ 258 - 224
tools/周报/周报表格导出/weekly_data_store.py

@@ -1,8 +1,7 @@
 from pymongo import MongoClient
 from datetime import datetime, timedelta
-import pandas as pd
 import pymysql
-# 数据入库量及数据监控时效 导出execl
+# 数据入库量及数据监控时效 入mysql
 # MongoDB连接配置
 host = '172.20.45.129'
 port = 27002
@@ -14,222 +13,254 @@ client = MongoClient(host, port)
 db = client[dbname]
 collection = db[collection_name]
 
-# 获取当前时间和一周前的时间
-end_time = datetime.now().replace(hour=23, minute=59, second=59, microsecond=999999)
-start_time = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
-
-# 将datetime转换为Unix时间戳(整数类型,去掉小数部分)
-start_timestamp = int(start_time.timestamp())
-end_timestamp = int(end_time.timestamp())
-
-# 输出调试信息:检查开始时间和结束时间
-print("Start time:", start_time)
-print("End time:", end_time)
-print("Start timestamp:", start_timestamp)
-print("End timestamp:", end_timestamp)
-
-# ----------------- 第一个Sheet: 断流监控_mongo库 -------------------
-
-# 查询过去一周的数据(断流监控_mongo库)
-pipeline_mongo = [
-    {
-        "$match": {
-            "$or": [
-                {"bidding.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
-                {"connections.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
-                {"nzj.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
-                {"medical.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
-                {"bidding_fragment.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}}
-            ]
+# # 获取当前时间和一周前的时间
+# end_time = datetime.now().replace(hour=23, minute=59, second=59, microsecond=999999)
+# start_time = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
+def statistical_basic_data():
+
+    # # 计算上一周周一到周五的时间段
+    # start_time = datetime.now() - timedelta(days=datetime.now().weekday() + 7)
+    # end_time = datetime.now()
+    # 获取当前时间
+    end_time = datetime.now()
+    # 计算7天前的时间
+    start_time = end_time - timedelta(days=7)
+
+    # 将datetime转换为Unix时间戳(整数类型,去掉小数部分)
+    start_timestamp = int(start_time.timestamp())
+    end_timestamp = int(end_time.timestamp())
+
+    # 输出调试信息:检查开始时间和结束时间
+    print("Start time:", start_time)
+    print("End time:", end_time)
+    print("Start timestamp:", start_timestamp)
+    print("End timestamp:", end_timestamp)
+
+    # ----------------- 第一个Sheet: 断流监控_mongo库 -------------------
+
+    # 查询过去一周的数据(断流监控_mongo库)
+    pipeline_mongo = [
+        {
+            "$match": {
+                "$or": [
+                    {"bidding.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
+                    {"connections.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
+                    {"nzj.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
+                    {"medical.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
+                    {"bidding_fragment.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}}
+                ]
+            }
+        },
+        {
+            "$limit": 5  # 限制查询返回的结果为前5条数据,便于调试
         }
-    },
-    {
-        "$limit": 5  # 限制查询返回的结果为前5条数据,便于调试
+    ]
+
+    # 获取符合条件的数据
+    data_mongo = list(collection.aggregate(pipeline_mongo))
+
+    # 初始化MongoDB字段统计数据
+    bidding_count = 0
+    connections_count = 0
+    nzj_count = 0
+    medical_count = 0
+    bidding_fragment_data = {
+        "情报_法务": 0,
+        "情报_财务审计": 0,
+        "情报_招标代理": 0,
+        "情报_管理咨询": 0,
+        "情报_保险": 0,
+        "情报_工程设计咨询": 0,
+        "情报_安防": 0,
+        "情报_印务商机": 0,
+        "情报_环境采购": 0,
+        "情报_家具招投标": 0
     }
-]
-
-# 获取符合条件的数据
-data_mongo = list(collection.aggregate(pipeline_mongo))
-
-# 初始化MongoDB字段统计数据
-bidding_count = 0
-connections_count = 0
-nzj_count = 0
-medical_count = 0
-bidding_fragment_data = {
-    "情报_法务": 0,
-    "情报_财务审计": 0,
-    "情报_招标代理": 0,
-    "情报_管理咨询": 0,
-    "情报_保险": 0,
-    "情报_工程设计咨询": 0,
-    "情报_安防": 0,
-    "情报_印务商机": 0,
-    "情报_环境采购": 0,
-    "情报_家具招投标": 0
-}
-
-# 统计MongoDB数据
-for doc in data_mongo:
-    if 'bidding' in doc:
-        bidding_count += doc['bidding'].get('count', 0)
-    if 'connections' in doc:
-        connections_count += doc['connections'].get('count', 0)
-    if 'nzj' in doc:
-        nzj_count += doc['nzj'].get('count', 0)
-    if 'medical' in doc :
-        medical_count += doc['medical'].get('count', 0)
-    if 'bidding_fragment' in doc:
-        for key, value in doc['bidding_fragment'].get('count', {}).items():
-            if key in bidding_fragment_data:
-                bidding_fragment_data[key] += value
-
-# ----------------- 第二个Sheet: 断流监控—es -------------------
-
-# 查询过去一周的数据(断流监控—es)
-pipeline_es = [
-    {
-        "$match": {
-            "$or": [
-                {"es_bidding.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
-                {"es_nzj.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
-                {"es_medical.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
-                {"es_bidding_fragment.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}}
-            ]
+
+    # 统计MongoDB数据
+    for doc in data_mongo:
+        if 'bidding' in doc:
+            bidding_count += doc['bidding'].get('count', 0)
+        if 'connections' in doc:
+            connections_count += doc['connections'].get('count', 0)
+        if 'nzj' in doc:
+            nzj_count += doc['nzj'].get('count', 0)
+        if 'medical' in doc :
+            medical_count += doc['medical'].get('count', 0)
+        if 'bidding_fragment' in doc:
+            for key, value in doc['bidding_fragment'].get('count', {}).items():
+                if key in bidding_fragment_data:
+                    bidding_fragment_data[key] += value
+
+    # ----------------- 第二个Sheet: 断流监控—es -------------------
+
+    # 查询过去一周的数据(断流监控—es)
+    pipeline_es = [
+        {
+            "$match": {
+                "$or": [
+                    {"es_bidding.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
+                    {"es_nzj.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
+                    {"es_medical.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}},
+                    {"es_bidding_fragment.timestamp": {"$gte": start_timestamp, "$lt": end_timestamp}}
+                ]
+            }
         }
+    ]
+
+    # 获取符合条件的数据
+    data_es = list(collection.aggregate(pipeline_es))
+
+    # 初始化ES字段统计数据
+    es_bidding_count = 0
+    es_nzj_count = 0
+    es_medical_count = 0
+    es_bidding_fragment_data = {
+        "情报_法务": 0,
+        "情报_财务审计": 0,
+        "情报_招标代理": 0,
+        "情报_管理咨询": 0,
+        "情报_保险": 0,
+        "情报_工程设计咨询": 0,
+        "情报_安防": 0,
+        "情报_印务商机": 0,
+        "情报_环境采购": 0,
+        "情报_家具招投标": 0
     }
-]
-
-# 获取符合条件的数据
-data_es = list(collection.aggregate(pipeline_es))
-
-# 初始化ES字段统计数据
-es_bidding_count = 0
-es_nzj_count = 0
-es_medical_count = 0
-es_bidding_fragment_data = {
-    "情报_法务": 0,
-    "情报_财务审计": 0,
-    "情报_招标代理": 0,
-    "情报_管理咨询": 0,
-    "情报_保险": 0,
-    "情报_工程设计咨询": 0,
-    "情报_安防": 0,
-    "情报_印务商机": 0,
-    "情报_环境采购": 0,
-    "情报_家具招投标": 0
-}
-
-# 统计ES数据
-for doc in data_es:
-    if 'es_bidding' in doc:
-        es_bidding_count += doc['es_bidding'].get('count', 0)
-    if 'es_nzj' in doc:
-        es_nzj_count += doc['es_nzj'].get('count', 0)
-    if 'es_medical' in doc:
-        es_medical_count += doc['es_medical'].get('count', 0)
-    if 'es_bidding_fragment' in doc:
-        for key, value in doc['es_bidding_fragment'].get('count', {}).items():
-            if key in es_bidding_fragment_data:
-                es_bidding_fragment_data[key] += value
-
-# ----------------- 第三个Sheet: 数据时效监控 -------------------
-
-# 查询过去一周的数据(数据时效监控)
-pipeline_timeliness = [
-    {
-        "$match": {
-            "data_timeliness.timestamp": {
-                "$gte": start_timestamp,  # 使用整数Unix时间戳
-                "$lt": end_timestamp  # 使用整数Unix时间戳
+
+    # 统计ES数据
+    for doc in data_es:
+        if 'es_bidding' in doc:
+            es_bidding_count += doc['es_bidding'].get('count', 0)
+        if 'es_nzj' in doc:
+            es_nzj_count += doc['es_nzj'].get('count', 0)
+        if 'es_medical' in doc:
+            es_medical_count += doc['es_medical'].get('count', 0)
+        if 'es_bidding_fragment' in doc:
+            for key, value in doc['es_bidding_fragment'].get('count', {}).items():
+                if key in es_bidding_fragment_data:
+                    es_bidding_fragment_data[key] += value
+
+    # ----------------- 第三个Sheet: 数据时效监控 -------------------
+
+    # 查询过去一周的数据(数据时效监控)
+    pipeline_timeliness = [
+        {
+            "$match": {
+                "data_timeliness.timestamp": {
+                    "$gte": start_timestamp,  # 使用整数Unix时间戳
+                    "$lt": end_timestamp  # 使用整数Unix时间戳
+                }
             }
+        },
+        {
+            "$limit": 5  # 限制查询返回的结果为前5条数据,便于调试
         }
-    },
-    {
-        "$limit": 5  # 限制查询返回的结果为前5条数据,便于调试
+    ]
+
+    # 获取符合条件的数据
+    data_timeliness = list(collection.aggregate(pipeline_timeliness))
+
+    # 初始化字段统计数据
+    timeliness_data = {
+        "[0,5)分钟": 0,
+        "[5,15)分钟": 0,
+        "[15,30)分钟": 0,
+        "[30,60)分钟": 0,
+        "[1,3)小时": 0,
+        "[3,7)小时": 0,
+        "[7,15)小时": 0,
+        "[15,24)小时": 0,
+        "[1,2)天": 0,
+        "[2,3)天": 0,
+        "3天+": 0
     }
-]
-
-# 获取符合条件的数据
-data_timeliness = list(collection.aggregate(pipeline_timeliness))
-
-# 初始化字段统计数据
-timeliness_data = {
-    "[0,5)分钟": 0,
-    "[5,15)分钟": 0,
-    "[15,30)分钟": 0,
-    "[30,60)分钟": 0,
-    "[1,3)小时": 0,
-    "[3,7)小时": 0,
-    "[7,15)小时": 0,
-    "[15,24)小时": 0,
-    "[1,2)天": 0,
-    "[2,3)天": 0,
-    "3天+": 0
-}
-
-# 统计数据
-for doc in data_timeliness:
-    if 'data_timeliness' in doc:
-        count_data = doc['data_timeliness'].get('count', {})
-        timeliness_data["[0,5)分钟"] += float(count_data.get("a1", "0%").replace('%', ''))
-        timeliness_data["[5,15)分钟"] += float(count_data.get("a2", "0%").replace('%', ''))
-        timeliness_data["[15,30)分钟"] += float(count_data.get("a3", "0%").replace('%', ''))
-        timeliness_data["[30,60)分钟"] += float(count_data.get("a4", "0%").replace('%', ''))
-        timeliness_data["[1,3)小时"] += float(count_data.get("a5", "0%").replace('%', ''))
-        timeliness_data["[3,7)小时"] += float(count_data.get("a6", "0%").replace('%', ''))
-        timeliness_data["[7,15)小时"] += float(count_data.get("a7", "0%").replace('%', ''))
-        timeliness_data["[15,24)小时"] += float(count_data.get("a8", "0%").replace('%', ''))
-        timeliness_data["[1,2)天"] += float(count_data.get("a9", "0%").replace('%', ''))
-        timeliness_data["[2,3)天"] += float(count_data.get("a10", "0%").replace('%', ''))
-        timeliness_data["3天+"] += float(count_data.get("a11", "0%").replace('%', ''))
-
-# 获取当前时间的一周时间范围字符串
-date_range = f"{start_time.strftime('%Y/%m/%d')}-{end_time.strftime('%Y/%m/%d')}"
-
-# 构建Excel数据
-columns = ['日期', '标讯每周入库数据量', '人脉管理数据', '拟在建数据量(全国)','医械通'] + list(bidding_fragment_data.keys())
-data_row_mongo = [date_range, bidding_count, connections_count, nzj_count,medical_count] + list(bidding_fragment_data.values())
-
-columns_es = ['日期', '标讯每周入库数据量', '拟在建数据量(全国)','医械通'] + list(es_bidding_fragment_data.keys())
-data_row_es = [date_range, es_bidding_count,  es_nzj_count,es_medical_count] + list(es_bidding_fragment_data.values())
-
-columns_timeliness = ['日期'] + list(timeliness_data.keys())
-data_row_timeliness = [date_range] + list(timeliness_data.values())
-
-def insert_mysql():
-    # MySQL 连接
-    conn = pymysql.connect(host='172.20.45.129', port='4000', user='root', password='=PDT49#80Z!RVv52_z',database='quality')
+
+    # 统计数据
+    for doc in data_timeliness:
+        if 'data_timeliness' in doc:
+            count_data = doc['data_timeliness'].get('count', {})
+            timeliness_data["[0,5)分钟"] += float(count_data.get("a1", "0%").replace('%', ''))
+            timeliness_data["[5,15)分钟"] += float(count_data.get("a2", "0%").replace('%', ''))
+            timeliness_data["[15,30)分钟"] += float(count_data.get("a3", "0%").replace('%', ''))
+            timeliness_data["[30,60)分钟"] += float(count_data.get("a4", "0%").replace('%', ''))
+            timeliness_data["[1,3)小时"] += float(count_data.get("a5", "0%").replace('%', ''))
+            timeliness_data["[3,7)小时"] += float(count_data.get("a6", "0%").replace('%', ''))
+            timeliness_data["[7,15)小时"] += float(count_data.get("a7", "0%").replace('%', ''))
+            timeliness_data["[15,24)小时"] += float(count_data.get("a8", "0%").replace('%', ''))
+            timeliness_data["[1,2)天"] += float(count_data.get("a9", "0%").replace('%', ''))
+            timeliness_data["[2,3)天"] += float(count_data.get("a10", "0%").replace('%', ''))
+            timeliness_data["3天+"] += float(count_data.get("a11", "0%").replace('%', ''))
+
+    # 获取当前时间的一周时间范围字符串
+    date_range = f"{start_time.strftime('%Y/%m/%d')}-{end_time.strftime('%Y/%m/%d')}"
+
+    # 构建Excel数据
+    # columns = ['日期', '标讯每周入库数据量', '人脉管理数据', '拟在建数据量(全国)','医械通'] + list(bidding_fragment_data.keys())
+    data_row_mongo = [date_range, bidding_count, connections_count, nzj_count,medical_count] + list(bidding_fragment_data.values())
+
+    # columns_es = ['日期', '标讯每周入库数据量', '拟在建数据量(全国)','医械通'] + list(es_bidding_fragment_data.keys())
+    data_row_es = [date_range, es_bidding_count,  es_nzj_count,es_medical_count] + list(es_bidding_fragment_data.values())
+
+    # columns_timeliness = ['日期'] + list(timeliness_data.keys())
+    data_row_timeliness = [date_range] + list(timeliness_data.values())
+    return data_row_mongo,data_row_es,data_row_timeliness
+
+
+def insert_mysql(data_row_mongo, data_row_es, data_row_timeliness):
+    # 连接 MySQL
+    conn = pymysql.connect(
+        host='172.20.45.129',
+        port=4000,
+        user='root',
+        password='=PDT49#80Z!RVv52_z',
+        database='quality',
+        charset='utf8mb4'
+    )
     cursor = conn.cursor()
 
-    # 插入数据入库监控表
-    sql_monitoring = """
-    INSERT INTO data_monitoring (
-        date_range, type, total_weekly_entries, renmaitong_data, planning_projects_data, medical_device_data,
-        legal_intelligence, financial_audit_intelligence, bidding_agency_intelligence, management_consulting_intelligence, insurance_intelligence,
-        engineering_consulting_intelligence, security_intelligence, printing_business_intelligence, environmental_procurement_intelligence, furniture_bidding_intelligence
-    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
+    # 将 date 字符串拆分为 start_time 和 end_time
+    start_time = datetime.strptime(data_row_mongo[0].split('-')[0], "%Y/%m/%d")
+    end_time = datetime.strptime(data_row_mongo[0].split('-')[1], "%Y/%m/%d")
+    # 转换为字符串格式
+    start_time = start_time.strftime('%Y/%m/%d')
+    end_time = end_time.strftime('%Y/%m/%d')
+    # Mongo 数据
+    data_mongo = [start_time, end_time, 'mongo'] + data_row_mongo[1:]
+
+    # 创建 data_es 数据 (es类型,填充 None 到 renmaitong_data 位置)
+    data_es = [start_time, end_time, 'es'] + data_row_es[1:2] + [None] + data_row_es[2:]
+
+    data_row_timeliness = [start_time, end_time] + data_row_timeliness[1:]
+
+    print("data_mongo:", data_mongo)
+    print("data_es:", data_es)
+    print("data_row_timeliness",data_row_timeliness)
+    print("Mongo 数据长度:", len(data_mongo))  # 调试
+    print("ES 数据长度:", len(data_es))  # 调试
+
+    # 插入 weekly_data_mongo(MongoDB & ES)
+    sql_weekly = """
+    INSERT INTO weekly_data (
+        start_time, end_time, type, total_weekly_entries, renmaitong_data, planning_projects_data, medical_device_data,
+        legal_intelligence, financial_audit_intelligence, bidding_agency_intelligence, 
+        management_consulting_intelligence, insurance_intelligence, engineering_consulting_intelligence, 
+        security_intelligence, printing_business_intelligence, environmental_procurement_intelligence, 
+        furniture_bidding_intelligence
+    ) VALUES (%s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
     """
 
-    data_monitoring = [
-        ('2025/03/20-2025/03/27', 'mongo', 5000, 1200, 2300, 1500, 300, 500, 600, 450, 200, 180, 220, 190, 210, 170),
-        ('2025/03/20-2025/03/27', 'es', 4800, 1100, 2200, 1400, 280, 480, 590, 430, 190, 170, 210, 180, 200, 160)
-    ]
-    cursor.executemany(sql_monitoring, data_monitoring)
-
-    # 插入数据时效监控表
+    # 插入 response_time_distribution
     sql_timeliness = """
     INSERT INTO response_time_distribution (
-        date_range, range_0_5_min, range_5_15_min, range_15_30_min, range_30_60_min,
+        start_time, end_time, range_0_5_min, range_5_15_min, range_15_30_min, range_30_60_min,
         range_1_3_hour, range_3_7_hour, range_7_15_hour, range_15_24_hour,
         range_1_2_day, range_2_3_day, range_3_plus_day
-    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
+    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s ,%s,%s);
     """
 
-    data_timeliness = [
-        ('2025/03/20-2025/03/27', 28.31, 15.42, 10.85, 8.34, 12.50, 6.75, 5.20, 3.90, 4.10, 2.45, 2.18)
-    ]
-    cursor.executemany(sql_timeliness, data_timeliness)
+    cursor.executemany(sql_weekly, [tuple(data_mongo), tuple(data_es)])
+    cursor.executemany(sql_timeliness, [tuple(data_row_timeliness)])
 
     # 提交事务
     conn.commit()
@@ -237,27 +268,30 @@ def insert_mysql():
     conn.close()
 
 
-# 创建DataFrame并写入Excel
-excel_file = 'mongo_data_statistics_combined1.xlsx'
-
-with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:
-    # 写入第一个sheet(断流监控_mongo库)
-    df_mongo = pd.DataFrame([data_row_mongo], columns=columns)
-    df_mongo.to_excel(writer, sheet_name='入库数据量监控-mongo(每周)', index=False)
-
-    # 写入第二个sheet(断流监控—es)
-    df_es = pd.DataFrame([data_row_es], columns=columns_es)
-    df_es.to_excel(writer, sheet_name='入库量数据量监控-es(每周)', index=False)
-
-    # 将timeliness_data中的值转换为百分比字符串
-    for key in timeliness_data:
-        timeliness_data[key] = f"{timeliness_data[key]:.2f}%"
-
-    # 构建数据行
-    data_row_timeliness = [date_range] + list(timeliness_data.values())
-
-    # 写入第三个sheet(数据时效监控)
-    df_timeliness = pd.DataFrame([data_row_timeliness], columns=columns_timeliness)
-    df_timeliness.to_excel(writer, sheet_name='数据时效监控(7天平均值)', index=False)
+# # 创建DataFrame并写入Excel
+# excel_file = 'mongo_data_statistics_combined1.xlsx'
+#
+# with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:
+#     # 写入第一个sheet(断流监控_mongo库)
+#     df_mongo = pd.DataFrame([data_row_mongo], columns=columns)
+#     df_mongo.to_excel(writer, sheet_name='入库数据量监控-mongo(每周)', index=False)
+#
+#     # 写入第二个sheet(断流监控—es)
+#     df_es = pd.DataFrame([data_row_es], columns=columns_es)
+#     df_es.to_excel(writer, sheet_name='入库量数据量监控-es(每周)', index=False)
+#
+#     # 将timeliness_data中的值转换为百分比字符串
+#     for key in timeliness_data:
+#         timeliness_data[key] = f"{timeliness_data[key]:.2f}%"
+#
+#     # 构建数据行
+#     data_row_timeliness = [date_range] + list(timeliness_data.values())
+#
+#     # 写入第三个sheet(数据时效监控)
+#     df_timeliness = pd.DataFrame([data_row_timeliness], columns=columns_timeliness)
+#     df_timeliness.to_excel(writer, sheet_name='数据时效监控(7天平均值)', index=False)
+#
+# print(f"统计结果已写入Excel文件: {excel_file}")
+data_row_mongo, data_row_es, data_row_timeliness = statistical_basic_data()
+insert_mysql(data_row_mongo, data_row_es, data_row_timeliness)
 
-print(f"统计结果已写入Excel文件: {excel_file}")

+ 124 - 0
tools/基于抽取表ai和规则对比/ai抽取和规则抽取对比.py

@@ -0,0 +1,124 @@
+from pymongo import MongoClient
+from bson import ObjectId
+from pymongo import UpdateOne
+
+# 定义字段映射关系(ai_zhipu字段名 -> ext_ai_record字段名)
+FIELD_MAPPING = {
+    "s_budget":"budget",
+    "s_city": "city",
+    "s_toptype": "toptype",
+    "s_winner": "s_winner",
+    "s_projectname": "projectname",
+    "s_area": "area",
+    "s_buyer": "buyer",
+    "s_agency": "agency",
+    "s_subtype": "subtype",
+    "s_bidamount": "bidamount",
+    "s_projectcode": "projectcode"
+}
+
+
+def compare_fields(ai_zhipu, ext_ai_record):
+    """比较两个文档的字段,返回比较结果"""
+    comparison_result = {}
+
+    for ai_field, ext_field in FIELD_MAPPING.items():
+        # 获取ai_zhipu中的值
+        ai_value = ai_zhipu.get(ai_field)
+
+        # 获取ext_ai_record中的值
+        ext_value = ext_ai_record.get(ext_field)
+
+        # 处理None/null的情况
+        if ai_value is None or ext_value is None:
+            comparison_result[f"{ext_field}_flag"] = 0
+            continue
+        # 特殊处理projectname字段
+        if ai_field == "s_projectname":
+            # 去除空格并转换为字符串
+            ai_str = str(ai_value).strip()
+            ext_str = str(ext_value).strip()
+
+            # 互相包含关系判断
+            if ai_str in ext_str or ext_str in ai_str:
+                comparison_result[f"{ext_field}_flag"] = 1
+            else:
+                comparison_result[f"{ext_field}_flag"] = 0
+            continue
+        # 比较值是否相同
+        if str(ai_value).strip() == str(ext_value).strip():
+            comparison_result[f"{ext_field}_flag"] = 1
+        else:
+            comparison_result[f"{ext_field}_flag"] = 0
+
+    return comparison_result
+
+
+def main():
+    # 连接MongoDB
+    client1 = MongoClient('mongodb://172.20.45.129:27002/', unicode_decode_error_handler="ignore",
+                          directConnection=True)
+    client2 = MongoClient('mongodb://127.0.0.1:27086/', unicode_decode_error_handler="ignore", directConnection=True)
+
+    # 库1和库2
+    db1 = client1['data_quality']
+    db2 = client2['qfw']
+
+    # 获取final_results集合
+    final_results = db1['final_results']
+    collection2 = db2['result_20220218']
+
+    # 批量处理设置
+    batch_size = 1000  # 每批处理1000个文档
+    bulk_operations = []
+    processed_count = 0
+
+    # 遍历final_results中的所有文档
+    for doc in final_results.find().sort('_id', 1):
+        # _id = doc['_id']
+        doc_id = ObjectId(doc['_id'])
+
+        # 在库2中查找对应的文档
+        doc2 = collection2.find_one({"_id": doc_id})
+
+        if not doc2:
+            print(f"result_20220218找不到该数据{doc_id}")
+            continue
+
+        # 获取ai_zhipu和ext_ai_record字段
+        ai_zhipu = doc2.get('ai_zhipu', {})
+        ext_ai_record = doc2.get('ext_ai_record', {})
+
+        if not ai_zhipu or not ext_ai_record:
+            print(f"文档{doc_id}缺少ai_zhipu或ext_ai_record字段")
+            continue
+
+        # 比较字段
+        comparison_result = compare_fields(ai_zhipu, ext_ai_record)
+
+        # 添加到批量操作列表
+        bulk_operations.append(
+            UpdateOne(
+                {"_id": doc_id},
+                {"$set": comparison_result}
+            )
+        )
+
+        # 当批量操作达到指定数量时执行
+        if len(bulk_operations) >= batch_size:
+            result = final_results.bulk_write(bulk_operations)
+            processed_count += len(bulk_operations)
+            print(f"已批量更新{len(bulk_operations)}个文档,共处理{processed_count}个文档")
+            bulk_operations = []  # 重置批量操作列表
+
+    # 处理剩余的批量操作
+    if bulk_operations:
+        result = final_results.bulk_write(bulk_operations)
+        processed_count += len(bulk_operations)
+        print(f"最后批量更新{len(bulk_operations)}个文档,共处理{processed_count}个文档")
+
+    print(f"处理完成,总共处理了{processed_count}个文档")
+
+
+if __name__ == "__main__":
+    main()

+ 111 - 0
tools/基于抽取表ai和规则对比/new.py

@@ -0,0 +1,111 @@
+from pymongo import MongoClient
+from bson import ObjectId
+
+# 定义字段映射关系(ai_zhipu字段名 -> ext_ai_record字段名)
+FIELD_MAPPING = {
+    "s_city": "city",
+    "s_toptype": "toptype",
+    "s_winner": "s_winner",
+    "s_projectname": "projectname",
+    "s_area": "area",
+    "s_buyer": "buyer",
+    "s_agency": "agency",
+    "s_subtype": "subtype",
+    "s_bidamount":"bidamount",
+    "s_projectcode":"projectcode"
+}
+
+
+def compare_fields(ai_zhipu, ext_ai_record):
+    """比较两个文档的字段,返回比较结果"""
+    comparison_result = {}
+
+    for ai_field, ext_field in FIELD_MAPPING.items():
+        # 获取ai_zhipu中的值
+        ai_value = ai_zhipu.get(ai_field)
+
+        # 获取ext_ai_record中的值
+        ext_value = ext_ai_record.get(ext_field)
+
+        # 处理None/null的情况
+        if ai_value is None or ext_value is None:
+            # 如果两个都为空,标记为1
+            if ai_value is None and ext_value is None:
+                comparison_result[f"{ext_field}_flag"] = 1
+            else:
+                # 否则(仅一个为空),标记为0
+                comparison_result[f"{ext_field}_flag"] = 0
+            continue
+        # 特殊处理projectname字段
+        if ai_field == "s_projectname":
+            # 去除空格并转换为字符串
+            ai_str = str(ai_value).strip()
+            ext_str = str(ext_value).strip()
+
+            # 互相包含关系判断
+            if ai_str in ext_str or ext_str in ai_str:
+                comparison_result[f"{ext_field}_flag"] = 1
+            else:
+                comparison_result[f"{ext_field}_flag"] = 0
+            continue
+        # 比较值是否相同
+        if str(ai_value).strip() == str(ext_value).strip():
+            comparison_result[f"{ext_field}_flag"] = 1
+        else:
+            comparison_result[f"{ext_field}_flag"] = 0
+
+    return comparison_result
+
+
+def main():
+    # 连接MongoDB
+    client1 = MongoClient('mongodb://172.20.45.129:27002/',unicode_decode_error_handler="ignore", directConnection=True)
+    client2 = MongoClient('mongodb://127.0.0.1:27086/',unicode_decode_error_handler="ignore", directConnection=True)
+
+
+    # 库1和库2
+    db1 = client1['data_quality']
+    db2 = client2['qfw']
+
+    # 获取final_results集合
+    final_results = db1['bidding_20250515']
+    collection2 = db2['result_20220219']
+    # 遍历final_results中的所有文档
+    # {"_id": ObjectId("6826e5d28ab8c6af4e752de9")}
+    for doc in final_results.find().sort('_id',1):
+
+        _id = doc['_id']
+        doc_id = ObjectId(doc['id'])
+
+        # 在库2中查找对应的文档(假设库2中的集合名与_id相同)
+        # 如果库2中的集合名不同,需要修改这里
+
+        doc2 = collection2.find_one({"_id": doc_id})
+
+        if not doc2:
+            # print(f"未在库2中找到_id为{doc_id}的文档")
+            continue
+
+        # 获取ai_zhipu和ext_ai_record字段
+        ai_zhipu = doc2.get('ai_zhipu', {})
+        ext_ai_record = doc2.get('ext_ai_record', {})
+
+        if not ai_zhipu or not ext_ai_record:
+            print(f"文档{doc_id}缺少ai_zhipu或ext_ai_record字段")
+            # num+=1
+            continue
+
+        # 比较字段
+        comparison_result = compare_fields(ai_zhipu, ext_ai_record)
+
+        # 更新final_results中的标志位
+        update_result = final_results.update_one(
+            {"_id": _id},
+            {"$set": comparison_result},
+        )
+
+        print(f"更新文档{doc_id}: 匹配{update_result.modified_count}个字段")
+
+
+if __name__ == "__main__":
+    main()

+ 201 - 0
tools/基于抽取表ai和规则对比/一致性对比.py

@@ -0,0 +1,201 @@
+import os
+import time
+from pymongo import MongoClient
+import requests
+import json
+from bson import ObjectId
+
+# MongoDB 配置
+MongodbConfigSource = {
+    "ip_port": "127.0.0.1:27088",
+    "user": "viewdata",
+    "password": "viewdata",
+    "db": "qfw",
+    "col": "bidding"
+}
+
+MongodbConfigCompare = {
+    "ip_port": "127.0.0.1:27098",
+    "db": "qfw",
+    "col": "result_20220218"
+}
+
+MongodbConfigResult = {
+    "ip_port": "172.20.45.129:27002",
+    "db": "data_quality",
+    "col": "final_results"
+}
+
+# MongoDB 连接
+source_client = MongoClient(f"mongodb://{MongodbConfigSource['user']}:{MongodbConfigSource['password']}@{MongodbConfigSource['ip_port']}/")
+source_collection = source_client[MongodbConfigSource['db']][MongodbConfigSource['col']]
+
+compare_client = MongoClient(f"mongodb://{MongodbConfigCompare['ip_port']}?directConnection=true")
+compare_collection = compare_client[MongodbConfigCompare['db']][MongodbConfigCompare['col']]
+
+result_client = MongoClient(f"mongodb://{MongodbConfigResult['ip_port']}/")
+result_collection = result_client[MongodbConfigResult['db']][MongodbConfigResult['col']]
+
+API_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
+HEADERS = {
+    "Authorization": "Bearer ba336a9ea90e4cbd973ca9a06d197193.UMwtFTgevHTAkFZz",
+    "Content-Type": "application/json"
+}
+
+def call_llm_api(text):
+    system_prompt = (
+        "请根据以下标准判断文章是否规整,仅返回“规整”或“不规整”,不需要任何解释或多余输出:\n\n"
+        "1. **格式规范**:文章是否具有完整的结构,例如标准标题、分段清晰,无明显表格堆砌或连续罗列(如长列表、标号堆叠等)。如果存在表格,直接判定为“不规整”。\n"
+        "2. **语言质量**:语言是否流畅,无语法、拼写错误或明显语义错误?\n"
+        "3. **逻辑清晰度**:内容是否条理清晰、逻辑连贯,段落之间衔接自然?\n"
+        "4. **排除分包标讯**:如文章涉及分包项目(关键词包括:“分包”“多包”“多个包”“包一”“包二”“多标段”“标段划分”“本项目分为”),则直接判定为“不规整”。\n\n"
+        "要求:每次调用前清除上下文记忆,确保判断基于本次输入内容。\n"
+        "输出限定:仅回答“规整”或“不规整”。"
+    )
+    payload = {
+        "model": "glm-4-flash",
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": f"正文: {text}\n请判定该标讯是否符合要求?"}
+        ]
+    }
+    try:
+        response = requests.post(API_URL, headers=HEADERS, json=payload)
+        response.raise_for_status()
+        result = response.json()
+        return result["choices"][0]["message"]["content"].strip()
+    except Exception as e:
+        print(f"API调用失败: {e}")
+        return "不规整"
+
+def is_multi_package(ext_package, ai_com_package):
+    if not ext_package:
+        if isinstance(ai_com_package, list) and len(ai_com_package) == 1:
+            return True
+        return False
+    ext_is_multi = isinstance(ext_package, dict) and len(ext_package) > 1
+    ai_is_multi = isinstance(ai_com_package, list) and len(ai_com_package) > 1
+    return ext_is_multi == ai_is_multi
+
+def compare_and_score(ext_ai_record, ai_zhipu):
+    score = 100
+    deduct = 10
+    fields = [
+        ("area", "s_area"),
+        ("subtype", "s_subtype"),
+        ("projectname", "s_projectname"),
+        ("toptype", "s_toptype"),
+        ("city", "s_city"),
+        ("buyer", "s_buyer"),
+        ("s_winner", "s_winner"),
+        ("budget", "s_budget"),
+        ("projectcode", "s_projectcode"),
+        ("projectname", "s_projectname"),
+        ("bidamount", "s_bidamount"),
+    ]
+    for f_ext, f_ai in fields:
+        v_ext = ext_ai_record.get(f_ext)
+        v_ai = ai_zhipu.get(f_ai)
+        if not v_ext and not v_ai:
+            continue
+        if v_ext != v_ai:
+            score -= deduct
+    ext_package = ext_ai_record.get("package")
+    ai_com_package = ai_zhipu.get("s_pkg", {}).get("com_package")
+    if not is_multi_package(ext_package, ai_com_package):
+        score -= deduct
+    return score
+
+# 时间与起始文档ID(可根据需要修改)
+start_time = 1744905600
+end_time = 1744991999
+last_processed_id = '6802797a5f834436f09e3aaf'
+query_filter = {
+    "comeintime": {"$gte": start_time, "$lte": end_time},
+    "_id": {"$gt": ObjectId(last_processed_id)},
+    "extracttype": 1
+}
+
+print(f"查询条件: {query_filter}")
+print(f"查询结果数量: {source_collection.count_documents(query_filter)}")
+
+fields = {"_id": 1, "detail": 1, "area": 1, "site": 1, "subtype": 1, "title": 1, "href": 1,
+          "toptype": 1, "city": 1, "buyer": 1, "s_winner": 1, "budget": 1,
+          "projectcode": 1, "projectname": 1,"bidamount":1}
+
+# 分页查询,避免一次性查询过多数据
+batch_size = 1000  # 每次查询1000条数据
+docs_cursor = source_collection.find(query_filter, fields, no_cursor_timeout=True).batch_size(batch_size)
+
+# 处理文档时的修改
+try:
+    for doc in docs_cursor:
+        print(f"处理文档: {doc['_id']}")
+        if not doc.get("detail"):
+            print(f"文档 {doc['_id']} 无正文,跳过。")
+            continue
+
+        if call_llm_api(doc["detail"]) == "规整":
+            compare_record = compare_collection.find_one({"_id": doc["_id"]})
+            if not compare_record:
+                print(f"文档 {doc['_id']} 在对比库中未找到,跳过。")
+                continue
+
+            ext_ai_record = compare_record.get("ext_ai_record")
+            ai_zhipu = compare_record.get("ai_zhipu")
+            if not ext_ai_record or not ai_zhipu:
+                print(f"文档 {doc['_id']} 缺少 ext_ai_record 或 ai_zhipu 字段,跳过。")
+                continue
+
+            ext_package = ext_ai_record.get("package")
+            ai_com_package = ai_zhipu.get("s_pkg", {}).get("com_package")
+            multi_package_flag = 1 if (
+                (ext_package and isinstance(ext_package, dict) and len(ext_package) > 1) or
+                (ai_com_package and isinstance(ai_com_package, list) and len(ai_com_package) > 1)
+            ) else 0
+
+            score = compare_and_score(ext_ai_record, ai_zhipu)
+
+            # 获取 spidercode 和 channel 字段,从 bidding 集合中获取
+            bidding_record = source_collection.find_one({"_id": doc["_id"]})
+            spidercode = bidding_record.get("spidercode", "") if bidding_record else ""
+            channel = bidding_record.get("channel", "") if bidding_record else ""
+            print(f"spidercode: {spidercode}, channel: {channel}")
+
+            # 一致性判断的字段和标签
+            fields_to_check = [
+                "area", "multipackage", "projectname", "projectcode", "budget",
+                "s_winner", "buyer", "city", "toptype", "subtype","bidamount"
+            ]
+
+            result_data = {k: doc.get(k) for k in fields if k != "detail"}
+            result_data["panduan"] = "规整"
+            result_data["score"] = score
+            result_data["multipackage"] = multi_package_flag
+            result_data["spidercode"] = spidercode
+            result_data["channel"] = channel
+
+            # 为每个字段进行一致性判断并添加标签字段
+            for field in fields_to_check:
+                ext_value = ext_ai_record.get(field)
+                ai_value = ai_zhipu.get(f"s_{field}")
+
+                # 如果两个字段都为空,则标签为 0
+                if not ext_value and not ai_value:
+                    field_flag = 0
+                else:
+                    # 如果字段一致,则标签为 1,不一致为 0
+                    field_flag = 1 if ext_value == ai_value else 0
+
+                result_data[f"{field}_flag"] = field_flag
+
+            # 插入结果库
+            result_collection.insert_one(result_data)
+            print(f"文档 {doc['_id']} 得分 {score},已写入结果库。")
+            last_processed_id = doc["_id"]
+        else:
+            print(f"文档 {doc['_id']} 判定为不规整,跳过。")
+
+except KeyboardInterrupt:
+    print(f"程序被中断,最后处理的文档 _id 为: {last_processed_id}")
+    time.sleep(1)

+ 7 - 12
tools/数据抽样/sample_data_export_new.py

@@ -1,17 +1,11 @@
 from pymongo import MongoClient
 def sample_data(N):
-    # db = MongoClient('172.20.45.129', 27002, unicode_decode_error_handler="ignore").data_quality
-    db = MongoClient('mongodb://127.0.0.1:27087/', unicode_decode_error_handler="ignore",directConnection=True).jyqyfw  # 清洗库
+    db = MongoClient('172.20.45.129', 27002, unicode_decode_error_handler="ignore").wjh
+    # db = MongoClient('mongodb://127.0.0.1:27087/', unicode_decode_error_handler="ignore",directConnection=True).jyqyfw  # 清洗库
 
-    coll_user = db["usermail_Unicom_1_2"]
-
-    filter_condition = {
-        "$or": [
-            {"tag": 1},
-            {"tag": 2}
-        ]
-    }
+    coll_user = db["unicom_1_2"]
 
+    filter_condition = {"tag_1": 1}
     # 获取所有站点及其文档数
     site_list = {}
     site_count = coll_user.aggregate([
@@ -19,6 +13,7 @@ def sample_data(N):
         {"$group": {"_id": "$site", "count": {"$sum": 1}}},
         {"$sort": {"count": -1}}
     ])
+
     for item in site_count:
         site_list[item["_id"]] = item["count"]
 
@@ -49,10 +44,10 @@ def sample_data(N):
 
         update_result = coll_user.update_many(
             {"_id": {"$in": sampled_ids}},
-            {"$set": {"mark": 1}}
+            {"$set": {"mark_1": 1}}
         )
         marked_count += update_result.modified_count
         remaining -= update_result.modified_count
 
     print(f"Total marked documents: {marked_count}")
-sample_data(2000)
+sample_data(500)

+ 79 - 0
tools/数据抽样/sample_data_export_online.py

@@ -0,0 +1,79 @@
+from pymongo import MongoClient
+from urllib.parse import quote_plus
+
+def sample_data(N):
+    username = "liumiaomiao"
+    password = "Lmm@80923"
+    host = "127.0.0.1"  # 例如: localhost 或 192.168.1.100
+    port = "27088"  # 默认MongoDB端口
+    escaped_username = quote_plus(username)
+    escaped_password = quote_plus(password)
+    # 构建连接URI
+    mongo_uri = f"mongodb://{escaped_username}:{escaped_password}@{host}:{port}/"
+
+    # 连接MongoDB
+    db = MongoClient(mongo_uri, unicode_decode_error_handler="ignore", directConnection=True).qfw  # bidding库
+
+    # db = MongoClient('172.20.45.129', 27002, unicode_decode_error_handler="ignore").data_quality
+    # db = MongoClient('mongodb://127.0.0.1:27088/', unicode_decode_error_handler="ignore",directConnection=True).qfw  # 清洗库
+    coll_user = db["bidding_master_20250530"]
+
+    # filter_condition = {
+    #     "$or": [
+    #         {"tag": 1},
+    #         {"tag": 2}
+    #     ]
+    # }
+    filter_condition = {
+        "$and": [
+            {"old_id": {"$exists": True}},
+            {"$expr": {"$ne": ["$_id", "$old_id"]}},
+            {"$expr": {"$ne": ["$prefer_score", "$old_prefer_score"]}}
+        ]
+    }
+    # 获取所有站点及其文档数
+    site_list = {}
+    site_count = coll_user.aggregate([
+        {"$match": filter_condition},
+        {"$group": {"_id": "$site", "count": {"$sum": 1}}},
+        {"$sort": {"count": -1}}
+    ])
+    for item in site_count:
+        site_list[item["_id"]] = item["count"]
+
+    total_docs = sum(site_list.values())
+    remaining = N
+    marked_count = 0
+
+    for site, count in site_list.items():
+        if remaining <= 0:
+            break
+
+        # 计算该站点应分配的样本数
+        num = max(1, round(N * count / total_docs))
+        num = min(num, remaining)
+
+        print(f"Processing site: {site} - Allocating {num} samples")
+
+        # 使用随机抽样
+        pipeline = [
+            {"$match": {"site": site, **filter_condition}},
+            {"$match": {"site": site}},
+            {"$sample": {"size": num}},
+            {"$project": {"_id": 1}}
+        ]
+
+        sampled_ids = [doc["_id"] for doc in coll_user.aggregate(pipeline)]
+        if not sampled_ids:
+            continue
+
+        update_result = coll_user.update_many(
+            {"_id": {"$in": sampled_ids}},
+            {"$set": {"mark": 1}}
+        )
+        marked_count += update_result.modified_count
+        remaining -= update_result.modified_count
+
+    print(f"Total marked documents: {marked_count}")
+
+sample_data(100)

+ 108 - 0
tools/数据抽样/抽样方法最新.py

@@ -0,0 +1,108 @@
+from pymongo import MongoClient
+
+
+def sample_data(N):
+    """
+    按比例从MongoDB中随机抽样并标记指定数量(N)的文档
+    优化点:
+    1. 精确控制各站点分配数量,确保总数严格等于N
+    2. 添加错误处理和日志
+    3. 使用更均匀的随机抽样方法
+    """
+    # 连接MongoDB
+    client = MongoClient(
+        'mongodb://127.0.0.1:27017/',
+        unicode_decode_error_handler="ignore",
+        directConnection=True
+    )
+    db = client.jyqyfw  # 数据库名
+    coll_user = db["usermail_Unicom_1_2"]  # 集合名
+
+    # 查询条件:只处理tag=1或2的文档
+    filter_condition = {"$or": [{"tag": 1}, {"tag": 2}]}
+
+    print(f"开始抽样,目标样本量: {N}")
+
+    try:
+        # 1. 获取各站点的文档分布(按文档数降序排列)
+        site_counts = list(coll_user.aggregate([
+            {"$match": filter_condition},
+            {"$group": {"_id": "$site", "count": {"$sum": 1}}},
+            {"$sort": {"count": -1}}  # 按文档数从大到小排序
+        ]))
+
+        if not site_counts:
+            print("错误:没有符合条件的文档")
+            return
+
+        total_docs = sum(item["count"] for item in site_counts)
+        print(f"符合条件的总文档数: {total_docs}")
+
+        # 2. 预分配样本数(核心算法)
+        allocations = []
+        total_allocated = 0
+
+        # 第一轮分配:按比例计算(向下取整)
+        for item in site_counts:
+            site, count = item["_id"], item["count"]
+            num = max(1, int(N * count / total_docs))  # 至少分配1个
+            allocations.append({"site": site, "allocated": num})
+            total_allocated += num
+
+        # 处理舍入误差:将差值分配给文档数最多的站点
+        remaining = N - total_allocated
+        if remaining != 0:
+            allocations[0]["allocated"] += remaining  # 文档最多的站点多分配
+            print(f"调整分配: 给站点[{allocations[0]['site']}]追加{remaining}个样本")
+
+        # 3. 执行抽样和标记
+        marked_count = 0
+        for alloc in allocations:
+            if marked_count >= N:
+                break
+
+            site = alloc["site"]
+            num = alloc["allocated"]
+
+            print(f"处理站点: {site} | 计划分配: {num}")
+
+            # 优化后的随机抽样方法(比$sample更均匀)
+            pipeline = [
+                {"$match": {"site": site, **filter_condition}},
+                {"$addFields": {"rand_sort": {"$rand": {}}}},  # 添加随机字段
+                {"$sort": {"rand_sort": 1}},  # 随机排序
+                {"$limit": num},  # 取前num条
+                {"$project": {"_id": 1}}  # 只返回_id
+            ]
+
+            try:
+                # 获取抽样结果的_id列表
+                sampled_ids = [doc["_id"] for doc in coll_user.aggregate(pipeline)]
+                if not sampled_ids:
+                    print(f"警告:站点[{site}]没有抽到文档")
+                    continue
+
+                # 批量标记文档
+                update_result = coll_user.update_many(
+                    {"_id": {"$in": sampled_ids}},
+                    {"$set": {"mark": 1}}  # 设置标记字段
+                )
+                marked_count += update_result.modified_count
+                print(f"成功标记: {update_result.modified_count}条 (实际/计划: {update_result.modified_count}/{num})")
+
+            except Exception as e:
+                print(f"处理站点[{site}]时出错: {str(e)}")
+                continue
+
+        print(f"抽样完成 | 实际标记: {marked_count}/{N}")
+        if marked_count != N:
+            print("警告:实际标记数量与目标不一致")
+
+    except Exception as e:
+        print(f"抽样过程发生严重错误: {str(e)}")
+    finally:
+        client.close()
+
+
+# 示例:抽取2000条文档
+sample_data(2000)

+ 54 - 0
tools/数据质量监控平台/kb-数据问题统计/execl_kb.py

@@ -0,0 +1,54 @@
+import pandas as pd
+import pymysql
+import re
+
+# 数据库连接配置
+db_connection_remote = pymysql.connect(
+    host="172.20.45.129",
+    user="root",
+    password="=PDT49#80Z!RVv52_z",
+    database="quality",
+    port=4000
+)
+
+# 读取Excel文件
+file_path = "KB问题统计汇总.xlsx"  # 请替换为实际的文件路径
+df = pd.read_excel(file_path, sheet_name="kB问题数据源")  # 读取指定的sheet页
+
+# 选取需要的列
+df = df[['年份', '月份', '描述', '站点', '链接', '爬虫', '备注','问题分类一级','问题分类二级']]  # 根据实际列名调整
+
+# 处理月份列,提取数字部分
+df['月份'] = df['月份'].apply(lambda x: int(re.sub(r'\D', '', str(x))) if pd.notnull(x) else None)
+
+# 将空值替换为 None (相当于在数据库中插入 NULL)
+df = df.where(pd.notnull(df), None)
+
+# 将数据插入到数据库中
+with db_connection_remote.cursor() as cursor:
+    for _, row in df.iterrows():
+        # 插入数据的SQL语句
+        sql = """
+        INSERT INTO problem_analysis (year, month, problem_description, site, link, spider, notes,category_level_1,category_level_2)
+        VALUES (%s, %s, %s, %s, %s, %s, %s,%s,%s)
+        """
+        values = (
+            row['年份'],
+            row['月份'],
+            row['描述'],
+            row['站点'],
+            row['链接'],
+            row['爬虫'],
+            row['备注'],
+            row['问题分类一级'],
+            row['问题分类二级']
+        )
+
+        # 执行插入操作
+        cursor.execute(sql, values)
+
+    # 提交事务
+    db_connection_remote.commit()
+
+# 关闭数据库连接
+db_connection_remote.close()

+ 160 - 0
tools/数据质量监控平台/kb-数据问题统计/task_kb.py

@@ -0,0 +1,160 @@
+import pymysql
+from pymysql import MySQLError
+from datetime import datetime, timedelta
+
+
+def get_last_week_monday_friday():
+    """获取上周一和上周五的 Unix 时间戳"""
+    today = datetime.utcnow()
+    last_monday = today - timedelta(days=today.weekday() + 7)  # 上周一
+    last_friday = last_monday + timedelta(days=4)  # 上周五
+
+    last_monday_timestamp = int(last_monday.replace(hour=0, minute=0, second=0).timestamp())
+    last_friday_timestamp = int(last_friday.replace(hour=23, minute=59, second=59).timestamp())
+
+    return last_monday_timestamp, last_friday_timestamp
+
+
+def close_connection(cursor, connection):
+    """确保数据库连接被关闭"""
+    if cursor:
+        cursor.close()
+    if connection:
+        connection.close()
+
+
+def test_connection():
+    cursor_local = None
+    cursor_remote = None
+    db_connection_local = None
+    db_connection_remote = None
+
+    try:
+        print("开始连接本地数据库...")
+        db_connection_local = pymysql.connect(
+            host="127.0.0.1",
+            user="kanboard",
+            password="K99b3e9qa9d",
+            database="kanboard",
+            port=19988
+        )
+        cursor_local = db_connection_local.cursor()
+
+        print("开始连接远程数据库...")
+        db_connection_remote = pymysql.connect(
+            host="172.20.45.129",
+            user="root",
+            password="=PDT49#80Z!RVv52_z",
+            database="quality",
+            port=4000
+        )
+        cursor_remote = db_connection_remote.cursor()
+
+        # 计算上周一到上周五的时间范围
+        start_timestamp, end_timestamp = get_last_week_monday_friday()
+        print(
+            f"查询时间范围: {datetime.utcfromtimestamp(start_timestamp)} - {datetime.utcfromtimestamp(end_timestamp)}")
+
+        print("开始执行查询...")
+        query = """
+        SELECT t.id AS task_id, 
+               t.date_started, 
+               t.date_moved, 
+               t.external_uri, 
+               t.title,          
+               t.owner_id, 
+               t.creator_id, 
+               t.column_id
+        FROM tasks t
+        JOIN task_has_tags tht ON t.id = tht.task_id
+        JOIN tags tg ON tht.tag_id = tg.id
+        WHERE t.project_id = 261
+          AND t.date_started >= %s
+          AND t.date_started <= %s
+          AND tg.project_id = 261
+        """
+        cursor_local.execute(query, (start_timestamp, end_timestamp))
+        results = cursor_local.fetchall()
+        if not results:
+            print("没有找到符合条件的数据")
+            return
+
+        print(f"查询到 {len(results)} 条数据")
+
+        # 获取 owner_name 和 creator_name
+        cursor_local.execute("SELECT id, name FROM users")
+        users = cursor_local.fetchall()
+        user_dict = {user[0]: user[1] for user in users}
+
+        # 获取远程数据库中的 kb_error_dict 表的 category 列
+        cursor_remote.execute("SELECT category FROM kb_error_dict")
+        categories = cursor_remote.fetchall()
+        category_list = [category[0] for category in categories]  # 获取所有 category 值
+
+        # 获取当前日期
+        current_date = datetime.utcnow().strftime('%Y-%m-%d')  # 当前日期格式:YYYY-MM-DD
+
+        print("开始插入数据...")
+        for task in results:
+            task_id, date_started, date_moved, external_uri, title, owner_id, creator_id, column_id = task
+
+            # 获取任务对应的 column_id 和 task_status
+            if column_id == 1057:
+                task_status = '已创建'
+            elif column_id in [1058, 1060, 1065]:
+                task_status = '处理中'
+            elif column_id == 1064:
+                task_status = '已完成'
+            else:
+                task_status = '未定义'  # 如果 column_id 不在预设范围内
+
+            # 获取任务对应的所有标签名称
+            cursor_local.execute(""" 
+                SELECT tg.name 
+                FROM task_has_tags tht 
+                JOIN tags tg ON tht.tag_id = tg.id 
+                WHERE tht.task_id = %s AND tg.project_id = 261 
+            """, (task_id,))
+            tags = cursor_local.fetchall()
+            tags_list = [tag[0] for tag in tags]  # 获取所有标签名称
+
+            # 比较标签名称与 kb_error_dict 中的 category
+            matching_tags = [tag for tag in tags_list if any(category in tag for category in category_list)]
+
+            # 只存储匹配的标签
+            tags_str = ', '.join(matching_tags) if matching_tags else None
+
+            creation_date = datetime.utcfromtimestamp(date_started).strftime('%Y-%m-%d')
+            year = datetime.utcfromtimestamp(date_started).year
+            month = datetime.utcfromtimestamp(date_started).month
+            modification_date = datetime.utcfromtimestamp(date_moved).strftime('%Y-%m-%d') if date_moved else None
+
+            assignee_name = user_dict.get(owner_id, None)
+            creator_name = user_dict.get(creator_id, None)
+            work_order_link = f"https://jykb.jydev.jianyu360.com/task/{task_id}"
+
+            # 插入数据时,增加 created_time 字段
+            if tags_str:  # 只有当标签不为空时才插入
+                cursor_remote.execute(""" 
+                    INSERT IGNORE INTO kb_tasks 
+                    (task_id, creation_date, year, month, modification_date, work_order_link,  
+                     issue_description, assignee_name, creator_name, tags, task_status, created_time) 
+                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) 
+                """, (
+                    task_id, creation_date, year, month, modification_date, work_order_link, title, assignee_name,
+                    creator_name, tags_str, task_status, current_date
+                ))
+
+        db_connection_remote.commit()
+        print("数据处理完成!")
+
+    except MySQLError as e:
+        print(f"发生错误: {e}")
+
+    finally:
+        close_connection(cursor_local, db_connection_local)
+        close_connection(cursor_remote, db_connection_remote)
+
+
+if __name__ == "__main__":
+    test_connection()

+ 212 - 0
tools/数据质量监控平台/基于标准数据的字段分析结果.py

@@ -0,0 +1,212 @@
+import pymysql
+import pymongo
+import pandas as pd
+
+# MongoDB 配置
+MONGO_CONFIG = {
+    "host": "172.20.45.129",
+    "port": 27002,
+    "db": "data_quality",
+    "col": "standard_sample_data_new",
+}
+
+# MySQL 配置
+MYSQL_CONFIG = {
+    "host": "172.20.45.129",
+    "user": "root",
+    "password": "=PDT49#80Z!RVv52_z",
+    "database": "quality",
+    "port": 4000
+}
+
+
+def fetch_data():
+    """从 MongoDB 读取源数据"""
+    client = pymongo.MongoClient(f"mongodb://{MONGO_CONFIG['host']}:{MONGO_CONFIG['port']}")
+    db = client[MONGO_CONFIG["db"]]
+    collection = db[MONGO_CONFIG["col"]]
+    data = list(collection.find({}, {"_id": 0}))
+    df = pd.DataFrame(data)
+    client.close()
+    return df
+
+
+def is_contained(str1, str2):
+    """判断字符串是否互为包含"""
+    if pd.isna(str1) or pd.isna(str2):
+        return False
+    return str1 in str2 or str2 in str1
+
+
+def calculate_metrics(df, category):
+    """核心计算逻辑(包含完整正确率计算)"""
+    # 过滤数据类别
+    if category == "中标类":
+        bid_types = ["成交", "单一", "废标", "合同", "结果变更", "流标", "验收", "中标", "其它"]
+        df = df[df["subtype"].isin(bid_types)]
+        fields = ["toptype", "subtype", "area", "city", "buyer", "projectname",
+                  "projectcode", "budget", "s_winner", "bidamount"]
+    else:
+        bid_types = ["成交", "单一", "废标", "合同", "结果变更", "流标", "验收", "中标", "其它", "拟建"]
+        df = df[~df["subtype"].isin(bid_types)]
+        fields = ["toptype", "subtype", "area", "city", "buyer", "projectname",
+                  "projectcode", "budget"]
+
+    df = df.replace({None: pd.NA, '': pd.NA}).fillna(pd.NA)
+    results = []
+    total_count = len(df)
+
+    # 计算整行正确率
+    correct_rows = 0
+    for _, row in df.iterrows():
+        row_correct = True
+        for field in fields:
+            original = row.get(field, pd.NA)
+            ai = row.get(f"{field}_ai", pd.NA)
+
+            if field == "projectname":
+                if not is_contained(original, ai):
+                    row_correct = False
+                    break
+            else:
+                # 安全比较逻辑
+                if (pd.isna(original) and pd.isna(ai)):
+                    continue
+                elif pd.isna(original) ^ pd.isna(ai):
+                    row_correct = False
+                    break
+                elif original != ai:
+                    row_correct = False
+                    break
+        if row_correct:
+            correct_rows += 1
+
+    # 计算各字段指标
+    for field in fields:
+        # 基础统计
+        null_count = df[field].isna().sum()
+        valid_count = total_count - null_count
+
+        # 错误统计
+        if field == "projectname":
+            correct_count = df.apply(lambda r: is_contained(r[field], r[f"{field}_ai"]), axis=1).sum()
+            error_count = total_count - correct_count
+            error_no_null = error_count
+        else:
+            # 使用安全比较方法
+            error_mask = (
+                    (df[field].isna() & df[f"{field}_ai"].notna()) |
+                    (df[field].notna() & df[f"{field}_ai"].isna()) |
+                    (df[field].fillna('__NA__') != df[f"{field}_ai"].fillna('__NA__'))
+            )
+            error_count = error_mask.sum()
+
+            # 计算有值错误数
+            error_no_null = (
+                    df[field].notna() &
+                    (df[f"{field}_ai"].isna() | (df[field] != df[f"{field}_ai"]))
+            ).sum()
+
+        # 计算比率
+        recognition_rate = valid_count / total_count if total_count else 0
+        correct_rate = (valid_count - error_no_null) / valid_count if valid_count else 0
+
+        results.append({
+            "field_name": field,
+            "sample_total": total_count,
+            "original_null": null_count,
+            "original_exist": valid_count,
+            "extract_error_total": error_count,
+            "extract_correct_total": total_count - error_count,
+            "extract_error_exist": error_no_null,
+            "extract_correct_exist": valid_count - error_no_null,
+            "recognition_rate": f"{recognition_rate:.2%}",
+            "correct_recognition_rate": f"{(total_count - error_count) / total_count:.2%}" if total_count else "0.00%",
+            "accuracy_rate": f"{correct_rate:.2%}",
+            "data_type": category
+        })
+
+    # 构建整体统计
+    overall_data = {
+        "total_data_count": total_count,
+        "correct_rows_count": correct_rows,
+        "row_accuracy": f"{correct_rows / total_count:.2%}" if total_count else "0.00%",
+        "data_type": category
+    }
+
+    return pd.DataFrame(results), pd.DataFrame([overall_data])
+
+def save_to_database(df_fields, df_overall):
+    """保存到优化后的数据库结构"""
+    conn = pymysql.connect(**MYSQL_CONFIG)
+    cursor = conn.cursor()
+
+    try:
+        # 插入字段统计
+        for _, row in df_fields.iterrows():
+            sql = """
+            INSERT INTO tendering_data_analysis (
+                field_name, sample_total, original_null, original_exist,
+                extract_error_total, extract_correct_total, extract_error_exist,
+                extract_correct_exist, recognition_rate, correct_recognition_rate,
+                accuracy_rate, data_type
+            ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
+            """
+            cursor.execute(sql, (
+                row['field_name'], row['sample_total'], row['original_null'],
+                row['original_exist'], row['extract_error_total'],
+                row['extract_correct_total'], row['extract_error_exist'],
+                row['extract_correct_exist'], row['recognition_rate'],
+                row['correct_recognition_rate'], row['accuracy_rate'],
+                row['data_type']
+            ))
+
+        # 插入整体统计
+        for _, row in df_overall.iterrows():
+            sql = """
+            INSERT INTO data_quality_analysis 
+            (total_data_count, correct_rows_count, row_accuracy, data_type)
+            VALUES (%s,%s,%s,%s)
+            """
+            cursor.execute(sql, (
+                row['total_data_count'], row['correct_rows_count'],
+                row['row_accuracy'], row['data_type']
+            ))
+
+        conn.commit()
+        print(f"成功插入 {len(df_fields)} 条字段记录和 {len(df_overall)} 条整体记录")
+    except Exception as e:
+        conn.rollback()
+        print(f"数据库操作失败: {str(e)}")
+        raise  # 抛出异常以便调试
+    finally:
+        cursor.close()
+        conn.close()
+
+
+def main():
+    # 数据准备
+    df = fetch_data()
+
+    # 中标类分析
+    bid_fields, bid_overall = calculate_metrics(df, "中标类")
+
+    # 招标类分析
+    tender_fields, tender_overall = calculate_metrics(df, "招标类")
+
+    # 合并结果
+    all_fields = pd.concat([bid_fields, tender_fields])
+    all_overall = pd.concat([bid_overall, tender_overall])
+
+    # 存储数据
+    save_to_database(all_fields, all_overall)
+
+    # 打印示例数据
+    print("\n字段统计示例:")
+    print(bid_fields.head(3))
+    print("\n整体统计示例:")
+    print(bid_overall)
+
+
+if __name__ == "__main__":
+    main()

+ 259 - 0
tools/数据质量监控平台/标讯基础信息分析结果入库.py

@@ -0,0 +1,259 @@
+from datetime import date, timedelta,datetime
+import pandas as pd
+import pymysql
+from sqlalchemy import create_engine
+import json
+
+# MySQL 连接配置
+mysql_config = {
+    "host": "172.20.45.129",
+    "port": 4000,
+    "user": "root",
+    "password": "=PDT49#80Z!RVv52_z",
+    "database": "quality"
+}
+
+# SQLAlchemy 连接字符串
+engine = create_engine(
+    f'mysql+pymysql://{mysql_config["user"]}:{mysql_config["password"]}@{mysql_config["host"]}:{mysql_config["port"]}/{mysql_config["database"]}')
+
+def get_this_week_thursday():
+    today = date.today()
+    delta = (3 - today.weekday()) % 7  # 计算到本周四的天数差(兼容跨周)
+    return today + timedelta(days=delta)
+
+# 动态获取本周四的日期
+# query_create_time = get_this_week_thursday().strftime("%Y-%m-%d")  # ✅ 动态获取
+# query_create_time = '2025-04-02'
+now_date = (datetime.now()).strftime("%Y-%m-%d")
+yesterday_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
+batch_id = yesterday_date
+
+
+# 从 MySQL 获取数据
+def fetch_data_from_mysql(query):
+    df = pd.read_sql(query, engine)
+    return df
+
+
+# SQL 查询条件
+query = f"SELECT * FROM bid_analysis WHERE create_time = '{batch_id}'"
+data = fetch_data_from_mysql(query)
+print(data.columns, "第一轮数据检索成功")
+
+# 字段映射
+column_name_mapping = {
+    "area_qa": "省份",
+    "bidamount_qa": "中标金额",
+    "budget_qa": "预算",
+    "buyer_qa": "采购单位",
+    "com_package_qa": "分包",
+    "projectcode_qa": "项目编号",
+    "projectname_qa": "项目名称",
+    "title_qa": "标题",
+    "winner_qa": "中标单位",
+    "score": "标讯总分数",
+    "bidopentime_qa": "开标时间",
+    "publishtime_qa": "发布时间",
+    "toptype_qa": "信息一级分类",
+    "subtype_qa": "信息二级分类",
+    "city_qa": "城市"
+}
+
+
+# 只从 error_type 字段提取数据
+def extract_errors(df):
+    # 定义只从 error_type 中映射的字段
+    qa_fields = list(column_name_mapping.values())
+
+    if "error_type" in df.columns:
+        print("✅ 'error_type' 字段存在,开始提取数据")
+
+        # 处理 error_type 字段,如果它是字符串类型,将其转换为字典
+        df['error_type'] = df['error_type'].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
+
+        # 打印 error_type 字段的前几行,确认其数据类型
+        print("error_type 字段数据示例:")
+        print(df['error_type'].head())
+
+        for qa_field in qa_fields:
+            english_field = next((k for k, v in column_name_mapping.items() if v == qa_field), None)
+            if english_field:
+                print(f"正在提取字段: {qa_field}, 对应的英文字段: {english_field}")
+                # 打印出提取过程中的一些信息
+                df[qa_field] = df['error_type'].apply(
+                    lambda x: x.get(english_field) if isinstance(x, dict) and x.get(english_field) != {} else None
+                )
+
+        # 提取标讯总分数
+        df['标讯总分数'] = df['error_type'].apply(lambda x: x.get("score") if isinstance(x, dict) else None)
+        df['标讯总分数'] = pd.to_numeric(df['标讯总分数'], errors='coerce')  # 转换为数值类型
+
+    print(f"✅ 提取后的数据:{df.head()}")
+    return df
+
+
+# 提取数据
+data = extract_errors(data)
+
+# 检查提取后的数据
+print("提取后的数据:")
+print(data.head())  # 打印前几行数据进行检查
+
+# 检查 error_type 的类型分布
+print("error_type 字段的类型分布:")
+print(data['error_type'].apply(type).value_counts())
+
+# 过滤招标数据和中标数据
+bid_types = {"成交", "单一", "废标", "合同", "结果变更", "流标", "验收", "中标", "其它"}
+exclude_types = {"拟建", "采购意向", "成交", "单一", "废标", "合同", "结果变更", "流标", "验收", "中标"}
+data_bid = data[~data["subtype"].isin(exclude_types)].copy()  # 招标数据
+data_win = data[data["subtype"].isin(bid_types)].copy()  # 中标数据
+
+# 打印筛选后的数据行数
+print("筛选后的招标数据行数:", len(data_bid))
+print("筛选后的中标数据行数:", len(data_win))
+
+
+# 数据分析函数
+def analyze_column(dataframe, column_name, data_type, batch_id):
+    if column_name not in dataframe.columns:
+        return None
+
+    total = len(dataframe[column_name])
+    correct = dataframe[column_name].isna().sum()
+    error = total - correct
+
+    # accuracy and error rate with percentage symbol
+    accuracy = f'{(correct / total * 100):.2f}%' if total > 0 else "0%"
+    error_rate = f'{(error / total * 100):.2f}%' if total > 0 else "0%"
+
+    # calculate error rate for individual error reasons, rounded to two decimal places
+    error_code_counts = {}
+    for item in dataframe[column_name].dropna():
+        if isinstance(item, dict):
+            for error_code, error_desc in item.items():
+                error_code_counts.setdefault(error_code, {'description': error_desc, 'count': 0})['count'] += 1
+
+    result = []
+    for error_code, data in error_code_counts.items():
+        # calculate the error rate for the individual reason
+        single_error_rate = (data['count'] / error) * 100 if error > 0 else 0
+        single_error_rate_percent = f'{single_error_rate:.2f}%'  # single reason error rate with percentage
+
+        result.append({
+            'record_type': '字段分析',  # 'record_type' -> record_type
+            'field_name': column_name,  # 'field_name' -> field_name
+            'data_type': data_type,  # 'data_type' -> data_type
+            'batch_id': batch_id,  # 'batch_id' -> batch_id
+            'total_count': total,  # 'total_count' -> total_count
+            'correct_count': correct,  # 'correct_count' -> correct_count
+            'error_count': error,  # 'error_count' -> error_count
+            'accuracy': accuracy,  # 'accuracy' -> accuracy
+            'error_rate': error_rate,  # 'error_rate' -> error_rate
+            'error_code': error_code,  # 'error_code' -> error_code
+            'error_description': data['description'],  # 'error_description' -> error_description
+            'field_count': data['count'],  # 'field_count' -> field_count
+            'single_error_rate': single_error_rate_percent  # 'single_error_rate' -> single_error_rate
+        })
+
+    return result if result else None
+
+
+
+
+# 分析招标和中标数据
+def analyze_data(df, selected_fields, data_type,batch_id):
+    results = []
+    for field in selected_fields:
+        analysis_result = analyze_column(df, field, data_type, batch_id)
+        if analysis_result:
+            results.extend(analysis_result)
+    return pd.DataFrame(results)
+
+
+# 定义需要分析的字段
+win_selected_fields = ["信息一级分类", "信息二级分类", "省份", "城市", "采购单位", "项目名称", "项目编号", "预算",
+                       "中标单位", "中标金额", "分包"]
+bid_selected_fields = ["信息一级分类", "信息二级分类", "省份", "城市", "采购单位", "项目名称", "项目编号", "预算",
+                       "分包"]
+
+# 分析结果
+win_analysis_df = analyze_data(data_win, win_selected_fields, "结果",batch_id)
+bid_analysis_df = analyze_data(data_bid, bid_selected_fields, "招标",batch_id)
+
+# 查看分析结果
+print("结果数据分析结果:", win_analysis_df.head())
+print("招标数据分析结果:", bid_analysis_df.head())
+
+
+# 分数分析
+def analyze_scores(df):
+    if '标讯总分数' not in df.columns:
+        return []
+
+    total_count = len(df['标讯总分数'])
+    score_counts = df['标讯总分数'].value_counts().sort_index()
+    score_percentage = (score_counts / total_count * 100).round(2).astype(str) + '%'
+
+    score_analysis = []
+    for score, count in score_counts.items():
+        score_analysis.append({
+            "record_type": "分数分析",
+            "score": str(score),
+            "count": int(count),
+            "total": total_count,
+            "percentage": score_percentage[score],
+            "batch_id": batch_id
+        })
+    return score_analysis
+
+
+# 分数分析结果
+score_analysis_data = analyze_scores(pd.concat([data_bid, data_win]))
+print("分数分析结果:", score_analysis_data)
+
+
+# 存储到 MySQL
+def store_analysis_to_mysql(df, table_name):
+    if df.empty:
+        print(f"⚠️ {table_name} 数据为空,跳过存储")
+        return
+
+    connection = pymysql.connect(**mysql_config)
+    cursor = connection.cursor()
+
+    if table_name == "analysis_results":
+        sql = """
+        INSERT INTO analysis_results 
+        (record_type, field_name, data_type, batch_id, total_count, correct_count, error_count, accuracy, error_rate, error_code, error_description, field_count, single_error_rate)
+        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+        """
+
+    elif table_name == "score_analysis":
+        sql = """
+        INSERT INTO analysis_score 
+        (record_type, score, count, total, percentage, batch_id)
+        VALUES (%s, %s, %s, %s, %s, %s)
+        """
+
+    for _, row in df.iterrows():
+        values = tuple(row.fillna("").values)
+
+        if len(values) != sql.count('%s'):
+            print(f"❌ 参数数量不匹配: 预期 {sql.count('%s')},实际 {len(values)},跳过此行")
+            continue
+
+        cursor.execute(sql, values)
+
+    connection.commit()
+    cursor.close()
+    connection.close()
+    print(f"✅ {table_name} 数据已存储到 MySQL")
+
+
+store_analysis_to_mysql(win_analysis_df, "analysis_results")
+store_analysis_to_mysql(bid_analysis_df, "analysis_results")
+store_analysis_to_mysql(pd.DataFrame(score_analysis_data), "score_analysis")
+
+print("✅ 分析完成,数据已存入 MySQL!")

+ 2 - 2
tools/标准样本数据入库/File_import_mysql.py

@@ -14,7 +14,7 @@ data = data.replace({pd.NA: None, pd.NaT: None, float('nan'): None})
 # 连接数据库
 try:
     connection = pymysql.connect(
-        host='192.168.3.217',
+        host='172.20.45.129',
         user='root',
         password='=PDT49#80Z!RVv52_z',
         database='quality',
@@ -24,7 +24,7 @@ try:
     cursor = connection.cursor()
 
     sql = """
-    INSERT INTO bid_llizhikun 
+    INSERT INTO sample_bid_analysis_tmp 
     (_id, site, toptype, subtype, area, city, buyer, projectname, projectcode, budget, 
     s_winner, bidamount, multipackage, label, href, jyhref) 
     VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)

+ 1 - 1
tools/标准样本数据入库/execl_into_mongo.py

@@ -13,7 +13,7 @@ collection = db['standard_sample_data_all']  # 替换为你的集合名称
 file_path = '/Users/miaobao/Downloads/标准样本数据汇总.xlsx'
 sheet_name = '标准样本数据汇总'
 
-# 使用 pandas 读取 Excel 文件
+# 使用 pandas 读取 Excel 记录
 df = pd.read_excel(file_path, sheet_name=sheet_name, na_values=['', 'NA', 'N/A'], dtype={'budget': float, 'bidamount': float})
 
 

+ 0 - 0
tools/标准样本数据入库/insert_errors.log


二进制
tools/标准样本数据入库/标准样本数据汇总.xlsx


+ 2 - 2
tools/标准样本数据入库/样本分析数据入mongo库.py

@@ -5,8 +5,8 @@ from pymongo import MongoClient
 collection_bid = MongoClient(f'mongodb://{"viewdata"}:{"viewdata"}@{"127.0.0.1:27088"}/',
                          unicode_decode_error_handler="ignore", directConnection=True)["qfw"]["bidding"]
 #连接测试环境mongo
-db = MongoClient('192.168.3.149', 27180, unicode_decode_error_handler="ignore").data_quality
-coll_user = db["standard_sample_data_all"]
+db = MongoClient('172.20.45.129', 27002, unicode_decode_error_handler="ignore").data_quality
+coll_user = db["standard_sample_data_copy"]
 
 # 字段映射
 field_mapping = {

+ 89 - 0
tools/标讯数据附件为空数量统计/统计.py

@@ -0,0 +1,89 @@
+from pymongo import MongoClient
+
+def count_documents():
+    # 连接到MongoDB
+    client = MongoClient('172.20.45.129', 27002, unicode_decode_error_handler="ignore")
+    db = client['data_quality']  # 替换为你的数据库名
+    collection = db['bidding_20250515']  # 替换为你的集合名
+
+    # 对于对象形式的attachments
+    pipeline = [
+        {
+            "$match": {
+                "projectinfo.attachments": {"$exists": True, "$ne": None}  # 先检查 attachments 存在
+            }
+        },
+        {
+            "$addFields": {
+                "has_valid_org_url": {
+                    "$gt": [
+                        {
+                            "$size": {
+                                "$filter": {
+                                    "input": {"$objectToArray": "$projectinfo.attachments"},
+                                    "as": "item",
+                                    "cond": {"$ne": ["$$item.v.org_url", ""]}  # org_url 不为空
+                                }
+                            }
+                        },
+                        0
+                    ]
+                }
+            }
+        },
+        {
+            "$match": {
+                "has_valid_org_url": True  # 筛选出满足条件的文档
+            }
+        },
+        {
+            "$count": "count"
+        }
+    ]
+
+    result = list(collection.aggregate(pipeline))
+    org_url_not_empty = result[0]["count"] if result else 0
+    # 2. 统计 org_url 不为空且 fid 为空的数量(使用聚合查询,修复 $size 错误)
+    pipeline = [
+        {
+            "$match": {
+                "projectinfo.attachments": {"$exists": True, "$ne": None}  # 确保 attachments 存在且非空
+            }
+        },
+        {
+            "$addFields": {
+                "filtered_attachments": {
+                    "$filter": {
+                        "input": {"$objectToArray": "$projectinfo.attachments"},
+                        "as": "item",
+                        "cond": {
+                            "$and": [
+                                {"$ne": ["$$item.v.org_url", ""]},  # org_url 不为空
+                                {"$in": ["$$item.v.fid", [None, ""]]}  # fid 为空
+                            ]
+                        }
+                    }
+                }
+            }
+        },
+        {
+            "$match": {
+                "filtered_attachments": {"$ne": []}  # 确保至少有一个匹配项
+            }
+        },
+        {
+            "$count": "count"
+        }
+    ]
+
+    result = list(collection.aggregate(pipeline))
+    org_url_not_empty_and_fid_empty = result[0]["count"] if result else 0
+
+    print(f"org_url不为空的数量: {org_url_not_empty}")
+    print(f"fid为空的数量: {org_url_not_empty_and_fid_empty}")
+
+    # 关闭连接
+    client.close()
+
+if __name__ == "__main__":
+    count_documents()

+ 68 - 0
tools/生成标准样本库的分析数据/test.py

@@ -0,0 +1,68 @@
+import pymongo
+import mysql.connector
+from decimal import Decimal
+
+def convert_decimal_to_float(data):
+    """ 遍历字典,把 Decimal 类型转换为 float """
+    for key, value in data.items():
+        if isinstance(value, Decimal):
+            data[key] = float(value)
+    return data
+
+
+
+# MongoDB 连接配置
+mongodb_config = {
+    "host": "172.20.45.129",
+    "port": 27002,
+    "db": "data_quality",
+    "collection_a": "temp_bidding",
+    "collection_b": "standard_sample_data_all"
+}
+
+# MySQL 连接配置
+mysql_config = {
+    "host": "172.20.45.129",
+    "user": "root",
+    "password": "=PDT49#80Z!RVv52_z",
+    "database": "quality",
+    "port": 4000
+}
+
+# 连接 MongoDB
+mongo_client = pymongo.MongoClient(f"mongodb://{mongodb_config['host']}:{mongodb_config['port']}")
+mongo_db = mongo_client[mongodb_config["db"]]
+mongo_collection_a = mongo_db[mongodb_config["collection_a"]]
+mongo_collection_b = mongo_db[mongodb_config["collection_b"]]
+
+# 获取 MongoDB `a` 表中的 `_id` 列表
+mongo_ids = mongo_collection_a.distinct("_id")
+
+# 连接 MySQL
+mysql_conn = mysql.connector.connect(
+    host=mysql_config["host"],
+    user=mysql_config["user"],
+    port=mysql_config["port"],
+    password=mysql_config["password"],
+    database=mysql_config["database"]
+)
+mysql_cursor = mysql_conn.cursor(dictionary=True)  # 以字典格式返回数据
+
+# 查询 MySQL 数据库中匹配的 `id` 数据
+if mongo_ids:
+    format_strings = ','.join(['%s'] * len(mongo_ids))  # 生成SQL占位符
+    mysql_cursor.execute(f"SELECT * FROM customer_data_ttzl WHERE id IN ({format_strings})", tuple(mongo_ids))
+    matched_rows = mysql_cursor.fetchall()
+    # 转换 MySQL 查询结果
+    matched_rows = [convert_decimal_to_float(row) for row in matched_rows]
+
+    # 如果有匹配的数据,将其插入到 MongoDB `b` 表中
+    if matched_rows:
+        for row in matched_rows:
+            row["_id"] = row.pop("id")  # 把 id 设为 MongoDB 的 _id
+            mongo_collection_b.update_one({"_id": row["_id"]}, {"$set": row}, upsert=True)
+
+# 关闭数据库连接
+mysql_cursor.close()
+mysql_conn.close()
+mongo_client.close()

+ 77 - 0
tools/生成标准样本库的分析数据/test2.py

@@ -0,0 +1,77 @@
+import pymongo
+import pymysql
+from decimal import Decimal
+
+
+def convert_decimal_to_float(data):
+    """ 遍历字典,把 Decimal 类型转换为 float,避免 MySQL 插入错误 """
+    for key, value in data.items():
+        if isinstance(value, Decimal):
+            data[key] = float(value)
+    return data
+
+
+# **MongoDB 连接配置**
+mongodb_config = {
+    "host": "172.20.45.129",
+    "port": 27002,
+    "db": "data_quality",
+    "collection_a": "temp_bidding"  # MongoDB 的 A 表
+}
+
+# **MySQL 连接配置**
+mysql_config = {
+    "host": "172.20.45.129",
+    "user": "root",
+    "password": "=PDT49#80Z!RVv52_z",
+    "database": "quality",
+    "port": 4000
+}
+
+# **连接 MongoDB**
+mongo_client = pymongo.MongoClient(f"mongodb://{mongodb_config['host']}:{mongodb_config['port']}")
+mongo_db = mongo_client[mongodb_config["db"]]
+mongo_collection_a = mongo_db[mongodb_config["collection_a"]]
+
+# **获取 MongoDB `a` 表中的 `_id` 列表**
+mongo_ids = mongo_collection_a.distinct("_id")
+
+# **连接 MySQL**
+mysql_conn = pymysql.connect(
+    host=mysql_config["host"],
+    user=mysql_config["user"],
+    port=mysql_config["port"],
+    password=mysql_config["password"],
+    database=mysql_config["database"]
+)
+mysql_cursor = mysql_conn.cursor(pymysql.cursors.DictCursor)  # 查询结果返回字典格式
+
+# **查询 MySQL `a` 表(customer_data_ttzl)中匹配的 `id`**
+if mongo_ids:
+    format_strings = ','.join(['%s'] * len(mongo_ids))  # SQL 参数占位符
+    query = f"SELECT * FROM customer_data_ttzl WHERE id IN ({format_strings})"
+
+    mysql_cursor.execute(query, tuple(mongo_ids))
+    matched_rows = mysql_cursor.fetchall()
+
+    # **转换 Decimal 类型**
+    matched_rows = [convert_decimal_to_float(row) for row in matched_rows]
+
+    # **如果有匹配的数据,将其插入到 `b` 表**
+    if matched_rows:
+        for row in matched_rows:
+            columns = ', '.join(row.keys())  # 获取所有列名
+            placeholders = ', '.join(['%s'] * len(row))  # 生成占位符
+            insert_sql = f"INSERT INTO customer_data_b ({columns}) VALUES ({placeholders})"
+
+            try:
+                mysql_cursor.execute(insert_sql, tuple(row.values()))
+            except pymysql.err.IntegrityError as e:
+                print(f"插入失败,可能是主键冲突: {e}")
+
+    mysql_conn.commit()  # 提交事务
+
+# **关闭数据库连接**
+mysql_cursor.close()
+mysql_conn.close()
+mongo_client.close()

+ 92 - 0
tools/生成标准样本库的分析数据/test3.py

@@ -0,0 +1,92 @@
+import pymongo
+import pymysql
+from decimal import Decimal
+
+
+def convert_decimal_to_float(data):
+    """ 遍历字典,把 Decimal 类型转换为 float,避免 MySQL 插入错误 """
+    for key, value in data.items():
+        if isinstance(value, Decimal):
+            data[key] = float(value)
+    return data
+
+
+# **MongoDB 连接配置**
+mongodb_config = {
+    "host": "172.20.45.129",
+    "port": 27002,
+    "db": "data_quality",
+    "collection_a": "temp_bidding"  # MongoDB 的 A 表
+}
+
+# **MySQL 连接配置**
+mysql_config = {
+    "host": "172.20.45.129",
+    "user": "root",
+    "password": "=PDT49#80Z!RVv52_z",
+    "database": "quality",
+    "port": 4000
+}
+
+# **连接 MongoDB**
+mongo_client = pymongo.MongoClient(
+    f"mongodb://{mongodb_config['host']}:{mongodb_config['port']}",
+    serverSelectionTimeoutMS=5000  # 5 秒超时,避免卡住
+)
+mongo_db = mongo_client[mongodb_config["db"]]
+mongo_collection_a = mongo_db[mongodb_config["collection_a"]]
+
+# **获取 MongoDB `a` 表中的 `_id` 列表(最多10万条,防止查询过载)**
+mongo_ids = [
+    doc["_id"] for doc in mongo_collection_a.find({}, {"_id": 1}).limit(100000)
+]
+
+# **连接 MySQL**
+mysql_conn = pymysql.connect(
+    host=mysql_config["host"],
+    user=mysql_config["user"],
+    port=mysql_config["port"],
+    password=mysql_config["password"],
+    database=mysql_config["database"],
+    connect_timeout=10  # 10 秒超时,防止连接卡住
+)
+mysql_cursor = mysql_conn.cursor(pymysql.cursors.DictCursor)  # 查询结果返回字典格式
+
+# **分批查询 MySQL `a` 表(customer_data_ttzl)中的匹配数据**
+batch_size = 1000  # 每次查询 1000 条
+matched_rows = []
+
+for i in range(0, len(mongo_ids), batch_size):
+    batch_ids = mongo_ids[i:i + batch_size]
+    format_strings = ','.join(['%s'] * len(batch_ids))  # SQL 参数占位符
+    query = f"SELECT * FROM customer_data_ttzl WHERE id IN ({format_strings})"
+
+    mysql_cursor.execute(query, tuple(batch_ids))
+    rows = mysql_cursor.fetchall()
+
+    matched_rows.extend(rows)
+
+# **转换 Decimal 类型**
+matched_rows = [convert_decimal_to_float(row) for row in matched_rows]
+
+# **如果有匹配的数据,将其插入到 `b` 表**
+if matched_rows:
+    for row in matched_rows:
+        columns = ', '.join(row.keys())  # 获取所有列名
+        placeholders = ', '.join(['%s'] * len(row))  # 生成占位符
+        insert_sql = f"""
+        INSERT INTO customer_data_b ({columns}) VALUES ({placeholders})
+        ON DUPLICATE KEY UPDATE {", ".join([f"{col}=VALUES({col})" for col in row.keys()])}
+        """
+
+        try:
+            mysql_cursor.execute(insert_sql, tuple(row.values()))
+        except pymysql.err.IntegrityError as e:
+            print(f"插入失败,可能是主键冲突: {e}")
+
+    mysql_conn.commit()  # 提交事务
+
+# **关闭数据库连接**
+mysql_cursor.close()
+mysql_conn.close()
+mongo_client.close()

二进制
tools/生成标准样本库的分析数据/数据分析结果.xlsx


+ 34 - 52
tools/生成标准样本库的分析数据/根据样本数据拉取正式数据生成分析表mongo.py

@@ -1,24 +1,20 @@
 from pymongo import MongoClient
 from bson import ObjectId  # 导入 ObjectId
-import pymysql
 from lib.mogodb_helper import MongoDBInterface
 
 # MongoDB 配置信息
 MongodbConfigLocal = {
-    "ip_port": "127.0.0.1:27088",
-    "user": "viewdata",
-    "password": "viewdata",
-    "db": "qfw",
-    "col": "bidding"  # 替换为实际集合名称
+    "ip_port": "172.20.45.129",
+    "port": 27002,
+    "db": "data_quality",
+    "collection_a": "temp_bidding_copy",
 }
 
-# MySQL 配置信息
-mysql_config = {
+MongodbConfigAnalysis = {
     "host": "172.20.45.129",
-    "user": "root",
-    "password": "=PDT49#80Z!RVv52_z",
-    "database": "quality",
-    "port": 4000
+    "port": 27002,
+    "db": "data_quality",
+    "collection_a": "standard_sample_data_new",
 }
 
 # 字段映射
@@ -36,52 +32,38 @@ field_mapping = {
 }
 
 def main():
-    # 实例化 MongoDBInterface
-    mongo_db_interface = MongoDBInterface(MongodbConfigLocal)
+    # 连接 MongoDB
+    mongo_interface_src = MongoDBInterface(MongodbConfigLocal)  # 源数据库
+    mongo_interface_dst = MongoDBInterface(MongodbConfigAnalysis)  # 目标数据库
 
-    # 使用 MySQL 的 with 语句管理连接
-    with pymysql.connect(
-            host=mysql_config["host"],
-            port=mysql_config["port"],
-            user=mysql_config["user"],
-            password=mysql_config["password"],
-            database=mysql_config["database"]
-    ) as mysql_conn:
-        with mysql_conn.cursor() as mysql_cursor:
-            # 从 MySQL 中读取 _id 列表
-            mysql_cursor.execute("SELECT _id FROM sample_bid_analysis")
-            ids = mysql_cursor.fetchall()
 
-            for (_id,) in ids:
-                # 将 _id 转换为 ObjectId 类型
-                try:
-                    object_id = ObjectId(_id)
-                except Exception as e:
-                    print(f"Invalid ObjectId: {_id}, skipping. Error: {e}")
-                    continue
+    # 获取 `sample_bid_analysis` 表中的 `_id` 列表
+    sample_analysis_collection = mongo_interface_dst.get_collection()
+    analysis_ids = sample_analysis_collection.distinct("_id")  # 获取所有 _id
 
-                # 使用 MongoDBInterface 的 find_by_id 方法从 MongoDB 查询数据
-                mongo_data = mongo_db_interface.find_by_id(MongodbConfigLocal["col"], object_id)
-                if not mongo_data:
-                    continue
+    for _id in analysis_ids:
+        try:
+            object_id = ObjectId(_id)
+        except Exception as e:
+            print(f"Invalid ObjectId: {_id}, skipping. Error: {e}")
+            continue
 
-                # 构造更新数据,若值为 None 或 "",则填充为 None
-                update_fields = {
-                    field_mapping[key]: None if not mongo_data.get(key) else mongo_data[key]
-                    for key in field_mapping
-                }
+        # 查询 `bidding` 表数据
+        bidding_data = mongo_interface_src.find_by_id(MongodbConfigLocal["col"], object_id)
+        if not bidding_data:
+            continue
 
-                # 构造更新 SQL
-                update_sql = f"""
-                UPDATE sample_bid_analysis
-                SET {", ".join([f"{field} = %s" for field in update_fields.keys()])}
-                WHERE _id = %s
-                """
-                update_values = list(update_fields.values()) + [_id]
+        # 生成更新字段
+        update_fields = {
+            field_mapping[key]: None if not bidding_data.get(key) else bidding_data[key]
+            for key in field_mapping
+        }
 
-                # 执行更新操作
-                mysql_cursor.execute(update_sql, update_values)
-                mysql_conn.commit()
+        # 更新 `sample_bid_analysis` 表
+        sample_analysis_collection.update_one(
+            {"_id": object_id},  # 依据 _id 匹配
+            {"$set": update_fields}  # 更新字段
+        )
 
 if __name__ == "__main__":
     main()

+ 210 - 0
tools/生成标准样本库的分析数据/生成统计结果.py

@@ -0,0 +1,210 @@
+import pymysql
+import pymongo
+import pandas as pd
+from openpyxl import Workbook
+from openpyxl.styles import Font, Alignment
+
+# # MySQL 配置信息
+# MYSQL_CONFIG = {
+#     "host": "172.20.45.129",
+#     "user": "root",
+#     "password": "=PDT49#80Z!RVv52_z",
+#     "database": "quality",
+#     "port": 4000
+# }
+# # 连接 MySQL 并读取数据
+# def fetch_data():
+#     conn = pymysql.connect(**MYSQL_CONFIG)
+#     query = "SELECT * FROM sample_bid_analysis;"
+#     df = pd.read_sql(query, conn)
+#     conn.close()
+#     return df
+# MongoDB 连接配置
+MONGO_CONFIG = {
+    "host": "172.20.45.129",
+    "port": 27002,
+    "db": "data_quality",
+    "col": "standard_sample_data_new",
+}
+
+
+# 连接 MongoDB 并读取数据
+def fetch_data():
+    client = pymongo.MongoClient(f"mongodb://{MONGO_CONFIG['host']}:{MONGO_CONFIG['port']}")
+    db = client[MONGO_CONFIG["db"]]
+    collection = db[MONGO_CONFIG["col"]]
+
+    # 读取数据并转换为 DataFrame
+    data = list(collection.find({}, {"_id": 0}))  # 去掉 `_id` 字段
+    df = pd.DataFrame(data)
+
+    client.close()
+    return df
+
+# 判断 projectname 是否互为包含关系
+def is_contained(str1, str2):
+    """ 判断 str1 和 str2 是否互相包含(非空值情况下) """
+    if pd.isna(str1) or pd.isna(str2):  # 如果有 NaN 值,直接返回 False
+        return False
+    return str1 in str2 or str2 in str1  # 互为包含
+
+# 计算统计数据
+def calculate_metrics_and_accuracy(df, category):
+    """ 计算表格所需数据 """
+    # 确定数据类别:中标类 or 招标类
+    if category == "中标类":
+        bid_types = ["成交", "单一", "废标", "合同", "结果变更", "流标", "验收", "中标", "其它"]
+        df = df[df["subtype"].isin(bid_types)]
+        fields = ["toptype", "subtype", "area", "city", "buyer", "projectname", "projectcode", "budget", "s_winner", "bidamount"]
+
+    else:  # 招标类
+        bid_types = ["成交", "单一", "废标", "合同", "结果变更", "流标", "验收", "中标", "其它", "拟建"]
+        df = df[~df["subtype"].isin(bid_types)]
+        fields = ["toptype", "subtype", "area", "city", "buyer", "projectname", "projectcode", "budget"]
+
+
+    results = []
+    # 统一将 None、<NA> 和空字符串都转为 pd.NA
+    df = df.replace({None: pd.NA, '': pd.NA})  # 替换 None 和空字符串为 pd.NA
+    df = df.fillna(pd.NA)  # 确保所有空值都转为 pd.NA
+    correct_rows = 0  # 整行正确的计数
+    total_count = len(df)  # 样本总量
+
+    for _, row in df.iterrows():
+        row_correct = True  # 假设整行正确
+
+        for field in fields:
+            original_value = row.get(field, pd.NA)
+            ai_value = row.get(f"{field}_ai", pd.NA)
+
+            if field == "projectname":  # 特殊处理 projectname
+                is_correct = is_contained(original_value, ai_value)
+            else:
+                # 这里避免 pd.NA 直接比较导致错误
+                if pd.isna(original_value) or pd.isna(ai_value):
+                    is_correct = pd.isna(original_value) and pd.isna(ai_value)  # 如果都为空,算正确
+                else:
+                    is_correct = original_value == ai_value  # 正常比较
+
+            if not is_correct:
+                row_correct = False  # 只要有一个字段错误,整行就是错误的
+
+        if row_correct:
+            correct_rows += 1  # 统计整行正确的数量
+
+    # 计算整行正确率
+    single_row_accuracy = correct_rows / total_count if total_count else 0
+
+    for field in fields:
+        total_count = len(df)  # 样本数据总量
+        null_count = df[field].isna().sum()  # 原文无值
+        valid_count = total_count - null_count  # 原文有值的数量
+
+        if field == "projectname":  # 特殊处理 projectname
+            extract_correct_count = df.apply(lambda row: is_contained(row["projectname"], row["projectname_ai"]),axis=1).sum()
+            extract_error_count = valid_count - extract_correct_count
+            extract_correct_no_null = extract_correct_count  # 互为包含的都算正确
+            extract_error_no_null = extract_error_count
+        else:  # 其他字段的正常处理逻辑
+            extract_error_count = ((df[field].isna() & df[f"{field}_ai"].notna()) |
+                                   (df[field].notna() & df[f"{field}_ai"].isna()) |
+                                   (df[field].notna() & df[f"{field}_ai"].notna() & (
+                                               df[field] != df[f"{field}_ai"]))).sum()
+
+            # 抽取错误的数量(含原文无)
+            extract_correct_count = total_count - extract_error_count  # 抽取正确的数量(含原文无)
+            extract_error_no_null = (df[field].notna() & (df[field] != df.get(f"{field}_ai", df[field]))).sum()  # 抽取错误的数量(不含原文无)
+            extract_correct_no_null = valid_count - extract_error_no_null  # 抽取有值且正确数量(不含原文无)
+
+        # 计算比率
+        recognition_rate = valid_count / total_count if total_count else 0  # 识别率
+        recognition_correct_rate = extract_correct_count / total_count if total_count else 0  # 识别正确率
+        correct_rate = extract_correct_no_null / valid_count if valid_count else 0  # 正确率(原文存在情况下)
+
+        results.append([
+            field, total_count, null_count, valid_count, extract_error_count,
+            extract_correct_count, extract_error_no_null, extract_correct_no_null,
+            f"{recognition_rate:.2%}", f"{recognition_correct_rate:.2%}", f"{correct_rate:.2%}"
+        ])
+
+    columns = ["字段", "样本数据总量", "原文无值", "原文有值的数量", "抽取错误的数量(含原文无)",
+               "抽取正确的数量(含原文无)", "抽取错误的数量(不含原文无)",
+               "抽取有值且正确数量(不含原文无)", "识别率", "识别正确率", "正确率(原文存在情况下)"]
+    df_fields = pd.DataFrame(results, columns=columns)
+
+    # 整行统计数据
+    df_overall = pd.DataFrame([["数据总量", total_count],
+                               ["整行都正确的数量", correct_rows],
+                               ["单行正确率", f"{single_row_accuracy:.2%}"]],
+                              columns=["指标", "数值"])
+    return df_fields,df_overall
+
+
+# # 计算整体正确率
+# def calculate_overall_accuracy(df, fields):
+#     """ 计算整行正确的数量及单行正确率 """
+#     total_count = len(df)  # 样本总量
+#
+#     # 判断每行所有字段是否都正确(projectname 需使用互为包含逻辑)
+#     def is_row_correct(row):
+#         for field in fields:
+#             if pd.isna(row[field]) and pd.isna(row[f"{field}_ai"]):  # 如果原值和 AI 值都为空,算正确
+#                 continue
+#             if field == "projectname":
+#                 if not is_contained(row["projectname"], row["projectname_ai"]):  # projectname 互为包含
+#                     return False
+#             else:
+#                 if row[field] != row.get(f"{field}_ai", row[field]):  # 其他字段直接对比
+#                     return False
+#         return True
+#
+#     correct_rows = df.apply(is_row_correct, axis=1).sum()  # 统计整行正确的数量
+#     single_row_accuracy = correct_rows / total_count if total_count else 0  # 计算单行正确率
+#
+#     return pd.DataFrame([["数据总量", total_count],
+#                          ["整行都正确的数量", correct_rows],
+#                          ["单行正确率", f"{single_row_accuracy:.2%}"]],
+#                         columns=["指标", "数值"])
+
+# 导出 Excel
+def export_to_excel(df_bid_fields, df_bid_overall,df_tender_fields,df_tender_overall):
+    file_path = "数据分析结果.xlsx"
+    with pd.ExcelWriter(file_path, engine="openpyxl") as writer:
+        df_bid_fields.to_excel(writer, sheet_name="字段统计-中标类", index=False)
+        df_bid_overall.to_excel(writer, sheet_name="整体正确率-中标类", index=False)
+        df_tender_fields.to_excel(writer, sheet_name="字段统计-招标类", index=False)
+        df_tender_overall.to_excel(writer, sheet_name="整体正确率-招标类", index=False)
+
+        # Excel 格式优化
+        workbook = writer.book
+        for sheet in workbook.sheetnames:
+            ws = workbook[sheet]
+            for col in ws.columns:
+                max_length = 0
+                col_letter = col[0].column_letter
+                for cell in col:
+                    try:
+                        if cell.value:
+                            max_length = max(max_length, len(str(cell.value)))
+                    except:
+                        pass
+                ws.column_dimensions[col_letter].width = max_length + 2  # 调整列宽
+
+            # 加粗第一行
+            for cell in ws[1]:
+                cell.font = Font(bold=True)
+                cell.alignment = Alignment(horizontal="center", vertical="center")
+
+    print(f"Excel 文件已保存:{file_path}")
+
+
+# 主函数
+def main():
+    df = fetch_data()
+    df_bid_fields, df_bid_overall = calculate_metrics_and_accuracy(df, "中标类")
+    df_tender_fields, df_tender_overall = calculate_metrics_and_accuracy(df, "招标类")
+    export_to_excel(df_bid_fields, df_bid_overall,df_tender_fields,df_tender_overall)
+
+
+if __name__ == "__main__":
+    main()

+ 6 - 79
tools/生成标准样本库的分析数据/生成统计结果_入库.py

@@ -127,12 +127,6 @@ def calculate_metrics_and_accuracy(df, category):
         recognition_rate = valid_count / total_count if total_count else 0  # 识别率
         recognition_correct_rate = extract_correct_count / total_count if total_count else 0  # 识别正确率
         correct_rate = extract_correct_no_null / valid_count if valid_count else 0  # 正确率(原文存在情况下)
-
-        results.append([
-            field, total_count, null_count, valid_count, extract_error_count,
-            extract_correct_count, extract_error_no_null, extract_correct_no_null,
-            f"{recognition_rate:.2%}", f"{recognition_correct_rate:.2%}", f"{correct_rate:.2%}"
-        ])
         results.append({
             "field_name": field,
             "sample_total": total_count,
@@ -148,83 +142,18 @@ def calculate_metrics_and_accuracy(df, category):
             "data_type": category
         })
 
-    columns = ["字段", "样本数据总量", "原文无值", "原文有值的数量", "抽取错误的数量(含原文无)",
-               "抽取正确的数量(含原文无)", "抽取错误的数量(不含原文无)",
-               "抽取有值且正确数量(不含原文无)", "识别率", "识别正确率", "正确率(原文存在情况下)"]
-    df_fields = pd.DataFrame(results, columns=columns)
-
-    # 整行统计数据
-    df_overall = pd.DataFrame([["数据总量", total_count],
-                               ["整行都正确的数量", correct_rows],
-                               ["单行正确率", f"{single_row_accuracy:.2%}"]],
-                              columns=["指标", "数值"])
+    # columns = ["字段", "样本数据总量", "原文无值", "原文有值的数量", "抽取错误的数量(含原文无)",
+    #            "抽取正确的数量(含原文无)", "抽取错误的数量(不含原文无)",
+    #            "抽取有值且正确数量(不含原文无)", "识别率", "识别正确率", "正确率(原文存在情况下)"]
 
     # 构建整体统计
     overall_data = {
         "total_data_count": total_count,
         "correct_rows_count": correct_rows,
-        "row_accuracy": f"{correct_rows / total_count:.2%}" if total_count else "0.00%",
+        "row_accuracy": f"{single_row_accuracy:.2%}" ,
         "data_type": category
     }
-    return df_fields,df_overall,overall_data
-
-
-# # 计算整体正确率
-# def calculate_overall_accuracy(df, fields):
-#     """ 计算整行正确的数量及单行正确率 """
-#     total_count = len(df)  # 样本总量
-#
-#     # 判断每行所有字段是否都正确(projectname 需使用互为包含逻辑)
-#     def is_row_correct(row):
-#         for field in fields:
-#             if pd.isna(row[field]) and pd.isna(row[f"{field}_ai"]):  # 如果原值和 AI 值都为空,算正确
-#                 continue
-#             if field == "projectname":
-#                 if not is_contained(row["projectname"], row["projectname_ai"]):  # projectname 互为包含
-#                     return False
-#             else:
-#                 if row[field] != row.get(f"{field}_ai", row[field]):  # 其他字段直接对比
-#                     return False
-#         return True
-#
-#     correct_rows = df.apply(is_row_correct, axis=1).sum()  # 统计整行正确的数量
-#     single_row_accuracy = correct_rows / total_count if total_count else 0  # 计算单行正确率
-#
-#     return pd.DataFrame([["数据总量", total_count],
-#                          ["整行都正确的数量", correct_rows],
-#                          ["单行正确率", f"{single_row_accuracy:.2%}"]],
-#                         columns=["指标", "数值"])
-
-# 导出 Excel
-def export_to_excel(df_bid_fields, df_bid_overall,df_tender_fields,df_tender_overall):
-    file_path = "数据分析结果.xlsx"
-    with pd.ExcelWriter(file_path, engine="openpyxl") as writer:
-        df_bid_fields.to_excel(writer, sheet_name="字段统计-中标类", index=False)
-        df_bid_overall.to_excel(writer, sheet_name="整体正确率-中标类", index=False)
-        df_tender_fields.to_excel(writer, sheet_name="字段统计-招标类", index=False)
-        df_tender_overall.to_excel(writer, sheet_name="整体正确率-招标类", index=False)
-
-        # Excel 格式优化
-        workbook = writer.book
-        for sheet in workbook.sheetnames:
-            ws = workbook[sheet]
-            for col in ws.columns:
-                max_length = 0
-                col_letter = col[0].column_letter
-                for cell in col:
-                    try:
-                        if cell.value:
-                            max_length = max(max_length, len(str(cell.value)))
-                    except:
-                        pass
-                ws.column_dimensions[col_letter].width = max_length + 2  # 调整列宽
-
-            # 加粗第一行
-            for cell in ws[1]:
-                cell.font = Font(bold=True)
-                cell.alignment = Alignment(horizontal="center", vertical="center")
-
-    print(f"Excel 文件已保存:{file_path}")
+    return pd.DataFrame(results), pd.DataFrame([overall_data])
 
 def save_to_database(df_fields, df_overall):
     """保存到优化后的数据库结构"""
@@ -250,11 +179,10 @@ def save_to_database(df_fields, df_overall):
                 row['correct_recognition_rate'], row['accuracy_rate'],
                 row['data_type']
             ))
-
         # 插入整体统计
         for _, row in df_overall.iterrows():
             sql = """
-            INSERT INTO data_quality_analysis 
+            INSERT INTO sample_data_per_line_analysis 
             (total_data_count, correct_rows_count, row_accuracy, data_type)
             VALUES (%s,%s,%s,%s)
             """
@@ -278,7 +206,6 @@ def main():
     df = fetch_data()
     df_bid_fields, df_bid_overall = calculate_metrics_and_accuracy(df, "中标类")
     df_tender_fields, df_tender_overall = calculate_metrics_and_accuracy(df, "招标类")
-    export_to_excel(df_bid_fields, df_bid_overall,df_tender_fields,df_tender_overall)
     # 合并结果
     all_fields = pd.concat([df_bid_fields, df_tender_fields])
     all_overall = pd.concat([df_bid_overall, df_tender_overall])

+ 2297 - 51
tools/高质量站点第一版/_id.csv

@@ -1,51 +1,2297 @@
-spidercode
-a_qgzbgggsssyq_qbgg
-a_zgzfcgw_zydwzfcgyxgk_gjjs_01
-sc_gzzwsjjcgxt_jjgg
-gd_gdszfcgw_syss_cggg
-jx_jxszfcgdzmc_ggdt_htgg
-a_zgzfcgw_zfcghtgg_new
-a_zgjcjtcgpt_fzbgg_cggg
-a_zgjcjtcgpt_fzbgg_jggg
-hn_hnszfcgdzmc_hnsbj_ggdt
-xj_xjwwezzqzfcgw_dzmcgg_cgcg
-a_zgzfcgw_zydwzfcgyxgk_gjjs_new_01
-a_zgzbtbggfwpt_zbgg2
-js_ntszfcgwssc_xjgg_xqgg
-gx_gxzzzzqzfcg_dzmchtgg
-zj_zjzfcgw_cggg_sylx
-a_oycg_gkcggg
-ah_ahzfcgypt_cjgg
-a_zgjcjtcgpt_fzbgg_bggg
-a_jdcgwxwz_cgdtzxcgxx
-js_ntszfcgwssc_xjgg_cjgg
-gd_gdswszjfwcs_cggg
-a_zgzbtbggfwpt_zhbjggs2
-ah_ahzfcgypt_htgg
-sd_zgsdzfcgw_xxgk_sxhtgk
-gd_gdszfcgw_syss_dzmc
-a_jsxmhjyxdjbbaxt_gg_nipc
-a_zgzbtbggfwpt_wasjgf_zbgg
-gz_gzszfcgdzmc_gzsbj_ggdt_01
-ah_ahzfcgypt_ysgg
-ha_hnstzxmzxspjgptbsdt_xmbljggs_njpc
-hb_hbzwfww_bacx_njpc
-a_zgzbycgw_zbxx_zbxx
-a_gtcgpt_cgjg
-a_zgjcjtcgpt_zbzq_zhbgg
-xj_xjwwezzqzfcgw_dzmcgg_wscs
-gd_gdswszjfwcs_zxgs
-sd_zgsdzfcgw_sxzhbgg_new
-a_syjtyxgs_zh
-jx_jxswszjfwcs_cggg
-a_gjggzyjypt_gcjs_kbjl
-nm_nmgzzqzfcgw_dzmc_htgs
-a_zgzbtbggfwpt_wasjgf_kbjl
-a_gtcgpt_cggg
-jx_jxszfcgdzmc_htgg
-js_jsstzxmzxspjgpt_gsxx_bazcx_njpc
-a_zgzbtbggfwpt_zhbhxrgs2
-a_bjgc_jggs
-a_zgzbycgw_zbxx_zb
-a_zgzfcgw_dfgg_new
-a_zgzfcgw_zydwzfcgyxgk_gjjs
+_id
+6699380c66cf0db42a55858b
+6699387b66cf0db42a5588ad
+6699381666cf0db42a5585c1
+6699385366cf0db42a5587b5
+66993be966cf0db42a559ee7
+66993be966cf0db42a559ee4
+66993be966cf0db42a559ee8
+66993be966cf0db42a559ee3
+6698ec0066cf0db42a547712
+6698ef0866cf0db42a549002
+669939b266cf0db42a55914c
+669937b266cf0db42a5581f8
+66992d4e66cf0db42a5562c4
+669939f266cf0db42a55932d
+6699381966cf0db42a5585ed
+66992ec166cf0db42a5565c0
+669939a866cf0db42a5590f5
+669939ee66cf0db42a5592fc
+6699391b66cf0db42a558d03
+66993b6166cf0db42a559b40
+66993aa366cf0db42a5597ce
+66993b0166cf0db42a5598c8
+66993aad66cf0db42a5597d9
+6699399e66cf0db42a559075
+66993b8a66cf0db42a559ce3
+669934e766cf0db42a556fa2
+669934dd66cf0db42a556f85
+669936b666cf0db42a557830
+669936b666cf0db42a557828
+66993be966cf0db42a559ea8
+66993be966cf0db42a559ecb
+66993be966cf0db42a559ef2
+66993be966cf0db42a559eef
+66993be966cf0db42a559ef0
+66993be966cf0db42a559eee
+66993b4366cf0db42a559ae6
+6699380c66cf0db42a558584
+66993a5c66cf0db42a5595fc
+66993a3e66cf0db42a5594f1
+66993a0666cf0db42a559354
+66993a1066cf0db42a55939d
+66993a1066cf0db42a55939f
+66993a0666cf0db42a559352
+6699331966cf0db42a556c5e
+66992e0266cf0db42a556444
+6699331966cf0db42a556c60
+669936ad66cf0db42a5577d1
+66993ad566cf0db42a5597ea
+66993aa366cf0db42a5597d1
+6699399466cf0db42a559073
+669939f266cf0db42a559310
+669933f666cf0db42a556ddc
+669933d766cf0db42a556da3
+6699387166cf0db42a5587f4
+6699382066cf0db42a558657
+66992d4e66cf0db42a5562b8
+6699250366cf0db42a5550cb
+669939dd66cf0db42a559230
+6699393366cf0db42a558d8a
+6699301566cf0db42a5567b6
+66992f0f66cf0db42a55662a
+6699343d66cf0db42a556ea4
+6699342866cf0db42a556e50
+66992ead66cf0db42a55655e
+66992ead66cf0db42a55655f
+66992ead66cf0db42a55655d
+66992ebf66cf0db42a55659b
+669934b566cf0db42a556f56
+669934a166cf0db42a556f29
+669937c666cf0db42a558302
+6699348c66cf0db42a556ee8
+66993b9466cf0db42a559ce6
+6699391166cf0db42a558c7b
+66993b6166cf0db42a559b41
+66993a0666cf0db42a559387
+669936e066cf0db42a557a45
+6699371266cf0db42a557c82
+6699387366cf0db42a5587ff
+669938ad66cf0db42a5589e1
+66992d5666cf0db42a5562d0
+66992e6f66cf0db42a556504
+6699337d66cf0db42a556d55
+66993b5766cf0db42a559b36
+66993a9d66cf0db42a5597a5
+66993a9d66cf0db42a5597af
+66993a9d66cf0db42a5597ae
+66993a9d66cf0db42a5597a4
+669936a266cf0db42a5576d7
+66993a6666cf0db42a55960a
+6699354166cf0db42a557027
+669929b166cf0db42a555ceb
+66992a4a66cf0db42a555db0
+66992a4a66cf0db42a555dae
+669932b566cf0db42a556c06
+669936cc66cf0db42a557977
+6699367066cf0db42a5574a0
+6699399466cf0db42a559070
+6699382366cf0db42a558670
+66992f2466cf0db42a556658
+66992f4266cf0db42a556672
+6699292566cf0db42a555c57
+66992ca166cf0db42a5561b1
+6699342866cf0db42a556e4d
+66993b7066cf0db42a559bbb
+6699bfff5e4bd86befa0dcd3
+6699bd015e4bd86befa0dcd1
+6672975166cf0db42ac5b0e1
+66729ae666cf0db42ac5d271
+666bf96166cf0db42ab1f0da
+666a763866cf0db42aab55f4
+6683bd3266cf0db42a0455a2
+667a7ad766cf0db42ae09aa6
+667bcc5e66cf0db42ae6e584
+666fd66366cf0db42ab9508a
+6669aea066cf0db42aa8bbf9
+661e683f66cf0db42aa3c455
+664aabf06a6f15434a5ae762
+65d851d866cf0db42acc8911
+65d8526566cf0db42acc8cef
+658ef9e866cf0db42a478287
+668d416766cf0db42a29cc1e
+666ffbe5a396ec22bd8af15c
+666ffacda396ec22bd8af155
+666ffa24a396ec22bd8af151
+666ffb5ea396ec22bd8af158
+668287e866cf0db42aff0c9a
+665d6ee466cf0db42a81c74a
+668e498d66cf0db42a2daf1e
+667a7ca666cf0db42ae0abf1
+6646356066cf0db42a2ea629
+6673812ba396ec22bd8af1eb
+668e05495e4bd86befa0db81
+66738012a396ec22bd8af1e8
+658ef93e66cf0db42a478159
+668eac6b66cf0db42a2fb693
+668eac6b66cf0db42a2fb696
+668eac6b66cf0db42a2fb691
+668eac6b66cf0db42a2fb692
+668eac6b66cf0db42a2fb697
+668eac6b66cf0db42a2fb698
+668eac6b66cf0db42a2fb694
+668e76c266cf0db42a2f2cbc
+668e805f66cf0db42a2f5447
+668e806966cf0db42a2f54d2
+668e7f9f66cf0db42a2f520e
+668e7ff066cf0db42a2f52b5
+668ead4766cf0db42a2fb80b
+668eaacf66cf0db42a2fb4fa
+668eaacf66cf0db42a2fb4fb
+668eaf6666cf0db42a2fba36
+668eaf6666cf0db42a2fba41
+668eaf6666cf0db42a2fba37
+668eaf6666cf0db42a2fba3b
+668eaf6666cf0db42a2fba3a
+668eaf6666cf0db42a2fba39
+668eaf6666cf0db42a2fba3c
+668eaf8466cf0db42a2fba51
+668eafbf66cf0db42a2fba7d
+668eafbf66cf0db42a2fba7f
+668eafca66cf0db42a2fbab2
+668eafc066cf0db42a2fba84
+668eafc966cf0db42a2fba96
+668eafbf66cf0db42a2fba7c
+668eafca66cf0db42a2fbab1
+668eafc066cf0db42a2fba86
+668ead3e66cf0db42a2fb7eb
+668eabd366cf0db42a2fb5e8
+668eae6a66cf0db42a2fb935
+668eae7466cf0db42a2fb943
+668eae7466cf0db42a2fb942
+668e6bf766cf0db42a2ef978
+668e65e166cf0db42a2ed54b
+668e880466cf0db42a2f6b29
+668e7d7766cf0db42a2f4745
+668e880466cf0db42a2f6b2a
+668e7d8b66cf0db42a2f478a
+668e6f9566cf0db42a2f0bff
+668e627d66cf0db42a2ebd66
+668e627d66cf0db42a2ebd6a
+668e628766cf0db42a2ebdda
+668e627d66cf0db42a2ebd5b
+668e623766cf0db42a2ebbd9
+668e627d66cf0db42a2ebd6d
+668e627d66cf0db42a2ebd60
+668e6f8b66cf0db42a2f0bef
+668e999166cf0db42a2f936b
+668e640666cf0db42a2ec790
+668e709a66cf0db42a2f12b0
+668e654066cf0db42a2ed0f3
+668e640666cf0db42a2ec78c
+668e647f66cf0db42a2ecada
+668e658666cf0db42a2ed314
+668e709a66cf0db42a2f12b4
+668e627d66cf0db42a2ebd64
+668eab8d66cf0db42a2fb598
+668eab8d66cf0db42a2fb58d
+668eab8d66cf0db42a2fb58f
+668eaf1e66cf0db42a2fb9ca
+668eabde66cf0db42a2fb5f1
+668ea23166cf0db42a2fa768
+668ea23566cf0db42a2fa771
+668ea68766cf0db42a2faf76
+668eae1066cf0db42a2fb8f0
+668ea10e66cf0db42a2fa571
+668ea10e66cf0db42a2fa55f
+668ea1c366cf0db42a2fa669
+668ea05a66cf0db42a2fa46c
+668ea10e66cf0db42a2fa572
+668ea10e66cf0db42a2fa573
+668ea10e66cf0db42a2fa56b
+668ea0c266cf0db42a2fa506
+668ea05a66cf0db42a2fa476
+668ea10e66cf0db42a2fa551
+668ea11866cf0db42a2fa587
+668ea10e66cf0db42a2fa570
+668ea0c266cf0db42a2fa507
+668ea05a66cf0db42a2fa46e
+668ea10e66cf0db42a2fa553
+668ea10e66cf0db42a2fa554
+668ea10e66cf0db42a2fa56f
+668ea10e66cf0db42a2fa552
+668ea10e66cf0db42a2fa56c
+668eac7466cf0db42a2fb69e
+668ea7c166cf0db42a2fb1de
+668eac7466cf0db42a2fb69d
+668ea61266cf0db42a2faf02
+668ea90c66cf0db42a2fb336
+668ea9fc66cf0db42a2fb3ef
+668ea9ca66cf0db42a2fb3bd
+668e8e1766cf0db42a2f79b9
+668e8e1766cf0db42a2f79b8
+668e8e6b66cf0db42a2f7a29
+668eafc066cf0db42a2fba85
+668ead3466cf0db42a2fb7e1
+668ead3466cf0db42a2fb7e8
+668eae6a66cf0db42a2fb91f
+668eae7466cf0db42a2fb93a
+668e9baf66cf0db42a2f99e7
+668e9baf66cf0db42a2f99e8
+668eae5666cf0db42a2fb917
+668ead1566cf0db42a2fb7ac
+668eaf5066cf0db42a2fba11
+668eae1066cf0db42a2fb8f1
+6524bac352c083c4e07bebbf
+6669254366cf0db42aa53a7a
+663b567266cf0db42afdf7b9
+6638b32466cf0db42af2c526
+6638b15466cf0db42af2bd8c
+6638b0dc66cf0db42af2bc0f
+6662dce466cf0db42a9954d9
+667bf58c66cf0db42ae875d1
+662de3c56a6f15434a5ae64b
+6613501266cf0db42a79480b
+659b850966cf0db42a5d4feb
+659f653a66cf0db42a688359
+662df8bd66cf0db42ad8a2d0
+66599e2266cf0db42a7a5616
+666c345866cf0db42ab3e633
+668bb0da66cf0db42a21ac00
+6613b19766cf0db42a7c095c
+667e6456a396ec22bd8af309
+662330348c5f5f15c79b3eee
+65669856edb1ed45a91448a7
+6566982dedb1ed45a91448a4
+655dca6eedb1ed45a914489e
+66727d1a66cf0db42ac4d526
+650c0a2149768f129a991219
+662e139c6a6f15434a5ae65c
+663211d166cf0db42ae8fd10
+6656a5cc6a6f15434a5ae7ff
+66262b73b4d6306d94b44e01
+6647240566cf0db42a3388bc
+650ac53a513e65bb3f23f0c9
+6628e10666cf0db42ac4a72e
+669eda5166cf0db42a65ab8b
+669eddff66cf0db42a65afa3
+669ebb8166cf0db42a658144
+669f1bd766cf0db42a66b8e0
+669f175b66cf0db42a6692b9
+669f11f066cf0db42a66651e
+669f12a566cf0db42a666a40
+669e827766cf0db42a651cc0
+669e82d166cf0db42a651dc9
+669f176f66cf0db42a66938e
+669e826d66cf0db42a651c88
+669f06d866cf0db42a661994
+669e82dc66cf0db42a651e00
+669ef1dc66cf0db42a65cb3c
+669ee59466cf0db42a65b8aa
+669e879866cf0db42a65296a
+669e8bf166cf0db42a653219
+669ef7b066cf0db42a65d887
+669e833466cf0db42a651e40
+669efd5666cf0db42a65e6ff
+669f0b3266cf0db42a6637eb
+669f01c466cf0db42a65fa0e
+669e82a866cf0db42a651d70
+669e82a866cf0db42a651d74
+669e911566cf0db42a653f46
+669e827f66cf0db42a651cea
+669e93e566cf0db42a654aaf
+669e854966cf0db42a65235d
+669e892066cf0db42a652c9b
+669eff0f66cf0db42a65ed8e
+669efff266cf0db42a65f16e
+669ee97d66cf0db42a65be2b
+669ef1fd66cf0db42a65cb8e
+669f147c66cf0db42a667bdb
+669f010566cf0db42a65f66a
+669eefa166cf0db42a65c5fa
+669eef9766cf0db42a65c5e3
+669eefa166cf0db42a65c60c
+669e827f66cf0db42a651cd2
+669e87b766cf0db42a652a4b
+669e828b66cf0db42a651d2e
+669e827f66cf0db42a651cd5
+669e907166cf0db42a653c52
+669f054666cf0db42a660f94
+669f055066cf0db42a660fda
+669ee3c766cf0db42a65b671
+669ee0bc66cf0db42a65b2e1
+669f0b1e66cf0db42a6636d3
+669e906766cf0db42a653c0e
+669e90b866cf0db42a653d7e
+669e908666cf0db42a653c98
+669ee1df66cf0db42a65b439
+669ef64766cf0db42a65d572
+669ef64766cf0db42a65d57a
+669e8d9666cf0db42a6536af
+669effb066cf0db42a65f051
+669e849e66cf0db42a6521f6
+669e909a66cf0db42a653cfe
+669efff266cf0db42a65f14e
+669e83c166cf0db42a651fd0
+669e917066cf0db42a6540fc
+669e9e6266cf0db42a655a9a
+669e9e6c66cf0db42a655aa9
+669e848b66cf0db42a6521bb
+669effa266cf0db42a65eff0
+669eeaf266cf0db42a65bf8f
+669f02f666cf0db42a660130
+669e860766cf0db42a65257a
+669f083766cf0db42a662382
+669f084866cf0db42a66245b
+669e855d66cf0db42a6523b6
+669e905d66cf0db42a653bbe
+669e906766cf0db42a653bea
+669e910866cf0db42a653eea
+669e918c66cf0db42a6541fe
+669e918166cf0db42a654177
+669e826d66cf0db42a651c80
+669e826d66cf0db42a651c81
+669e826d66cf0db42a651c7d
+669e827f66cf0db42a651ce7
+669e829d66cf0db42a651d5c
+669e90d666cf0db42a653e0a
+669e914f66cf0db42a65404f
+669e82d166cf0db42a651d9e
+669e86b266cf0db42a6526ac
+669e848b66cf0db42a6521a1
+669efb0366cf0db42a65dfab
+669f08ba66cf0db42a6626bc
+669ef10966cf0db42a65c88b
+669eedca66cf0db42a65c3df
+669f013366cf0db42a65f7aa
+669ee50866cf0db42a65b83e
+669e92c666cf0db42a6548da
+669f0dfa66cf0db42a664986
+669f0b6666cf0db42a66388e
+669ef17866cf0db42a65ca15
+669f177966cf0db42a6693e6
+669ef93666cf0db42a65dbcf
+6592248066cf0db42a4a9a60
+659836bc66cf0db42a5a3a38
+65a0f26e66cf0db42a6cfc4a
+65aa038d66cf0db42a7f338f
+65b39c9f66cf0db42a92f6f4
+65c0b02f66cf0db42ab18ae7
+65d80eba66cf0db42acb7bd2
+65e042e866cf0db42adb9564
+65e9461266cf0db42aef75e7
+65f1564e66cf0db42a05040b
+65f9366166cf0db42a1f4483
+65fd65cd66cf0db42a3109f0
+660436d966cf0db42a4a2314
+660a9ed566cf0db42a60f67e
+66129b4a66cf0db42a77def3
+6617934766cf0db42a8c5a4c
+661e262966cf0db42aa11e9a
+6622689966cf0db42ab42c83
+6628f42566cf0db42ac4fd2d
+662e0ee166cf0db42ad97a69
+6631458066cf0db42ae79755
+6638df2c66cf0db42af3419f
+663c3b6e66cf0db42a00752a
+663e244b66cf0db42a0a564b
+6643106866cf0db42a1f7307
+6646305266cf0db42a2e8470
+664b4da466cf0db42a3f8407
+664db0f866cf0db42a4c494b
+66500d9066cf0db42a585705
+6654514066cf0db42a647d49
+6655d44c66cf0db42a6b54d7
+66595c4f66cf0db42a787625
+665b8e5a66cf0db42a7e6941
+665efc3e66cf0db42a89b2d4
+66625b8566cf0db42a95a488
+6668080e66cf0db42aa071a6
+666a69e866cf0db42aaafcac
+666cadc766cf0db42ab51858
+667148f966cf0db42abfb697
+6673db2e66cf0db42acb2c9f
+6678c4ce66cf0db42ad6dd07
+667a8b7b66cf0db42ae14cf0
+667cda7b66cf0db42aeb5bf9
+667e9c1266cf0db42af4e188
+6683b65e66cf0db42a040f34
+66861fe466cf0db42a0f4c05
+6687e75966cf0db42a19168b
+668ce8d266cf0db42a26e09c
+668f41cd66cf0db42a30dd42
+6690f8bf66cf0db42a3954df
+6695d07f66cf0db42a4478f8
+6698b5ff66cf0db42a52b693
+669e3acb66cf0db42a6429a2
+66a083d766cf0db42a6d9531
+66a231c766cf0db42a75f7f3
+66a7046e66cf0db42a80fa08
+66a8fcbe66cf0db42a8a8482
+66ab829f66cf0db42a94e2bf
+66b080da66cf0db42aa0c871
+66b2ddef66cf0db42aa9b62e
+66b4d0bd66cf0db42ab27d2b
+66b9db5766cf0db42abfbee1
+66bed06466cf0db42ad5c3de
+66bef62266cf0db42ad65f11
+66bf0d0466cf0db42ad6fcba
+66bf1f7c66cf0db42ad7a264
+66bf369166cf0db42ad84558
+66bf57f466cf0db42ad8bc81
+6585e7816977356f559ed3b8
+6596308f66cf0db42a53eaa9
+659bb07666cf0db42a5e0a88
+659e7a0566cf0db42a6630be
+65a4a3fd66cf0db42a705c76
+65a79fb366cf0db42a792c41
+65ab0ff666cf0db42a81ecb5
+65b0832c66cf0db42a8a61ff
+65b37a6f66cf0db42a925eeb
+65b8d35c66cf0db42a9d8653
+65bc7eec66cf0db42aa87842
+65c0a37f66cf0db42ab1415a
+65c491f066cf0db42ab8f5dd
+65d5938f66cf0db42ac6a117
+65dc37b466cf0db42ad0ebd2
+65df6cb166cf0db42ad95443
+65e554df66cf0db42ae2f7a2
+65e8b3c666cf0db42aee215b
+65eee78666cf0db42afcb749
+65f4e32366cf0db42a14017c
+65f99f4766cf0db42a21c7af
+65fe81e666cf0db42a32d63e
+6603cd2d66cf0db42a472ede
+6606944b66cf0db42a56807e
+660bd53f66cf0db42a6700db
+661268d866cf0db42a76ee76
+6615175566cf0db42a82611e
+6618ad3666cf0db42a91024b
+661ded9f66cf0db42a9fc340
+6620f10f66cf0db42aadd77f
+6627319e66cf0db42abd3e0f
+662e941366cf0db42adb54c8
+663f73dd66cf0db42a1115b5
+66459bc566cf0db42a2b8f8c
+664b421666cf0db42a3f6665
+664f082566cf0db42a536172
+66553e5066cf0db42a671b9b
+66584b6966cf0db42a74f8c8
+665d987066cf0db42a8341fd
+665f3a1f66cf0db42a8a4912
+6661fe5b66cf0db42a951040
+6668261d66cf0db42aa181ff
+666ac0c866cf0db42aad6396
+666fa37466cf0db42ab85e3e
+667198e966cf0db42ac17ce8
+6674f3b266cf0db42acfac89
+667b9d6866cf0db42ae5a546
+6682819066cf0db42afec3b5
+668d2bec66cf0db42a2976bf
+66953d7866cf0db42a433ea4
+6699dfe866cf0db42a57d884
+66a0f7b566cf0db42a708bf7
+66a749c166cf0db42a829b9c
+66ab69ed66cf0db42a947e2c
+66b5b1ab66cf0db42ab5c55b
+66bec91366cf0db42ad593a1
+66bef0c466cf0db42ad64716
+66bf0eaa66cf0db42ad70caa
+66bf24ad66cf0db42ad7d081
+66bf347366cf0db42ad83955
+66bf50ee66cf0db42ad8aac0
+6592b13166cf0db42a4ab08a
+65960b5666cf0db42a532a8f
+659b585666cf0db42a5c7eef
+659d07d366cf0db42a61c522
+659fa55b66cf0db42a6990c3
+65a8e0e966cf0db42a7c5a8d
+65b114b366cf0db42a8c5342
+65b3d9f966cf0db42a933ab9
+65b8ad3f66cf0db42a9c5a87
+65bb468666cf0db42aa546f4
+65bf572a66cf0db42aade40e
+65d1c11f66cf0db42abfbdbe
+65dc3a4166cf0db42ad0f8fc
+65e1223966cf0db42add0d9d
+65e6e49866cf0db42ae98284
+65ee61a566cf0db42af9407d
+65f139eb66cf0db42a047ba2
+65f79ecc66cf0db42a1713a1
+65fa80a466cf0db42a247fbe
+660144d966cf0db42a37859b
+6603c9cf66cf0db42a471410
+6606821466cf0db42a55c377
+660bbbea66cf0db42a65f613
+6612534f66cf0db42a762aee
+6614e85866cf0db42a80b2d6
+66179a4b66cf0db42a8c9025
+661cda6166cf0db42a9af164
+661f821e66cf0db42aa78a3a
+6623339066cf0db42ab53c0e
+66277c8966cf0db42abf166a
+662a20b166cf0db42ac91d34
+662e348e66cf0db42ada849f
+66383cc366cf0db42aefb890
+6639b82266cf0db42af5d309
+663b35f966cf0db42afce4da
+663d830266cf0db42a0620bb
+6640082366cf0db42a12b0eb
+6642d77366cf0db42a1e3363
+66447ee066cf0db42a2772df
+6646e8d866cf0db42a31e565
+664b14a566cf0db42a3e5a7b
+664d548466cf0db42a4940e3
+66504ef666cf0db42a59dc03
+66728f5266cf0db42ac56160
+6674ee1466cf0db42acf7001
+66793e5666cf0db42adb23ec
+667b9dd466cf0db42ae5a793
+66822f3a66cf0db42afc342f
+6683ddd166cf0db42a05bd14
+668670b466cf0db42a11957f
+668bc68666cf0db42a2278e5
+668e4ede66cf0db42a2de79d
+6690ef4866cf0db42a38ed3c
+6696210166cf0db42a46ec29
+6698d43c66cf0db42a538fe0
+669e1ef766cf0db42a62f970
+66a198c166cf0db42a71950b
+66a7231366cf0db42a81d027
+66acaa8566cf0db42a998a2e
+66b2f69266cf0db42aaa63c3
+66baa7b266cf0db42ac1da39
+659214ad66cf0db42a4a985c
+65968a6566cf0db42a5591e2
+659d06b066cf0db42a61bc03
+65a1705066cf0db42a6e7b7a
+65a809de66cf0db42a7a3a1e
+65af2b7d66cf0db42a87599f
+65b8aae666cf0db42a9c4be2
+65c1fd9966cf0db42ab47574
+65d8014a66cf0db42acb3993
+65e1aa4366cf0db42adfa12b
+65ee559e66cf0db42af9022b
+65f3c3d066cf0db42a0fff28
+65fbe31266cf0db42a2a2a12
+6603b35a66cf0db42a46905c
+660cc81a66cf0db42a6acc07
+6616581766cf0db42a87f623
+661eb2b566cf0db42aa47a60
+6627ea0766cf0db42ac076ea
+662f6a7966cf0db42adf85e3
+663b3f7a66cf0db42afd3cc6
+6642bd2266cf0db42a1d5028
+664aec9d66cf0db42a3cfeac
+66505cbb66cf0db42a5a6986
+66596b5c66cf0db42a78adb7
+666167b766cf0db42a926959
+666a9f9b66cf0db42aac1ad7
+6671bdc966cf0db42ac1f2e4
+66792e6866cf0db42ada658d
+667e78c566cf0db42af3b2ac
+668742fb66cf0db42a14656d
+668f8a9366cf0db42a328f0d
+66977f7866cf0db42a4d9455
+66a0582c66cf0db42a6c388e
+66a8bbcf66cf0db42a8988fa
+66b2c8f066cf0db42aa92da8
+659248d866cf0db42a4aa04a
+65ae4b7d66cf0db42a862a4e
+65d2df9c66cf0db42ac153fb
+65e54f6266cf0db42ae2e086
+65f27d8466cf0db42a09c40b
+65fd545e66cf0db42a309787
+6606b8a566cf0db42a5728e3
+6613c14666cf0db42a7cb4df
+661e105b66cf0db42aa09fe2
+66277ea666cf0db42abf2daf
+6630566766cf0db42ae28bfc
+663c632b66cf0db42a017fac
+66434d0966cf0db42a21dc2c
+664c345266cf0db42a42a9cd
+66546b4d66cf0db42a655154
+665d4c7566cf0db42a812da2
+6662812a66cf0db42a96c360
+666bb7c966cf0db42ab059fd
+6678f9ab66cf0db42ad878a2
+667e663366cf0db42af308d9
+6687856066cf0db42a16505e
+66909c5166cf0db42a36ba01
+6699ccbb66cf0db42a5734ca
+66a35c1366cf0db42a7a3e00
+66adbeb166cf0db42a9c4b27
+66b99cd966cf0db42abe078e
+66bed92a66cf0db42ad5f0f5
+66bf150e66cf0db42ad7464d
+66bf3cec66cf0db42ad86430
+659530ab66cf0db42a519b83
+65a5dbfa66cf0db42a731cf2
+65b9d3a666cf0db42aa0898c
+65d2c72e66cf0db42ac107bf
+65e02ca266cf0db42adac8b2
+65eb463066cf0db42af57b40
+65fb816266cf0db42a27d3bf
+6604ee6e66cf0db42a4cd727
+6612512466cf0db42a761bd5
+661d049566cf0db42a9c974b
+66285bcc66cf0db42ac10860
+66306aa566cf0db42ae341de
+663c8c1566cf0db42a02af64
+66459ab566cf0db42a2b8afe
+664ee2d266cf0db42a522f9a
+66571afd66cf0db42a70c256
+665ee4db66cf0db42a891168
+6668fde966cf0db42aa3e55b
+66741efb66cf0db42acd7ffc
+667e687066cf0db42af318b2
+668e45d966cf0db42a2d8706
+669ccbba66cf0db42a5e8c05
+66ac58c866cf0db42a9747f8
+66becc3366cf0db42ad5a81b
+66bef7b366cf0db42ad666c6
+66bf200966cf0db42ad7a6f4
+66bf3eb666cf0db42ad86902
+6592700b66cf0db42a4aa65a
+65a7926266cf0db42a78d9e5
+65bc9a1066cf0db42aa8dc35
+65dd4cb166cf0db42ad34548
+65ee668b66cf0db42af95ca2
+65fbcdb166cf0db42a29be89
+660b7a0966cf0db42a64312c
+66190cba66cf0db42a93bcda
+66286e2666cf0db42ac18b80
+6638abdc66cf0db42af29c0c
+66432ba766cf0db42a209acd
+664ed72e66cf0db42a51f7ac
+6659969c66cf0db42a7a0359
+6667d9c866cf0db42a9f6443
+667152e166cf0db42ac01291
+667ac25066cf0db42ae2acfc
+6684bc2b66cf0db42a08a472
+668f3ea866cf0db42a30ca30
+6698d44166cf0db42a53905d
+66a351d866cf0db42a79df10
+66b082cc66cf0db42aa0d717
+66beca8e66cf0db42ad59d50
+6584a89d6977356f55996e2a
+65ad3dc966cf0db42a8397b0
+65fc3a3866cf0db42a2bfe9e
+661fe73d66cf0db42aa9e099
+667f74ac66cf0db42af698aa
+6685336d66cf0db42a0c56d3
+668b723e66cf0db42a1fd5b6
+668e6fdf66cf0db42a2f0de3
+6694e25d66cf0db42a413ce8
+6698ec1066cf0db42a547827
+669f8bd166cf0db42a6a3eee
+66a3b63a66cf0db42a7c5bb9
+66ab98fd66cf0db42a951887
+66b2313166cf0db42aa819fe
+66b6069166cf0db42ab82a10
+66bec6c266cf0db42ad583c1
+66bf2c8966cf0db42ad80d02
+66bf44ec66cf0db42ad8803f
+66bf5ddd66cf0db42ad8cafc
+6592b1d566cf0db42a4ab443
+659f92e866cf0db42a692681
+65ae002f66cf0db42a84e541
+65ba1d1c66cf0db42aa21f5c
+65d4930466cf0db42ac55aee
+65e589bf66cf0db42ae467e9
+65f3f39466cf0db42a10e38e
+6603fc9a66cf0db42a4929dc
+661504ae66cf0db42a81a681
+6622257866cf0db42ab20ea1
+662f1cda66cf0db42add5948
+6645c0e366cf0db42a2c757d
+6659a37266cf0db42a7a9358
+66700df266cf0db42abb2018
+667cd24866cf0db42aeb1348
+668b606b66cf0db42a1f551d
+6697a9bc66cf0db42a4f0409
+66a4c11766cf0db42a7de7bc
+66b08da666cf0db42aa129e2
+66bec68e66cf0db42ad581ab
+66bf0c6e66cf0db42ad6f79c
+6595bf4f66cf0db42a52c944
+65ab88d166cf0db42a82723c
+65bfb50166cf0db42aaf1ae6
+65dd1e5e66cf0db42ad2b6d8
+65ed101e66cf0db42af6f194
+65fdf26466cf0db42a321a3d
+66104ab066cf0db42a7248fb
+6623eee766cf0db42ab5f4a4
+66348fc466cf0db42aec08ac
+6647b23266cf0db42a360ce2
+6655cb8f66cf0db42a6b3563
+666329c866cf0db42a9a6dbf
+667175b166cf0db42ac0ff2d
+66807cd466cf0db42af84118
+66a088c766cf0db42a6db0ed
+66b5ed5366cf0db42ab79a7a
+66bf090e66cf0db42ad6da84
+659f931a66cf0db42a692875
+65bcb78e66cf0db42aa98cac
+65ddac3366cf0db42ad5328f
+65f2548c66cf0db42a08ae1a
+6603f50d66cf0db42a48e917
+66165a9266cf0db42a881645
+66260d5e66cf0db42ab92ce3
+663ad41166cf0db42afa492e
+664af2af66cf0db42a3d217e
+6659207b66cf0db42a76e5b9
+66696d6366cf0db42aa71dff
+667547d066cf0db42ad20388
+6685f99966cf0db42a0e10fa
+66973c2566cf0db42a4be9de
+66a8bc3466cf0db42a898c0e
+6592b56166cf0db42a4ac2a5
+65a48e6166cf0db42a6ff7ba
+65b7183566cf0db42a963e1e
+65d6c5a966cf0db42ac8d58f
+65f117ee66cf0db42a03d269
+6606a54b66cf0db42a56e1a7
+661fe7a466cf0db42aa9e0fd
+663898a466cf0db42af1f6cb
+664b02cc66cf0db42a3da29f
+6659ffd666cf0db42a7c3c42
+6670ec1166cf0db42abd586b
+6683724e66cf0db42a02375c
+669a05c066cf0db42a58a0d4
+6592353566cf0db42a4a9d91
+65b356fb66cf0db42a919208
+65e17c8c66cf0db42ade75be
+65fbf0fa66cf0db42a2a83e0
+66825d9fa396ec22bd8af332
+6747e16ab25c3e1deba0baf0
+6747d284b25c3e1deba0101e
+6747cdd8b25c3e1deb9fe149
+6747c5deb25c3e1deb9f833f
+6747e5b6b25c3e1deba0ebff
+6747e7a0b25c3e1deba107a8
+6747de4eb25c3e1deba09369
+6747dd5db25c3e1deba084fb
+6747dfaeb25c3e1deba0a43d
+6747c1c8b25c3e1deb9f5c3c
+67474432b25c3e1deb9db8d7
+6747ca46b25c3e1deb9fb8e9
+6747d1cfb25c3e1deba00a81
+6747e690b25c3e1deba0fabd
+6746baf7b25c3e1deb999973
+6747cbafb25c3e1deb9fcb00
+6747c935b25c3e1deb9faa66
+6747d193b25c3e1deba0073e
+6747d9e1b25c3e1deba05dc1
+6747e91db25c3e1deba114e5
+6747e3b1b25c3e1deba0d65d
+6747dbdeb25c3e1deba0725e
+6747c5c0b25c3e1deb9f819a
+6746b948b25c3e1deb998aac
+6747e580b25c3e1deba0e8e3
+6747f301b25c3e1deba18f00
+67469f48b25c3e1deb98cd0d
+6747da88b25c3e1deba06394
+6747d23db25c3e1deba00d9e
+6747d200b25c3e1deba00b84
+6747dff5b25c3e1deba0a8bf
+6747baeab25c3e1deb9f2eee
+6746b857b25c3e1deb99831e
+6747c3e8b25c3e1deb9f6d78
+6747d844b25c3e1deba04c35
+6747dc25b25c3e1deba0759b
+6747437eb25c3e1deb9db6d9
+6747cd55b25c3e1deb9fdd25
+6746bd2cb25c3e1deb99ae61
+6747c84cb25c3e1deb9fa108
+6746bc08b25c3e1deb99a1d3
+6747bddcb25c3e1deb9f42f7
+6747e1edb25c3e1deba0c129
+6747d8ffb25c3e1deba05511
+6747d5efb25c3e1deba036c4
+6747ed47b25c3e1deba141a8
+6747dc8ab25c3e1deba07abd
+6747c680b25c3e1deb9f8a2f
+6747d344b25c3e1deba01ae0
+6747eac1b25c3e1deba1251d
+6747e7beb25c3e1deba107f8
+6747d87cb25c3e1deba04e47
+6747c551b25c3e1deb9f7ad2
+6747d234b25c3e1deba00d78
+67474d07b25c3e1deb9ded55
+6747db0bb25c3e1deba0689c
+6747d528b25c3e1deba02fe1
+6747cd7eb25c3e1deb9fde0b
+6747c9f5b25c3e1deb9fb372
+6747ec4cb25c3e1deba13865
+6747b313b25c3e1deb9f065c
+67468ceeb25c3e1deb97f68e
+6747dd72b25c3e1deba086ee
+6747e1ceb25c3e1deba0bfbd
+6747e562b25c3e1deba0e75e
+6747e484b25c3e1deba0df97
+6747e3e3b25c3e1deba0d954
+6747cd0fb25c3e1deb9fda12
+6746ce56b25c3e1deb9a6917
+6747d029b25c3e1deb9ff94a
+6747bf59b25c3e1deb9f4903
+6747bd59b25c3e1deb9f4007
+6747e08cb25c3e1deba0b079
+67476f16b25c3e1deb9e516d
+6747e9ecb25c3e1deba11d19
+6747cb73b25c3e1deb9fc7eb
+6747d7efb25c3e1deba04864
+6746ba05b25c3e1deb9990be
+6747e08cb25c3e1deba0b016
+6747dff0b25c3e1deba0a874
+6747e691b25c3e1deba0fb18
+6747e557b25c3e1deba0e6d2
+6746b8a7b25c3e1deb998572
+6747cb23b25c3e1deb9fc4d4
+6747c1b4b25c3e1deb9f5b08
+6747d232b25c3e1deba00d53
+6747e927b25c3e1deba11587
+6746b810b25c3e1deb99806a
+6747eb79b25c3e1deba12dbd
+6746ba7eb25c3e1deb999586
+6747d4d6b25c3e1deba02c53
+6747c598b25c3e1deb9f7fe0
+6747ce83b25c3e1deb9fe900
+6747d8a4b25c3e1deba05030
+6746bd68b25c3e1deb99afe8
+6747c75db25c3e1deb9f9407
+6747d5b3b25c3e1deba03507
+6747ca13b25c3e1deb9fb560
+6746b770b25c3e1deb997ca0
+6747c4a6b25c3e1deb9f72e6
+67510322b7d67b9e15506fb0
+67511aaeb7d67b9e1551785e
+6750ef37b7d67b9e154fc5b4
+675105f7b7d67b9e15509201
+675074a5b7d67b9e154e4ded
+67511c7eb7d67b9e15518e1d
+6750fea4b7d67b9e1550348f
+67512618b7d67b9e1551fdf9
+67510d09b7d67b9e1550d902
+6750fb7fb7d67b9e15501022
+67511de0b7d67b9e1551a333
+67511f6ab7d67b9e1551b884
+6750e2fbb7d67b9e154f8883
+6751040ab7d67b9e15507d3a
+67510465b7d67b9e155081d4
+67512855b7d67b9e155216cb
+6751262bb7d67b9e1551fe94
+6751155fb7d67b9e155138f4
+6750f98ab7d67b9e15500233
+6750f988b7d67b9e155001df
+67511aaeb7d67b9e15517891
+675123dab7d67b9e1551e7eb
+67507618b7d67b9e154e5176
+67510f00b7d67b9e1550ecac
+67510cf5b7d67b9e1550d83d
+675113aeb7d67b9e1551269a
+675103f2b7d67b9e15507c9f
+67512f61b7d67b9e15527122
+67511439b7d67b9e15512c55
+6750fa01b7d67b9e155004cd
+675100a7b7d67b9e15504e8b
+675113aeb7d67b9e1551261d
+675116d3b7d67b9e1551498e
+6750ff3cb7d67b9e15503bdc
+67510a85b7d67b9e1550bd02
+67512220b7d67b9e1551d6dd
+6750fe72b7d67b9e155030a0
+67511ac2b7d67b9e155179f9
+675126e0b7d67b9e155205d9
+67510a8fb7d67b9e1550bde2
+67510f28b7d67b9e1550eef4
+67511ac2b7d67b9e1551799e
+67508015b7d67b9e154e777b
+6750da86b7d67b9e154f68d3
+6750c087b7d67b9e154f1c9b
+6750f19db7d67b9e154fd524
+67510d96b7d67b9e1550de56
+67510e4bb7d67b9e1550e51d
+675113e1b7d67b9e155127e0
+67511b13b7d67b9e15517d1b
+6751254eb7d67b9e1551f5fd
+67511541b7d67b9e155137ac
+6751057fb7d67b9e15508d2b
+67510465b7d67b9e1550817b
+67511905b7d67b9e15516467
+67511785b7d67b9e15515287
+675124e9b7d67b9e1551f1de
+67511476b7d67b9e15512e2d
+67510a67b7d67b9e1550b9ba
+675077c3b7d67b9e154e5723
+6750fef4b7d67b9e1550387c
+67511f4eb7d67b9e1551b64b
+67512397b7d67b9e1551e5cb
+6750cb53b7d67b9e154f34ab
+67510f3cb7d67b9e1550f0f7
+67507d26b7d67b9e154e65ba
+67510903b7d67b9e1550aced
+67510852b7d67b9e1550a6b9
+675100f7b7d67b9e155052b7
+67510ccdb7d67b9e1550d65e
+67511495b7d67b9e1551308e
+67511b13b7d67b9e15517ced
+6750aa9bb7d67b9e154ef84b
+675122a5b7d67b9e1551dd70
+67512932b7d67b9e1552218e
+6750e259b7d67b9e154f865a
+67512033b7d67b9e1551c1e7
+675074ebb7d67b9e154e4eae
+67510fcab7d67b9e1550f756
+6751291eb7d67b9e15521fe4
+67510c3fb7d67b9e1550cfa2
+67510a3eb7d67b9e1550b8f7
+6751204db7d67b9e1551c34d
+67503facb7d67b9e154d6f90
+67508f92b7d67b9e154ebd10
+67511c37b7d67b9e15518a77
+6750ff3cb7d67b9e15503bc4
+6750b1d0b7d67b9e154f0594
+67511785b7d67b9e15515252
+675108bcb7d67b9e1550a9ee
+67512272b7d67b9e1551daf5
+6750fff2b7d67b9e155046b0
+6751014ab7d67b9e155057d5
+67512061b7d67b9e1551c3e5
+6750edebb7d67b9e154fbf32
+67510299b7d67b9e155065eb
+6750eda5b7d67b9e154fbcf2
+67510518b7d67b9e155088d0
+6750f1dab7d67b9e154fd5a4
+6750edffb7d67b9e154fbfb1
+6750fe8eb7d67b9e155032c9
+6751277bb7d67b9e15520e09
+675104dcb7d67b9e155086ec
+6750edb9b7d67b9e154fbde3
+6750ed91b7d67b9e154fbc82
+6750ee4fb7d67b9e154fc0c9
+6750ed91b7d67b9e154fbc47
+67510de4b7d67b9e1550e095
+67510649b7d67b9e15509535
+6750fbfcb7d67b9e155014c6
+675103cab7d67b9e15507a38
+675102d6b7d67b9e15506974
+67511a49b7d67b9e15517479
+675107bfb7d67b9e1550a120
+6751081bb7d67b9e1550a45c
+6750edb9b7d67b9e154fbda8
+6750d913b7d67b9e154f6400
+6750ee82b7d67b9e154fc1ed
+6771e71f3309c0998b944ebe
+6771e8153309c0998b945018
+6771dd903309c0998b9445c6
+6771e7573309c0998b944f39
+6771e9e23309c0998b94549b
+6771e44a3309c0998b944b5b
+6771e8343309c0998b945065
+6771e92e3309c0998b945332
+6771e4043309c0998b944b32
+6771e3a03309c0998b944ab2
+6771e82a3309c0998b945044
+6771ea0b3309c0998b945505
+6771ea273309c0998b945513
+6771de9e3309c0998b94469d
+67717a1d3309c0998b93e33c
+6771e72f3309c0998b944eda
+6771e83e3309c0998b945069
+6771e79d3309c0998b944f61
+6771e8293309c0998b945021
+6771e2a53309c0998b944936
+6771e86c3309c0998b945125
+6771ef043309c0998b94641a
+6771f3733309c0998b94766c
+6771f26d3309c0998b947209
+6771f2fa3309c0998b94741d
+6771f2273309c0998b9470ef
+6771ed693309c0998b945e36
+6771f45b3309c0998b947e0d
+6771f5cf3309c0998b9485e6
+6771ebb03309c0998b94598e
+6771f1183309c0998b946bb5
+6771f3b03309c0998b9477dc
+6771f0a03309c0998b946a0f
+6771f45b3309c0998b947e04
+6771edc83309c0998b94604a
+6771edd83309c0998b9460d0
+6771f3a53309c0998b9477a8
+6771f6f23309c0998b948c54
+6771f2cb3309c0998b94734b
+6771f6c03309c0998b948ae3
+6771eafb3309c0998b94573e
+6771f7893309c0998b948fd2
+6771f2fa3309c0998b94741c
+6771f4513309c0998b947d9a
+6771f60b3309c0998b948686
+6771f76b3309c0998b948f1c
+6771f47e3309c0998b947f7f
+6771f6983309c0998b948a5b
+6771eb193309c0998b9457c1
+6771f2633309c0998b9471eb
+6771f3583309c0998b9475c7
+6771f72e3309c0998b948d0f
+6771ea653309c0998b945574
+6771f40a3309c0998b947a3b
+6771f2e63309c0998b947396
+6771f3913309c0998b947752
+6771f60b3309c0998b948693
+6771f1a53309c0998b946db2
+6771f4653309c0998b947efb
+6771f4653309c0998b947eaa
+6771f49c3309c0998b947fa8
+6771ed873309c0998b945eb5
+6771f54c3309c0998b94836d
+6771f4513309c0998b947c6a
+6771ee0a3309c0998b9461f5
+6771f66f3309c0998b94884c
+6771f35f3309c0998b9475e7
+6771f2cb3309c0998b947350
+6771f0a03309c0998b946a6d
+6771f3a53309c0998b9477a6
+6771f76c3309c0998b948f2e
+6771f4b63309c0998b9480a4
+6771f68e3309c0998b9489b5
+6771f54c3309c0998b94836a
+6771edd83309c0998b946080
+6771f7953309c0998b949008
+6771f1683309c0998b946cae
+6771f7933309c0998b948ff7
+6771f03c3309c0998b94690a
+6771efdf3309c0998b94678f
+6771f4473309c0998b947c1e
+6771f2aa3309c0998b9472c4
+6771f1c43309c0998b946ece
+6771f54c3309c0998b948372
+6771f1a53309c0998b946d8e
+6771f45b3309c0998b947e48
+6771f5cf3309c0998b9485c8
+6771f5423309c0998b948261
+6771eeb43309c0998b94639c
+6771f40a3309c0998b947a55
+6771f6153309c0998b948720
+6771f4b63309c0998b948055
+6771f4b63309c0998b948079
+6771f6153309c0998b94873a
+6771f4c53309c0998b9480f0
+6771efe13309c0998b9467cd
+6771f2703309c0998b947211
+6771f4653309c0998b947f1a
+6771ea513309c0998b94552a
+6771ed2d3309c0998b945d5e
+6771ed733309c0998b945e61
+6771f6793309c0998b9488a3
+6771efe13309c0998b9467ae
+6771f5d93309c0998b94866a
+6771f61f3309c0998b948789
+6771f4b63309c0998b94803f
+6771f4513309c0998b947c76
+6771f3043309c0998b947440
+6771f4513309c0998b947d75
+6771f0c83309c0998b946b62
+6771f0073309c0998b94682b
+6771f3d83309c0998b9478df
+6771f3a53309c0998b94779b
+6771f66f3309c0998b9487f5
+6771f45b3309c0998b947e3e
+6771ef043309c0998b94640b
+6771f7243309c0998b948cc1
+6771ed913309c0998b945f3f
+6771ec6e3309c0998b945aef
+6771eda53309c0998b945f88
+6771edd83309c0998b9460e8
+6771ef873309c0998b94667d
+6771f23b3309c0998b947147
+6771f6de3309c0998b948bf8
+6771ecf13309c0998b945c9f
+6771eef73309c0998b9463e4
+6771f5c53309c0998b948566
+6771f5063309c0998b948193
+6771ea793309c0998b9455b0
+6771f34e3309c0998b947581
+6771f7563309c0998b948ddf
+6771f4f23309c0998b94812c
+6771eab53309c0998b9456aa
+6771f2b43309c0998b9472f2
+6771f2333309c0998b947145
+6771f68e3309c0998b94899f
+6771ee003309c0998b9461a9
+6771f4513309c0998b947c42
+6771f6153309c0998b94872b
+6771f4653309c0998b947e92
+6771f5103309c0998b9481ce
+6771f1b93309c0998b946e78
+6771efe13309c0998b9467dc
+6771f3773309c0998b9476a8
+6771f2313309c0998b94712c
+6771f4a23309c0998b947fba
+6771edf03309c0998b946115
+6771f1ba3309c0998b946e90
+6771eeb43309c0998b946364
+6771ef413309c0998b94650e
+6771f3183309c0998b9474df
+6771eb233309c0998b9457dc
+6771f4f23309c0998b948135
+6772022d3309c0998b94d62e
+677204103309c0998b94e3a5
+6771fc7c3309c0998b94b539
+677203ac3309c0998b94e10b
+6772049d3309c0998b94e77c
+6771fc193309c0998b94b317
+677201973309c0998b94d3aa
+677201143309c0998b94d003
+6771fefe3309c0998b94c17e
+677201143309c0998b94cfed
+6771f9ef3309c0998b94a391
+6771f9023309c0998b949af4
+6771f91b3309c0998b949c1d
+6771fb303309c0998b94ab72
+6771f92b3309c0998b949c70
+677200193309c0998b94c9f7
+677201f73309c0998b94d4b7
+677202cd3309c0998b94d97c
+6771fb583309c0998b94ac7f
+677202163309c0998b94d56b
+6771ff463309c0998b94c3fc
+6771fc0e3309c0998b94b2f4
+6771f9443309c0998b949d4c
+677200ec3309c0998b94cf0e
+677200d13309c0998b94cebd
+677204253309c0998b94e418
+677203513309c0998b94dd36
+6771fbaa3309c0998b94b0a9
+6771faa23309c0998b94a7f2
+6771fd273309c0998b94b8b8
+677203ac3309c0998b94e10f
+677204d23309c0998b94e85e
+6771f8fd3309c0998b949a2e
+6771fe843309c0998b94be94
+6772014b3309c0998b94d0fa
+677200103309c0998b94c998
+6772022a3309c0998b94d625
+677204393309c0998b94e4be
+6772002e3309c0998b94ca95
+677204253309c0998b94e428
+6771fc903309c0998b94b5b9
+6771fca83309c0998b94b612
+6771faa43309c0998b94a841
+6771f8fd3309c0998b949aba
+6771fb443309c0998b94abeb
+6771ffb43309c0998b94c658
+677201ab3309c0998b94d439
+6772046d3309c0998b94e6a4
+6771fcf53309c0998b94b7cf
+6771fe4a3309c0998b94bdf6
+677203ac3309c0998b94e111
+677200373309c0998b94caaf
+6771ff823309c0998b94c5ad
+6771fd0d3309c0998b94b823
+677201a13309c0998b94d405
+6771f8203309c0998b9492f1
+6771f88e3309c0998b949608
+6771fb1c3309c0998b94ab15
+6771fb4e3309c0998b94ac25
+6771ffdd3309c0998b94c851
+6771fc683309c0998b94b434
+6771fe223309c0998b94bd35
+6771fb8b3309c0998b94b082
+6771fc7f3309c0998b94b592
+677203c03309c0998b94e173
+6771f8e93309c0998b949862
+677203153309c0998b94db9c
+6771f93a3309c0998b949d20
+677200b03309c0998b94cd92
+677200ba3309c0998b94ce09
+6772002d3309c0998b94ca64
+6771fa983309c0998b94a6f3
+6771ff3b3309c0998b94c3a6
+6771f9113309c0998b949b78
+6771f9d13309c0998b94a25c
+6771fac03309c0998b94a911
+6771f96c3309c0998b949f1f
+6772036f3309c0998b94de57
+6771f8e33309c0998b9497b1
+6771fac03309c0998b94a909
+677203b63309c0998b94e14f
+6771fa493309c0998b94a594
+6771fa993309c0998b94a786
+6771fb113309c0998b94aac9
+6771f8d43309c0998b94970e
+6772049d3309c0998b94e7cd
+6771fa8f3309c0998b94a699
+6771fd7c3309c0998b94ba39
+6771fab83309c0998b94a8ca
+6771f8fd3309c0998b9499fb
+6772006b3309c0998b94cbd2
+6771f9a83309c0998b94a04c
+6771f93a3309c0998b949d09
+6771fa1e3309c0998b94a4c1
+6771fc383309c0998b94b3cf
+677203153309c0998b94dbb5
+6772031f3309c0998b94dbdc
+6771f87d3309c0998b94957e
+6771ffe73309c0998b94c8fa
+6771f80b3309c0998b94923b
+6771fd593309c0998b94b981
+6771f8e93309c0998b94992d
+6771fb633309c0998b94ad79
+6771fc2c3309c0998b94b345
+677201ed3309c0998b94d4ab
+677203843309c0998b94df50
+677210933309c0998b95398d
+67720e843309c0998b952b23
+67720c6f3309c0998b951d58
+677210663309c0998b953781
+677208123309c0998b94fe5a
+677211ee3309c0998b9541c2
+67720fc83309c0998b953331
+677208bd3309c0998b95027b
+6772058f3309c0998b94ee0e
+677209c33309c0998b950b0c
+677210233309c0998b9535b7
+67720c5b3309c0998b951c79
+67720d4c3309c0998b9521a1
+67720e273309c0998b9527f1
+67720ac83309c0998b9511b6
+6772057a3309c0998b94ec81
+6772081c3309c0998b94fec3
+67720db13309c0998b952472
+6772094d3309c0998b95075d
+67720fc83309c0998b95333f
+67720a893309c0998b95105e
+6772099a3309c0998b950935
+677206253309c0998b94f0f3
+67720b413309c0998b95153c
+677205c63309c0998b94eece
+67720b7d3309c0998b951643
+67720d323309c0998b9520bc
+677209e63309c0998b950c3d
+677209d73309c0998b950b9a
+677208633309c0998b950033
+677209723309c0998b950808
+67720a633309c0998b950eff
+67720cc03309c0998b951ecc
+67720eac3309c0998b952c53
+67720d1e3309c0998b952069
+67720e643309c0998b9529e5
+6772107a3309c0998b953863
+67720c1f3309c0998b951b25
+677210343309c0998b953666
+677206613309c0998b94f281
+677210053309c0998b9534fd
+6772103e3309c0998b953694
+677207ea3309c0998b94fd44
+6772083b3309c0998b94ff14
+6772052a3309c0998b94eb86
+677205e93309c0998b94eff5
+67720da23309c0998b9523aa
+677208593309c0998b94ffda
+67720bd83309c0998b95187a
+676ca9a87ee35ef267116d1e
+677210c13309c0998b953b41
+67720f573309c0998b953095
+677213573309c0998b954b96
+677212c23309c0998b95480d
+677206393309c0998b94f198
+677208d13309c0998b9502d3
+67720f933309c0998b95319d
+677209403309c0998b9506b2
+67720f113309c0998b952ed4
+677207a43309c0998b94fb40
+677210343309c0998b95365c
+677210d53309c0998b953bc4
+6772120c3309c0998b95428d
+677208313309c0998b94ff06
+677210193309c0998b953588
+67720be23309c0998b951926
+67720be23309c0998b951927
+677208e53309c0998b950411
+677206873309c0998b94f34f
+677211223309c0998b953d1d
+67720d563309c0998b9521f7
+67720f933309c0998b9531bd
+67720c333309c0998b951b9b
+677212b73309c0998b954771
+677207493309c0998b94f829
+6772104c3309c0998b95370c
+677205343309c0998b94ec09
+67720b9b3309c0998b95174c
+67720c1f3309c0998b951b1e
+67720e523309c0998b952930
+677210df3309c0998b953bf7
+6772057a3309c0998b94ec94
+6772061b3309c0998b94f05f
+677210993309c0998b9539b3
+67720cfc3309c0998b951fe7
+67720ed43309c0998b952d31
+67720d603309c0998b9522fc
+6772069e3309c0998b94f41e
+677208593309c0998b94ffa9
+677213143309c0998b9549f3
+67720c473309c0998b951bfe
+6772111b3309c0998b953ce4
+67720f393309c0998b952faa
+677211da3309c0998b954104
+677206c63309c0998b94f567
+67720abe3309c0998b951188
+677206c63309c0998b94f576
+67720b733309c0998b95161c
+67720bd83309c0998b9518d1
+67720a783309c0998b950fc9
+67720bc43309c0998b9517bd
+67720fa93309c0998b9532b6
+6772100c3309c0998b953520
+677211da3309c0998b9540e1
+67720c833309c0998b951db4
+677209363309c0998b950608
+677209723309c0998b9507d3
+677212f33309c0998b95489b
+67720be23309c0998b95194d
+677207e03309c0998b94fcb6
+677210d13309c0998b953ba1
+677207e03309c0998b94fcca
+6772131e3309c0998b954a3f
+67720f613309c0998b9530c4
+67720e313309c0998b952833
+67720f4d3309c0998b952ff9
+677208123309c0998b94fe5f
+6772147b3309c0998b95530a
+677215563309c0998b9559d4
+677219073309c0998b9570cf
+67721f783309c0998b9597fb
+67721e873309c0998b95921d
+677215423309c0998b955951
+67721a603309c0998b957937
+6772160d3309c0998b955da0
+677218903309c0998b956d8a
+677216d83309c0998b9562b3
+67721ce13309c0998b958815
+677218cd3309c0998b956f0a
+67721c373309c0998b9582cc
+677214fe3309c0998b955620
+677217db3309c0998b956896
+67721f8c3309c0998b9598a9
+677215db3309c0998b955cfb
+67721dc83309c0998b958e25
+677213b23309c0998b954e35
+6772171f3309c0998b956391
+67721a5e3309c0998b957924
+67721fd33309c0998b959bb1
+67721c0a3309c0998b9581b3
+67721fb53309c0998b959aa9
+677214993309c0998b9553c7
+67721f8c3309c0998b9598ab
+67721e193309c0998b958fea
+67721e7d3309c0998b95919b
+67721e4e3309c0998b959059
+67721d323309c0998b9589df
+67721c043309c0998b95817f
+67721c143309c0998b958200
+67721c7a3309c0998b958549
+67721b3b3309c0998b957c66
+67721f5a3309c0998b9596bd
+677217263309c0998b9563b3
+67721fa13309c0998b9599c0
+6772159d3309c0998b955b13
+677214623309c0998b95528e
+67721ce13309c0998b958842
+67721a183309c0998b9576f9
+677217453309c0998b95651f
+677215233309c0998b95578c
+67721aa73309c0998b957a64
+67721f5a3309c0998b9596cc
+677216863309c0998b956122
+67721e733309c0998b95912e
+67721a933309c0998b957a08
+6772139e3309c0998b954d8b
+67721d973309c0998b958c3c
+6772202d3309c0998b959dad
+67721eb93309c0998b9593b9
+67721e693309c0998b9590db
+677217523309c0998b9565da
+677218363309c0998b956ae9
+67721fe73309c0998b959c43
+677220603309c0998b959f02
+67721bdc3309c0998b958091
+677218fd3309c0998b95705e
+6772145d3309c0998b955284
+6772191d3309c0998b957191
+67721f5a3309c0998b9596bf
+677214ad3309c0998b955467
+677216a43309c0998b9561a6
+67721d973309c0998b958c34
+677214493309c0998b9551a8
+677214f43309c0998b9555d4
+6772191d3309c0998b9571bb
+6772202d3309c0998b959dfe
+67721d093309c0998b958951
+677219313309c0998b95723d
+67721ccb3309c0998b9587a1
+6772174f3309c0998b9565a8
+67721f1e3309c0998b9595c0
+6772154c3309c0998b95599b
+6772202d3309c0998b959dd2
+677219d23309c0998b957569
+6772146d3309c0998b9552ac
+677218fd3309c0998b957050
+67721fa13309c0998b9599aa
+67721d503309c0998b958b02
+677218d73309c0998b956f42
+67721c0e3309c0998b9581ce
+67721ba03309c0998b957eec
+677215da3309c0998b955ca6
+67721fbc3309c0998b959ac7
+677214e63309c0998b955535
+67721e7d3309c0998b95916f
+67721baa3309c0998b957f6f
+6772159d3309c0998b955b2b
+67721ba03309c0998b957eda
+677215813309c0998b955ab6
+6772196d3309c0998b957305
+6772191d3309c0998b9571d2
+6772289c3309c0998b95d3c6
+677227353309c0998b95caa7
+6772297c3309c0998b95d92c
+67722c4a3309c0998b95e7c1
+677220ed3309c0998b95a25f
+67722af93309c0998b95e0e0
+677228263309c0998b95d10d
+67722f2d3309c0998b95f1cf
+677221153309c0998b95a3c2
+6772275d3309c0998b95cca8
+677222d03309c0998b95b060
+6772292b3309c0998b95d754
+67722a3b3309c0998b95dcf6
+67722fa63309c0998b95f369
+6772241b3309c0998b95b81d
+6772269e3309c0998b95c81f
+677228bd3309c0998b95d44a
+677224803309c0998b95bae4
+677225c13309c0998b95c193
+677225f33309c0998b95c2fd
+677227533309c0998b95cbe3
+67722c8b3309c0998b95e89f
+6772255c3309c0998b95be29
+677221293309c0998b95a4b1
+677224c73309c0998b95bb59
+67722bd73309c0998b95e5d4
+677228263309c0998b95d0c8
+677228d13309c0998b95d492
+677227ad3309c0998b95cddb
+67722e113309c0998b95ee42
+677220ed3309c0998b95a25a
+6772269e3309c0998b95c814
+6772242f3309c0998b95b954
+677227c13309c0998b95ce5c
+67722abd3309c0998b95df7c
+67722efa3309c0998b95f0fb
+677220ed3309c0998b95a2a2
+677226763309c0998b95c619
+67722b723309c0998b95e3da
+677229a43309c0998b95d9eb
+677226763309c0998b95c5b1
+677225ad3309c0998b95c039
+677224e43309c0998b95bc65
+6772263a3309c0998b95c49b
+67722e1d3309c0998b95ee88
+677225ad3309c0998b95bf58
+677228f93309c0998b95d680
+67722ae53309c0998b95e0bd
+6772217a3309c0998b95a7ed
+677222a73309c0998b95afb8
+6772242f3309c0998b95b9df
+677220d83309c0998b95a20a
+677227493309c0998b95cb4f
+67722b403309c0998b95e26c
+67722a6d3309c0998b95dda5
+6772302a3309c0998b95f52f
+677224ee3309c0998b95bc76
+677221673309c0998b95a75b
+677228e33309c0998b95d4e8
+677228303309c0998b95d1c8
+677229353309c0998b95d801
+677228603309c0998b95d29d
+67722b4a3309c0998b95e304
+677223523309c0998b95b27e
+677227a33309c0998b95cd73
+677225983309c0998b95bf0c
+677234ff3309c0998b96052f
+677239f33309c0998b961830
+677232303309c0998b95fbb0
+67723c773309c0998b9623c3
+67723dd33309c0998b9629bb
+67723dc33309c0998b962985
+67723b743309c0998b9620e2
+6772323b3309c0998b95fbba
+677237cb3309c0998b960ef7
+67723c203309c0998b9622e6
+6772328b3309c0998b95fc85
+677230d13309c0998b95f74c
+6772397c3309c0998b961704
+67723d183309c0998b96264f
+677239673309c0998b9616bb
+6772384d3309c0998b961136
+677238073309c0998b961000
+677232773309c0998b95fc47
+6772304f3309c0998b95f5a8
+67723c8f3309c0998b962440
+677238113309c0998b961079
+67723bc23309c0998b96217f
+67723c133309c0998b962275
+677239c23309c0998b9617bc
+677238803309c0998b96121a
+67723c9f3309c0998b96248b
+677237363309c0998b960d33
+67723abd3309c0998b961d15
+6772362f3309c0998b9608c1
+677237673309c0998b960d84
+677233ea3309c0998b9600de
+67723d7c3309c0998b96282a
+677233f43309c0998b9600fc
+67723d7c3309c0998b962831
+67723c813309c0998b9623f8
+677236583309c0998b9609fc
+677236bc3309c0998b960b94
+677235bf3309c0998b960776
+67723b5f3309c0998b96204a
+677232303309c0998b95fb9e
+677237693309c0998b960dad
+677239513309c0998b961636
+67723a2f3309c0998b9619e5
+677232ef3309c0998b95fd7f
+677233173309c0998b95fe51
+677235ad3309c0998b9606f3
+677239fd3309c0998b961851
+677238253309c0998b9610c1
+67723c2a3309c0998b962322
+67723b7c3309c0998b9620e5
+67723d7c3309c0998b962834
+6772494d3309c0998b96676e
+677246463309c0998b965027
+67724a0c3309c0998b966d15
+677249063309c0998b96639c
+677246c93309c0998b9652c1
+677244e93309c0998b96492b
+677248fd3309c0998b966383
+677247f53309c0998b965b16
+67724ad53309c0998b9671fd
+67723de73309c0998b962a46
+67724aad3309c0998b967106
+6772443c3309c0998b96466d
+67724ad53309c0998b9671b5
+6772496c3309c0998b966893
+67724b263309c0998b9674f7
+677246973309c0998b9651a0
+677243413309c0998b9640ea
+677248463309c0998b965d47
+677248503309c0998b965d8d
+677247923309c0998b9657fa
+677248ac3309c0998b966160
+677247eb3309c0998b965ab7
+67723f5d3309c0998b962fcf
+677248843309c0998b965f88
+677247a63309c0998b965863
+677243013309c0998b963fcd
+677241803309c0998b96379d
+67724b213309c0998b9674d4
+67724a0c3309c0998b966d1d
+677240783309c0998b96344d
+67724a0f3309c0998b966db2
+677242b43309c0998b963df3
+67723edc3309c0998b962d9d
+67723ed23309c0998b962d79
+67724aad3309c0998b9670f8
+67724a993309c0998b9670a1
+67723f873309c0998b963044
+67724b553309c0998b967690
+677244de3309c0998b9648dd
+67724b263309c0998b9674f9
+677247713309c0998b965725
+67724a243309c0998b966e03
+677244fb3309c0998b96497e
+67724af33309c0998b967378
+67724b8b3309c0998b967844
+677249e33309c0998b966b3b
+67724b633309c0998b96770b
+6772438f3309c0998b9642e9
+677247e33309c0998b965a96
+677246f73309c0998b9653f7
+6772477b3309c0998b96573d
+67723faf3309c0998b96316c
+67724b8b3309c0998b967823
+677244963309c0998b964819
+677244003309c0998b96459b
+67724a0c3309c0998b966cf1
+677243ec3309c0998b9644a7
+6772415e3309c0998b96373e
+677240b43309c0998b96352e
+677244ca3309c0998b9648b7
+677241ff3309c0998b96395b
+677241043309c0998b963618
+677241803309c0998b963797
+677243153309c0998b964032
+677243f63309c0998b96452d
+677246323309c0998b964f8c
+677247ff3309c0998b965b8d
+677242b03309c0998b963db3
+67724a0c3309c0998b966d6f
+67724aa73309c0998b9670d2
+677241043309c0998b9635f0
+6772425a3309c0998b963b94
+677247ce3309c0998b9659f9
+6772414a3309c0998b96370e
+677248f33309c0998b966353
+677243f63309c0998b964516
+677246193309c0998b964eb0
+677240643309c0998b9633f9
+67724bbd3309c0998b9679da
+677247ce3309c0998b965a11
+677248a23309c0998b966141
+677248843309c0998b966006
+67724f8e3309c0998b96a0ae
+67724c043309c0998b967c79
+67724c913309c0998b9681f4
+677251f13309c0998b96bb19
+67724eef3309c0998b969a29
+677252153309c0998b96be1b
+677259913309c0998b970352
+6772579f3309c0998b96f126
+67724bd23309c0998b967aae
+677254673309c0998b96d22b
+6772584b3309c0998b96f59d
+677253a73309c0998b96cc6b
+677252563309c0998b96c02c
+677257393309c0998b96ed7c
+67724c7c3309c0998b96814e
+677251a63309c0998b96b936
+677257573309c0998b96ee58
+67724d733309c0998b968b29
+677250a93309c0998b96ac24
+6772514b3309c0998b96b622
+677253923309c0998b96caa5
+6772510e3309c0998b96b126
+677257dc3309c0998b96f2b7
+67724ccd3309c0998b9683cf
+6772548f3309c0998b96d476
+67724dfc3309c0998b968f9b
+6772574d3309c0998b96edf8
+677251773309c0998b96b732
+677256033309c0998b96e01e
+677252e83309c0998b96c4bf
+6772509f3309c0998b96aba0
+67724e043309c0998b968fb6
+6772528d3309c0998b96c1d8
+6772591f3309c0998b96fe52
+67724d283309c0998b9686d2
+677250f03309c0998b96afc5
+677256273309c0998b96e1db
+677259a23309c0998b9703f7
+67724d733309c0998b968b2a
+67724f843309c0998b969ff5
+67724d3c3309c0998b96877b
+67724f703309c0998b969e59
+677256863309c0998b96e5e5
+67724bcf3309c0998b967a6e
+677254b83309c0998b96d6e9
+677256033309c0998b96e055
+67724d973309c0998b968bb4
+677253613309c0998b96c9ab
+677251243309c0998b96b277
+677258ce3309c0998b96fa07
+677258933309c0998b96f8d7
+67724c733309c0998b9680b7
+67724c873309c0998b9681d3
+677252c93309c0998b96c334
+677258603309c0998b96f70c
+677252473309c0998b96bfe4
+6772564f3309c0998b96e35f
+6772523d3309c0998b96bf93
+677253553309c0998b96c8fe
+677252e83309c0998b96c5a0
+6772521f3309c0998b96be6a
+67724cd73309c0998b968431
+677255ad3309c0998b96ddc0
+677255c73309c0998b96dea2
+677253ed3309c0998b96cf3f
+677259143309c0998b96fd3e
+67724c363309c0998b967e51
+67724efa3309c0998b969a61
+67724e563309c0998b9692fd
+677250613309c0998b96a874
+6772535f3309c0998b96c92c
+67724fb73309c0998b96a292
+677251a63309c0998b96b925
+677256823309c0998b96e583
+67724f6a3309c0998b969e33
+677258d83309c0998b96fab5
+6772591f3309c0998b96fe1c
+677251183309c0998b96b1ce
+67724eb13309c0998b96956a
+677254673309c0998b96d22a
+6772597a3309c0998b970236
+67724ee53309c0998b96999e
+6772548f3309c0998b96d46b
+677256683309c0998b96e4c2
+677253613309c0998b96c9be
+677259363309c0998b96ff87
+677252563309c0998b96c025
+6772515f3309c0998b96b6bb
+677258c63309c0998b96f9fb
+677255d13309c0998b96dedf
+677256823309c0998b96e56d
+677256cc3309c0998b96e845
+67724ccd3309c0998b9683e2
+677254483309c0998b96d146
+677257963309c0998b96f0be
+677250953309c0998b96ab53
+677256593309c0998b96e3f2
+6772563f3309c0998b96e2b7
+677251553309c0998b96b66d
+67724ec63309c0998b96972f
+677258733309c0998b96f803
+677251183309c0998b96b1c2
+67724fad3309c0998b96a23d
+67724c4b3309c0998b967f56
+677253933309c0998b96caba
+677252d43309c0998b96c3b9
+67724ffa3309c0998b96a57a
+6772574f3309c0998b96ee24
+6772561c3309c0998b96e1b2
+67724dd03309c0998b968d40
+67724ee53309c0998b9699b3
+67724da13309c0998b968be1
+677255ea3309c0998b96df39
+67724e423309c0998b9691d1
+677252e83309c0998b96c5df
+677257313309c0998b96ed4c
+677250953309c0998b96ab1d
+6772506c3309c0998b96a9dc
+67724c723309c0998b968059
+677253063309c0998b96c69a
+67724d973309c0998b968bbb
+6772591e3309c0998b96fdd1
+677254853309c0998b96d3fb
+67724c873309c0998b96819e
+677253b23309c0998b96cdcd
+677252973309c0998b96c26f
+67724f753309c0998b969eee
+67724fd13309c0998b96a400
+677257133309c0998b96ebe8
+67724f8e3309c0998b96a01c
+677253ed3309c0998b96cf44
+67724c873309c0998b9681ce
+677259293309c0998b96fe65
+67724c403309c0998b967e9e
+677255993309c0998b96dd57
+67724ffa3309c0998b96a591
+677253a73309c0998b96cc3a
+6772502d3309c0998b96a6ef
+6772528d3309c0998b96c224
+677258233309c0998b96f433
+677252a73309c0998b96c2ed
+67724c4b3309c0998b967ef2
+67724ca53309c0998b9682fc
+677259a63309c0998b970411
+6772528d3309c0998b96c1b0
+677259a23309c0998b9703ed
+67725f973309c0998b973b44
+67725fed3309c0998b973e16
+67725ed53309c0998b9734a3
+677262af3309c0998b975d93
+67725efc3309c0998b9735a4
+67725b533309c0998b9712c5
+677259e83309c0998b97064d
+67725f883309c0998b973a7a
+67725d4f3309c0998b97265f
+67725beb3309c0998b971b4d
+677265d23309c0998b9781d9
+677261c33309c0998b9751aa
+67725cf03309c0998b972313
+67725bb83309c0998b97193a
+67725c6d3309c0998b971eaa
+67725cac3309c0998b972092
+67725a203309c0998b9707f4
+67725b5d3309c0998b9713bd
+67725e003309c0998b972e09
+677264c63309c0998b9775b8
+67725a5d3309c0998b97092c
+67725dce3309c0998b972ba1
+677265353309c0998b977a9a
+67725d4f3309c0998b97264f
+67725b343309c0998b9711da
+6772618b3309c0998b974f7c
+67725d693309c0998b97270e
+67725cb43309c0998b9720c3
+67725e003309c0998b972ddf
+67725a4d3309c0998b970885
+677264933309c0998b97729b
+677259b03309c0998b970458
+677264603309c0998b977014
+677265053309c0998b977880
+67725dd43309c0998b972bd7
+67725cb43309c0998b9720a6
+677261f73309c0998b975358
+67725f883309c0998b973a8d
+677264223309c0998b976d83
+67725fb13309c0998b973bdb
+677265913309c0998b977eae
+67725fe33309c0998b973df8
+67725e123309c0998b972ee8
+677263de3309c0998b9769e3
+6772616d3309c0998b974de2
+67725f4c3309c0998b9738c7
+677260f63309c0998b9749a3
+67725e793309c0998b9731ee
+677263543309c0998b976448
+67725b023309c0998b970f90
+67725d453309c0998b9725f5
+67725b333309c0998b97119f
+677262f63309c0998b975fe2
+677264943309c0998b9772d3
+67725ef43309c0998b97354f
+67725a903309c0998b970b14
+677261123309c0998b974aee
+67725be03309c0998b971b15
+67725d263309c0998b972525
+677259b03309c0998b97045d
+677262403309c0998b9756d8
+67725abc3309c0998b970d56
+67725b513309c0998b971279
+67725f973309c0998b973b2e
+677266333309c0998b978621
+677262ce3309c0998b975ed8
+6772601e3309c0998b974076
+67725c813309c0998b971f8a
+677266283309c0998b978592
+677264743309c0998b977123
+6772647f3309c0998b9771b4
+677260523309c0998b97431f
+6772615d3309c0998b974da0
+677260cb3309c0998b9747ff
+67725ed53309c0998b9734bd
+67725ce63309c0998b972296
+6772659b3309c0998b977f1e
+67725bb83309c0998b971901
+677260cb3309c0998b974874
+6772633d3309c0998b9763b7
+677265be3309c0998b9780d3
+6772614e3309c0998b974ccb
+677265f73309c0998b978382
+67725dc43309c0998b972b1c
+67725b9a3309c0998b97175a
+677264893309c0998b977276
+67725a903309c0998b970b19
+6772600b3309c0998b973f94
+677263f23309c0998b976b2f
+677261123309c0998b974ac8
+677262f63309c0998b97601c
+677264183309c0998b976ce5
+677265de3309c0998b978291
+67725a673309c0998b97095a
+67725c1d3309c0998b971d32
+67725b673309c0998b971458
+67725f883309c0998b973a47
+67725bae3309c0998b971847
+67725dce3309c0998b972bc3
+67725e6f3309c0998b97319c
+67725f4c3309c0998b9738ba
+677262e33309c0998b975f60
+67725fe33309c0998b973de0
+677260e13309c0998b97495a
+677263d43309c0998b976925
+6772635f3309c0998b9764db
+677264e43309c0998b9776c8
+67725ba43309c0998b9717c2
+67725aa43309c0998b970c40
+67725a9d3309c0998b970c0b
+677264223309c0998b976d8f
+677264183309c0998b976ce8
+67725dd83309c0998b972bf0
+677262913309c0998b975c62
+67725e833309c0998b973262
+677265ad3309c0998b977fc6
+67725d8c3309c0998b9727fb
+677265ad3309c0998b977fcb
+677265d23309c0998b9781b1
+677265a33309c0998b977f5c
+677261583309c0998b974d84
+677260013309c0998b973f1f
+677263793309c0998b9765e5
+6772658e3309c0998b977e69
+677261813309c0998b974f24
+677264893309c0998b9771e2
+677264e63309c0998b977738
+67725cdf3309c0998b972245
+67725dba3309c0998b972a1c
+677264603309c0998b977011
+67725dce3309c0998b972b77
+67725e263309c0998b972f4b
+677261443309c0998b974c24
+67725c133309c0998b971cc4
+677266473309c0998b978748
+677272443309c0998b97ff64
+67726b893309c0998b97c258
+67726c7b3309c0998b97cc17
+67726a3c3309c0998b97b27e
+67726c5c3309c0998b97cacf
+67726be33309c0998b97c62c
+677268de3309c0998b97a41e
+677268153309c0998b979ca8
+6772701a3309c0998b97ed8c
+677273243309c0998b98067d
+67726ac73309c0998b97b958
+67726cfd3309c0998b97d081
+67726c203309c0998b97c7e2
+67726fb03309c0998b97e8d2
+677272263309c0998b97fdf5
+6772706a3309c0998b97efea
+677270ee3309c0998b97f4a6
+67726a283309c0998b97b1bd
+67726c523309c0998b97ca7e
+67726f873309c0998b97e76f
+677269c03309c0998b97acc9
+67726ccb3309c0998b97cf93
+677267c43309c0998b979947
+677266763309c0998b9788fe
+677273eb3309c0998b980c7d
+677266483309c0998b978775
+6772668c3309c0998b978a3c
+67726c2a3309c0998b97c81b
+677269973309c0998b97ab37
+67726ec33309c0998b97e1b2
+6772685a3309c0998b979f52
+677271993309c0998b97fa30
+677267943309c0998b979646
+67726ccb3309c0998b97cf5f
+67726a443309c0998b97b2c5
+677268843309c0998b97a158
+67726a6e3309c0998b97b4a1
+67726c3d3309c0998b97c8eb
+677268e13309c0998b97a466
+67726b253309c0998b97bd25
+677267043309c0998b978fd7
+677267043309c0998b978fa8
+67726afb3309c0998b97bbdf
+67726f873309c0998b97e76e
+67726a963309c0998b97b6e6
+6772729f3309c0998b980243
+67726de33309c0998b97d9cc
+67726b383309c0998b97be4b
+67726fa63309c0998b97e8a4
+677272ef3309c0998b9803b7
+679197783309c0998bf5dc6b
+679189903309c0998bf5b735
+679198293309c0998bf5dfff
+679189ae3309c0998bf5b772
+67919df33309c0998bf6050b
+679197783309c0998bf5dc19
+6791b39d3309c0998bf67e56
+679195bd3309c0998bf5d43f
+679184d43309c0998bf5b02c
+679189363309c0998bf5b680
+6791aef13309c0998bf66032
+6791b4cb3309c0998bf68540
+6791b4523309c0998bf6821a
+6791c43c3309c0998bf6e12f
+67919f853309c0998bf60ba7
+6791a6073309c0998bf62e62
+6791af7e3309c0998bf662cc
+6791b4f33309c0998bf685db
+679197dc3309c0998bf5de80
+6791a6113309c0998bf62e99
+6791b08d3309c0998bf66ad9
+6791b3933309c0998bf67e0f
+6791a5c23309c0998bf62d8c
+6801a22e5f834436f098d3f0
+6801a0665f834436f098ccb9
+6801a0295f834436f098cbaf
+6801a0265f834436f098cba4
+6801a0015f834436f098cb5d
+68019ffd5f834436f098cb45
+68019fec5f834436f098caf0
+68019fca5f834436f098ca75
+68019fc05f834436f098ca73
+68019fb65f834436f098ca63
+68019fac5f834436f098ca4e
+68019fa55f834436f098ca3a
+68019fa55f834436f098ca36
+68019f915f834436f098c9f6
+68019f795f834436f098c9b1
+68019f5b5f834436f098c99d
+68019f515f834436f098c994
+68019f515f834436f098c993
+68019f405f834436f098c98e
+68019f405f834436f098c98a
+68019f365f834436f098c945
+68019f365f834436f098c92f
+68019f2b5f834436f098c8ae
+68019eff5f834436f098c85a
+68019ed75f834436f098c7b4
+68019ed75f834436f098c7ad
+68019e905f834436f098c716
+68019e3e5f834436f098c5f9
+68019e245f834436f098c5ce
+68019e1a5f834436f098c56d
+68019dce5f834436f098c4be
+68019dc85f834436f098c49f
+68019d6d5f834436f098c368
+68019d685f834436f098c330
+68019d545f834436f098c298
+68019cf95f834436f098c236
+68019cee5f834436f098c1ed
+68019cb75f834436f098c190
+68019c9d5f834436f098c119
+68019c935f834436f098c10e
+68019c7e5f834436f098c0da
+68019c6a5f834436f098c089
+68019c605f834436f098c046
+68019c5f5f834436f098c03b
+68019c415f834436f098bfe7
+68019c2d5f834436f098bfa7
+68019c185f834436f098bf45
+68019bb25f834436f098be55
+68019bb25f834436f098be53
+68019b905f834436f098bddc
+68019b895f834436f098bdbc
+68019b755f834436f098bd8d
+68019b6a5f834436f098bd6d
+68019b605f834436f098bd46
+68019b355f834436f098bc8d
+68019af05f834436f098bbfa
+68019ad95f834436f098bb6e
+68019a955f834436f098bae2
+68019a8b5f834436f098bad8
+68019a745f834436f098ba78
+68019a395f834436f098ba03
+68019a385f834436f098b9f6
+68019a105f834436f098b977
+680199e65f834436f098b904
+6801996d5f834436f098b7c4
+680199305f834436f098b786
+6801991c5f834436f098b743
+680198cc5f834436f098b6b2
+680198cb5f834436f098b6a6
+680198cb5f834436f098b6a3
+680198715f834436f098b5e6
+6801985c5f834436f098b580
+6801985c5f834436f098b57f
+680198515f834436f098b556
+6801980a5f834436f098b40b
+680198005f834436f098b3f1
+680198005f834436f098b3f0
+680197f55f834436f098b3e5
+680197f55f834436f098b3e4
+680197d85f834436f098b3b7
+680197d75f834436f098b3af
+680197d75f834436f098b3ab
+680197cd5f834436f098b38a
+680197c25f834436f098b374
+680197c25f834436f098b373
+680197b95f834436f098b34c
+680197215f834436f098b191
+680196d75f834436f098b0a5
+680196cd5f834436f098b091
+680196b15f834436f098b059
+680196865f834436f098afe9
+680195fb5f834436f098aee7
+680195e35f834436f098aec3
+680195d95f834436f098aeb6
+680195c55f834436f098ae8f
+680195a65f834436f098ae71
+680194ef5f834436f098ace0
+6801949e5f834436f098ac6c
+680194385f834436f098ab6e
+680194245f834436f098ab41
+680193b25f834436f098aa01
+680193125f834436f098a8a1
+6801925b5f834436f098a699
+680192505f834436f098a65c
+6801923c5f834436f098a5cd
+680192275f834436f098a569
+680191e05f834436f098a43d
+680191d45f834436f098a3fa
+680191475f834436f098a2e7
+680190e15f834436f098a1e2
+680190e15f834436f098a1d3
+680190d75f834436f098a1c5
+680190d75f834436f098a1b1
+680190ce5f834436f098a18f
+680190c35f834436f098a14c
+680190535f834436f098a072
+680190345f834436f098a03d
+6801902a5f834436f098a021
+6801901f5f834436f0989ff8
+68018fa55f834436f0989ee8
+68018f9b5f834436f0989ed1
+68018f725f834436f0989e88
+68018f535f834436f0989e1c
+68018f495f834436f0989e08
+68018f2a5f834436f0989ddb
+68018e935f834436f0989d30
+68018e935f834436f0989d20
+68018e885f834436f0989cf9
+68018e5d5f834436f0989cce
+68018d265f834436f0989b68
+68018c965f834436f09899ce
+68018c785f834436f0989935
+68018c595f834436f09898b3
+68018c115f834436f0989815
+68018bac5f834436f0989784
+68018b8e5f834436f0989779
+68018b515f834436f0989740
+68018a905f834436f098965f
+68018a675f834436f098960f
+68018a5d5f834436f0989600
+68018a5d5f834436f09895fe
+680189a75f834436f098950d
+680189225f834436f09893fe
+680189045f834436f09893e9
+680188e55f834436f09893a5
+680188e55f834436f098939d
+680188db5f834436f0989387
+680188d05f834436f0989358
+680188a85f834436f0989345
+6801886c5f834436f09892e3
+680188435f834436f09892ba
+6801881a5f834436f0989231
+680187fc5f834436f0989226
+680187b45f834436f09891ba
+680187345f834436f09890fd
+680186ac5f834436f098901b
+680186295f834436f0988fce
+680186005f834436f0988f7b
+680184c55f834436f0988dbf
+680184a65f834436f0988d98
+680183e55f834436f0988cde
+680183d15f834436f0988cc4
+680183395f834436f0988c1d
+6801832f5f834436f0988c0c
+680183115f834436f0988bd7
+680182ab5f834436f0988b59
+6801828c5f834436f0988b31
+680181e05f834436f0988a28
+680181d65f834436f0988a1a
+680180e25f834436f09887e0
+680180915f834436f0988766
+6801804a5f834436f0988704
+680180355f834436f0988686
+68017fee5f834436f098865d
+68017fc55f834436f0988634
+68017f9d5f834436f0988606
+68017f0e5f834436f0988482
+68017eef5f834436f098840e
+68017ee55f834436f09883dc
+68017edb5f834436f09883c0
+68017e615f834436f098828d
+68017e425f834436f0988244
+68017d965f834436f09880cb
+68017cb65f834436f0987ebb
+68017cac5f834436f0987ea2
+68017c285f834436f0987d39
+68017c145f834436f0987d1f
+68017c0a5f834436f0987d05
+68017be15f834436f0987cae
+68017b725f834436f0987bba
+68017af85f834436f0987b01
+68016fb85f834436f0986b53
+68016cc65f834436f0986806
+68016a135f834436f09865cd
+680167b45f834436f098637d
+6801679f5f834436f0986369
+6801673a5f834436f0986304
+6801668e5f834436f09861e6
+6801667a5f834436f098618c
+680166155f834436f0986152
+6801655f5f834436f09860cf
+680164315f834436f0985f51
+680163ad5f834436f0985e9f
+680163ad5f834436f0985e9e
+6801608f5f834436f0985b98
+68015b815f834436f09854e5
+68015b585f834436f09854a0
+68015a295f834436f0985316
+680158c75f834436f09850bd
+680157145f834436f0984e24
+680151f45f834436f0984623
+680150995f834436f0984341
+68014c255f834436f0983c16
+68014be85f834436f0983ba9
+680145925f834436f0982f6e
+6801455f5f834436f0982f21
+680144945f834436f0982d4c
+6801448a5f834436f0982d40
+680144805f834436f0982d2c
+680144805f834436f0982d26
+680144805f834436f0982d1d
+680144805f834436f0982d0a
+680144765f834436f0982cf2
+680144765f834436f0982cd6
+680144755f834436f0982ccb
+680144425f834436f0982c51
+6801442d5f834436f0982bc7
+680140d75f834436f098242e
+6801409a5f834436f09823ea
+68013e265f834436f0981d50
+68013e1b5f834436f0981cae
+68013e075f834436f0981c61
+68013df35f834436f0981c15
+68013dd35f834436f0981b1b
+68013dd35f834436f0981b18
+68013dbf5f834436f0981abe
+68013dbf5f834436f0981abd
+68013dbf5f834436f0981abc
+68013db45f834436f0981a71
+68013d3d5f834436f09818d7
+68013b515f834436f09813e6
+68013af55f834436f09812c9
+680139225f834436f0980dd3
+680138a85f834436f0980d1e
+680137085f834436f0980903
+680136d65f834436f09808b2
+680131fb5f834436f097fa49
+680131b45f834436f097f972
+680131b45f834436f097f970
+6801312f5f834436f097f825
+6801312f5f834436f097f81b
+680130cb5f834436f097f665
+680130335f834436f097f59a
+680130285f834436f097f56f
+68012fa65f834436f097f46e
+68012cb25f834436f097eb66
+68012b0f5f834436f097e64e
+68012afd5f834436f097e628
+68012a455f834436f097e3ea
+68012a3a5f834436f097e3a2
+680129c15f834436f097e25c
+680129b65f834436f097e23f
+680129b65f834436f097e232
+680129ac5f834436f097e22b
+6801298e5f834436f097e20c
+6801295b5f834436f097e162
+680128e15f834436f097df7f
+680128985f834436f097ddd9
+680128795f834436f097dd65
+6801286f5f834436f097dd3b
+680128505f834436f097dc5e
+680127d85f834436f097dadd
+680127a55f834436f097d90c
+6801276e5f834436f097d816
+6801276e5f834436f097d815
+6801276d5f834436f097d7f9
+680127305f834436f097d67b
+680127255f834436f097d5b2
+680127245f834436f097d576
+680126e85f834436f097d4de
+680126ce5f834436f097d3e9
+680126575f834436f097d0f6
+6801263d5f834436f097cf28
+6801260f5f834436f097ce75
+680125fa5f834436f097cd63
+680125c55f834436f097ca71
+680125c55f834436f097ca6f
+680125c45f834436f097ca5a
+6800e3065f834436f0967275
+6800d6895f834436f095e3a0

二进制
tools/高质量站点第一版/export_result_信用.xlsx


二进制
tools/高质量站点第一版/export_result_招标.xlsx


二进制
tools/高质量站点第一版/export_result_招标新.xlsx


二进制
tools/高质量站点第一版/export_result_结果.xlsx


二进制
tools/高质量站点第一版/export_result_采购意向.xlsx


二进制
tools/高质量站点第一版/export_result_采购意向新.xlsx


二进制
tools/高质量站点第一版/export_result_预告.xlsx


二进制
tools/高质量站点第一版/export_result_预告新.xlsx


二进制
tools/高质量站点第一版/flag1_unique_spidercodes.xlsx


+ 51 - 0
tools/高质量站点第一版/spidercodes.csv

@@ -0,0 +1,51 @@
+spidercode
+a_qgzbgggsssyq_qbgg
+a_zgzfcgw_zydwzfcgyxgk_gjjs_01
+sc_gzzwsjjcgxt_jjgg
+gd_gdszfcgw_syss_cggg
+jx_jxszfcgdzmc_ggdt_htgg
+a_zgzfcgw_zfcghtgg_new
+a_zgjcjtcgpt_fzbgg_cggg
+a_zgjcjtcgpt_fzbgg_jggg
+hn_hnszfcgdzmc_hnsbj_ggdt
+xj_xjwwezzqzfcgw_dzmcgg_cgcg
+a_zgzfcgw_zydwzfcgyxgk_gjjs_new_01
+a_zgzbtbggfwpt_zbgg2
+js_ntszfcgwssc_xjgg_xqgg
+gx_gxzzzzqzfcg_dzmchtgg
+zj_zjzfcgw_cggg_sylx
+a_oycg_gkcggg
+ah_ahzfcgypt_cjgg
+a_zgjcjtcgpt_fzbgg_bggg
+a_jdcgwxwz_cgdtzxcgxx
+js_ntszfcgwssc_xjgg_cjgg
+gd_gdswszjfwcs_cggg
+a_zgzbtbggfwpt_zhbjggs2
+ah_ahzfcgypt_htgg
+sd_zgsdzfcgw_xxgk_sxhtgk
+gd_gdszfcgw_syss_dzmc
+a_jsxmhjyxdjbbaxt_gg_nipc
+a_zgzbtbggfwpt_wasjgf_zbgg
+gz_gzszfcgdzmc_gzsbj_ggdt_01
+ah_ahzfcgypt_ysgg
+ha_hnstzxmzxspjgptbsdt_xmbljggs_njpc
+hb_hbzwfww_bacx_njpc
+a_zgzbycgw_zbxx_zbxx
+a_gtcgpt_cgjg
+a_zgjcjtcgpt_zbzq_zhbgg
+xj_xjwwezzqzfcgw_dzmcgg_wscs
+gd_gdswszjfwcs_zxgs
+sd_zgsdzfcgw_sxzhbgg_new
+a_syjtyxgs_zh
+jx_jxswszjfwcs_cggg
+a_gjggzyjypt_gcjs_kbjl
+nm_nmgzzqzfcgw_dzmc_htgs
+a_zgzbtbggfwpt_wasjgf_kbjl
+a_gtcgpt_cggg
+jx_jxszfcgdzmc_htgg
+js_jsstzxmzxspjgpt_gsxx_bazcx_njpc
+a_zgzbtbggfwpt_zhbhxrgs2
+a_bjgc_jggs
+a_zgzbycgw_zbxx_zb
+a_zgzfcgw_dfgg_new
+a_zgzfcgw_zydwzfcgyxgk_gjjs

二进制
tools/高质量站点第一版/transformed_data.xlsx


+ 39 - 29
tools/高质量站点第一版/根据id找出爬虫代码.py

@@ -1,37 +1,47 @@
-import csv
+from bson import ObjectId
 from pymongo import MongoClient
+import csv
 
+def process_tagged_documents():
+    # 直接在URI中包含用户名和密码
+    username = "viewdata"
+    password = "viewdata"
+    host = "127.0.0.1"  # 例如: localhost 或 192.168.1.100
+    port = "27088"  # 默认MongoDB端口
 
-def mark_spidercodes_in_mongo(csv_file_path, mongo_uri, db_name, collection_name):
-    # 连接到MongoDB
-    client = MongoClient(mongo_uri)
-    db = client[db_name]
-    collection = db[collection_name]
-
-    # 读取 CSV 文件中的 spidercodes
-    with open(csv_file_path, mode='r', encoding='utf-8') as csv_file:
-        csv_reader = csv.reader(csv_file)
-        next(csv_reader)  # 跳过标题行
-        spidercodes = [row[0] for row in csv_reader]  # 假设 spidercode 是第一列
-
-    # 去除重复的 spidercodes
-    unique_spidercodes = list(set(spidercodes))
+    # 构建连接URI
+    mongo_uri = f"mongodb://{username}:{password}@{host}:{port}/"
 
-    # 批量更新符合条件的文档,设置 flag=1
-    result = collection.update_many(
-        {"spidercode": {"$in": unique_spidercodes}},
-        {"$set": {"tag": 1}}
-    )
 
-    print(f"成功更新了 {result.modified_count} 条文档")
+    # 连接MongoDB
+    client2 = MongoClient(mongo_uri,unicode_decode_error_handler="ignore", directConnection=True)  #bidding库
 
+    # 定义数据库和集合
+    db2 = client2['qfw']  # 替换为实际的数据库2名称
 
-# 使用示例
-if __name__ == "__main__":
-    # 配置参数
-    csv_file_path = 'spidercodes.csv'  # 替换为你的CSV文件路径
-    mongo_uri = 'mongodb://172.20.45.129:27002/'  # MongoDB连接字符串
-    db_name = 'data_quality'  # 数据库名称
-    collection_name = 'bidding_202505'  # 集合名称
+    collection2 = db2['bidding']  # 替换为实际的bidding集合名称
 
-    mark_spidercodes_in_mongo(csv_file_path, mongo_uri, db_name, collection_name)
+    # 读取 CSV 文件中的 spidercodes
+    with open('_id.csv', mode='r', encoding='utf-8') as csv_file:
+        csv_reader = csv.reader(csv_file)
+        next(csv_reader)  # 跳过标题行
+        for row in csv_reader:
+            _id = row[0]
+            doc_id = ObjectId(_id)
+
+            # 在库2的bidding集合中查找该id
+            bidding_doc = collection2.find_one({'_id': doc_id})
+            if bidding_doc:
+                # 提取需要的字段
+                result = {
+                    '_id': doc_id,
+                    'site': bidding_doc.get('site'),
+                    'channel': bidding_doc.get('channel'),
+                    'spidercode': bidding_doc.get('spidercode')
+                }
+
+            print(result)
+
+
+if __name__ == '__main__':
+    process_tagged_documents()

+ 17 - 23
tools/高质量站点第一版/统计标讯数量.py

@@ -1,36 +1,30 @@
 import csv
 from pymongo import MongoClient
-from collections import defaultdict
-def count_spidercodes_in_mongo(csv_file_path, mongo_uri, db_name, collection_name):
+
+
+def mark_spidercodes_in_mongo(csv_file_path, mongo_uri, db_name, collection_name):
     # 连接到MongoDB
     client = MongoClient(mongo_uri)
     db = client[db_name]
     collection = db[collection_name]
 
-    # 读取 CSV 文件(严格按行顺序)
+    # 读取 CSV 文件中的 spidercodes
     with open(csv_file_path, mode='r', encoding='utf-8') as csv_file:
         csv_reader = csv.reader(csv_file)
         next(csv_reader)  # 跳过标题行
         spidercodes = [row[0] for row in csv_reader]  # 假设 spidercode 是第一列
 
-        # 一次性查询所有符合条件的数据(避免多次查询)
-        # query = {"toptype": "招标", "spidercode": {"$in": list(set(spidercodes))}}
-        # 查询条件:subtype 是 "中标" 或 "成交"
-        query = {
-            "subtype": {"$in": ["中标", "成交"]},
-            "spidercode": {"$in": list(set(spidercodes))}
-        }
-        cursor = collection.find(query, {"spidercode": 1})
-
-        # 统计每个 spidercode 的数量
-        code_counts = {}
-        for doc in cursor:
-            code = doc["spidercode"]
-            code_counts[code] = code_counts.get(code, 0) + 1
-
-        # 按 CSV 顺序输出
-        for code in spidercodes:
-            print(f"{code}: {code_counts.get(code, 0)}")
+    # 去除重复的 spidercodes
+    unique_spidercodes = list(set(spidercodes))
+
+    # 批量更新符合条件的文档,设置 flag=1
+    result = collection.update_many(
+        {"spidercode": {"$in": unique_spidercodes}},
+        {"$set": {"tag": 1}}
+    )
+
+    print(f"成功更新了 {result.modified_count} 条文档")
+
 
 # 使用示例
 if __name__ == "__main__":
@@ -38,6 +32,6 @@ if __name__ == "__main__":
     csv_file_path = 'spidercodes.csv'  # 替换为你的CSV文件路径
     mongo_uri = 'mongodb://172.20.45.129:27002/'  # MongoDB连接字符串
     db_name = 'data_quality'  # 数据库名称
-    collection_name = 'result_new'  # 集合名称
+    collection_name = 'bidding_202505'  # 集合名称
 
-    count_spidercodes_in_mongo(csv_file_path, mongo_uri, db_name, collection_name)
+    mark_spidercodes_in_mongo(csv_file_path, mongo_uri, db_name, collection_name)

+ 16 - 5
tools/高质量站点第一版/高质量站点-脚本41.py

@@ -7,25 +7,35 @@ def process_tagged_documents_batch():
     # 连接MongoDB
     client = MongoClient('mongodb://172.20.45.129:27002/')  # 测试库
     db = client['data_quality']
-    collection = db['result_new']
+    collection = db['final_results']
 
     # 定义要检测的字段列表(可自定义)
     # fields_to_check = ['projectname', 'area', 'city', 'budget', 'buyer']
     # fields_to_check = ['projectname', 'projectcode','area', 'city', 'buyer']
     # fields_to_check = ['projectname', 'projectcode','area', 'city','budget', 'buyer','toptype','subtype']
+    # fields_to_check = ['projectcode','area', 'city','budget', 'buyer']
+
     fields_to_check = ['projectname', 'projectcode', 'area', 'city', 'bidamount', 's_winner', 'toptype', 'subtype']
     sort_order = [('_id', 1)]
 
     # 构建查询条件:toptype为"采购意向"且所有指定字段在v_taginfo中都为1
+    # query = {
+    #     "$and": [
+    #         {"$or": [{"subtype": "合同"}, {"subtype": "验收"}]},
+    #         *[{f"v_taginfo.{field}": 1} for field in fields_to_check]
+    #     ]
+    # }
+
     query = {
         "$and": [
-            {"$or": [{"subtype": "合同"}, {"subtype": "验收"}]},
-            *[{f"v_taginfo.{field}": 1} for field in fields_to_check]
+            # {"toptype": "招标"},
+            {"$or": [{"subtype": "中标"}, {"subtype": "成交"}]},
+            *[{f"{field}_flag": 1} for field in fields_to_check]
         ]
     }
 
     # 批量处理参数
-    batch_size = 500  # 每批处理500个文档
+    batch_size = 1000  # 每批处理500个文档
     operations = []
     processed_count = 0
 
@@ -36,13 +46,14 @@ def process_tagged_documents_batch():
         for doc in cursor:
             try:
                 doc_id = doc['_id']
+
                 # 输出当前正在处理的ID
                 print(f"正在处理文档 _id: {doc_id}")
                 # 准备批量操作
                 operations.append(
                     UpdateOne(
                         {'_id': doc_id},
-                        {'$set': {'flag5': 1}},
+                        {'$set': {'flag4': 1}},
                         upsert=True
                     )
                 )

二进制
tools/高质量站点第二版/.~spidercode_stats.xlsx


+ 91 - 0
tools/高质量站点第二版/ai抽取和规则抽取对比结果.py

@@ -0,0 +1,91 @@
+from pymongo import MongoClient
+from bson import ObjectId
+
+# 定义字段映射关系(ai_zhipu字段名 -> ext_ai_record字段名)
+FIELD_MAPPING = {
+    "s_city": "city",
+    "s_toptype": "toptype",
+    "s_winner": "s_winner",
+    "s_projectname": "projectname",
+    "s_agencytel": "agencytel",
+    "s_area": "area",
+    "s_buyer": "buyer",
+    "s_agency": "agency",
+    "s_subtype": "subtype"
+}
+
+
+def compare_fields(ai_zhipu, ext_ai_record):
+    """比较两个文档的字段,返回比较结果"""
+    comparison_result = {}
+
+    for ai_field, ext_field in FIELD_MAPPING.items():
+        ai_value = ai_zhipu.get(ai_field)
+        ext_value = ext_ai_record.get(ext_field)
+
+        # 处理None/null的情况
+        if ai_value is None or ext_value is None:
+            # 两个都为空 → 1
+            if ai_value is None and ext_value is None:
+                comparison_result[f"{ai_field}_flag"] = 1
+            else:
+                # 仅一个为空 → 0
+                comparison_result[f"{ai_field}_flag"] = 0
+            continue
+
+        # 特殊处理projectname字段(互相包含关系)
+        if ai_field == "s_projectname":
+            ai_str = str(ai_value).strip()
+            ext_str = str(ext_value).strip()
+
+            if ai_str in ext_str or ext_str in ai_str:
+                comparison_result[f"{ai_field}_flag"] = 1
+            else:
+                comparison_result[f"{ai_field}_flag"] = 0
+            continue
+
+        # 其他字段正常比较(精确匹配)
+        if str(ai_value).strip() == str(ext_value).strip():
+            comparison_result[f"{ai_field}_flag"] = 1
+        else:
+            comparison_result[f"{ai_field}_flag"] = 0
+
+    return comparison_result
+
+
+def main():
+    client = MongoClient('mongodb://172.20.45.129:27002/')
+    client1 = MongoClient('mongodb://127.0.0.1:27087/',unicode_decode_error_handler="ignore", directConnection=True)  #清洗库
+
+    db1 = client['data_quality']
+    db2 = client['库2名称']
+    final_results = db1['bidding_20250515']
+
+    for doc in final_results.find():
+        doc_id = doc['_id']
+        collection2 = db2[doc_id.collection.name]
+        doc2 = collection2.find_one({"_id": doc_id})
+
+        if not doc2:
+            print(f"未在库2中找到_id为{doc_id}的文档")
+            continue
+
+        ai_zhipu = doc2.get('ai_zhipu', {})
+        ext_ai_record = doc2.get('ext_ai_record', {})
+
+        if not ai_zhipu or not ext_ai_record:
+            print(f"文档{doc_id}缺少ai_zhipu或ext_ai_record字段")
+            continue
+
+        comparison_result = compare_fields(ai_zhipu, ext_ai_record)
+
+        update_result = final_results.update_one(
+            {"_id": doc_id},
+            {"$set": comparison_result}
+        )
+
+        print(f"更新文档{doc_id}: 匹配{update_result.modified_count}个字段")
+
+
+if __name__ == "__main__":
+    main()

二进制
tools/高质量站点第二版/spidercode_analysis.xlsx


二进制
tools/高质量站点第二版/spidercode_stats.xlsx


二进制
tools/高质量站点第二版/spidercode_stats1.xlsx


二进制
tools/高质量站点第二版/spidercode_stats_信用_err.xlsx


二进制
tools/高质量站点第二版/spidercode_stats_招标_err.xlsx


二进制
tools/高质量站点第二版/spidercode_stats_结果_err.xlsx


二进制
tools/高质量站点第二版/spidercode_stats_采购意向.xlsx


二进制
tools/高质量站点第二版/spidercode_stats_采购意向_err.xlsx


二进制
tools/高质量站点第二版/spidercode_stats_预告_err.xlsx


+ 201 - 0
tools/高质量站点第二版/增加一致性对比-智昆.py

@@ -0,0 +1,201 @@
+import os
+import time
+from pymongo import MongoClient
+import requests
+import json
+from bson import ObjectId
+
+# MongoDB 配置
+MongodbConfigSource = {
+    "ip_port": "127.0.0.1:27088",
+    "user": "viewdata",
+    "password": "viewdata",
+    "db": "qfw",
+    "col": "bidding"
+}
+
+MongodbConfigCompare = {
+    "ip_port": "127.0.0.1:27098",
+    "db": "qfw",
+    "col": "result_20220218"
+}
+
+MongodbConfigResult = {
+    "ip_port": "172.20.45.129:27002",
+    "db": "data_quality",
+    "col": "final_results"
+}
+
+# MongoDB 连接
+source_client = MongoClient(f"mongodb://{MongodbConfigSource['user']}:{MongodbConfigSource['password']}@{MongodbConfigSource['ip_port']}/")
+source_collection = source_client[MongodbConfigSource['db']][MongodbConfigSource['col']]
+
+compare_client = MongoClient(f"mongodb://{MongodbConfigCompare['ip_port']}?directConnection=true")
+compare_collection = compare_client[MongodbConfigCompare['db']][MongodbConfigCompare['col']]
+
+result_client = MongoClient(f"mongodb://{MongodbConfigResult['ip_port']}/")
+result_collection = result_client[MongodbConfigResult['db']][MongodbConfigResult['col']]
+
+API_URL = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
+HEADERS = {
+    "Authorization": "Bearer ba336a9ea90e4cbd973ca9a06d197193.UMwtFTgevHTAkFZz",
+    "Content-Type": "application/json"
+}
+
+def call_llm_api(text):
+    system_prompt = (
+        "请根据以下标准判断文章是否规整,仅返回“规整”或“不规整”,不需要任何解释或多余输出:\n\n"
+        "1. **格式规范**:文章是否具有完整的结构,例如标准标题、分段清晰,无明显表格堆砌或连续罗列(如长列表、标号堆叠等)。如果存在表格,直接判定为“不规整”。\n"
+        "2. **语言质量**:语言是否流畅,无语法、拼写错误或明显语义错误?\n"
+        "3. **逻辑清晰度**:内容是否条理清晰、逻辑连贯,段落之间衔接自然?\n"
+        "4. **排除分包标讯**:如文章涉及分包项目(关键词包括:“分包”“多包”“多个包”“包一”“包二”“多标段”“标段划分”“本项目分为”),则直接判定为“不规整”。\n\n"
+        "要求:每次调用前清除上下文记忆,确保判断基于本次输入内容。\n"
+        "输出限定:仅回答“规整”或“不规整”。"
+    )
+    payload = {
+        "model": "glm-4-flash",
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": f"正文: {text}\n请判定该标讯是否符合要求?"}
+        ]
+    }
+    try:
+        response = requests.post(API_URL, headers=HEADERS, json=payload)
+        response.raise_for_status()
+        result = response.json()
+        return result["choices"][0]["message"]["content"].strip()
+    except Exception as e:
+        print(f"API调用失败: {e}")
+        return "不规整"
+
+def is_multi_package(ext_package, ai_com_package):
+    if not ext_package:
+        if isinstance(ai_com_package, list) and len(ai_com_package) == 1:
+            return True
+        return False
+    ext_is_multi = isinstance(ext_package, dict) and len(ext_package) > 1
+    ai_is_multi = isinstance(ai_com_package, list) and len(ai_com_package) > 1
+    return ext_is_multi == ai_is_multi
+
+def compare_and_score(ext_ai_record, ai_zhipu):
+    score = 100
+    deduct = 10
+    fields = [
+        ("area", "s_area"),
+        ("subtype", "s_subtype"),
+        ("projectname", "s_projectname"),
+        ("toptype", "s_toptype"),
+        ("city", "s_city"),
+        ("buyer", "s_buyer"),
+        ("s_winner", "s_s_winner"),
+        ("budget", "s_budget"),
+        ("projectcode", "s_projectcode"),
+        ("projectname", "s_projectname"),
+        ("bidamount", "s_bidamount"),
+    ]
+    for f_ext, f_ai in fields:
+        v_ext = ext_ai_record.get(f_ext)
+        v_ai = ai_zhipu.get(f_ai)
+        if not v_ext and not v_ai:
+            continue
+        if v_ext != v_ai:
+            score -= deduct
+    ext_package = ext_ai_record.get("package")
+    ai_com_package = ai_zhipu.get("s_pkg", {}).get("com_package")
+    if not is_multi_package(ext_package, ai_com_package):
+        score -= deduct
+    return score
+
+# 时间与起始文档ID(可根据需要修改)
+start_time = 1744905600
+end_time = 1744991999
+last_processed_id = '6802797a5f834436f09e3aaf'
+query_filter = {
+    "comeintime": {"$gte": start_time, "$lte": end_time},
+    "_id": {"$gt": ObjectId(last_processed_id)},
+    "extracttype": 1
+}
+
+print(f"查询条件: {query_filter}")
+print(f"查询结果数量: {source_collection.count_documents(query_filter)}")
+
+fields = {"_id": 1, "detail": 1, "area": 1, "site": 1, "subtype": 1, "title": 1, "href": 1,
+          "toptype": 1, "city": 1, "buyer": 1, "s_winner": 1, "budget": 1,
+          "projectcode": 1, "projectname": 1,"bidamount":1}
+
+# 分页查询,避免一次性查询过多数据
+batch_size = 1000  # 每次查询1000条数据
+docs_cursor = source_collection.find(query_filter, fields, no_cursor_timeout=True).batch_size(batch_size)
+
+# 处理文档时的修改
+try:
+    for doc in docs_cursor:
+        print(f"处理文档: {doc['_id']}")
+        if not doc.get("detail"):
+            print(f"文档 {doc['_id']} 无正文,跳过。")
+            continue
+
+        if call_llm_api(doc["detail"]) == "规整":
+            compare_record = compare_collection.find_one({"_id": doc["_id"]})
+            if not compare_record:
+                print(f"文档 {doc['_id']} 在对比库中未找到,跳过。")
+                continue
+
+            ext_ai_record = compare_record.get("ext_ai_record")
+            ai_zhipu = compare_record.get("ai_zhipu")
+            if not ext_ai_record or not ai_zhipu:
+                print(f"文档 {doc['_id']} 缺少 ext_ai_record 或 ai_zhipu 字段,跳过。")
+                continue
+
+            ext_package = ext_ai_record.get("package")
+            ai_com_package = ai_zhipu.get("s_pkg", {}).get("com_package")
+            multi_package_flag = 1 if (
+                (ext_package and isinstance(ext_package, dict) and len(ext_package) > 1) or
+                (ai_com_package and isinstance(ai_com_package, list) and len(ai_com_package) > 1)
+            ) else 0
+
+            score = compare_and_score(ext_ai_record, ai_zhipu)
+
+            # 获取 spidercode 和 channel 字段,从 bidding 集合中获取
+            bidding_record = source_collection.find_one({"_id": doc["_id"]})
+            spidercode = bidding_record.get("spidercode", "") if bidding_record else ""
+            channel = bidding_record.get("channel", "") if bidding_record else ""
+            print(f"spidercode: {spidercode}, channel: {channel}")
+
+            # 一致性判断的字段和标签
+            fields_to_check = [
+                "area", "multipackage", "projectname", "projectcode", "budget",
+                "s_winner", "buyer", "city", "toptype", "subtype","bidamount"
+            ]
+
+            result_data = {k: doc.get(k) for k in fields if k != "detail"}
+            result_data["panduan"] = "规整"
+            result_data["score"] = score
+            result_data["multipackage"] = multi_package_flag
+            result_data["spidercode"] = spidercode
+            result_data["channel"] = channel
+
+            # 为每个字段进行一致性判断并添加标签字段
+            for field in fields_to_check:
+                ext_value = ext_ai_record.get(field)
+                ai_value = ai_zhipu.get(f"s_{field}")
+
+                # 如果两个字段都为空,则标签为 0
+                if not ext_value and not ai_value:
+                    field_flag = 0
+                else:
+                    # 如果字段一致,则标签为 1,不一致为 0
+                    field_flag = 1 if ext_value == ai_value else 0
+
+                result_data[f"{field}_flag"] = field_flag
+
+            # 插入结果库
+            result_collection.insert_one(result_data)
+            print(f"文档 {doc['_id']} 得分 {score},已写入结果库。")
+            last_processed_id = doc["_id"]
+        else:
+            print(f"文档 {doc['_id']} 判定为不规整,跳过。")
+
+except KeyboardInterrupt:
+    print(f"程序被中断,最后处理的文档 _id 为: {last_processed_id}")
+    time.sleep(1)

+ 27 - 5
tools/高质量站点第二版/找出爬虫比例.py

@@ -10,10 +10,29 @@ def count_spidercode_stats(db_name='your_db_name', collection_name='your_collect
 
     # 聚合查询统计每个spidercode的数量
     pipeline = [
-        {"$group": {
-            "_id": "$spidercode",
-            "count": {"$sum": 1}
-        }},
+        # {
+        #     "$match": {
+        #         "$and": [
+        #             {"toptype": "招标","err3":1},
+        #         ]
+        #     }
+        # },
+        # {
+        #     "$match": {
+        #          "$and": [
+        #         {"subtype": {"$in": ["成交", "中标"]}},
+        #         {"err4": 1}
+        #     ]
+        #     }
+        # },
+        {
+            "$group": {
+                "_id": "$spidercode",
+                "count": {"$sum": 1},
+                "site": {"$first": "$site"},  # 获取每个spidercode的第一个site值
+                "channel": {"$first": "$channel"}  # 获取每个spidercode的第一个channel值
+            }
+        },
         {"$sort": {"count": -1}}
     ]
 
@@ -32,6 +51,9 @@ def count_spidercode_stats(db_name='your_db_name', collection_name='your_collect
     total_count = df['count'].sum()
     df['percentage'] = (df['count'] / total_count * 100).round(2)
 
+    # 重新排列列顺序
+    df = df[['spidercode', 'site', 'channel', 'count', 'percentage']]
+
     # 打印结果
     print(f"总记录数: {total_count}")
     print("\n每个spidercode的数量及占比:")
@@ -44,4 +66,4 @@ def count_spidercode_stats(db_name='your_db_name', collection_name='your_collect
 
 
 # 使用示例
-count_spidercode_stats(db_name='data_quality', collection_name='bidding_20250515')
+count_spidercode_stats(db_name='data_quality', collection_name='bidding_202505_chouqu')

+ 73 - 0
tools/高质量站点第二版/找出爬虫比例2.py

@@ -0,0 +1,73 @@
+from pymongo import MongoClient
+import pandas as pd
+
+
+def analyze_spidercode_stats():
+    # 连接MongoDB
+    client = MongoClient('mongodb://172.20.45.129:27002/')
+    db = client['data_quality']  # 替换为你的数据库名
+    collection = db['result_new']  # 替换为你的集合名
+
+    # 聚合查询1:统计每个spidercode的总标讯数量
+    pipeline_total = [
+        {
+            "$match": {
+                "$and": [
+                    {"toptype": "采购意向"},
+                    # {"toptype": {"$exists": True}}
+                    # 如果需要特定toptype值: {"toptype": "your_value"}
+                ]
+            }
+        },
+        {"$group": {
+            "_id": "$spidercode",
+            "total_count": {"$sum": 1}
+        }},
+        {"$sort": {"total_count": -1}}
+    ]
+
+    # 聚合查询2:统计每个spidercode中flag3=1的标讯数量
+    pipeline_flag3 = [
+        {"$match": {"err1": 1}},
+        {"$group": {
+            "_id": "$spidercode",
+            "flag3_count": {"$sum": 1}
+        }},
+        {"$sort": {"flag3_count": -1}}
+    ]
+
+    # 执行聚合查询
+    total_results = list(collection.aggregate(pipeline_total))
+    flag3_results = list(collection.aggregate(pipeline_flag3))
+
+    # 转换为DataFrame
+    df_total = pd.DataFrame(total_results).rename(columns={'_id': 'spidercode'})
+    df_flag3 = pd.DataFrame(flag3_results).rename(columns={'_id': 'spidercode'})
+
+    # 合并两个结果
+    df_merged = pd.merge(df_total, df_flag3, on='spidercode', how='left')
+    df_merged['flag3_count'] = df_merged['flag3_count'].fillna(0).astype(int)
+
+    # 计算flag3占比
+    df_merged['flag3_percentage'] = (df_merged['flag3_count'] / df_merged['total_count'] * 100).round(2)
+
+    # 找出flag3=1最多的spidercode
+    max_flag3 = df_merged.loc[df_merged['flag3_count'].idxmax()]
+
+    # 打印结果
+    print("标讯数量统计(按spidercode分组):")
+    print(df_merged.sort_values('total_count', ascending=False).to_string(index=False))
+
+    print("\nflag3=1最多的爬虫代码:")
+    print(f"Spidercode: {max_flag3['spidercode']}")
+    print(f"flag3=1记录数: {max_flag3['flag3_count']}")
+    print(f"占总标讯数比例: {max_flag3['flag3_percentage']}%")
+
+    # 保存结果到Excel
+    output_file = 'spidercode_analysis.xlsx'
+    df_merged.to_excel(output_file, index=False)
+    print(f"\n结果已保存到 {output_file}")
+
+
+# 执行分析
+analyze_spidercode_stats()

+ 67 - 0
tools/高质量站点第二版/统计三个大模型和规则一致性的比例.py

@@ -0,0 +1,67 @@
+from pymongo import MongoClient
+from collections import defaultdict
+
+# 连接MongoDB
+# client = MongoClient('mongodb://172.20.45.129:27002/')
+client = MongoClient('mongodb://127.0.0.1:27087/', unicode_decode_error_handler="ignore",directConnection=True)  # 清洗库
+
+db = client['jyqyfw']  # 替换为您的数据库名
+collection = db['temp_0512']  # 替换为您的集合名
+
+# 初始化统计结果字典
+result = {
+    "deepseek": defaultdict(int),
+    "doubao": defaultdict(int),
+    "qwen": defaultdict(int)
+}
+
+# 遍历集合中的所有文档
+for doc in collection.find():
+    compare_data = doc.get("compare", {})
+
+    # 检查每个字段的result值
+    deepseek_all = True
+    doubao_all = True
+    qwen_all = True
+
+    for field, value_dict in compare_data.items():
+        result_value = value_dict.get("result", "")
+
+        # 检查result值是否以A开头
+        if result_value.startswith("A") and len(result_value) > 1:
+            first_char = result_value[0]
+
+            # 检查第二位
+            if len(result_value) > 1 and result_value[1] == first_char:
+                result["deepseek"][field] += 1
+            else:
+                deepseek_all = False
+
+            # 检查第三位
+            if len(result_value) > 2 and result_value[2] == first_char:
+                result["doubao"][field] += 1
+            else:
+                doubao_all = False
+
+            # 检查第四位
+            if len(result_value) > 3 and result_value[3] == first_char:
+                result["qwen"][field] += 1
+            else:
+                qwen_all = False
+
+    # 更新all计数器
+    if deepseek_all:
+        result["deepseek"]["all"] += 1
+    if doubao_all:
+        result["doubao"]["all"] += 1
+    if qwen_all:
+        result["qwen"]["all"] += 1
+
+# 将defaultdict转换为普通dict
+final_result = {
+    "deepseek": dict(result["deepseek"]),
+    "doubao": dict(result["doubao"]),
+    "qwen": dict(result["qwen"])
+}
+
+print(final_result)