6 сар өмнө · 3a497a6efa
--- a/client.py
+++ b/client.py
@@ -10,10 +10,11 @@ ReluClient = MongoDBInterface(ReluMongodb)
 
															 # 评估服务配置
														
 
															 a2s_ip = "192.168.3.240:9090"
														
 
															+# a2s_ip = "172.17.0.11:9090"
														
 
															 topic = "quality_bid"
														
 
															 #本地测试用的主题
														
 
															 # topic = "test_quality_bid"
														
 
															-timeout = 120
														
 
															+timeout = 180
														
 
															 # 开始评估
														
 
															 def start_quality(data: dict, rules_id: int, a2s_ip, topic, timeout, retry=3):
														
@@ -45,17 +46,17 @@ def batch_load_purchasinglist():
 
															     rules_id = get_rule("北京剑鱼信息技术有限公司", "v1.1")
														
 
															     print(rules_id)
														
 
															-    # max_id = ObjectId("0" * 24)
														
 
															-    max_id = ObjectId("5f8e5e1650cded0641ae3f7e")
														
 
															+    max_id = ObjectId("0" * 24)
														
 
															+    # max_id = ObjectId("5f8e5e1650cded0641ae3f7e")
														
 
															     while True:
														
 
															         # db = MongoClient('192.168.3.71', 29099, unicode_decode_error_handler="ignore").re4art
														
 
															         # coll_user = db["zc_classify_test"]
														
 
															-        db = MongoClient('192.168.3.166', 27082, unicode_decode_error_handler="ignore").zhengluming_27082
														
 
															-        coll_user = db["update_data"]
														
 
															+        db = MongoClient('192.168.3.149', 27180, unicode_decode_error_handler="ignore").data_quality
														
 
															+        coll_user = db["bidding_20241205_ai"]
														
 
															         num=0
														
 
															         try:
														
 
															-            # for item in coll_user.find({"_id":{"$gte":max_id}}).sort("_id",1):
														
 
															-            for item in coll_user.find({"_id":ObjectId("60f84adf1a75b8f4460ead47")}):
														
 
															+            for item in coll_user.find({"_id":{"$gte":max_id}}).sort("_id",1):
														
 
															+            # for item in coll_user.find({"_id":ObjectId("60f84adf1a75b8f4460ead47")}):
														
 
															             # for item in coll_user.find().sort("_id",1):
														
 
															                 max_id = item["_id"]
														
 
															                 print(max_id)
														
@@ -94,13 +95,13 @@ def batch_load_data():
 
															     max_id = ObjectId("0" * 24)
														
 
															     # max_id = ObjectId("655ec5609aed6eb2ffa654ca")
														
 
															     while True:
														
 
															-        db = MongoClient('192.168.3.206', 27080, unicode_decode_error_handler="ignore").data_quality
														
 
															-        coll_user = db["bidding_20231221"]
														
 
															-        # db = MongoClient('192.168.3.166', 27082, unicode_decode_error_handler="ignore").zhengluming_27082
														
 
															-        # coll_user = db["update_data"]
														
 
															+        # db = MongoClient('192.168.3.206', 27080, unicode_decode_error_handler="ignore").data_quality
														
 
															+        # coll_user = db["bidding_20231221"]
														
 
															+        db = MongoClient('192.168.3.149', 27180, unicode_decode_error_handler="ignore").data_quality
														
 
															+        coll_user = db["bidding_20241219"]
														
 
															         try:
														
 
															             for item in coll_user.find({"_id":{"$gte":max_id}}).sort("_id",1):
														
 
															-            # for item in coll_user.find({"_id":ObjectId("655ec5579aed6eb2ffa63d6d")}):
														
 
															+            # for item in coll_user.find({"_id":ObjectId("65838f83185812a17f85760f")}):
														
 
															             # for item in coll_user.find().sort("_id",1):
														
 
															                 max_id = item["_id"]
														
 
															                 print(max_id)
														
@@ -135,11 +136,12 @@ def batch_load_data_test():
 
															         "buyer": "文化广电新闻出版旅游局",
														
 
															         "budget": 3100,
														
 
															         "subtype":"中标",
														
 
															+        "area":"河南",
														
 
															         "projectcode": "3333",
														
 
															         "buyerclass":"学校"
														
 
															     }
														
 
															     # 规则查询,根据必要条件 公司名称（用户ID）、版本号
														
 
															-    rules_id = get_rule("北京剑鱼信息技术有限公司", "v1.0")
														
 
															+    rules_id = get_rule("北京剑鱼信息技术有限公司", "v1.4")
														
 
															     print(rules_id)
														
 
															     # 评估调用
														
 
															     result = start_quality(row_data, rules_id, a2s_ip, topic, timeout)
														
@@ -147,7 +149,7 @@ def batch_load_data_test():
 
															 def batch_load_data_debug():
														
 
															     # 规则查询,根据必要条件 公司名称（用户ID）、版本号
														
 
															-    rules_id = get_rule("北京剑鱼信息技术有限公司", "v1.2")
														
 
															+    rules_id = get_rule("北京剑鱼信息技术有限公司", "v1.4")
														
 
															     print(rules_id)
														
 
															     db = MongoClient('192.168.3.206', 27080, unicode_decode_error_handler="ignore").data_quality
														
 
															     coll_user = db["bidding_20231122"]
														
--- a/client_spider.py
+++ b/client_spider.py
@@ -0,0 +1,263 @@
 
															+# coding:utf-8
														
 
															+import time
														
 
															+from a2s.tools import json_serialize, json_deserialize
														
 
															+from a2s.a2s_client import a2s_execute
														
 
															+from docs.config import ReluMongodb
														
 
															+from util.mogodb_helper import MongoDBInterface
														
 
															+from pymongo import MongoClient
														
 
															+from util.mysql_tool import MysqlUtil
														
 
															+import json
														
 
															+from datetime import datetime, timedelta
														
 
															+from elasticsearch import Elasticsearch
														
 
															+
														
 
															+
														
 
															+ReluClient = MongoDBInterface(ReluMongodb)
														
 
															+
														
 
															+# 评估服务配置
														
 
															+a2s_ip = "192.168.3.240:9090"
														
 
															+# a2s_ip = "172.17.0.11:9090"
														
 
															+topic = "quality_bid"
														
 
															+#本地测试用的主题
														
 
															+# topic = "test_quality_bid"
														
 
															+timeout = 180
														
 
															+
														
 
															+# 获取当前时间
														
 
															+now = datetime.now()
														
 
															+current_datetime = now.strftime("%Y-%m-%d %H:%M:%S")
														
 
															+# 获取今天的日期
														
 
															+today = datetime.today()
														
 
															+# 获取昨天的日期
														
 
															+yesterday = today - timedelta(days=1)
														
 
															+# 获取昨天0点的时间
														
 
															+yesterday_midnight = datetime(yesterday.year, yesterday.month, yesterday.day)
														
 
															+# 获取今天0点的时间
														
 
															+today_midnight = datetime(today.year, today.month, today.day)
														
 
															+# 转换为Unix时间戳
														
 
															+start_date = int(yesterday_midnight.timestamp())
														
 
															+end_date = int(today_midnight.timestamp())
														
 
															+
														
 
															+# ES 连接配置
														
 
															+es_host = "http://127.0.0.1:19800"
														
 
															+es_username = "jianyuGr"
														
 
															+es_password = "we3g8glKfe#"
														
 
															+
														
 
															+# 初始化 Elasticsearch 客户端
														
 
															+es_client = Elasticsearch(es_host,http_auth=(es_username, es_password),retry_on_timeout=True) # 使用基本认证
														
 
															+
														
 
															+# 开始评估
														
 
															+def start_quality(data: dict, rules_id: int, a2s_ip, topic, timeout, retry=3):
														
 
															+    # 本次不使用SSL，所以channel是不安全的
														
 
															+    row = {"data": data, "rules_id": rules_id}
														
 
															+    bytes_data = json_serialize(row)
														
 
															+    for t in range(retry):
														
 
															+        print("topic",topic)
														
 
															+        try:
														
 
															+            resp_data = a2s_execute(a2s_ip, topic, timeout, bytes_data)
														
 
															+            if resp_data is None:
														
 
															+                continue
														
 
															+            result = json_deserialize(resp_data)
														
 
															+            return result
														
 
															+        except Exception as e:
														
 
															+            print(e)
														
 
															+    return {}
														
 
															+
														
 
															+# 获取规则ID
														
 
															+def get_rule(company, version):
														
 
															+    rule_id = ReluClient.find_rule_by_company(ReluMongodb["col"], company, version)
														
 
															+    return rule_id
														
 
															+
														
 
															+def find_error_id(conn, cleaned_key, sub_value):
														
 
															+    """
														
 
															+    查找 error_dict 中的 id
														
 
															+    """
														
 
															+    query = """SELECT id FROM error_dict WHERE fields = %s AND error = %s"""
														
 
															+    params = (cleaned_key, sub_value)
														
 
															+    result = MysqlUtil.query_data(conn, query, params)
														
 
															+    #[(10,)]
														
 
															+    # 检查查询结果是否为空
														
 
															+    if not result:
														
 
															+        print(f"Error: No matching record found for fields={cleaned_key}, error={sub_value}")
														
 
															+        return None  # 或者返回一个默认值，根据需求而定
														
 
															+
														
 
															+    record = result[0][0]
														
 
															+    return record
														
 
															+
														
 
															+def insert_batch_data(conn, params):
														
 
															+    """
														
 
															+    执行批量插入数据
														
 
															+    """
														
 
															+    query = """INSERT IGNORE INTO bid_analysis (mongoid, site, spidercode, comeintime, area, city, district, score, error_type, spider_modified_time) 
														
 
															+               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
														
 
															+    MysqlUtil.insert_data(conn, query, params)
														
 
															+
														
 
															+def insert_dynamic_error_field(conn, cleaned_key, error_ids, mongoid):
														
 
															+    """
														
 
															+    动态插入 error_ids 到相应的 cleaned_key_error 字段
														
 
															+    """
														
 
															+    # 构造动态插入 SQL 语句，更新指定的 cleaned_key_error 字段
														
 
															+    query = f"""
														
 
															+        UPDATE bid_analysis 
														
 
															+        SET {cleaned_key}_error = %s 
														
 
															+        WHERE mongoid = %s
														
 
															+    """
														
 
															+    # 拼接多个 error_id，用分隔符分开
														
 
															+    error_ids_str = ','.join(map(str, error_ids))
														
 
															+    params = (error_ids_str, mongoid )
														
 
															+
														
 
															+    MysqlUtil.update_data(conn, query, params)
														
 
															+
														
 
															+def get_last_processed_id():
														
 
															+    """
														
 
															+    获取上次处理的最大 ID (例如从数据库或文件中读取)
														
 
															+    """
														
 
															+    # 这里假设从文件读取中断 ID，你也可以从数据库或 Redis 等存储获取
														
 
															+    try:
														
 
															+        with open('docs/last_processed_id.txt', 'r') as f:
														
 
															+            last_id = f.read().strip()
														
 
															+            if last_id:
														
 
															+                return last_id
														
 
															+            else:
														
 
															+                return None
														
 
															+    except FileNotFoundError:
														
 
															+        return None
														
 
															+
														
 
															+def save_last_processed_id(last_id):
														
 
															+    """
														
 
															+    保存当前处理的最大 ID，用于恢复
														
 
															+    """
														
 
															+    with open('docs/last_processed_id.txt', 'w') as f:
														
 
															+        f.write(str(last_id))
														
 
															+
														
 
															+def batch_load_data():
														
 
															+    """
														
 
															+    批量数据质量检查
														
 
															+    """
														
 
															+    # 规则查询,根据必要条件 公司名称（用户ID）、版本号
														
 
															+    rules_id = get_rule("北京剑鱼信息技术有限公司", "v1.2")
														
 
															+    print(rules_id)
														
 
															+
														
 
															+    # 初始化mysql
														
 
															+    # conn = MysqlUtil.connect_to_mysql(host='192.168.3.14', port='4000', user='DataScBi', password='DT#Sc20221123Ht',database='quality')
														
 
															+    conn = MysqlUtil.connect_to_mysql(host='192.168.3.217', port='4000', user='root', password='=PDT49#80Z!RVv52_z',database='quality')
														
 
															+    # 初始化爬虫代码库
														
 
															+    collection = MongoClient(f'mongodb://{"127.0.0.1:27089"}/', unicode_decode_error_handler="ignore", directConnection=True)["editor"]["lua_logs_auditor"]
														
 
															+
														
 
															+    # 获取上次处理的 ID，如果没有，则从头开始
														
 
															+    last_processed_id = get_last_processed_id()
														
 
															+    print(f"上次处理的 ID: {last_processed_id}")
														
 
															+
														
 
															+    # 获取ES数据
														
 
															+    es_query = {
														
 
															+        "query": {
														
 
															+            "bool": {
														
 
															+                "filter": [
														
 
															+                    {
														
 
															+                        "range": {
														
 
															+                            "comeintime": {
														
 
															+                                "gte": start_date,
														
 
															+                                "lt": end_date
														
 
															+                            }
														
 
															+                        }
														
 
															+                    }
														
 
															+                ]
														
 
															+            }
														
 
															+        },
														
 
															+        "sort": [
														
 
															+            {"_id": {"order": "asc"}}  # 如果 comeintime 相同，再按 _id 排序
														
 
															+        ],
														
 
															+        "size": 100  # 每次返回的数据量
														
 
															+    }
														
 
															+
														
 
															+    # 如果有上次处理的 ID，使用 `search_after` 进行分页
														
 
															+    if last_processed_id:
														
 
															+        es_query["search_after"] = [last_processed_id]  # 确保传入的是字符串类型的 _id
														
 
															+
														
 
															+    try:
														
 
															+        # 使用 scroll API 来分批获取数据
														
 
															+        response = es_client.search(index="bidding", body=es_query, size=100)
														
 
															+        hits = response['hits']['hits']
														
 
															+
														
 
															+        while hits:
														
 
															+            print(f"---- 批次开始 ----")
														
 
															+            max_id = None
														
 
															+            for hit in hits:
														
 
															+                item = hit["_source"]
														
 
															+                print("------一条数据开始--------")
														
 
															+                max_id = hit["_id"]
														
 
															+                print(f"正在处理数据: {max_id}")
														
 
															+                item["_id"] = str(hit["_id"])
														
 
															+
														
 
															+                # 质量检查逻辑
														
 
															+                result = start_quality(item, rules_id, a2s_ip, topic, timeout)
														
 
															+                print(result)
														
 
															+
														
 
															+                code = result.get("code")
														
 
															+                if code != 200:
														
 
															+                    # 数据出错，跳过
														
 
															+                    continue
														
 
															+
														
 
															+                data = result.get("data", {})
														
 
															+
														
 
															+                # 数据插入到 MySQL
														
 
															+                site = item.get("site", "")
														
 
															+                spidercode = item.get("spidercode", "")
														
 
															+                comeintime = item.get("comeintime", "")
														
 
															+                comeintime = datetime.fromtimestamp(comeintime)
														
 
															+                area = item.get("area", "")
														
 
															+                city = item.get("city", "")
														
 
															+                district = item.get("district", "")
														
 
															+                score = item.get("score", "")
														
 
															+                error_type_data = json.dumps(data)
														
 
															+                spider_modified_time = current_datetime
														
 
															+
														
 
															+                info = collection.find_one({"code": spidercode})
														
 
															+                if info:
														
 
															+                    spider_modified_time = info.get("modifytime", "")
														
 
															+                    spider_modified_time = datetime.fromtimestamp(spider_modified_time)
														
 
															+
														
 
															+                params = (item["_id"], site, spidercode, comeintime, area, city, district, score, error_type_data,spider_modified_time)
														
 
															+                insert_batch_data(conn, params)
														
 
															+
														
 
															+                # 遍历错误原因字典并提取非空字典中的值
														
 
															+                for key, value in data.items():
														
 
															+                    error_ids = []
														
 
															+                    if isinstance(value, dict) and value:
														
 
															+                        cleaned_key = key[:-3] if key.endswith('_qa') else key  # 去除 '_qa' 后缀
														
 
															+                        for sub_key, sub_value in value.items():
														
 
															+                            error_id = find_error_id(conn, cleaned_key, sub_value)
														
 
															+                            if error_id:
														
 
															+                                error_ids.append(error_id)
														
 
															+                            print(f"  {sub_key}: {sub_value}")
														
 
															+
														
 
															+                        # 插入错误ID到cleaned_key_error字段
														
 
															+                        insert_dynamic_error_field(conn, cleaned_key, error_ids, item["_id"])
														
 
															+                print("------一条数据结束------")
														
 
															+            # 保存当前批次处理的最大 ID
														
 
															+            if max_id:
														
 
															+                save_last_processed_id(max_id)
														
 
															+                print(f"保存当前处理的最大 ID: {max_id}")
														
 
															+            # 批次结束的打印信息
														
 
															+            print("---- 当前批次数据处理完成 ----")
														
 
															+
														
 
															+            # 获取下一批数据
														
 
															+            search_after = hits[-1]["_id"]  # 获取当前批次最后一条数据的 _id 作为下一批的起始点
														
 
															+            es_query["search_after"] = [search_after]  # 保持 _id 类型一致
														
 
															+            response = es_client.search(index="bidding", body=es_query, scroll="1m")
														
 
															+            hits = response['hits']['hits']
														
 
															+
														
 
															+            # 如果没有更多数据，跳出循环
														
 
															+            if not hits:
														
 
															+                print("没有更多数据，结束批次处理")
														
 
															+                break
														
 
															+        print("数据处理完成")
														
 
															+    except Exception as e:
														
 
															+        print(f"错误: {e}")
														
 
															+        time.sleep(10)
														
 
															+    finally:
														
 
															+        if conn.is_connected():
														
 
															+            conn.close()  # 确保连接关闭
														
 
															+            print("MySQL 连接已关闭")
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    batch_load_data()
														
 
															+
														
--- a/docs/config.py
+++ b/docs/config.py
@@ -37,7 +37,7 @@ DEBUG = True
 
															 # 字段识别模型
														
 
															 FieldServer = {
														
 
															-    "a2s_ip": "192.168.3.240:9090",
														
 
															+    "a2s_ip": "172.17.0.11:9090",
														
 
															     "topic": "goods_field",
														
 
															     "timeout": 60,
														
 
															     "retry_times": 5,
														
@@ -45,7 +45,7 @@ FieldServer = {
 
															 # 组织机构识别模型
														
 
															 RecognitionServer = {
														
 
															-    "a2s_ip": "192.168.3.240:9090",
														
 
															+    "a2s_ip": "172.17.0.11:9090",
														
 
															     "topic": "recognition_org",
														
 
															     "timeout": 60,
														
 
															     "retry_times": 5,
														
@@ -53,7 +53,7 @@ RecognitionServer = {
 
															 # 产品识别模型
														
 
															 ProductServer = {
														
 
															-    "a2s_ip": "192.168.3.240:9090",
														
 
															+    "a2s_ip": "172.17.0.11:9090",
														
 
															     "topic": "recognition_goods",
														
 
															     "timeout": 60,
														
 
															     "retry_times": 5,
														
--- a/docs/last_processed_id.txt
+++ b/docs/last_processed_id.txt
@@ -0,0 +1 @@
 
															+67901c263309c0998bf146ad
														
--- a/last_processed_id
+++ b/last_processed_id
--- a/sample_data_export.py
+++ b/sample_data_export.py
@@ -2,17 +2,19 @@ from pymongo import MongoClient
 
															 def sample_data(N):
														
 
															     # 连接MongoDB数据库
														
 
															-    db = MongoClient('192.168.3.206', 27080, unicode_decode_error_handler="ignore").data_quality
														
 
															-    coll_user = db["bidding_20231122"]
														
 
															+    db = MongoClient('192.168.3.149', 27180, unicode_decode_error_handler="ignore").data_quality
														
 
															+    coll_user = db["bidding_919ai_norepeat"]
														
 
															     # 统计总的数据量
														
 
															-    count_all = coll_user.estimated_document_count()
														
 
															+    # count_all = coll_user.estimated_document_count()
														
 
															+    count_all = coll_user.count_documents({"tag": 1})
														
 
															     print("Total Document Count:", count_all)
														
 
															     # 把符合条件的站点名称存起来
														
 
															     site_list = {}
														
 
															     n = 0
														
 
															     site_count = coll_user.aggregate([
														
 
															+                        {"$match": {"tag": 1}},
														
 
															                          {"$group": {"_id": "$site", "count": {"$sum": 1}}},
														
 
															                          {"$sort": {"count": -1}}])
														
 
															     for item in site_count:
														
@@ -34,7 +36,7 @@ def sample_data(N):
 
															         # 计算每个站点的目标比例
														
 
															         target_ratio = min(site_list[key] / count_all, 1) / total_ratio
														
 
															         # 计算每个站点应该抽取的文档数量，确保至少为1
														
 
															-        num = max(int(target_ratio * N), 1)
														
 
															+        num = max(int(target_ratio * N), 2)
														
 
															         # 如果加上这个站点的数量会超过总目标，调整数量
														
 
															         num = min(num, N - marked_count)
														
@@ -49,19 +51,19 @@ def sample_data(N):
 
															             if marked_count >= N:
														
 
															                 break  # 再次检查是否已达到目标数量
														
 
															-            for info in coll_user.find({"title_qa.0303": "包含叠词，异常词汇，特殊词汇（测试，公告公告等）", "site": key, "flag": {"$exists": False}}).sort("_id", 1).skip(i * jiange).limit(1):
														
 
															+            for info in coll_user.find({"tag": 1, "site": key}).sort("title", 1).skip(i*2).limit(1):
														
 
															                 print(f"Updating document with _id: {info['_id']}")
														
 
															                 # 更新文档，设置标记
														
 
															-                update_result = coll_user.update_one({"_id": info["_id"]}, {"$set": {"flag": 10}})
														
 
															+                update_result = coll_user.update_one({"_id": info["_id"]}, {"$set": {"flag": 9}})
														
 
															                 if update_result.modified_count == 0:
														
 
															                     print("No document updated for _id:", info["_id"])
														
 
															                 else:
														
 
															                     print("Document updated successfully for _id:", info["_id"])
														
 
															-                    marked_count += 1
														
 
															+                marked_count += 1
														
 
															             if marked_count >= N:
														
 
															                 break  # 再次检查是否已达到目标数量
														
 
															     print(f"Total marked documents: {marked_count}")
														
 
															-sample_data(100)
														
 
															+sample_data(1000)
														
--- a/score.py
+++ b/score.py
@@ -3,13 +3,15 @@ from bson import ObjectId
 
															 from docs.config import abnormal_config
														
 
															 import csv
														
 
															 def bid_score():
														
 
															-    # db = MongoClient('192.168.3.167', 27080, unicode_decode_error_handler="ignore").jyqyfw_historyData2023_1
														
 
															-    # coll_user = db["20230921Ssk_endo"]
														
 
															-    db = MongoClient('192.168.3.206', 27080, unicode_decode_error_handler="ignore").data_quality
														
 
															-    coll_user = db["bidding_20231122"]
														
 
															+    db = MongoClient('192.168.3.167', 27080, unicode_decode_error_handler="ignore").jyqyfw_historyData2024
														
 
															+    coll_user = db["20240624Hbgd_yanzheng"]
														
 
															+    # db = MongoClient('192.168.3.206', 27080, unicode_decode_error_handler="ignore").data_quality
														
 
															+    # coll_user = db["bidding_20231122"]
														
 
															     score=100
														
 
															+    max_id = ObjectId("0" * 24)
														
 
															+    for item in coll_user.find({"_id":{"$gte":max_id}}).sort("_id",1).limit(200):
														
 
															     # for item in coll_user.find({"_id":ObjectId("655ec5319aed6eb2ffa5d77f")}):
														
 
															-    for item in coll_user.find().sort("_id",1):
														
 
															+    # for item in coll_user.find().sort("_id",1):
														
 
															         title= item.get('title_qa')
														
 
															         projectname = item.get('projectname_qa')
														
 
															         area= item.get('area_qa')
														
--- a/util/export_site.py
+++ b/util/export_site.py
@@ -0,0 +1,31 @@
 
															+#定时从正式环境导出重要的网站和爬虫
														
 
															+from pymongo import MongoClient
														
 
															+from util.mysql_tool import MysqlUtil
														
 
															+# 初始化mysql
														
 
															+conn = MysqlUtil.connect_to_mysql(host='192.168.3.14', port='4000', user='DataScBi', password='DT#Sc20221123Ht',database='quality')
														
 
															+def export_site():
														
 
															+    collection = MongoClient(f'mongodb://{"127.0.0.1:27089"}/',unicode_decode_error_handler="ignore", directConnection=True)["editor"]["site"]
														
 
															+
														
 
															+    for info in collection.find({"important" : 1}).sort("_id", 1):
														
 
															+        site = info.get("site", '')
														
 
															+        domain = info.get("domain", '')
														
 
															+
														
 
															+        query = """INSERT IGNORE INTO site_dict (site,domain) VALUES (%s, %s)"""
														
 
															+        params=(site,domain)
														
 
															+        MysqlUtil.insert_data(conn, query, params)
														
 
															+
														
 
															+
														
 
															+def export_spider():
														
 
															+    # 初始化mysql
														
 
															+    collection_lua = MongoClient(f'mongodb://{"127.0.0.1:27089"}/', unicode_decode_error_handler="ignore", directConnection=True)["editor"]["luaconfig"]
														
 
															+
														
 
															+    for info in collection_lua.find({"spiderimportant":True}).sort("_id", 1):
														
 
															+        spider=info.get("code","")
														
 
															+        # 检查 spider 字段是否为空
														
 
															+        if not spider:
														
 
															+            continue  # 如果没有爬虫数据，跳过此条记录
														
 
															+        query = """INSERT IGNORE INTO spider_dict (spider) VALUES (%s)"""
														
 
															+        params = (spider,)
														
 
															+        MysqlUtil.insert_data(conn, query, params)
														
 
															+
														
 
															+export_spider()
														
--- a/util/mysql_tool.py
+++ b/util/mysql_tool.py
@@ -0,0 +1,119 @@
 
															+import mysql.connector
														
 
															+class MysqlUtil:
														
 
															+    @staticmethod
														
 
															+    def connect_to_mysql(host,port,user,password,database):
														
 
															+        # 创建数据库连接
														
 
															+        connection = mysql.connector.connect(
														
 
															+            host=host,  # 数据库主机地址
														
 
															+            user=user,  # 数据库用户名
														
 
															+            port=port,
														
 
															+            password=password,  # 数据库密码
														
 
															+            database=database  # 数据库名称
														
 
															+        )
														
 
															+        return  connection
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def execute_sql(connection,query,params):
														
 
															+        if connection.is_connected():
														
 
															+            print('Connected to MySQL database')
														
 
															+            # 创建一个cursor对象，用于执行SQL语句
														
 
															+            cursor = connection.cursor()
														
 
															+            # 执行SQL查询
														
 
															+            cursor.execute(query,params)
														
 
															+            mysql_count = cursor.fetchone()[0]
														
 
															+            cursor.close()
														
 
															+            connection.close()
														
 
															+            print('MySQL connection is closed')
														
 
															+            return mysql_count
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def insert_data(connection, query, params):
														
 
															+        if connection.is_connected():
														
 
															+            print('Connected to MySQL database')
														
 
															+            # 创建一个cursor对象，用于执行SQL语句
														
 
															+            cursor = connection.cursor()
														
 
															+            try:
														
 
															+                # 执行插入数据的SQL语句
														
 
															+                cursor.execute(query, params)
														
 
															+                # 提交事务
														
 
															+                connection.commit()
														
 
															+                print("Data inserted successfully.")
														
 
															+                print(cursor.rowcount)   # 返回影响的行数
														
 
															+            except mysql.connector.Error as err:
														
 
															+                print(f"Error: {err}")
														
 
															+                connection.rollback()  # 出错时回滚事务
														
 
															+                return None
														
 
															+            finally:
														
 
															+                cursor.close()
														
 
															+                # connection.close()
														
 
															+                # print('MySQL connection is closed')
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def insert_many(connection, query, params_list):
														
 
															+        """
														
 
															+        批量插入数据
														
 
															+        :param connection: MySQL 连接对象
														
 
															+        :param query: SQL 插入语句
														
 
															+        :param params_list: 包含多条数据的参数列表
														
 
															+        """
														
 
															+        if connection.is_connected():
														
 
															+            print('Connected to MySQL database')
														
 
															+            # 创建一个 cursor 对象，用于执行 SQL 语句
														
 
															+            cursor = connection.cursor()
														
 
															+            try:
														
 
															+                # 执行批量插入的 SQL 语句
														
 
															+                cursor.executemany(query, params_list)
														
 
															+                # 提交事务
														
 
															+                connection.commit()
														
 
															+                print(f"Data inserted successfully. {cursor.rowcount} rows affected.")
														
 
															+            except mysql.connector.Error as err:
														
 
															+                print(f"Error: {err}")
														
 
															+                connection.rollback()  # 出错时回滚事务
														
 
															+            finally:
														
 
															+                cursor.close()
														
 
															+                print('MySQL cursor is closed')
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def query_data(connection, query, params=None):
														
 
															+        """
														
 
															+        执行查询操作
														
 
															+        :param connection: MySQL 连接对象
														
 
															+        :param query: SQL 查询语句
														
 
															+        :param params: SQL 查询参数
														
 
															+        :return: 查询结果（tuple），如果没有结果返回 None
														
 
															+        """
														
 
															+        if connection.is_connected():
														
 
															+            print('Connected to MySQL database')
														
 
															+            cursor = connection.cursor()
														
 
															+            try:
														
 
															+                cursor.execute(query, params)  # 执行查询
														
 
															+                result = cursor.fetchall()  # 获取所有查询结果
														
 
															+                return result
														
 
															+            except mysql.connector.Error as err:
														
 
															+                print(f"Error: {err}")
														
 
															+                return None
														
 
															+            finally:
														
 
															+                cursor.close()
														
 
															+                print('MySQL cursor is closed')
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def update_data(connection, query, params):
														
 
															+        """
														
 
															+        执行更新操作
														
 
															+        :param connection: MySQL 连接对象
														
 
															+        :param query: SQL 更新语句
														
 
															+        :param params: SQL 更新参数
														
 
															+        """
														
 
															+        if connection.is_connected():
														
 
															+            print('Connected to MySQL database')
														
 
															+            cursor = connection.cursor()
														
 
															+            try:
														
 
															+                cursor.execute(query, params)  # 执行更新
														
 
															+                connection.commit()  # 提交事务
														
 
															+                print(f"{cursor.rowcount} rows affected.")
														
 
															+            except mysql.connector.Error as err:
														
 
															+                print(f"Error: {err}")
														
 
															+                connection.rollback()  # 出错时回滚事务
														
 
															+            finally:
														
 
															+                cursor.close()
														
 
															+                print('MySQL cursor is closed')