liumiaomiao 2 روز پیش
والد
کامیت
e7ee738710

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 44 - 11
app.py


+ 26 - 13
client_mongo_mysql_zxz_liantong.py

@@ -46,8 +46,10 @@ def insert_batch_data(conn, params):
     """
     执行批量插入数据
     """
-    query = """INSERT IGNORE INTO yusuan_analysis_liantong (mongoid, area, city,district, projectname, publish_org, procure_content,kpi,budget,institution,score, error_type, create_time) 
-               VALUES (%s, %s, %s, %s, %s, %s, %s, %s,%s,%s,%s,%s,%s )"""
+    query = """INSERT IGNORE INTO zxz_analysis_liantong (mongoid, area, city,district, project_name, total_investment, project_domain,project_owner,start_date,end_date,
+                                operation_start_date,operation_end_date,source_income,construction_content,remarks,cost_income_percent,coverage_multiple,competent_department,
+                                score, error_type, create_time) 
+               VALUES (%s, %s, %s, %s, %s, %s, %s, %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s )"""
     MysqlUtil.insert_data(conn, query, params)
 
 
@@ -65,7 +67,7 @@ def has_non_empty_qa(data):
 
 def insert_bid_statistics (col,conn,query,batch_id):
     #定义来源  1标讯简版2拟在建3新闻4预算5专项债
-    data_source =4
+    data_source =5
     # 使用聚合管道进行多条件统计
     pipeline = [
         {"$match": query},
@@ -79,7 +81,7 @@ def insert_bid_statistics (col,conn,query,batch_id):
     # 提取统计结果
     count_total = result["总量"][0]["count"] if result["总量"] else 0
 
-    sql_query = """INSERT IGNORE INTO bid_statistics_liantong (yusuan_count, batch_id,data_source) 
+    sql_query = """INSERT IGNORE INTO bid_statistics_liantong (zxz_count, batch_id,data_source) 
                    VALUES ( %s, %s ,%s)"""
     params = (count_total, batch_id, data_source)
     MysqlUtil.insert_data(conn, sql_query, params)
@@ -98,7 +100,7 @@ def batch_load_data():
     end_date = int(datetime.strptime(f"{today_date} 00:00:00", "%Y-%m-%d %H:%M:%S").timestamp())
     # print("end_date", end_date)
     # 规则查询,根据必要条件 公司名称(用户ID)、版本号
-    rules_id = get_rule("中国联通-预算", "v1.4.3")
+    rules_id = get_rule("中国联通-专项债", "v1.4.2")
     print(rules_id)
     # 初始化mysql
     conn = MysqlUtil.connect_to_mysql(host='172.20.45.129', port='4000', user='root', password='=PDT49#80Z!RVv52_z',database='quality')
@@ -109,7 +111,7 @@ def batch_load_data():
     query = {
         "_id": {"$gte": max_id},
         # "_id": max_id,
-        "createtime": {"$gte": start_date, "$lte": end_date}
+        # "createtime": {"$gte": start_date, "$lte": end_date}
     }
 
     mongo_client = MongoClient('mongodb://127.0.0.1:27087/', unicode_decode_error_handler="ignore",directConnection=True)  # 修改为你的连接地址
@@ -140,17 +142,28 @@ def batch_load_data():
                     area = item.get("area", "")
                     city = item.get("city", "")
                     district = item.get("district", "")
-                    projectname = item.get("projectname", "")
-                    publish_org = item.get("publish_org", "")
-                    procure_content = item.get("procure_content", "")
-                    kpi = item.get("kpi", "")
-                    budget = item.get("budget", "")
-                    institution = item.get("institution", "")
+                    project_name = item.get("project_name", "")
+                    total_investment = item.get("total_investment", "")
+                    project_domain = item.get("project_domain", "")
+                    project_owner = item.get("project_owner", "")
+                    start_date = item.get("start_date", "")
+                    end_date = item.get("end_date", "")
+                    operation_start_date = item.get("operation_start_date", "")
+                    operation_end_date = item.get("operation_end_date", "")
+                    source_income = item.get("source_income", "")
+                    construction_content = item.get("construction_content", "")
+                    remarks = item.get("remarks", "")
+                    cost_income_percent = item.get("cost_income_percent", "")
+                    coverage_multiple = item.get("coverage_multiple", "")
+                    competent_department = item.get("competent_department", "")
+
                     score = data.get("score", "")
                     error_type_data = json.dumps(data)
                     create_time = today_date
 
-                    params = (item["_id"],  area, city, district,projectname, publish_org, procure_content,kpi,budget,institution,score, error_type_data,create_time)
+                    params = (item["_id"],  area, city, district,project_name, total_investment, project_domain,project_owner,start_date,end_date,
+                              operation_start_date,operation_end_date,source_income,construction_content,remarks,cost_income_percent,coverage_multiple,competent_department,
+                              score, error_type_data,create_time)
                     insert_batch_data(conn, params)
 
             print("---- 数据处理完成 ----")

+ 121 - 1
tables/fields/NoField.py

@@ -58,8 +58,128 @@ class NoFieldChecker(object):
             "procure_content":self.check_procure_content,
             "kpi": self.check_kpi,
             "institution": self.check_institution,
-        }
+            "project_domain": self.check_project_domain,
+            "project_owner": self.check_project_owner,
+            "start_date": self.check_start_date,
+            "end_date": self.check_end_date,
+            "operation_start_date": self.check_operation_start_date,
+            "operation_end_date": self.check_operation_end_date,
+            "source_income": self.check_source_income,
+            "construction_content": self.check_construction_content,
+            "remarks": self.check_remarks,
+            "cost_income_percent": self.check_cost_income_percent,
+            "coverage_multiple": self.check_coverage_multiple,
+            "competent_department": self.check_competent_department,
 
+        }
+    def check_competent_department(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        competent_department = obj.get("competent_department", "")
+        if not competent_department:
+            return True
+        return False
+    def check_coverage_multiple(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        coverage_multiple = obj.get("coverage_multiple", "")
+        if not coverage_multiple:
+            return True
+        return False
+    def check_cost_income_percent(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        cost_income_percent = obj.get("cost_income_percent", "")
+        if not cost_income_percent:
+            return True
+        return False
+    def check_remarks(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        remarks = obj.get("remarks", "")
+        if not remarks:
+            return True
+        return False
+    def check_construction_content(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        construction_content = obj.get("construction_content", "")
+        if not construction_content:
+            return True
+        return False
+    def check_source_income(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        source_income = obj.get("source_income", "")
+        if not source_income:
+            return True
+        return False
+    def check_operation_end_date(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        operation_end_date = obj.get("operation_end_date", "")
+        if not operation_end_date:
+            return True
+        return False
+    def check_operation_start_date(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        operation_start_date = obj.get("operation_start_date", "")
+        if not operation_start_date:
+            return True
+        return False
+    def check_end_date(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        end_date = obj.get("end_date", "")
+        if not end_date:
+            return True
+        return False
+    def check_start_date(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        start_date = obj.get("start_date", "")
+        if not start_date:
+            return True
+        return False
+    def check_project_owner(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        project_owner = obj.get("project_owner", "")
+        if not project_owner:
+            return True
+        return False
+    def check_project_domain(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        project_domain = obj.get("project_domain", "")
+        if not project_domain:
+            return True
+        return False
     def check_bidamount(self,obj,catch_content: CatchContentObject) -> bool:
         """
         中标金额为空检测

+ 30 - 0
tables/fields/competent_department.py

@@ -0,0 +1,30 @@
+"""
+   competent_department字段检查
+"""
+
+
+class Competent_departmentChecker(object):
+    """
+        competent_department字段检查
+    """
+
+    def __init__(self):
+        self.errors_tables = {
+            "0101": {
+                "name": "主管部门数据长度<3",
+                "parent_name": "名称长度异常错误",
+                "parent_code": "01",
+                "checkFn": self.check0101
+            }
+        }
+
+    @staticmethod
+    def check0101(competent_department: str) -> bool :
+        """
+        :param price:
+        :return: 返回true 代表异常
+        """
+        if competent_department:
+            if len(competent_department) < 3:
+                return True
+            return False

+ 57 - 0
tables/fields/cost_income_percent.py

@@ -0,0 +1,57 @@
+"""
+   cost_income_percent字段检查
+"""
+
+
+class Cost_income_percentChecker(object):
+    """
+        cost_income_percent字段检查
+    """
+
+    def __init__(self):
+        self.errors_tables = {
+            "0101": {
+                "name": "成本/收入小数点位数超过2位",
+                "parent_name": "金额错误",
+                "parent_code": "01",
+                "checkFn": self.check0101
+            },
+            "0102": {
+                "name": "成本/收入<0",
+                "parent_name": "金额错误",
+                "parent_code": "01",
+                "checkFn": self.check0102
+            }
+        }
+
+    @staticmethod
+    def check0101(cost_income_percent: str) -> bool :
+        """
+        :param price:
+        :return: 返回true 代表异常
+        """
+        # 将数字转换为字符串
+        number_str = str(cost_income_percent.strip('%'))
+        # 检查是否有小数点
+        if '.' in number_str:
+        # 分割整数部分和小数部分
+            integer_part, decimal_part = number_str.split('.')
+        # 返回小数部分的长度
+            length= len(decimal_part)
+        else:
+             length = 0
+        if length > 2 :
+            return True
+
+    @staticmethod
+    def check0102(cost_income_percent: str) -> bool :
+        """
+        :return: 返回true 代表异常
+        """
+        # 去除百分号并转为浮点数
+        value = float(cost_income_percent.strip('%'))
+
+        if value < 0:
+            return True
+        else:
+            return False

+ 57 - 0
tables/fields/coverage_multiple.py

@@ -0,0 +1,57 @@
+"""
+   coverage_multiple字段检查
+"""
+
+
+class Coverage_multipleChecker(object):
+    """
+        coverage_multiple字段检查
+    """
+
+    def __init__(self):
+        self.errors_tables = {
+            "0101": {
+                "name": "覆盖倍数小数点位数超过2位",
+                "parent_name": "金额错误",
+                "parent_code": "01",
+                "checkFn": self.check0101
+            },
+            "0102": {
+                "name": "覆盖倍数<0",
+                "parent_name": "金额错误",
+                "parent_code": "01",
+                "checkFn": self.check0102
+            }
+        }
+
+    @staticmethod
+    def check0101(coverage_multiple: float) -> bool :
+        """
+        :param price:
+        :return: 返回true 代表异常
+        """
+        # 将数字转换为字符串
+        number_str = str(coverage_multiple)
+        # 检查是否有小数点
+        if '.' in number_str:
+        # 分割整数部分和小数部分
+            integer_part, decimal_part = number_str.split('.')
+        # 返回小数部分的长度
+            length= len(decimal_part)
+        else:
+             length = 0
+        if length > 2 :
+            return True
+
+    @staticmethod
+    def check0102(coverage_multiple: float) -> bool :
+        """
+        :return: 返回true 代表异常
+        """
+        # 去除百分号并转为浮点数
+        value = float(coverage_multiple)
+
+        if value < 0:
+            return True
+        else:
+            return False

+ 21 - 0
tables/fields/end_date.py

@@ -0,0 +1,21 @@
+
+#建设期结束时间
+class End_dateChecker(object):
+    def __init__(self):
+        self.errors_tables = {
+            "0101": {
+                "name": "建设期结束时间<建设期开始时间",
+                "parent_name": "时间有效性异常",
+                "parent_code": "01",
+                "checkFn": self.check0101
+            }
+        }
+
+    def check0101(self, start_date:str,end_date:str) -> bool:
+        """
+        return true  代表返回异常
+        """
+        if start_date and end_date:
+            if end_date < start_date:
+                return True
+            return False

+ 21 - 0
tables/fields/operation_end_date.py

@@ -0,0 +1,21 @@
+
+#运营期开始
+class Operation_end_dateChecker(object):
+    def __init__(self):
+        self.errors_tables = {
+            "0101": {
+                "name": "运营期结束时间<建设期开始时间",
+                "parent_name": "时间有效性异常",
+                "parent_code": "01",
+                "checkFn": self.check0101
+            }
+        }
+
+    def check0101(self, operation_start_date:str,operation_end_date:str) -> bool:
+        """
+        return true  代表返回异常
+        """
+        if operation_end_date and operation_start_date:
+            if operation_end_date < operation_start_date:
+                return True
+            return False

+ 21 - 0
tables/fields/operation_start_date.py

@@ -0,0 +1,21 @@
+
+#运营期开始
+class Operation_start_dateChecker(object):
+    def __init__(self):
+        self.errors_tables = {
+            "0101": {
+                "name": "运营期开始时间<建设期结束时间",
+                "parent_name": "时间有效性异常",
+                "parent_code": "01",
+                "checkFn": self.check0101
+            }
+        }
+
+    def check0101(self, operation_start_date:str,end_date:str) -> bool:
+        """
+        return true  代表返回异常
+        """
+        if operation_start_date and end_date:
+            if operation_start_date < end_date:
+                return True
+            return False

+ 119 - 0
tables/fields/project_name.py

@@ -0,0 +1,119 @@
+"""
+    专项债项目名称字段检查
+"""
+import re
+from docs.config import general_config
+from util.sensitive_word import AcAutomation
+import csv
+from docs.config import abnormal_config
+
+class Project_nameChecker(object):
+    """
+        专项债项目名称字段检查
+    """
+    def __init__(self):
+        self.errors_tables = {
+            "0101": {
+                "name": "项目名称长度小于等于5",
+                "parent_name": "长度类型",
+                "parent_code": "01",
+                "checkFn": self.lt5
+            },
+            "0102": {
+                "name": "长度大于等于100",
+                "parent_name": "长度类型",
+                "parent_code": "01",
+                "checkFn": self.gt100
+            },
+            "0201":{
+                "name": "非汉字占比>55%",
+                "parent_name": "汉字占比",
+                "parent_code": "02",
+                "checkFn": self.check0201
+            },
+            # "0302": {
+            #     "name": "不包含通用词汇(中标公告)",
+            #     "parent_name": "语义表述不完整",
+            #     "parent_code": "03",
+            #     "checkFn": self.check0302
+            # },
+            "0303": {
+                "name": "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
+                "parent_name": "语义表述不完整",
+                "parent_code": "03",
+                "checkFn": self.check0303
+            }
+        }
+
+    @staticmethod
+    def gt100(project_name: str) -> bool:
+        """
+        标题长度大于80
+        :param title:
+        :return:返回true 代表异常
+        """
+        return len(project_name) >= 100
+
+    @staticmethod
+    def lt5(project_name: str) -> bool:
+        """
+        标题长度小于5
+        :param title:
+        :return:返回true 代表异常
+        """
+        return len(project_name) <= 5
+
+    def check0201(self,project_name: str) -> bool:
+        """
+        标题非汉字占比 >55%
+        :param title:
+        :return:返回true 代表异常
+        """
+        # chinese_chars = [char for char in title if '\u4e00' <= char <= '\u9fff']  # 匹配汉字
+        non_chinese_chars = [char for char in project_name if not ('\u4e00' <= char <= '\u9fff')]  # 匹配非汉字和非字母数字字符
+        non_chinese_chars_radio = len(non_chinese_chars) / len(project_name)
+        if non_chinese_chars_radio > 0.5:
+            return True
+        return False
+    def check0302(self,project_name: str) -> bool:
+        """
+        没有通用后缀
+        :param project_name:
+        :return:返回true 代表异常
+        """
+        # 项目名称必须以通用词汇结尾
+        with open(general_config["table_field_config"]["path"], "r") as f:
+            reads = csv.reader(f)
+            for w in reads:
+                if w[0] in project_name:
+                    return False
+                else:
+                    return True
+        return True
+    def check0303(self,project_name: str) -> bool:
+        """
+        没有通用后缀
+        :param project_name:
+        :return:返回true 代表异常
+        """
+        # 项目名称中包含异常字符
+        with open(abnormal_config["table_field_config"]["path6"], "r") as f:
+            reads = csv.reader(f)
+            for w in reads:
+                if w[0] in project_name:
+                    return True
+        # 项目名称以异常字符结尾
+        # with open(abnormal_config["table_field_config"]["path5"], "r") as f:
+        #     reads = csv.reader(f)
+        #     for w in reads:
+        #         if re.search(f"{w[0]}$", project_name) != None:
+        #             return True
+        p2 = re.search("[nbsp\..\...\.]$",project_name)
+        #re.search():匹配整个字符串,并返回第一个成功的匹配,如果匹配失败,则返回None
+        if p2!=None:
+            return True
+
+        #项目名称以异常字符开始
+        p1 = re.search("^[36780\.)]",project_name)
+        if p1!=None:
+            return True

+ 56 - 0
tables/fields/total_investment.py

@@ -0,0 +1,56 @@
+"""
+    total_investment投资金额字段检查
+"""
+
+
+class Total_investmentChecker(object):
+    """
+        投资金额字段检查
+    """
+
+    def __init__(self):
+        self.errors_tables = {
+            "0103": {
+                "name": "投资金额小数点位数超过4位",
+                "parent_name": "金额错误",
+                "parent_code": "01",
+                "checkFn": self.check0103
+            },
+            "0201": {
+                "name": "投资金额<0",
+                "parent_name": "金额错误",
+                "parent_code": "01",
+                "checkFn": self.check0201
+            }
+        }
+
+    @staticmethod
+    def check0103(total_investment: float) -> bool :
+        """
+        投资金额小数点位数超过4位,视为异常
+        :param price:
+        :return: 返回true 代表异常
+        """
+        # 将数字转换为字符串
+        number_str = str(total_investment)
+        # 检查是否有小数点
+        if '.' in number_str:
+        # 分割整数部分和小数部分
+            integer_part, decimal_part = number_str.split('.')
+        # 返回小数部分的长度
+            length= len(decimal_part)
+        else:
+             length = 0
+        if length > 4 :
+            return True
+
+    @staticmethod
+    def check0201(total_investment: float) -> bool :
+        """
+        投资金额<0,视为异常
+        :return: 返回true 代表异常
+        """
+        if  total_investment < 0:
+            return True
+        return False
+

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است