liumiaomiao 1 年之前
父节点
当前提交
798286d0cf

+ 213 - 50
app.py

@@ -16,6 +16,8 @@ from tables.fields.subpackage import SubpackageChecker
 from tables import CatchContentObject
 from tables.fields.fieldtype import FieldTypeChecker
 from tables.fields.purchasing import PurchasingChecker
+from tables.fields.bidopentime import BidopentimeChecker
+from tables.fields.publishtime import PublishtimeChecker
 
 area_checker = AreaChecker()
 winner_checker = WinnerChecker()
@@ -26,10 +28,13 @@ title_checker = TitleChecker()
 nofield_checker = NoFieldChecker()
 projectname_checker = ProjectnameChecker()
 projectcode_checker = ProjectcodeChecker()
+publishtime_checker = PublishtimeChecker()
+bidopentime_checker = BidopentimeChecker()
 multipackage_checker = SubpackageChecker()
 fieldtype_checker = FieldTypeChecker()
 purchasing_checker = PurchasingChecker()
 
+
 # 定义检查链
 check_chain = {
     "title": {
@@ -56,6 +61,12 @@ check_chain = {
     "area": {
         "checker": area_checker,
     },
+    "bidopentime":{
+        "checker":bidopentime_checker,
+    },
+    "publishtime":{
+        "checker":publishtime_checker,
+    },
     "multipackage": {
         "checker": multipackage_checker,
     },
@@ -69,7 +80,7 @@ def check_params(func, param_rows: dict, is_check_type=False) -> (bool, list):
     """
     函数传参数
     """
-    _default_params = {"attach_text": {}, "bidamount": 0, "budget": 0,"supervisorrate":0,"city":"郑州市","district":"金水区"}  # 必要参数的默认值
+    _default_params = {"attach_text": {}, "bidamount": 0, "budget": 0,"bidopentime":0,"publishtime":0,"subtype":"","supervisorrate":0,"city":"郑州市","district":"金水区"}  # 必要参数的默认值
     func_params = inspect.getfullargspec(func)  # 取出函数的参数
     args_annotations = func_params.annotations  # 参数类型dict,取出参数类型
     args_list = func_params.args  # 参数list
@@ -90,7 +101,7 @@ def check_params(func, param_rows: dict, is_check_type=False) -> (bool, list):
         return False, params
     return True, params  # 满足参数需求
 
-#根据检查结果打分
+#根据检查结果打分top
 def bid_score(error_dist,obj):
     score=100
     site = obj.get("site")
@@ -127,9 +138,8 @@ def check(obj: any, rules) -> any:
     field_qa = {}
     for field in rules:  # 获取检查字段
         qa = {}
-
         # 字段检查
-        if field not in obj:
+        if field not in obj and field in nofield_checker.errors_tables:
             func = nofield_checker.errors_tables[field]
             # 开始执行函数
             if func(obj, catch_content):
@@ -144,6 +154,8 @@ def check(obj: any, rules) -> any:
             qa["0001"] = f"{field}:类型不正确、空值"
             field_qa["%s_qa" % field] = qa
             continue
+        if field not in check_chain:
+            continue
         checker = check_chain[field]["checker"]  # 获取检测器
         for err,err_detail in rules[field].items():  # 获取检测的规则
             # 错误类型检查
@@ -175,7 +187,7 @@ if __name__ == '__main__':
     "comeintime" : int(1698739748), 
     "area" : "内蒙古",
     "purchasingsource" : "[{\"start\": 13, \"verify\": \"确定\", \"direction\": \"h\", \"type\": \"识别\", \"header\": {\"itemname\": \"产品名称\", \"model\": \"技术规格\", \"number\": \"数量\", \"unitprice\": \"单价(元)\", \"totalprice\": \"金额(元)\"}, \"file_name\": \"公告\"}]",
-    "toptype" : "结果", 
+    # "toptype" : "结果",
     "spidercode" : "nm_nmgzzqzfcgw_dzmc_zgcgcjgs", 
     "extracttype" : int(1),
     "s_sha" : "9269588cb73151d8ec09cf16347539678549f81395559589a731ca1394828582", 
@@ -183,9 +195,10 @@ if __name__ == '__main__':
     "purchasing" : "台式计算机", 
     "site" : "湖南省政府采购电子卖场",
     "title" : "二连浩特",
-    "dataging" : int(0), 
-    "publishtime" : int(1698739410), 
-    "subtype" : "成交", 
+    "dataging" : int(0),
+    "bidopentime":int(1798739414),
+    "publishtime" : int(1698739410),
+    "subtype" : "招标",
     "purchasinglist" : [
         {
             "score" : 0.8275, 
@@ -218,11 +231,11 @@ if __name__ == '__main__':
     "basicClass" : "货物", 
     "rate" : "97%", 
     "autoid" : int(249351001), 
-    "bidamount" : 15440.0, 
+    "bidamount" : None,
     "bidway" : "电子投标", 
-    "budget" : 15440.0,
+    "budget" : None,
     "supervisorrate": 0.03,
-    "buyer" : "二连浩特市人民医院", 
+    "buyer" : "哈密市伊州区花园乡人民政府",
     "buyerclass" : "医疗", 
     "dataprocess" : int(8), 
     "description" : "二连浩特市人民医院在政采商城电子卖场完成协议供货直购采购,采购结果确认如下:一、项目概述采购编号:ELHTSZFCG-DD-2023-407774采购单位:二连浩特市人民医院所属区域:二连浩特市本级预算金额(元):15,440.00采购人及", 
@@ -237,7 +250,7 @@ if __name__ == '__main__':
     "purchasing_tag" : "台式计算机,计算机,摄像头,液晶显示器,DR,M9,液晶,4G", 
     "s_subscopeclass" : "信息技术_其他,行政办公_通用办公设备", 
     "s_topscopeclass" : "行政办公,信息技术", 
-    "s_winner" : "二连浩特市智慧真彩文体办公", 
+    # "s_winner" : "二连浩特市智慧真彩文体办公",
     "subscopeclass" : [
         "信息技术_其他", 
         "行政办公_通用办公设备"
@@ -247,48 +260,198 @@ if __name__ == '__main__':
         "信息技术d", 
         "信息技术t"
     ], 
-    "winner" : "二连浩特市智慧真彩文体办公",
+    # "winner" : "二连浩特市智慧真彩文体办公",
     "pici" : int(1698740066)
 }
-    result=check(row,rules={
-        "title": {
-            "0101": {
-                "name": "<=5个字",
-                "parent_name": "长度类型",
-                "parent_code": "01"
-            },
-            "0102": {
-                "name": ">=100个字",
-                "parent_name": "长度类型",
+    # result=check(row,rules={
+    #     "bidamount": {
+    #         "0101": {
+    #             "name": "互相校验(预算和中标金额的比例)",
+    #             "parent_name": "金额错误",
+    #             "parent_code": "01"
+    #         },
+    #         "0102": {
+    #             "name": "过大过小[100,10亿]",
+    #             "parent_name": "金额错误",
+    #             "parent_code": "01"
+    #         },
+    #         "0104": {
+    #             "name": "中标金额存在费率,折扣率",
+    #             "parent_name": "金额错误",
+    #             "parent_code": "01"
+    #         }
+    #     },
+    #     "budget" : {
+    #         "0101" : {
+    #             "name" : "互相校验(预算和中标金额的比例)",
+    #             "parent_name" : "金额错误",
+    #             "parent_code" : "01"
+    #         },
+    #         "0102" : {
+    #             "name" : "过大过小[100,10亿]",
+    #             "parent_name" : "金额错误",
+    #             "parent_code" : "01"
+    #         }
+    #     },
+    #     "title" : {
+    #         "0101" : {
+    #             "name" : "标题长度小于等于5",
+    #             "parent_name" : "长度类型",
+    #             "parent_code" : "01"
+    #         },
+    #         "0102" : {
+    #             "name" : "长度大于等于100",
+    #             "parent_name" : "长度类型",
+    #             "parent_code" : "01"
+    #         },
+    #         "0201" : {
+    #             "name" : "非汉字占比>55%",
+    #             "parent_name" : "汉字占比",
+    #             "parent_code" : "02"
+    #         },
+    #         "0303" : {
+    #             "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
+    #             "parent_name" : "语义表述不完整",
+    #             "parent_code" : "03"
+    #         }
+    #     },
+    #     "projectname" : {
+    #         "0101" : {
+    #             "name" : "项目名称长度小于等于5",
+    #             "parent_name" : "长度类型",
+    #             "parent_code" : "01"
+    #         },
+    #         "0102" : {
+    #             "name" : "长度大于等于100",
+    #             "parent_name" : "长度类型",
+    #             "parent_code" : "01"
+    #         },
+    #         "0201" : {
+    #             "name" : "非汉字占比>55%",
+    #             "parent_name" : "汉字占比",
+    #             "parent_code" : "02"
+    #         },
+    #         "0303" : {
+    #             "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
+    #             "parent_name" : "语义表述不完整",
+    #             "parent_code" : "03"
+    #         }
+    #     },
+    #     "winner" : {
+    #         "0103" : {
+    #             "name" : "包含叠词,异常词汇,特殊词汇",
+    #             "parent_name" : "名称错误",
+    #             "parent_code" : "01"
+    #         }
+    #     },
+    #     "projectcode" : {
+    #         "0102" : {
+    #             "name" : "长度大于2小于等于4",
+    #             "parent_name" : "长度类型",
+    #             "parent_code" : "02"
+    #         },
+    #         "0103" : {
+    #             "name" : "长度大于50",
+    #             "parent_name" : "长度类型",
+    #             "parent_code" : "03"
+    #         },
+    #         "0201" : {
+    #             "name" : "检查日期格式",
+    #             "parent_name" : "日期格式",
+    #             "parent_code" : "01"
+    #         },
+    #         "0202" : {
+    #             "name" : "包含异常关键字",
+    #             "parent_name" : "异常关键字",
+    #             "parent_code" : "02"
+    #         },
+    #         "0203" : {
+    #             "name" : "不包含数字字母",
+    #             "parent_name" : "不包含数字字母",
+    #             "parent_code" : "03"
+    #         },
+    #         "0301" : {
+    #             "name" : "汉字占比>60%且不包含中国电信",
+    #             "parent_name" : "汉字占比",
+    #             "parent_code" : "01"
+    #         },
+    #         "0302" : {
+    #             "name" : "连续汉字超过9个",
+    #             "parent_name" : "汉字占比",
+    #             "parent_code" : "03"
+    #         }
+    #     },
+    #     "buyer" : {
+    #         "0103" : {
+    #             "name" : "包含叠词,异常词汇,特殊词汇",
+    #             "parent_name" : "名称错误",
+    #             "parent_code" : "01"
+    #         },
+    #         "0104" : {
+    #             "name" : "名称不完整",
+    #             "parent_name" : "名称错误",
+    #             "parent_code" : "01"
+    #         }
+    #     },
+    #
+    #     "area" : {
+    #         "0101" : {
+    #             "name" : "全国类数据",
+    #             "parent_name" : "全国类型",
+    #             "parent_code" : "01"
+    #         },
+    #         "0301" : {
+    #             "name" : "省份不在[2,3]个字之间",
+    #             "parent_name" : "长度异常类型",
+    #             "parent_code" : "03"
+    #         },
+    #         "0302" : {
+    #             "name" : "城市不在[3,11]个字之间",
+    #             "parent_name" : "长度异常类型",
+    #             "parent_code" : "03"
+    #         },
+    #         "0303" : {
+    #             "name" : "区县不在[2,15]个字之间",
+    #             "parent_name" : "长度异常类型",
+    #             "parent_code" : "03"
+    #         }
+    #     },
+    #     "multipackage" : {
+    #         "1000" : {
+    #             "name" : "分包类数据",
+    #             "parent_name" : "分包类型",
+    #             "parent_code" : "01"
+    #         }
+    #     },
+    #     "toptype": {
+    #     },
+    #     "subtype": {
+    #     },
+    #     "publishtime": {
+    #         "0201": {
+    #             "name": "发布时间 > 开标时间  ",
+    #             "parent_name": "数据范围类型",
+    #             "parent_code": "02"
+    #         }
+    #     },
+    #     "bidopentime": {
+    #         "0201": {
+    #             "name": "发布时间 > 开标时间",
+    #             "parent_name": "数据范围类型",
+    #             "parent_code": "02"
+    #         }
+    #     }
+    # })
+    result = check(row, rules={
+        "buyer": {
+            "0103": {
+                "name": "包含叠词,异常词汇,特殊词汇",
+                "parent_name": "名称错误",
                 "parent_code": "01"
             },
-            "0201": {
-                "name": "非汉字占比>55%",
-                "parent_name": "汉字占比",
-                "parent_code": "02"
-            }
-        },
-        "projectname": {
-            "0101": {
-                "name": "<=5个字",
-                "parent_name": "长度类型",
-                "parent_code": "01"
-            },
-            "0102": {
-                "name": ">=100个字",
-                "parent_name": "长度类型",
-                "parent_code": "01"
-            },
-            "0201": {
-                "name": "非汉字占比>55%",
-                "parent_name": "汉字占比",
-                "parent_code": "02"
-            }
-        },
-        "purchasinglist": {
-            "score": {
-                "name": "标的物评估",
-                "parent_name": "标的物",
+            "0104": {
+                "name": "名称不完整",
+                "parent_name": "名称错误",
                 "parent_code": "01"
             }
         }

+ 0 - 1
docs/table_head_doc/abnormal_buyer_end.csv

@@ -12,7 +12,6 @@
 年度室
 分行银行
 人政府
-民政府
 人民矢院
 教学楼局
 笺理局

+ 30 - 2
tables/fields/NoField.py

@@ -22,6 +22,9 @@ class NoFieldChecker(object):
             "bidamount": self.check_bidamount,
             "area":self.check_region,
             "projectcode": self.check_projectcode,
+            "toptype":self.check_toptype,
+            "subtype":self.check_subtype,
+            "publishtime":self.check_publishtime,
             "multipackage":self.check_subpackage,
             "purchasinglist":self.check_purchasinglist
         }
@@ -72,7 +75,7 @@ class NoFieldChecker(object):
         :return:返回true 代表异常
         """
         subtype = obj.get("subtype", "")
-        if subtype not in ["中标", "成交", "合同", "验收"]:
+        if subtype  in ["招标", "邀标", "询价", "竞谈","单一","竞价","变更"]:
             budget = obj.get("budget")
             if not budget:
                 return True
@@ -135,4 +138,29 @@ class NoFieldChecker(object):
     def check_purchasinglist(self,obj, catch_content: CatchContentObject) -> bool:
         if not obj.get("purchasinglist"):
             return True
-        return False
+        return False
+
+    def check_toptype(self,obj, catch_content: CatchContentObject) -> bool:
+        """
+        公告一级分类检测
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        if not obj.get("toptype"):
+            return True
+        return  False
+
+    def check_subtype(self,obj, catch_content: CatchContentObject) -> bool:
+        """
+        公告二级分类检测
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        if not obj.get("subtype"):
+            return True
+        return  False
+
+    def check_publishtime(self,obj, catch_content: CatchContentObject) -> bool:
+        if not obj.get("publishtime"):
+            return True
+        return  False

+ 1 - 1
tables/fields/bidamount.py

@@ -44,7 +44,7 @@ class BidAmountChecker(object):
             else:
                 return True
         else:
-            # 两者中有一方为空不判断
+            # 两者中有一方为空不判断,---0,None,空字符,空字符串都是空
             return False
 
     @staticmethod

+ 34 - 0
tables/fields/bidopentime.py

@@ -0,0 +1,34 @@
+"""
+    中标时间字段检查
+"""
+
+
+class BidopentimeChecker(object):
+    """
+        中标时间字段检查
+    """
+    def __init__(self):
+        self.errors_tables = {
+            "0201": {
+                "name": "发布时间 > 开标时间",
+                "parent_name": "数据范围类型",
+                "parent_code": "02",
+                "checkFn": self.check0201
+            }
+        }
+
+    def check0201(self, subtype:str,bidopentime: int, publishtime:int ) -> bool:
+        """
+        return true  代表返回异常
+        """
+        if subtype  in ["招标", "邀标", "询价", "竞谈","单一","竞价","变更"]:
+            if bidopentime and publishtime:
+                if bidopentime < publishtime :
+                    return True
+                else:
+                    return False
+            else:
+                # 两者中有一方为空不判断
+                return False
+        else:
+            return False

+ 1 - 1
tables/fields/budget.py

@@ -5,7 +5,7 @@
 
 class BudgetChecker(object):
     """
-        中标字段检查
+        预算字段检查
     """
 
     def __init__(self):

+ 24 - 0
tables/fields/fieldtype.py

@@ -15,6 +15,10 @@ class FieldTypeChecker(object):
             "bidamount": self.check_bidamount_type,
             "area": self.check_region_type,
             "projectcode": self.check_projectcode_type,
+            "toptype":self.check_toptype_type,
+            "subtype":self.check_subtype_type,
+            "publishtime":self.check_publishtime_type,
+            "bidopentime":self.check_bidopentime_type,
             "multipackage": self.check_multipackage_type,
             "purchasinglist": self.check_purchasinglist_type,
         }
@@ -110,3 +114,23 @@ class FieldTypeChecker(object):
         if not isinstance(value, list):
             return True
         return value
+
+    def check_publishtime_type(self,value):
+        if isinstance(value, (int, float)):
+            return False
+        return True
+
+    def check_bidopentime_type(self,value):
+        if isinstance(value, (int, float)):
+            return False
+        return True
+
+    def check_toptype_type(self,value):
+        if isinstance(value, str):
+            return False
+        return True
+
+    def check_subtype_type(self,value):
+        if isinstance(value, str):
+            return False
+        return True

+ 34 - 0
tables/fields/publishtime.py

@@ -0,0 +1,34 @@
+"""
+    中标时间字段检查
+"""
+
+
+class PublishtimeChecker(object):
+    """
+        开标时间字段检查
+    """
+    def __init__(self):
+        self.errors_tables = {
+            "0201": {
+                "name": "发布时间 > 开标时间",
+                "parent_name": "数据范围类型",
+                "parent_code": "02",
+                "checkFn": self.check0201
+            }
+        }
+
+    def check0201(self, subtype:str,bidopentime: int,publishtime:int ) -> bool:
+        """
+        return true  代表返回异常
+        """
+        if subtype  in ["招标", "邀标", "询价", "竞谈","单一","竞价","变更"]:
+            if bidopentime and publishtime:
+                if bidopentime < publishtime :
+                    return True
+                else:
+                    return False
+            else:
+                # 两者中有一方为空不判断
+                return False
+        else:
+            return False