|
@@ -16,6 +16,8 @@ from tables.fields.subpackage import SubpackageChecker
|
|
from tables import CatchContentObject
|
|
from tables import CatchContentObject
|
|
from tables.fields.fieldtype import FieldTypeChecker
|
|
from tables.fields.fieldtype import FieldTypeChecker
|
|
from tables.fields.purchasing import PurchasingChecker
|
|
from tables.fields.purchasing import PurchasingChecker
|
|
|
|
+from tables.fields.bidopentime import BidopentimeChecker
|
|
|
|
+from tables.fields.publishtime import PublishtimeChecker
|
|
|
|
|
|
area_checker = AreaChecker()
|
|
area_checker = AreaChecker()
|
|
winner_checker = WinnerChecker()
|
|
winner_checker = WinnerChecker()
|
|
@@ -26,10 +28,13 @@ title_checker = TitleChecker()
|
|
nofield_checker = NoFieldChecker()
|
|
nofield_checker = NoFieldChecker()
|
|
projectname_checker = ProjectnameChecker()
|
|
projectname_checker = ProjectnameChecker()
|
|
projectcode_checker = ProjectcodeChecker()
|
|
projectcode_checker = ProjectcodeChecker()
|
|
|
|
+publishtime_checker = PublishtimeChecker()
|
|
|
|
+bidopentime_checker = BidopentimeChecker()
|
|
multipackage_checker = SubpackageChecker()
|
|
multipackage_checker = SubpackageChecker()
|
|
fieldtype_checker = FieldTypeChecker()
|
|
fieldtype_checker = FieldTypeChecker()
|
|
purchasing_checker = PurchasingChecker()
|
|
purchasing_checker = PurchasingChecker()
|
|
|
|
|
|
|
|
+
|
|
# 定义检查链
|
|
# 定义检查链
|
|
check_chain = {
|
|
check_chain = {
|
|
"title": {
|
|
"title": {
|
|
@@ -56,6 +61,12 @@ check_chain = {
|
|
"area": {
|
|
"area": {
|
|
"checker": area_checker,
|
|
"checker": area_checker,
|
|
},
|
|
},
|
|
|
|
+ "bidopentime":{
|
|
|
|
+ "checker":bidopentime_checker,
|
|
|
|
+ },
|
|
|
|
+ "publishtime":{
|
|
|
|
+ "checker":publishtime_checker,
|
|
|
|
+ },
|
|
"multipackage": {
|
|
"multipackage": {
|
|
"checker": multipackage_checker,
|
|
"checker": multipackage_checker,
|
|
},
|
|
},
|
|
@@ -69,7 +80,7 @@ def check_params(func, param_rows: dict, is_check_type=False) -> (bool, list):
|
|
"""
|
|
"""
|
|
函数传参数
|
|
函数传参数
|
|
"""
|
|
"""
|
|
- _default_params = {"attach_text": {}, "bidamount": 0, "budget": 0,"supervisorrate":0,"city":"郑州市","district":"金水区"} # 必要参数的默认值
|
|
|
|
|
|
+ _default_params = {"attach_text": {}, "bidamount": 0, "budget": 0,"bidopentime":0,"publishtime":0,"subtype":"","supervisorrate":0,"city":"郑州市","district":"金水区"} # 必要参数的默认值
|
|
func_params = inspect.getfullargspec(func) # 取出函数的参数
|
|
func_params = inspect.getfullargspec(func) # 取出函数的参数
|
|
args_annotations = func_params.annotations # 参数类型dict,取出参数类型
|
|
args_annotations = func_params.annotations # 参数类型dict,取出参数类型
|
|
args_list = func_params.args # 参数list
|
|
args_list = func_params.args # 参数list
|
|
@@ -90,7 +101,7 @@ def check_params(func, param_rows: dict, is_check_type=False) -> (bool, list):
|
|
return False, params
|
|
return False, params
|
|
return True, params # 满足参数需求
|
|
return True, params # 满足参数需求
|
|
|
|
|
|
-#根据检查结果打分
|
|
|
|
|
|
+#根据检查结果打分top
|
|
def bid_score(error_dist,obj):
|
|
def bid_score(error_dist,obj):
|
|
score=100
|
|
score=100
|
|
site = obj.get("site")
|
|
site = obj.get("site")
|
|
@@ -127,9 +138,8 @@ def check(obj: any, rules) -> any:
|
|
field_qa = {}
|
|
field_qa = {}
|
|
for field in rules: # 获取检查字段
|
|
for field in rules: # 获取检查字段
|
|
qa = {}
|
|
qa = {}
|
|
-
|
|
|
|
# 字段检查
|
|
# 字段检查
|
|
- if field not in obj:
|
|
|
|
|
|
+ if field not in obj and field in nofield_checker.errors_tables:
|
|
func = nofield_checker.errors_tables[field]
|
|
func = nofield_checker.errors_tables[field]
|
|
# 开始执行函数
|
|
# 开始执行函数
|
|
if func(obj, catch_content):
|
|
if func(obj, catch_content):
|
|
@@ -144,6 +154,8 @@ def check(obj: any, rules) -> any:
|
|
qa["0001"] = f"{field}:类型不正确、空值"
|
|
qa["0001"] = f"{field}:类型不正确、空值"
|
|
field_qa["%s_qa" % field] = qa
|
|
field_qa["%s_qa" % field] = qa
|
|
continue
|
|
continue
|
|
|
|
+ if field not in check_chain:
|
|
|
|
+ continue
|
|
checker = check_chain[field]["checker"] # 获取检测器
|
|
checker = check_chain[field]["checker"] # 获取检测器
|
|
for err,err_detail in rules[field].items(): # 获取检测的规则
|
|
for err,err_detail in rules[field].items(): # 获取检测的规则
|
|
# 错误类型检查
|
|
# 错误类型检查
|
|
@@ -175,7 +187,7 @@ if __name__ == '__main__':
|
|
"comeintime" : int(1698739748),
|
|
"comeintime" : int(1698739748),
|
|
"area" : "内蒙古",
|
|
"area" : "内蒙古",
|
|
"purchasingsource" : "[{\"start\": 13, \"verify\": \"确定\", \"direction\": \"h\", \"type\": \"识别\", \"header\": {\"itemname\": \"产品名称\", \"model\": \"技术规格\", \"number\": \"数量\", \"unitprice\": \"单价(元)\", \"totalprice\": \"金额(元)\"}, \"file_name\": \"公告\"}]",
|
|
"purchasingsource" : "[{\"start\": 13, \"verify\": \"确定\", \"direction\": \"h\", \"type\": \"识别\", \"header\": {\"itemname\": \"产品名称\", \"model\": \"技术规格\", \"number\": \"数量\", \"unitprice\": \"单价(元)\", \"totalprice\": \"金额(元)\"}, \"file_name\": \"公告\"}]",
|
|
- "toptype" : "结果",
|
|
|
|
|
|
+ # "toptype" : "结果",
|
|
"spidercode" : "nm_nmgzzqzfcgw_dzmc_zgcgcjgs",
|
|
"spidercode" : "nm_nmgzzqzfcgw_dzmc_zgcgcjgs",
|
|
"extracttype" : int(1),
|
|
"extracttype" : int(1),
|
|
"s_sha" : "9269588cb73151d8ec09cf16347539678549f81395559589a731ca1394828582",
|
|
"s_sha" : "9269588cb73151d8ec09cf16347539678549f81395559589a731ca1394828582",
|
|
@@ -183,9 +195,10 @@ if __name__ == '__main__':
|
|
"purchasing" : "台式计算机",
|
|
"purchasing" : "台式计算机",
|
|
"site" : "湖南省政府采购电子卖场",
|
|
"site" : "湖南省政府采购电子卖场",
|
|
"title" : "二连浩特",
|
|
"title" : "二连浩特",
|
|
- "dataging" : int(0),
|
|
|
|
- "publishtime" : int(1698739410),
|
|
|
|
- "subtype" : "成交",
|
|
|
|
|
|
+ "dataging" : int(0),
|
|
|
|
+ "bidopentime":int(1798739414),
|
|
|
|
+ "publishtime" : int(1698739410),
|
|
|
|
+ "subtype" : "招标",
|
|
"purchasinglist" : [
|
|
"purchasinglist" : [
|
|
{
|
|
{
|
|
"score" : 0.8275,
|
|
"score" : 0.8275,
|
|
@@ -218,11 +231,11 @@ if __name__ == '__main__':
|
|
"basicClass" : "货物",
|
|
"basicClass" : "货物",
|
|
"rate" : "97%",
|
|
"rate" : "97%",
|
|
"autoid" : int(249351001),
|
|
"autoid" : int(249351001),
|
|
- "bidamount" : 15440.0,
|
|
|
|
|
|
+ "bidamount" : None,
|
|
"bidway" : "电子投标",
|
|
"bidway" : "电子投标",
|
|
- "budget" : 15440.0,
|
|
|
|
|
|
+ "budget" : None,
|
|
"supervisorrate": 0.03,
|
|
"supervisorrate": 0.03,
|
|
- "buyer" : "二连浩特市人民医院",
|
|
|
|
|
|
+ "buyer" : "哈密市伊州区花园乡人民政府",
|
|
"buyerclass" : "医疗",
|
|
"buyerclass" : "医疗",
|
|
"dataprocess" : int(8),
|
|
"dataprocess" : int(8),
|
|
"description" : "二连浩特市人民医院在政采商城电子卖场完成协议供货直购采购,采购结果确认如下:一、项目概述采购编号:ELHTSZFCG-DD-2023-407774采购单位:二连浩特市人民医院所属区域:二连浩特市本级预算金额(元):15,440.00采购人及",
|
|
"description" : "二连浩特市人民医院在政采商城电子卖场完成协议供货直购采购,采购结果确认如下:一、项目概述采购编号:ELHTSZFCG-DD-2023-407774采购单位:二连浩特市人民医院所属区域:二连浩特市本级预算金额(元):15,440.00采购人及",
|
|
@@ -237,7 +250,7 @@ if __name__ == '__main__':
|
|
"purchasing_tag" : "台式计算机,计算机,摄像头,液晶显示器,DR,M9,液晶,4G",
|
|
"purchasing_tag" : "台式计算机,计算机,摄像头,液晶显示器,DR,M9,液晶,4G",
|
|
"s_subscopeclass" : "信息技术_其他,行政办公_通用办公设备",
|
|
"s_subscopeclass" : "信息技术_其他,行政办公_通用办公设备",
|
|
"s_topscopeclass" : "行政办公,信息技术",
|
|
"s_topscopeclass" : "行政办公,信息技术",
|
|
- "s_winner" : "二连浩特市智慧真彩文体办公",
|
|
|
|
|
|
+ # "s_winner" : "二连浩特市智慧真彩文体办公",
|
|
"subscopeclass" : [
|
|
"subscopeclass" : [
|
|
"信息技术_其他",
|
|
"信息技术_其他",
|
|
"行政办公_通用办公设备"
|
|
"行政办公_通用办公设备"
|
|
@@ -247,48 +260,198 @@ if __name__ == '__main__':
|
|
"信息技术d",
|
|
"信息技术d",
|
|
"信息技术t"
|
|
"信息技术t"
|
|
],
|
|
],
|
|
- "winner" : "二连浩特市智慧真彩文体办公",
|
|
|
|
|
|
+ # "winner" : "二连浩特市智慧真彩文体办公",
|
|
"pici" : int(1698740066)
|
|
"pici" : int(1698740066)
|
|
}
|
|
}
|
|
- result=check(row,rules={
|
|
|
|
- "title": {
|
|
|
|
- "0101": {
|
|
|
|
- "name": "<=5个字",
|
|
|
|
- "parent_name": "长度类型",
|
|
|
|
- "parent_code": "01"
|
|
|
|
- },
|
|
|
|
- "0102": {
|
|
|
|
- "name": ">=100个字",
|
|
|
|
- "parent_name": "长度类型",
|
|
|
|
|
|
+ # result=check(row,rules={
|
|
|
|
+ # "bidamount": {
|
|
|
|
+ # "0101": {
|
|
|
|
+ # "name": "互相校验(预算和中标金额的比例)",
|
|
|
|
+ # "parent_name": "金额错误",
|
|
|
|
+ # "parent_code": "01"
|
|
|
|
+ # },
|
|
|
|
+ # "0102": {
|
|
|
|
+ # "name": "过大过小[100,10亿]",
|
|
|
|
+ # "parent_name": "金额错误",
|
|
|
|
+ # "parent_code": "01"
|
|
|
|
+ # },
|
|
|
|
+ # "0104": {
|
|
|
|
+ # "name": "中标金额存在费率,折扣率",
|
|
|
|
+ # "parent_name": "金额错误",
|
|
|
|
+ # "parent_code": "01"
|
|
|
|
+ # }
|
|
|
|
+ # },
|
|
|
|
+ # "budget" : {
|
|
|
|
+ # "0101" : {
|
|
|
|
+ # "name" : "互相校验(预算和中标金额的比例)",
|
|
|
|
+ # "parent_name" : "金额错误",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # },
|
|
|
|
+ # "0102" : {
|
|
|
|
+ # "name" : "过大过小[100,10亿]",
|
|
|
|
+ # "parent_name" : "金额错误",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # }
|
|
|
|
+ # },
|
|
|
|
+ # "title" : {
|
|
|
|
+ # "0101" : {
|
|
|
|
+ # "name" : "标题长度小于等于5",
|
|
|
|
+ # "parent_name" : "长度类型",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # },
|
|
|
|
+ # "0102" : {
|
|
|
|
+ # "name" : "长度大于等于100",
|
|
|
|
+ # "parent_name" : "长度类型",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # },
|
|
|
|
+ # "0201" : {
|
|
|
|
+ # "name" : "非汉字占比>55%",
|
|
|
|
+ # "parent_name" : "汉字占比",
|
|
|
|
+ # "parent_code" : "02"
|
|
|
|
+ # },
|
|
|
|
+ # "0303" : {
|
|
|
|
+ # "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
|
|
|
|
+ # "parent_name" : "语义表述不完整",
|
|
|
|
+ # "parent_code" : "03"
|
|
|
|
+ # }
|
|
|
|
+ # },
|
|
|
|
+ # "projectname" : {
|
|
|
|
+ # "0101" : {
|
|
|
|
+ # "name" : "项目名称长度小于等于5",
|
|
|
|
+ # "parent_name" : "长度类型",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # },
|
|
|
|
+ # "0102" : {
|
|
|
|
+ # "name" : "长度大于等于100",
|
|
|
|
+ # "parent_name" : "长度类型",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # },
|
|
|
|
+ # "0201" : {
|
|
|
|
+ # "name" : "非汉字占比>55%",
|
|
|
|
+ # "parent_name" : "汉字占比",
|
|
|
|
+ # "parent_code" : "02"
|
|
|
|
+ # },
|
|
|
|
+ # "0303" : {
|
|
|
|
+ # "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
|
|
|
|
+ # "parent_name" : "语义表述不完整",
|
|
|
|
+ # "parent_code" : "03"
|
|
|
|
+ # }
|
|
|
|
+ # },
|
|
|
|
+ # "winner" : {
|
|
|
|
+ # "0103" : {
|
|
|
|
+ # "name" : "包含叠词,异常词汇,特殊词汇",
|
|
|
|
+ # "parent_name" : "名称错误",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # }
|
|
|
|
+ # },
|
|
|
|
+ # "projectcode" : {
|
|
|
|
+ # "0102" : {
|
|
|
|
+ # "name" : "长度大于2小于等于4",
|
|
|
|
+ # "parent_name" : "长度类型",
|
|
|
|
+ # "parent_code" : "02"
|
|
|
|
+ # },
|
|
|
|
+ # "0103" : {
|
|
|
|
+ # "name" : "长度大于50",
|
|
|
|
+ # "parent_name" : "长度类型",
|
|
|
|
+ # "parent_code" : "03"
|
|
|
|
+ # },
|
|
|
|
+ # "0201" : {
|
|
|
|
+ # "name" : "检查日期格式",
|
|
|
|
+ # "parent_name" : "日期格式",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # },
|
|
|
|
+ # "0202" : {
|
|
|
|
+ # "name" : "包含异常关键字",
|
|
|
|
+ # "parent_name" : "异常关键字",
|
|
|
|
+ # "parent_code" : "02"
|
|
|
|
+ # },
|
|
|
|
+ # "0203" : {
|
|
|
|
+ # "name" : "不包含数字字母",
|
|
|
|
+ # "parent_name" : "不包含数字字母",
|
|
|
|
+ # "parent_code" : "03"
|
|
|
|
+ # },
|
|
|
|
+ # "0301" : {
|
|
|
|
+ # "name" : "汉字占比>60%且不包含中国电信",
|
|
|
|
+ # "parent_name" : "汉字占比",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # },
|
|
|
|
+ # "0302" : {
|
|
|
|
+ # "name" : "连续汉字超过9个",
|
|
|
|
+ # "parent_name" : "汉字占比",
|
|
|
|
+ # "parent_code" : "03"
|
|
|
|
+ # }
|
|
|
|
+ # },
|
|
|
|
+ # "buyer" : {
|
|
|
|
+ # "0103" : {
|
|
|
|
+ # "name" : "包含叠词,异常词汇,特殊词汇",
|
|
|
|
+ # "parent_name" : "名称错误",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # },
|
|
|
|
+ # "0104" : {
|
|
|
|
+ # "name" : "名称不完整",
|
|
|
|
+ # "parent_name" : "名称错误",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # }
|
|
|
|
+ # },
|
|
|
|
+ #
|
|
|
|
+ # "area" : {
|
|
|
|
+ # "0101" : {
|
|
|
|
+ # "name" : "全国类数据",
|
|
|
|
+ # "parent_name" : "全国类型",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # },
|
|
|
|
+ # "0301" : {
|
|
|
|
+ # "name" : "省份不在[2,3]个字之间",
|
|
|
|
+ # "parent_name" : "长度异常类型",
|
|
|
|
+ # "parent_code" : "03"
|
|
|
|
+ # },
|
|
|
|
+ # "0302" : {
|
|
|
|
+ # "name" : "城市不在[3,11]个字之间",
|
|
|
|
+ # "parent_name" : "长度异常类型",
|
|
|
|
+ # "parent_code" : "03"
|
|
|
|
+ # },
|
|
|
|
+ # "0303" : {
|
|
|
|
+ # "name" : "区县不在[2,15]个字之间",
|
|
|
|
+ # "parent_name" : "长度异常类型",
|
|
|
|
+ # "parent_code" : "03"
|
|
|
|
+ # }
|
|
|
|
+ # },
|
|
|
|
+ # "multipackage" : {
|
|
|
|
+ # "1000" : {
|
|
|
|
+ # "name" : "分包类数据",
|
|
|
|
+ # "parent_name" : "分包类型",
|
|
|
|
+ # "parent_code" : "01"
|
|
|
|
+ # }
|
|
|
|
+ # },
|
|
|
|
+ # "toptype": {
|
|
|
|
+ # },
|
|
|
|
+ # "subtype": {
|
|
|
|
+ # },
|
|
|
|
+ # "publishtime": {
|
|
|
|
+ # "0201": {
|
|
|
|
+ # "name": "发布时间 > 开标时间 ",
|
|
|
|
+ # "parent_name": "数据范围类型",
|
|
|
|
+ # "parent_code": "02"
|
|
|
|
+ # }
|
|
|
|
+ # },
|
|
|
|
+ # "bidopentime": {
|
|
|
|
+ # "0201": {
|
|
|
|
+ # "name": "发布时间 > 开标时间",
|
|
|
|
+ # "parent_name": "数据范围类型",
|
|
|
|
+ # "parent_code": "02"
|
|
|
|
+ # }
|
|
|
|
+ # }
|
|
|
|
+ # })
|
|
|
|
+ result = check(row, rules={
|
|
|
|
+ "buyer": {
|
|
|
|
+ "0103": {
|
|
|
|
+ "name": "包含叠词,异常词汇,特殊词汇",
|
|
|
|
+ "parent_name": "名称错误",
|
|
"parent_code": "01"
|
|
"parent_code": "01"
|
|
},
|
|
},
|
|
- "0201": {
|
|
|
|
- "name": "非汉字占比>55%",
|
|
|
|
- "parent_name": "汉字占比",
|
|
|
|
- "parent_code": "02"
|
|
|
|
- }
|
|
|
|
- },
|
|
|
|
- "projectname": {
|
|
|
|
- "0101": {
|
|
|
|
- "name": "<=5个字",
|
|
|
|
- "parent_name": "长度类型",
|
|
|
|
- "parent_code": "01"
|
|
|
|
- },
|
|
|
|
- "0102": {
|
|
|
|
- "name": ">=100个字",
|
|
|
|
- "parent_name": "长度类型",
|
|
|
|
- "parent_code": "01"
|
|
|
|
- },
|
|
|
|
- "0201": {
|
|
|
|
- "name": "非汉字占比>55%",
|
|
|
|
- "parent_name": "汉字占比",
|
|
|
|
- "parent_code": "02"
|
|
|
|
- }
|
|
|
|
- },
|
|
|
|
- "purchasinglist": {
|
|
|
|
- "score": {
|
|
|
|
- "name": "标的物评估",
|
|
|
|
- "parent_name": "标的物",
|
|
|
|
|
|
+ "0104": {
|
|
|
|
+ "name": "名称不完整",
|
|
|
|
+ "parent_name": "名称错误",
|
|
"parent_code": "01"
|
|
"parent_code": "01"
|
|
}
|
|
}
|
|
}
|
|
}
|