# coding:utf-8 import inspect import csv from pymongo import MongoClient from docs.config import abnormal_config from tables.fields.winner import WinnerChecker from tables.fields.buyer import BuyerChecker from tables.fields.bidamount import BidAmountChecker from tables.fields.budget import BudgetChecker from tables.fields.NoField import NoFieldChecker from tables.fields.title import TitleChecker from tables.fields.area import AreaChecker from tables.fields.projectcode import ProjectcodeChecker from tables.fields.projectname import ProjectnameChecker from tables.fields.subpackage import SubpackageChecker from tables import CatchContentObject from tables.fields.fieldtype import FieldTypeChecker from tables.fields.purchasing import PurchasingChecker from tables.fields.bidopentime import BidopentimeChecker from tables.fields.publishtime import PublishtimeChecker area_checker = AreaChecker() winner_checker = WinnerChecker() buyer_checker = BuyerChecker() amount_checker = BidAmountChecker() budget_checker = BudgetChecker() title_checker = TitleChecker() nofield_checker = NoFieldChecker() projectname_checker = ProjectnameChecker() projectcode_checker = ProjectcodeChecker() publishtime_checker = PublishtimeChecker() bidopentime_checker = BidopentimeChecker() multipackage_checker = SubpackageChecker() fieldtype_checker = FieldTypeChecker() purchasing_checker = PurchasingChecker() # 定义检查链 check_chain = { "title": { "checker": title_checker, }, "projectname": { "checker": projectname_checker, }, "s_winner": { "checker": winner_checker, }, "buyer": { "checker": buyer_checker, }, "projectcode": { "checker": projectcode_checker, }, "bidamount": { "checker": amount_checker, }, "budget": { "checker": budget_checker, }, "area": { "checker": area_checker, }, "bidopentime":{ "checker":bidopentime_checker, }, "publishtime":{ "checker":publishtime_checker, }, "com_package": { "checker": multipackage_checker, }, "purchasinglist": { "checker": purchasing_checker, } } #多参数处理 def check_params(func, param_rows: dict, is_check_type=False) -> (bool, list): """ 函数传参数 """ _default_params = {"attach_text": {}, "bidamount": 0, "budget": 0,"s_winner":"","bidopentime":0,"publishtime":0,"subtype":"","supervisorrate":0,"city":"郑州市","district":"金水区","com_package":[]} # 必要参数的默认值 func_params = inspect.getfullargspec(func) # 取出函数的参数 args_annotations = func_params.annotations # 参数类型dict,取出参数类型 args_list = func_params.args # 参数list if "self" in args_list: args_list.remove("self") params = {} # 返回参数列表 for arg in args_list: if arg in param_rows: # 存在参数 if is_check_type and not isinstance(param_rows[arg], args_annotations[arg]): # 不满足参数类型需求 return False, [] params[arg] = param_rows[arg] elif arg in _default_params: # 使用默认值 params[arg] = _default_params[arg] # print(params, args_list) if len(params) != len(args_list): # 参数不够 return False, params return True, params # 满足参数需求 #根据检查结果打分top def bid_score(error_dist,obj): score=100 site = obj.get("site") toptype = obj.get("toptype") subtype = obj.get("subtype") num=0 for key,value in error_dist.items(): if key != "purchasinglist_qa": if value : num+=1 flag=0 with open(abnormal_config["table_field_config"]["path7"], "r") as f: reads = csv.reader(f) for w in reads: result=w[0].split("\t") #可信度比较高的网站,打分时减少分值 if result[0]==site and result[1]==toptype and result[2]==subtype: flag = 1 score-=num print(score) if flag == 0: score=score-num*10 print(score) return score #检查并打分 def check(obj: any, rules) -> any: """ 单行数据质量检查 :return:any """ catch_content = CatchContentObject() obj["catch_content"] = catch_content field_qa = {} for field in rules: # 获取检查字段 qa = {} # 字段不存在检查 if field not in obj and field in nofield_checker.errors_tables: func = nofield_checker.errors_tables[field] # 开始执行函数 if func(obj, catch_content): qa["0000"] = f"{field}:不存在!!" field_qa["%s_qa" % field] = qa continue # 字段存在检查,判断字段值的类型是否正确,判断类型是否正确如值为null的,并把英文括号转换为中文括号 if field in obj and field in fieldtype_checker.errors_tables: func_type = fieldtype_checker.errors_tables[field] value = func_type(obj.get(field)) if value is True: qa["0001"] = f"{field}:类型不正确、空值" field_qa["%s_qa" % field] = qa continue if field not in check_chain: continue checker = check_chain[field]["checker"] # 获取检测器 for err,err_detail in rules[field].items(): # 获取检测的规则 # 错误类型检查 if err not in checker.errors_tables: qa[err] = f"{field}:服务端未定义错误类型" field_qa["%s_qa" % field] = qa continue func = checker.errors_tables[err]["checkFn"] # 获取检查方法 status, params = check_params(func, obj) # 多参数解决方案 if status: # 开始执行函数 #判断返回是否是数值,是数值的话,为标的物的检查规则,标的物返回的是打分结果(float类型),不是错误类型 result = func(**params) if isinstance(result, float): qa[err]=result else: if result: qa[err] = err_detail.get("name","") else: # 参数不满足要求 qa[err] = f"{field}:必须参数(字段)缺失" field_qa["%s_qa" % field] = qa score=bid_score(field_qa,obj) field_qa["score"]=score return field_qa if __name__ == '__main__': row={ "comeintime" : int(1698739748), "area" : "内蒙古", "purchasingsource" : "[{\"start\": 13, \"verify\": \"确定\", \"direction\": \"h\", \"type\": \"识别\", \"header\": {\"itemname\": \"产品名称\", \"model\": \"技术规格\", \"number\": \"数量\", \"unitprice\": \"单价(元)\", \"totalprice\": \"金额(元)\"}, \"file_name\": \"公告\"}]", "toptype" : "结果", "spidercode" : "nm_nmgzzqzfcgw_dzmc_zgcgcjgs", "extracttype" : int(1), "s_sha" : "9269588cb73151d8ec09cf16347539678549f81395559589a731ca1394828582", "detail" : ".二连浩特市人民医院在政采商城电子卖场完成协议供货直购采购,采购结果确认如下:
一、项目概述
采购编号:ELHTSZFCG-DD-2023-407774
采购单位:二连浩特市人民医院
所属区域:二连浩特市本级
预算金额(元):15,440.00
采购人及联系方式:白瑜/7535324
采购计划备案书/核准书编号:二财购备字(电子)[2023]00864号
采购方式:电子卖场(协议采购)
二、采购结果
成交供应商:二连浩特市智慧真彩文体办公
成交时间:2023-10-31 15:24
成交金额:15440.00,大写(人民币):壹万伍仟肆佰肆拾元整。
产品名称 技术规格 备注 数量 单价(元) 金额(元)
台式计算机 联想/LENOVO,
联想/LENOVOThinkCentre M930Z-GEN2 台式计算机 I5-10500/8G/512G/DVDRW/2G独立/WIFI/摄像头/W10-HOME / 23寸液晶屏,
M930Z,
数量:2;
2 ¥5450.0000
¥10900.00
台式计算机 联想/LENOVO,
联想 启天M420 台式计算机 商用办公计算机 处理器I3-9100 / 4G / 1T / 集显 / 21.5寸液晶显示器,
M420,
数量:1;
1 ¥4540.0000
¥4540.00
合计 ¥15440.00 大写(人民币): 壹万伍仟肆佰肆拾元整

采购单位:二连浩特市人民医院
2023年10月31日", "purchasing" : "台式计算机", "site" : "湖南省政府采购电子卖场", "title" : "黄冈罗田碳和瑞新能源科技有限公司罗田分公司租用罗田县三里畈镇尹家垸村三组尹小丹农户屋顶新建35.75分布式光伏发电项目", "dataging" : int(0), # "bidopentime":int(1798739414), # "publishtime" : int(1751937052), "subtype" : "成交", "purchasinglist" : [ { "score" : 0.8275, "itemname" : "台式计算机", "model" : "联想/LENOVO, 联想/LENOVOThinkCentre M930Z-GEN2 台式计算机 I5-10500/8G^~^", "number" : 2.0, "unitprice" : 5450.0, "totalprice" : 10900.0, "table" : int(0) }, { "unitprice" : 4540.0, "totalprice" : 4540.0, "table" : int(0), "score" : 0.8275, "itemname" : "台式计算机", "model" : "联想/LENOVO, 联想 启天M420 台式计算机 商用办公计算机 处理器I3-9100 / 4G / 1T / 集显^~^", "number" : 1.0 } ], "goods_start" : int(1698739805), "type" : "", "city" : "", "areaval" : int(16), "contenthtml" : "\n
\n
\n

\n

二连浩特市人民医院在政采商城电子卖场完成协议供货直购采购,采购结果确认如下:

\n

一、项目概述

\n

采购编号:ELHTSZFCG-DD-2023-407774

\n

采购单位:二连浩特市人民医院

\n

所属区域:二连浩特市本级

\n

预算金额(元):15,440.00

\n

采购人及联系方式:白瑜/7535324

\n

采购计划备案书/核准书编号:二财购备字(电子)[2023]00864号

\n

采购方式:电子卖场(协议采购)

\n

二、采购结果

\n

成交供应商:二连浩特市智慧真彩文体办公

\n

成交时间:2023-10-31 15:24

\n

成交金额:15440.00,大写(人民币):壹万伍仟肆佰肆拾元整。

\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n
产品名称技术规格备注数量单价(元)金额(元)
台式计算机\n 联想/LENOVO,\n 联想/LENOVOThinkCentre M930Z-GEN2 台式计算机 I5-10500/8G/512G/DVDRW/2G独立/WIFI/摄像头/W10-HOME / 23寸液晶屏,\n M930Z,\n 数量:2;\n
\n \n
2\n ¥5450.0000\n \n ¥10900.00\n
台式计算机\n 联想/LENOVO,\n 联想 启天M420 台式计算机 商用办公计算机 处理器I3-9100 / 4G / 1T / 集显 / 21.5寸液晶显示器,\n M420,\n 数量:1;\n
\n \n
1\n ¥4540.0000\n \n ¥4540.00\n
合计\n ¥15440.00 大写(人民币): 壹万伍仟肆佰肆拾元整
\n

采购单位:二连浩特市人民医院

\n

2023年10月31日

\n \n
\n
\n ", "infoformat" : 1.0, "href" : "https://www.ccgp-neimenggu.gov.cn/category/onlinetender?tb_id=001076&p_id=2000957690&type=2", "channel" : "电子卖场-直购采购成交公示", "detail_isvalidity" : int(1), "basicClass" : "货物", "rate" : "97%", "autoid" : int(249351001), # "bidamount" : 1034.1111, "bidway" : "电子投标", # "budget" : None, "supervisorrate": 0.03, "buyer" : "哈密市伊州区花园乡人民政府", "buyerclass" : "医疗", "dataprocess" : int(8), "description" : "二连浩特市人民医院在政采商城电子卖场完成协议供货直购采购,采购结果确认如下:一、项目概述采购编号:ELHTSZFCG-DD-2023-407774采购单位:二连浩特市人民医院所属区域:二连浩特市本级预算金额(元):15,440.00采购人及", "district" : "", "entidlist" : [ "ff3d9d9a24326937b4a9f45edbfab35a" ], "keywords" : "二连浩特市人民医院,台式计算机,订购,成交,公告", "multipackage" : int(0), # "projectcode" : "ELHTSZFCG-DD-2023-407774", "projectname" : "二连浩特市人民医院台式计算机(等)直接订购", "purchasing_tag" : "台式计算机,计算机,摄像头,液晶显示器,DR,M9,液晶,4G", "s_subscopeclass" : "信息技术_其他,行政办公_通用办公设备", "s_topscopeclass" : "行政办公,信息技术", "s_winner" : "二连浩特市智慧真彩文体办公院院", "subscopeclass" : [ "信息技术_其他", "行政办公_通用办公设备" ], "topscopeclass" : [ "行政办公d", "信息技术d", "信息技术t" ], "com_package": [ { "projectcode": "S202501020015", "packagecode": "", "contractcode": "", "package_id": "5bc4bbc85a4b461cb0aa0104f5fb5ee9", "name": "不干胶标签" }, { "projectcode": "S202501020015", "packagecode": "", "contractcode": "", "package_id": "5bc4bbc85a4b461cb0aa0104f5fb5ee9", "name": "不干胶标签" } ], "pici" : int(1698740066) } # result=check(row,rules={ # "bidamount": { # "0101": { # "name": "互相校验(预算和中标金额的比例)", # "parent_name": "金额错误", # "parent_code": "01" # }, # "0102": { # "name": "过大过小[100,10亿]", # "parent_name": "金额错误", # "parent_code": "01" # }, # "0103": { # "name": "中标金额小数点位数超过4位", # "parent_name": "金额错误", # "parent_code": "01", # "checkFn": "01", # }, # "0104": { # "name": "中标金额存在费率,折扣率", # "parent_name": "金额错误", # "parent_code": "01" # } # }, # # "budget" : { # # "0101" : { # # "name" : "互相校验(预算和中标金额的比例)", # # "parent_name" : "金额错误", # # "parent_code" : "01" # # }, # # "0102" : { # # "name" : "过大过小[100,10亿]", # # "parent_name" : "金额错误", # # "parent_code" : "01" # # } # # }, # # "title" : { # # "0101" : { # # "name" : "标题长度小于等于5", # # "parent_name" : "长度类型", # # "parent_code" : "01" # # }, # # "0102" : { # # "name" : "长度大于等于100", # # "parent_name" : "长度类型", # # "parent_code" : "01" # # }, # # "0201" : { # # "name" : "非汉字占比>55%", # # "parent_name" : "汉字占比", # # "parent_code" : "02" # # }, # # "0303" : { # # "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)", # # "parent_name" : "语义表述不完整", # # "parent_code" : "03" # # } # # }, # # "projectname" : { # # "0101" : { # # "name" : "项目名称长度小于等于5", # # "parent_name" : "长度类型", # # "parent_code" : "01" # # }, # # "0102" : { # # "name" : "长度大于等于100", # # "parent_name" : "长度类型", # # "parent_code" : "01" # # }, # # "0201" : { # # "name" : "非汉字占比>55%", # # "parent_name" : "汉字占比", # # "parent_code" : "02" # # }, # # "0303" : { # # "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)", # # "parent_name" : "语义表述不完整", # # "parent_code" : "03" # # } # # }, # # "winner" : { # # "0103" : { # # "name" : "包含叠词,异常词汇,特殊词汇", # # "parent_name" : "名称错误", # # "parent_code" : "01" # # } # # }, # # "projectcode" : { # # "0102" : { # # "name" : "长度大于2小于等于4", # # "parent_name" : "长度类型", # # "parent_code" : "02" # # }, # # "0103" : { # # "name" : "长度大于50", # # "parent_name" : "长度类型", # # "parent_code" : "03" # # }, # # "0201" : { # # "name" : "检查日期格式", # # "parent_name" : "日期格式", # # "parent_code" : "01" # # }, # # "0202" : { # # "name" : "包含异常关键字", # # "parent_name" : "异常关键字", # # "parent_code" : "02" # # }, # # "0203" : { # # "name" : "不包含数字字母", # # "parent_name" : "不包含数字字母", # # "parent_code" : "03" # # }, # # "0301" : { # # "name" : "汉字占比>60%且不包含中国电信", # # "parent_name" : "汉字占比", # # "parent_code" : "01" # # }, # # "0302" : { # # "name" : "连续汉字超过9个", # # "parent_name" : "汉字占比", # # "parent_code" : "03" # # } # # }, # # "buyer" : { # # "0103" : { # # "name" : "包含叠词,异常词汇,特殊词汇", # # "parent_name" : "名称错误", # # "parent_code" : "01" # # }, # # "0104" : { # # "name" : "名称不完整", # # "parent_name" : "名称错误", # # "parent_code" : "01" # # } # # }, # # # # "area" : { # # "0101" : { # # "name" : "全国类数据", # # "parent_name" : "全国类型", # # "parent_code" : "01" # # }, # # "0301" : { # # "name" : "省份不在[2,3]个字之间", # # "parent_name" : "长度异常类型", # # "parent_code" : "03" # # }, # # "0302" : { # # "name" : "城市不在[3,11]个字之间", # # "parent_name" : "长度异常类型", # # "parent_code" : "03" # # }, # # "0303" : { # # "name" : "区县不在[2,15]个字之间", # # "parent_name" : "长度异常类型", # # "parent_code" : "03" # # } # # }, # # "multipackage" : { # # "1000" : { # # "name" : "分包类数据", # # "parent_name" : "分包类型", # # "parent_code" : "01" # # } # # }, # # "toptype": { # # }, # # "subtype": { # # }, # # "publishtime": { # # "0201": { # # "name": "发布时间 > 开标时间 ", # # "parent_name": "数据范围类型", # # "parent_code": "02" # # } # # }, # # "bidopentime": { # # "0201": { # # "name": "发布时间 > 开标时间", # # "parent_name": "数据范围类型", # # "parent_code": "02" # # } # # } # # }) # # result = check(row, rules={ # # "title": { # # "0101" : { # # "name" : "标题长度小于等于5", # # "parent_name" : "长度类型", # # "parent_code" : "01" # # }, # # "0102" : { # # "name" : "长度大于等于100", # # "parent_name" : "长度类型", # # "parent_code" : "01" # # }, # # "0201" : { # # "name" : "非汉字占比>55%", # # "parent_name" : "汉字占比", # # "parent_code" : "02" # # }, # # "0303" : { # # "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)", # # "parent_name" : "语义表述不完整", # # "parent_code" : "03" # # } # # }, # # "area": { # # "0101": { # # "name": "全国类数据", # # "parent_name": "全国类型", # # "parent_code": "01" # # }, # # "0301": { # # "name": "省份不在[2,3]个字之间", # # "parent_name": "长度异常类型", # # "parent_code": "03" # # }, # # "0302": { # # "name": "城市不在[3,11]个字之间", # # "parent_name": "长度异常类型", # # "parent_code": "03" # # }, # # "0303": { # # "name": "区县不在[2,15]个字之间", # # "parent_name": "长度异常类型", # # "parent_code": "03" # # } # # } # }) result = check(row, rules={ "s_winner": { "0103": { "name": "包含叠词,异常词汇,特殊词汇", "parent_name": "名称错误", "parent_code": "01" } }, "publishtime": { "0201": { "name": "发布时间 > 开标时间 ", "parent_name": "数据范围类型", "parent_code": "02" }, "0202": { "name": "发布时间 > 当前时间", "parent_name": "数据范围类型", "parent_code": "02" } }, # "bidamount": { # "0101": { # "name": "互相校验(预算和中标金额的比例)", # "parent_name": "金额错误", # "parent_code": "01" # }, # "0102": { # "name": "过大过小[100,10亿]", # "parent_name": "金额错误", # "parent_code": "01" # }, # "0103": { # "name": "中标金额小数点位数超过4位", # "parent_name": "金额错误", # "parent_code": "01", # "checkFn": "01", # }, # "0104": { # "name": "中标金额存在费率,折扣率", # "parent_name": "金额错误", # "parent_code": "01" # } # }, }) print(result)