app.py 33 KB


  1. # coding:utf-8
  2. import inspect
  3. import csv
  4. from pymongo import MongoClient
  5. from docs.config import abnormal_config
  6. from tables.fields.winner import WinnerChecker
  7. from tables.fields.buyer import BuyerChecker
  8. from tables.fields.bidamount import BidAmountChecker
  9. from tables.fields.budget import BudgetChecker
  10. from tables.fields.NoField import NoFieldChecker
  11. from tables.fields.title import TitleChecker
  12. from tables.fields.area import AreaChecker
  13. from tables.fields.projectcode import ProjectcodeChecker
  14. from tables.fields.projectname import ProjectnameChecker
  15. from tables.fields.subpackage import SubpackageChecker
  16. from tables import CatchContentObject
  17. from tables.fields.fieldtype import FieldTypeChecker
  18. from tables.fields.purchasing import PurchasingChecker
  19. from tables.fields.bidopentime import BidopentimeChecker
  20. from tables.fields.publishtime import PublishtimeChecker
  21. area_checker = AreaChecker()
  22. winner_checker = WinnerChecker()
  23. buyer_checker = BuyerChecker()
  24. amount_checker = BidAmountChecker()
  25. budget_checker = BudgetChecker()
  26. title_checker = TitleChecker()
  27. nofield_checker = NoFieldChecker()
  28. projectname_checker = ProjectnameChecker()
  29. projectcode_checker = ProjectcodeChecker()
  30. publishtime_checker = PublishtimeChecker()
  31. bidopentime_checker = BidopentimeChecker()
  32. multipackage_checker = SubpackageChecker()
  33. fieldtype_checker = FieldTypeChecker()
  34. purchasing_checker = PurchasingChecker()
  35. # 定义检查链
  36. check_chain = {
  37. "title": {
  38. "checker": title_checker,
  39. },
  40. "projectname": {
  41. "checker": projectname_checker,
  42. },
  43. "s_winner": {
  44. "checker": winner_checker,
  45. },
  46. "buyer": {
  47. "checker": buyer_checker,
  48. },
  49. "projectcode": {
  50. "checker": projectcode_checker,
  51. },
  52. "bidamount": {
  53. "checker": amount_checker,
  54. },
  55. "budget": {
  56. "checker": budget_checker,
  57. },
  58. "area": {
  59. "checker": area_checker,
  60. },
  61. "bidopentime":{
  62. "checker":bidopentime_checker,
  63. },
  64. "publishtime":{
  65. "checker":publishtime_checker,
  66. },
  67. "com_package": {
  68. "checker": multipackage_checker,
  69. },
  70. "purchasinglist": {
  71. "checker": purchasing_checker,
  72. }
  73. }
  74. #多参数处理
  75. def check_params(func, param_rows: dict, is_check_type=False) -> (bool, list):
  76. """
  77. 函数传参数
  78. """
  79. _default_params = {"attach_text": {}, "bidamount": 0, "budget": 0,"s_winner":"","bidopentime":0,"publishtime":0,"subtype":"","supervisorrate":0,"city":"郑州市","district":"金水区","com_package":[]} # 必要参数的默认值
  80. func_params = inspect.getfullargspec(func) # 取出函数的参数
  81. args_annotations = func_params.annotations # 参数类型dict,取出参数类型
  82. args_list = func_params.args # 参数list
  83. if "self" in args_list:
  84. args_list.remove("self")
  85. params = {} # 返回参数列表
  86. for arg in args_list:
  87. if arg in param_rows: # 存在参数
  88. if is_check_type and not isinstance(param_rows[arg], args_annotations[arg]):
  89. # 不满足参数类型需求
  90. return False, []
  91. params[arg] = param_rows[arg]
  92. elif arg in _default_params: # 使用默认值
  93. params[arg] = _default_params[arg]
  94. # print(params, args_list)
  95. if len(params) != len(args_list):
  96. # 参数不够
  97. return False, params
  98. return True, params # 满足参数需求
  99. #根据检查结果打分top
  100. def bid_score(error_dist,obj):
  101. score=100
  102. site = obj.get("site")
  103. toptype = obj.get("toptype")
  104. subtype = obj.get("subtype")
  105. num=0
  106. for key,value in error_dist.items():
  107. if key != "purchasinglist_qa":
  108. if value :
  109. num+=1
  110. flag=0
  111. with open(abnormal_config["table_field_config"]["path7"], "r") as f:
  112. reads = csv.reader(f)
  113. for w in reads:
  114. result=w[0].split("\t")
  115. #可信度比较高的网站,打分时减少分值
  116. if result[0]==site and result[1]==toptype and result[2]==subtype:
  117. flag = 1
  118. score-=num
  119. print(score)
  120. if flag == 0:
  121. score=score-num*10
  122. print(score)
  123. return score
  124. #检查并打分
  125. def check(obj: any, rules) -> any:
  126. """
  127. 单行数据质量检查
  128. :return:any
  129. """
  130. catch_content = CatchContentObject()
  131. obj["catch_content"] = catch_content
  132. field_qa = {}
  133. for field in rules: # 获取检查字段
  134. qa = {}
  135. # 字段不存在检查
  136. if field not in obj and field in nofield_checker.errors_tables:
  137. func = nofield_checker.errors_tables[field]
  138. # 开始执行函数
  139. if func(obj, catch_content):
  140. qa["0000"] = f"{field}:不存在!!"
  141. field_qa["%s_qa" % field] = qa
  142. continue
  143. # 字段存在检查,判断字段值的类型是否正确,判断类型是否正确如值为null的,并把英文括号转换为中文括号
  144. if field in obj and field in fieldtype_checker.errors_tables:
  145. func_type = fieldtype_checker.errors_tables[field]
  146. value = func_type(obj.get(field))
  147. if value is True:
  148. qa["0001"] = f"{field}:类型不正确、空值"
  149. field_qa["%s_qa" % field] = qa
  150. continue
  151. if field not in check_chain:
  152. continue
  153. checker = check_chain[field]["checker"] # 获取检测器
  154. for err,err_detail in rules[field].items(): # 获取检测的规则
  155. # 错误类型检查
  156. if err not in checker.errors_tables:
  157. qa[err] = f"{field}:服务端未定义错误类型"
  158. field_qa["%s_qa" % field] = qa
  159. continue
  160. func = checker.errors_tables[err]["checkFn"] # 获取检查方法
  161. status, params = check_params(func, obj) # 多参数解决方案
  162. if status:
  163. # 开始执行函数
  164. #判断返回是否是数值,是数值的话,为标的物的检查规则,标的物返回的是打分结果(float类型),不是错误类型
  165. result = func(**params)
  166. if isinstance(result, float):
  167. qa[err]=result
  168. else:
  169. if result:
  170. qa[err] = err_detail.get("name","")
  171. else:
  172. # 参数不满足要求
  173. qa[err] = f"{field}:必须参数(字段)缺失"
  174. field_qa["%s_qa" % field] = qa
  175. score=bid_score(field_qa,obj)
  176. field_qa["score"]=score
  177. return field_qa
  178. if __name__ == '__main__':
  179. row={
  180. "comeintime" : int(1698739748),
  181. "area" : "内蒙古",
  182. "purchasingsource" : "[{\"start\": 13, \"verify\": \"确定\", \"direction\": \"h\", \"type\": \"识别\", \"header\": {\"itemname\": \"产品名称\", \"model\": \"技术规格\", \"number\": \"数量\", \"unitprice\": \"单价(元)\", \"totalprice\": \"金额(元)\"}, \"file_name\": \"公告\"}]",
  183. "toptype" : "结果",
  184. "spidercode" : "nm_nmgzzqzfcgw_dzmc_zgcgcjgs",
  185. "extracttype" : int(1),
  186. "s_sha" : "9269588cb73151d8ec09cf16347539678549f81395559589a731ca1394828582",
  187. "detail" : ".二连浩特市人民医院在政采商城电子卖场完成协议供货直购采购,采购结果确认如下:<br/>一、项目概述<br/>采购编号:ELHTSZFCG-DD-2023-407774<br/>采购单位:二连浩特市人民医院<br/>所属区域:二连浩特市本级<br/>预算金额(元):15,440.00<br/>采购人及联系方式:白瑜/7535324<br/>采购计划备案书/核准书编号:二财购备字(电子)[2023]00864号<br/>采购方式:电子卖场(协议采购)<br/>二、采购结果<br/>成交供应商:二连浩特市智慧真彩文体办公<br/>成交时间:2023-10-31 15:24<br/>成交金额:15440.00,大写(人民币):壹万伍仟肆佰肆拾元整。<br/> <table border=\"1\"> <tbody><tr> <th>产品名称</th> <th>技术规格</th> <th>备注</th> <th>数量</th> <th>单价(元)</th> <th>金额(元)</th> </tr> <tr> <td>台式计算机</td> <td> 联想/LENOVO,<br/> 联想/LENOVOThinkCentre M930Z-GEN2 台式计算机 I5-10500/8G/512G/DVDRW/2G独立/WIFI/摄像头/W10-HOME / 23寸液晶屏,<br/> M930Z,<br/> 数量:2;<br/> </td> <td></td> <td>2</td> <td> ¥5450.0000<br/> </td> <td> ¥10900.00<br/> </td> </tr> <tr> <td>台式计算机</td> <td> 联想/LENOVO,<br/> 联想 启天M420 台式计算机 商用办公计算机 处理器I3-9100 / 4G / 1T / 集显 / 21.5寸液晶显示器,<br/> M420,<br/> 数量:1;<br/> </td> <td></td> <td>1</td> <td> ¥4540.0000<br/> </td> <td> ¥4540.00<br/> </td> </tr> <tr> <td>合计</td> <td colspan=\"5\"> ¥15440.00 大写(人民币): 壹万伍仟肆佰肆拾元整</td> </tr> </tbody></table><br/> 采购单位:二连浩特市人民医院<br/> 2023年10月31日",
  188. "purchasing" : "台式计算机",
  189. "site" : "湖南省政府采购电子卖场",
  190. "title" : "黄冈罗田碳和瑞新能源科技有限公司罗田分公司租用罗田县三里畈镇尹家垸村三组尹小丹农户屋顶新建35.75分布式光伏发电项目",
  191. "dataging" : int(0),
  192. # "bidopentime":int(1798739414),
  193. # "publishtime" : int(1751937052),
  194. "subtype" : "成交",
  195. "purchasinglist" : [
  196. {
  197. "score" : 0.8275,
  198. "itemname" : "台式计算机",
  199. "model" : "联想/LENOVO, 联想/LENOVOThinkCentre M930Z-GEN2 台式计算机 I5-10500/8G^~^",
  200. "number" : 2.0,
  201. "unitprice" : 5450.0,
  202. "totalprice" : 10900.0,
  203. "table" : int(0)
  204. },
  205. {
  206. "unitprice" : 4540.0,
  207. "totalprice" : 4540.0,
  208. "table" : int(0),
  209. "score" : 0.8275,
  210. "itemname" : "台式计算机",
  211. "model" : "联想/LENOVO, 联想 启天M420 台式计算机 商用办公计算机 处理器I3-9100 / 4G / 1T / 集显^~^",
  212. "number" : 1.0
  213. }
  214. ],
  215. "goods_start" : int(1698739805),
  216. "type" : "",
  217. "city" : "",
  218. "areaval" : int(16),
  219. "contenthtml" : "\n <div class=\"noticeArea\">\n <div class=\"contractWrap\" style=\"page-break-after: always;\">\n <p></p>\n <p>二连浩特市人民医院在政采商城电子卖场完成协议供货直购采购,采购结果确认如下:</p>\n <h3><strong>一、项目概述</strong></h3>\n <p>采购编号:ELHTSZFCG-DD-2023-407774</p>\n <p>采购单位:二连浩特市人民医院</p>\n <p>所属区域:二连浩特市本级</p>\n <p>预算金额(元):15,440.00</p>\n <p>采购人及联系方式:白瑜/7535324</p>\n <p>采购计划备案书/核准书编号:二财购备字(电子)[2023]00864号</p>\n <p>采购方式:电子卖场(协议采购)</p>\n <h3><strong>二、采购结果</strong></h3>\n <p>成交供应商:二连浩特市智慧真彩文体办公</p>\n <p>成交时间:2023-10-31 15:24</p>\n <p>成交金额:15440.00,大写(人民币):壹万伍仟肆佰肆拾元整。</p>\n <table cellpadding=\"0\" cellspacing=\"0\" class=\"noticeTable\" border=\"1\" style=\"width:100%\">\n <tbody><tr style=\"height:40px\">\n <th style=\"width:18%;text-align:center;\">产品名称</th>\n <th style=\"width:32%;text-align:center;\">技术规格</th>\n <th style=\"width:12%;text-align:center;\">备注</th>\n <th style=\"width:12%;text-align:center;\">数量</th>\n <th style=\"width:13%;text-align:center;\">单价(元)</th>\n <th style=\"width:13%;text-align:center;\">金额(元)</th>\n </tr>\n <!--订单商品集合-->\n <tr style=\"height:36px;\">\n <td style=\"padding:3px 5px;\">台式计算机</td>\n <td style=\"padding:3px 5px;\">\n 联想/LENOVO,\n 联想/LENOVOThinkCentre M930Z-GEN2 台式计算机 I5-10500/8G/512G/DVDRW/2G独立/WIFI/摄像头/W10-HOME / 23寸液晶屏,\n M930Z,\n 数量:2;\n <br/>\n \n </td>\n <td></td>\n <td style=\"padding:3px 5px;text-align:center\">2</td>\n <td style=\"padding:3px 5px;text-align:right\">\n ¥5450.0000\n </td>\n <td style=\"padding:3px 5px;text-align:right\">\n ¥10900.00\n </td>\n </tr>\n<!--订单商品集合-->\n <tr style=\"height:36px;\">\n <td style=\"padding:3px 5px;\">台式计算机</td>\n <td style=\"padding:3px 5px;\">\n 联想/LENOVO,\n 联想 启天M420 台式计算机 商用办公计算机 处理器I3-9100 / 4G / 1T / 集显 / 21.5寸液晶显示器,\n M420,\n 数量:1;\n <br/>\n \n </td>\n <td></td>\n <td style=\"padding:3px 5px;text-align:center\">1</td>\n <td style=\"padding:3px 5px;text-align:right\">\n ¥4540.0000\n </td>\n <td style=\"padding:3px 5px;text-align:right\">\n ¥4540.00\n </td>\n </tr>\n <tr style=\"height:36px\">\n <td style=\"text-align: center;padding:3px 5px;\">合计</td>\n <td colspan=\"5\" style=\"text-align: left;padding:3px 5px;\">\n ¥15440.00 大写(人民币): 壹万伍仟肆佰肆拾元整</td>\n </tr>\n </tbody></table>\n <p class=\"text-right\" style=\"font-family: FangSong ;font-size:12pt\">采购单位:二连浩特市人民医院</p>\n <p class=\"text-right\" style=\"font-family: FangSong ;font-size:12pt\">2023年10月31日</p>\n <style>\n /*网页规范:\n 宋体,2.5行高,默认16号字\n 大标题用h2标签,字号为24号加粗\n 一级标题用h3标签,字号为18号加粗\n 二级标题用h4标签,字号为16号加粗\n 三级标题用h5标签,字号为16号加粗,首行缩进28px\n 段落字号16px,首行缩进28px\n 普通文字为16号字体\n 表格标题为加粗,表格字号为16号字体,高度为32px,表格外部上下间距10px,左右居中;单元格内部上下间距5px,左右间距8px;\n 标题加粗,自己改为<strong></strong>或者加样式。\n 标题段落间距为8px\n */\n\n .noticeArea {\n line-height: 2.5;\n font-size: 16px;\n text-align: justify;\n font-family: '宋体';\n }\n\n .noticeArea * {\n padding: 0;\n margin: 0;\n\t\t\t\tfont-family: '宋体';\n }\n\n .noticeArea h2 {\n font-size: 24px;\n text-align: center;\n margin-bottom: 20px;\n }\n\n .noticeArea h3 {\n font-size: 18px;\n font-weight: normal;\n }\n\n .noticeArea h4 {\n font-size: 16px;\n font-weight: normal;\n }\n\n .noticeArea h5 {\n font-size: 16px;\n text-indent: 28px;\n font-weight: normal;\n }\n\n .noticeArea p {\n text-indent: 28px;\n font-size: 16px;\n }\n\n .noticeArea .noticeTable {\n font-size: 16px;\n border-collapse: collapse;\n border-spacing: 0;\n width: 100%;\n margin: 10px auto;\n }\n\n .noticeArea .noticeTable tr td {\n border-collapse: collapse;\n border: 1px solid #333;\n text-align: center;\n padding: 5px 8px;\n }\n\n .noticeArea .noticeTable tr th {\n font-weight: bold;\n text-align: center;\n border: 1px solid #333;\n padding: 5px 8px;\n }\n\n .noticeArea .noticeTable tr {\n height: 32px;\n }\n\n @media print {\n /*打印规范:\n 宋体,1.8行高\n 大标题用h2标签,字号为34号加粗,对应WORD为二号\n 一级标题用h3标签,字号为24号加粗,对应WORD为小三号\n 二级标题用h4标签,字号为21号加粗。对应WORD为四号\n 三级标题用h5标签,字号为18号加粗。对应WORD为小四号,首行缩进28px\n 段落普通文字为18号字体,对应WORD为小四号,首行缩进28px\n 普通文字为18号字体,对应WORD为小四号\n 表格标题为加粗,表格字号为16号字体,高度为32px;考虑内容多,用16号,对应WORD为五号;表格外部上下间距10px,左右居中;单元格内部上下间距5px,左右间距8px\n 标题加粗,自己改为<strong></strong>或者加样式。\n */\n .noticeArea {\n font-family: '宋体';\n line-height: 1.8;\n font-size: 18px;\n text-align: justify;\n }\n\n .noticeArea * {\n padding: 0;\n margin: 0\n }\n\n .noticeArea .focuscontent {\n color: #337ab7;\n }\n\n .noticeArea h2 {\n font-size: 34px;\n text-align: center;\n margin-bottom: 25px;\n font-family: SimHei;\n }\n\n .noticeArea h3 {\n font-size: 24px;\n font-weight: normal;\n }\n\n .noticeArea h4 {\n font-size: 21px;\n font-weight: normal;\n }\n\n .noticeArea h5 {\n font-size: 18px;\n font-weight: normal;\n }\n\n .noticeArea p {\n text-indent: 32px;\n font-size: 18px;\n }\n\n .noticeArea .noticeTable {\n font-size: 16px;\n border-collapse: collapse;\n border-spacing: 0;\n width: 100%;\n margin: 15px auto;\n }\n\n .noticeArea .noticeTable tr td {\n border-collapse: collapse;\n border: 1px solid #333;\n text-align: center;\n padding: 2px 6px;\n }\n\n .noticeArea .noticeTable tr th {\n font-weight: bold;\n text-align: center;\n padding: 2px 6px;\n }\n\n .noticeArea .noticeTable tr {\n height: 32px;\n }\n\n }\n </style>\n </div>\n </div>\n ",
  220. "infoformat" : 1.0,
  221. "href" : "https://www.ccgp-neimenggu.gov.cn/category/onlinetender?tb_id=001076&p_id=2000957690&type=2",
  222. "channel" : "电子卖场-直购采购成交公示",
  223. "detail_isvalidity" : int(1),
  224. "basicClass" : "货物",
  225. "rate" : "97%",
  226. "autoid" : int(249351001),
  227. # "bidamount" : 1034.1111,
  228. "bidway" : "电子投标",
  229. # "budget" : None,
  230. "supervisorrate": 0.03,
  231. "buyer" : "哈密市伊州区花园乡人民政府",
  232. "buyerclass" : "医疗",
  233. "dataprocess" : int(8),
  234. "description" : "二连浩特市人民医院在政采商城电子卖场完成协议供货直购采购,采购结果确认如下:一、项目概述采购编号:ELHTSZFCG-DD-2023-407774采购单位:二连浩特市人民医院所属区域:二连浩特市本级预算金额(元):15,440.00采购人及",
  235. "district" : "",
  236. "entidlist" : [
  237. "ff3d9d9a24326937b4a9f45edbfab35a"
  238. ],
  239. "keywords" : "二连浩特市人民医院,台式计算机,订购,成交,公告",
  240. "multipackage" : int(0),
  241. # "projectcode" : "ELHTSZFCG-DD-2023-407774",
  242. "projectname" : "二连浩特市人民医院台式计算机(等)直接订购",
  243. "purchasing_tag" : "台式计算机,计算机,摄像头,液晶显示器,DR,M9,液晶,4G",
  244. "s_subscopeclass" : "信息技术_其他,行政办公_通用办公设备",
  245. "s_topscopeclass" : "行政办公,信息技术",
  246. "s_winner" : "二连浩特市智慧真彩文体办公院院",
  247. "subscopeclass" : [
  248. "信息技术_其他",
  249. "行政办公_通用办公设备"
  250. ],
  251. "topscopeclass" : [
  252. "行政办公d",
  253. "信息技术d",
  254. "信息技术t"
  255. ],
  256. "com_package": [
  257. {
  258. "projectcode": "S202501020015",
  259. "packagecode": "",
  260. "contractcode": "",
  261. "package_id": "5bc4bbc85a4b461cb0aa0104f5fb5ee9",
  262. "name": "不干胶标签"
  263. },
  264. {
  265. "projectcode": "S202501020015",
  266. "packagecode": "",
  267. "contractcode": "",
  268. "package_id": "5bc4bbc85a4b461cb0aa0104f5fb5ee9",
  269. "name": "不干胶标签"
  270. }
  271. ],
  272. "pici" : int(1698740066)
  273. }
  274. # result=check(row,rules={
  275. # "bidamount": {
  276. # "0101": {
  277. # "name": "互相校验(预算和中标金额的比例)",
  278. # "parent_name": "金额错误",
  279. # "parent_code": "01"
  280. # },
  281. # "0102": {
  282. # "name": "过大过小[100,10亿]",
  283. # "parent_name": "金额错误",
  284. # "parent_code": "01"
  285. # },
  286. # "0103": {
  287. # "name": "中标金额小数点位数超过4位",
  288. # "parent_name": "金额错误",
  289. # "parent_code": "01",
  290. # "checkFn": "01",
  291. # },
  292. # "0104": {
  293. # "name": "中标金额存在费率,折扣率",
  294. # "parent_name": "金额错误",
  295. # "parent_code": "01"
  296. # }
  297. # },
  298. # # "budget" : {
  299. # # "0101" : {
  300. # # "name" : "互相校验(预算和中标金额的比例)",
  301. # # "parent_name" : "金额错误",
  302. # # "parent_code" : "01"
  303. # # },
  304. # # "0102" : {
  305. # # "name" : "过大过小[100,10亿]",
  306. # # "parent_name" : "金额错误",
  307. # # "parent_code" : "01"
  308. # # }
  309. # # },
  310. # # "title" : {
  311. # # "0101" : {
  312. # # "name" : "标题长度小于等于5",
  313. # # "parent_name" : "长度类型",
  314. # # "parent_code" : "01"
  315. # # },
  316. # # "0102" : {
  317. # # "name" : "长度大于等于100",
  318. # # "parent_name" : "长度类型",
  319. # # "parent_code" : "01"
  320. # # },
  321. # # "0201" : {
  322. # # "name" : "非汉字占比>55%",
  323. # # "parent_name" : "汉字占比",
  324. # # "parent_code" : "02"
  325. # # },
  326. # # "0303" : {
  327. # # "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
  328. # # "parent_name" : "语义表述不完整",
  329. # # "parent_code" : "03"
  330. # # }
  331. # # },
  332. # # "projectname" : {
  333. # # "0101" : {
  334. # # "name" : "项目名称长度小于等于5",
  335. # # "parent_name" : "长度类型",
  336. # # "parent_code" : "01"
  337. # # },
  338. # # "0102" : {
  339. # # "name" : "长度大于等于100",
  340. # # "parent_name" : "长度类型",
  341. # # "parent_code" : "01"
  342. # # },
  343. # # "0201" : {
  344. # # "name" : "非汉字占比>55%",
  345. # # "parent_name" : "汉字占比",
  346. # # "parent_code" : "02"
  347. # # },
  348. # # "0303" : {
  349. # # "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
  350. # # "parent_name" : "语义表述不完整",
  351. # # "parent_code" : "03"
  352. # # }
  353. # # },
  354. # # "winner" : {
  355. # # "0103" : {
  356. # # "name" : "包含叠词,异常词汇,特殊词汇",
  357. # # "parent_name" : "名称错误",
  358. # # "parent_code" : "01"
  359. # # }
  360. # # },
  361. # # "projectcode" : {
  362. # # "0102" : {
  363. # # "name" : "长度大于2小于等于4",
  364. # # "parent_name" : "长度类型",
  365. # # "parent_code" : "02"
  366. # # },
  367. # # "0103" : {
  368. # # "name" : "长度大于50",
  369. # # "parent_name" : "长度类型",
  370. # # "parent_code" : "03"
  371. # # },
  372. # # "0201" : {
  373. # # "name" : "检查日期格式",
  374. # # "parent_name" : "日期格式",
  375. # # "parent_code" : "01"
  376. # # },
  377. # # "0202" : {
  378. # # "name" : "包含异常关键字",
  379. # # "parent_name" : "异常关键字",
  380. # # "parent_code" : "02"
  381. # # },
  382. # # "0203" : {
  383. # # "name" : "不包含数字字母",
  384. # # "parent_name" : "不包含数字字母",
  385. # # "parent_code" : "03"
  386. # # },
  387. # # "0301" : {
  388. # # "name" : "汉字占比>60%且不包含中国电信",
  389. # # "parent_name" : "汉字占比",
  390. # # "parent_code" : "01"
  391. # # },
  392. # # "0302" : {
  393. # # "name" : "连续汉字超过9个",
  394. # # "parent_name" : "汉字占比",
  395. # # "parent_code" : "03"
  396. # # }
  397. # # },
  398. # # "buyer" : {
  399. # # "0103" : {
  400. # # "name" : "包含叠词,异常词汇,特殊词汇",
  401. # # "parent_name" : "名称错误",
  402. # # "parent_code" : "01"
  403. # # },
  404. # # "0104" : {
  405. # # "name" : "名称不完整",
  406. # # "parent_name" : "名称错误",
  407. # # "parent_code" : "01"
  408. # # }
  409. # # },
  410. # #
  411. # # "area" : {
  412. # # "0101" : {
  413. # # "name" : "全国类数据",
  414. # # "parent_name" : "全国类型",
  415. # # "parent_code" : "01"
  416. # # },
  417. # # "0301" : {
  418. # # "name" : "省份不在[2,3]个字之间",
  419. # # "parent_name" : "长度异常类型",
  420. # # "parent_code" : "03"
  421. # # },
  422. # # "0302" : {
  423. # # "name" : "城市不在[3,11]个字之间",
  424. # # "parent_name" : "长度异常类型",
  425. # # "parent_code" : "03"
  426. # # },
  427. # # "0303" : {
  428. # # "name" : "区县不在[2,15]个字之间",
  429. # # "parent_name" : "长度异常类型",
  430. # # "parent_code" : "03"
  431. # # }
  432. # # },
  433. # # "multipackage" : {
  434. # # "1000" : {
  435. # # "name" : "分包类数据",
  436. # # "parent_name" : "分包类型",
  437. # # "parent_code" : "01"
  438. # # }
  439. # # },
  440. # # "toptype": {
  441. # # },
  442. # # "subtype": {
  443. # # },
  444. # # "publishtime": {
  445. # # "0201": {
  446. # # "name": "发布时间 > 开标时间 ",
  447. # # "parent_name": "数据范围类型",
  448. # # "parent_code": "02"
  449. # # }
  450. # # },
  451. # # "bidopentime": {
  452. # # "0201": {
  453. # # "name": "发布时间 > 开标时间",
  454. # # "parent_name": "数据范围类型",
  455. # # "parent_code": "02"
  456. # # }
  457. # # }
  458. # # })
  459. # # result = check(row, rules={
  460. # # "title": {
  461. # # "0101" : {
  462. # # "name" : "标题长度小于等于5",
  463. # # "parent_name" : "长度类型",
  464. # # "parent_code" : "01"
  465. # # },
  466. # # "0102" : {
  467. # # "name" : "长度大于等于100",
  468. # # "parent_name" : "长度类型",
  469. # # "parent_code" : "01"
  470. # # },
  471. # # "0201" : {
  472. # # "name" : "非汉字占比>55%",
  473. # # "parent_name" : "汉字占比",
  474. # # "parent_code" : "02"
  475. # # },
  476. # # "0303" : {
  477. # # "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
  478. # # "parent_name" : "语义表述不完整",
  479. # # "parent_code" : "03"
  480. # # }
  481. # # },
  482. # # "area": {
  483. # # "0101": {
  484. # # "name": "全国类数据",
  485. # # "parent_name": "全国类型",
  486. # # "parent_code": "01"
  487. # # },
  488. # # "0301": {
  489. # # "name": "省份不在[2,3]个字之间",
  490. # # "parent_name": "长度异常类型",
  491. # # "parent_code": "03"
  492. # # },
  493. # # "0302": {
  494. # # "name": "城市不在[3,11]个字之间",
  495. # # "parent_name": "长度异常类型",
  496. # # "parent_code": "03"
  497. # # },
  498. # # "0303": {
  499. # # "name": "区县不在[2,15]个字之间",
  500. # # "parent_name": "长度异常类型",
  501. # # "parent_code": "03"
  502. # # }
  503. # # }
  504. # })
  505. result = check(row, rules={
  506. "s_winner": {
  507. "0103": {
  508. "name": "包含叠词,异常词汇,特殊词汇",
  509. "parent_name": "名称错误",
  510. "parent_code": "01"
  511. }
  512. },
  513. "publishtime": {
  514. "0201": {
  515. "name": "发布时间 > 开标时间 ",
  516. "parent_name": "数据范围类型",
  517. "parent_code": "02"
  518. },
  519. "0202": {
  520. "name": "发布时间 > 当前时间",
  521. "parent_name": "数据范围类型",
  522. "parent_code": "02"
  523. }
  524. },
  525. # "bidamount": {
  526. # "0101": {
  527. # "name": "互相校验(预算和中标金额的比例)",
  528. # "parent_name": "金额错误",
  529. # "parent_code": "01"
  530. # },
  531. # "0102": {
  532. # "name": "过大过小[100,10亿]",
  533. # "parent_name": "金额错误",
  534. # "parent_code": "01"
  535. # },
  536. # "0103": {
  537. # "name": "中标金额小数点位数超过4位",
  538. # "parent_name": "金额错误",
  539. # "parent_code": "01",
  540. # "checkFn": "01",
  541. # },
  542. # "0104": {
  543. # "name": "中标金额存在费率,折扣率",
  544. # "parent_name": "金额错误",
  545. # "parent_code": "01"
  546. # }
  547. # },
  548. })
  549. print(result)