purchasing.py 11 KB


  1. # coding:utf-8
  2. from sklearn.preprocessing import Normalizer
  3. from tables.ai import product_detail_server
  4. import numpy as np
  5. import json
  6. Scaler = Normalizer()
  7. def softmax(x):
  8. # 对输入向量进行指数运算
  9. exp_values = np.exp(x)
  10. # 计算指数值的和
  11. sum_exp_values = np.sum(exp_values)
  12. # 计算每个元素的softmax概率值
  13. softmax_values = exp_values / sum_exp_values
  14. return softmax_values
  15. def calculate_score(score_list):
  16. '''
  17. 最终得分计算
  18. :param score_list:
  19. :return:
  20. '''
  21. if not score_list:
  22. return 0
  23. global Scaler
  24. # 适应并转换数据
  25. score_np = np.array(score_list)
  26. scaled_data = Scaler.fit_transform([score_list])
  27. soft_ret = softmax(scaled_data[0])
  28. return sum(score_np * soft_ret)
  29. class SourceEvaluate(object):
  30. def __init__(self):
  31. """
  32. 初始化
  33. """
  34. self.step = {}
  35. self.direction_step = {"h": 1, "v": 0.75, "h/v": 0.5, "无": 0.25}
  36. self.table_type_step = {"识别": 1, "推断": 0.8, "字段": 0.6, "标题": 0.4, "物品": 0.2}
  37. self.verify_step = {"确定": 1, "不确定": 0.7}
  38. self.file_name_step = {"公告": 1, "标题": 0.4}
  39. def calculate_score(self, file_name, table):
  40. """
  41. 计算得分
  42. :return:
  43. """
  44. verify = table.get("verify", "")
  45. direction = table.get("direction", "")
  46. table_type = table.get("type", "")
  47. direction_score = self.direction_step.get(direction, 0)
  48. table_type_score = self.table_type_step.get(table_type, 0)
  49. verify_step_score = self.verify_step.get(verify, 0)
  50. file_name_step_score = self.file_name_step.get(file_name, 0.8)
  51. return (direction_score + table_type_score + verify_step_score + file_name_step_score) / 4
  52. class ItemNameClassify(object):
  53. def __init__(self):
  54. self.step = []
  55. @staticmethod
  56. def calculate_score(name_score):
  57. """
  58. 计算得分
  59. :return:
  60. """
  61. if name_score > 0.98:
  62. return 0.99
  63. elif name_score > 0.86:
  64. return 0.94
  65. else:
  66. return 0.68
  67. def item_name_evaluate(item_score, header):
  68. '''
  69. 名称
  70. :param item_score:
  71. :param header:
  72. :return:
  73. '''
  74. item_name_field = header.get("itemname", "")
  75. if not item_name_field:
  76. return 0.3
  77. if [w for w in ["产品", "设备", "货物", "商品", "标的", "物资", "材料", "服务", "物料", "印刷品", "医疗设备"] if
  78. w in item_name_field] and "名称" in item_name_field:
  79. return item_score
  80. else:
  81. return item_score * 0.9
  82. def number_price_verify(target):
  83. """
  84. 数量价格验证
  85. :param target:
  86. :return:
  87. """
  88. number = target.get("number", 0)
  89. unit_price = target.get("unitprice", 0)
  90. total_price = target.get("totalprice", 0)
  91. if number and unit_price and total_price:
  92. if number * unit_price == total_price:
  93. return 1, 1, 1
  94. else:
  95. return 0, 0, 0
  96. return 1, 1, 1
  97. def number_unit_evaluate(header, target):
  98. '''
  99. 数量单位
  100. :param header:
  101. :param target:
  102. :return:
  103. '''
  104. number_header = header.get("number", "")
  105. number = target.get("number", "")
  106. unit_header = header.get("unitname", "")
  107. unitname = target.get("unitname", "")
  108. number_score, unit_score = 0, 0
  109. if number:
  110. number_score = 1 if "数量" in number_header or "数量" in unit_header else 0.5
  111. if unitname:
  112. unit_score = 1 if "单位" in number_header or "单位" in unit_header else 0.8
  113. return number_score, unit_score
  114. def total_unit_price_evaluate(header, target):
  115. '''
  116. 单价、总价
  117. :param header:
  118. :param target:
  119. :return:
  120. '''
  121. total_price_header = header.get("totalprice", "")
  122. unit_price_header = header.get("unitprice", "")
  123. unit_price = target.get("unitprice", "")
  124. total_price = target.get("totalprice", "")
  125. unit_price_score, total_price_score = 0, 0
  126. if total_price:
  127. if not total_price_header:
  128. total_price_score = 0.5
  129. else:
  130. total_price_score = 1 if "总价" in total_price_header else 0.8
  131. if unit_price:
  132. if not unit_price_header:
  133. unit_price_score = 0.5
  134. else:
  135. unit_price_score = 1 if "单价" in unit_price_header else 0.8
  136. return unit_price_score, total_price_score
  137. def entity_model_server(text, prefix=""):
  138. """
  139. 品牌、规格、型号的实体识别模型
  140. :param text:
  141. :param prefix:
  142. :return:
  143. """
  144. brands = [] # 品牌
  145. models = [] # 型号
  146. specs = [] # 规格
  147. product = [] # 产品
  148. if text:
  149. text = prefix + text
  150. model_dict = product_detail_server(text)
  151. output = model_dict.get('output', [])
  152. for row in output:
  153. _type = row.get("type", "")
  154. span = row.get("span", "")
  155. if _type == "品牌":
  156. brands.append(span)
  157. elif _type == "型号":
  158. models.append(span)
  159. elif _type == "规格":
  160. specs.append(span)
  161. elif "产品" in _type:
  162. product.append(span)
  163. return {"brands": brands, "models": models, "specs": specs, "product": product}
  164. else:
  165. return {"brands": [], "models": [], "specs": [], "product": []}
  166. def get_brand_model(target, title_result):
  167. """
  168. 获取品牌和规格
  169. :param target:
  170. :return:
  171. """
  172. # 关注的字段
  173. brand = target.get("brandname", "")
  174. model = target.get("model", "")
  175. # 实体识别
  176. brand_result = entity_model_server(brand, "品牌:")
  177. model_result = entity_model_server(model, "型号:")
  178. # 品牌
  179. if brand_result.get("brands", []):
  180. brands = [brand_result.get("brands", []), "brandname", 1]
  181. elif model_result.get("brands", []):
  182. brands = [model_result.get("brands", []), "model", 0.8]
  183. elif title_result.get("brands", []):
  184. brands = [title_result.get("brands", []), "itemname", 0.5]
  185. else:
  186. brands = [[], "", 0]
  187. # 型号
  188. if model_result.get("models", []):
  189. models = [model_result.get("models", []), "model", 1]
  190. elif brand_result.get("models", []):
  191. models = [brand_result.get("models", []), "brandname", 0.8]
  192. else:
  193. if model_result.get("specs", []):
  194. models = [model_result.get("specs", []), "model", 1]
  195. elif brand_result.get("specs", []):
  196. models = [brand_result.get("specs", []), "brandname", 0.8]
  197. elif title_result.get("models", []) or title_result.get("specs", []):
  198. models = [title_result.get("models", []), "itemname", 0.5]
  199. else:
  200. models = [[], "", 0]
  201. # 规格
  202. # if model_result.get("specs", []):
  203. # specs = [model_result.get("specs", []), "model", 1]
  204. # elif brand_result.get("brands", []):
  205. # specs = [brand_result.get("specs", []), "brand", 0.8]
  206. # else:
  207. # specs = [title_result.get("specs", []), "itemname", 0.5]
  208. return brands, models
  209. def brand_model_evaluate(header, target, title_ner):
  210. '''
  211. 品牌规格型号
  212. :param target:
  213. :return:
  214. '''
  215. model_score, brand_score, model, brand = 0, 0, "", ""
  216. brands, models = get_brand_model(target, title_ner)
  217. if brands:
  218. brand_score = brands[-1]
  219. brand = ";".join(brands[0])
  220. if models:
  221. model_score = models[-1]
  222. model = ";".join(models[0])
  223. model_header = header.get("model", "")
  224. brand_header = header.get("brandname", "")
  225. if target.get("brandname") and "品牌" in brand_header:
  226. brand_score = brand_score if brand_score * 1.1 > 1 else brand_score * 1.1
  227. if target.get("model") and "型号" in model_header or "规格" in model_header:
  228. model_score = model_score if model_score * 1.1 > 1 else model_score * 1.1
  229. print(brand)
  230. return brand_score, model_score, brand, model
  231. source = SourceEvaluate()
  232. # ['名称': 'itemname','品牌': 'brandname','型号': 'model','数量': 'number','计量单位': 'unitname','单价': 'unitprice','总价': 'totalprice']
  233. def evaluate(target, table):
  234. '''
  235. 标的物评估
  236. :param target:
  237. :return:
  238. '''
  239. file_name = target.get("file_name", "")
  240. header = table.get("header", {})
  241. item_name_score = target.get("score", 0)
  242. source_score = source.calculate_score(file_name, table)
  243. if not header:
  244. return [source_score, 0, 0, 0, 0, 0, 0, 0], {"table_score": round(source_score, 2)}
  245. # 名称
  246. itemname = target.get("itemname", "")
  247. item_name_score = item_name_evaluate(item_name_score, header)
  248. title_ner = entity_model_server(itemname, "")
  249. # 品牌型号
  250. brand_score, model_score, brand, model = brand_model_evaluate(header, target, title_ner)
  251. # 数量单位
  252. number_score, unit_score = number_unit_evaluate(header, target)
  253. # 单价、总价
  254. unit_price_score, total_price_score = total_unit_price_evaluate(header, target)
  255. # 校验
  256. number_price_weight = number_price_verify(target)
  257. # 生成最终的成绩
  258. if number_price_weight[0] == 0:
  259. number_score, unit_price_score, total_price_score = 0, 0, 0
  260. score_list = [source_score, item_name_score, brand_score, model_score, number_score, unit_score, unit_price_score,
  261. total_price_score]
  262. # result_score = calculate_score(score_list)
  263. score_dict = {
  264. "table_score": round(source_score, 2),
  265. "itemname_score": round(item_name_score, 2),
  266. "brand_score": round(brand_score, 2),
  267. "model_score": round(model_score, 2),
  268. "number_score": round(number_score, 2),
  269. "unit_score": round(unit_score, 2),
  270. "unitprice_score": round(unit_price_score, 2),
  271. "totalpricescore": round(total_price_score, 2)
  272. }
  273. return score_list, score_dict
  274. def purchasing_evaluate_start(targets, tables):
  275. '''
  276. 标的物评估函数
  277. :param targets:
  278. :return:
  279. '''
  280. purchasing_score_list = []
  281. purchasing_evaluate_list = []
  282. tables = json.loads(tables) if tables else []
  283. for target in targets:
  284. table_ind = target.get("table", -1)
  285. table_ind = int(table_ind)
  286. table = tables[table_ind] if table_ind > -1 else {}
  287. score, score_dict = evaluate(target, table)
  288. purchasing_score_list.append(score)
  289. purchasing_evaluate_list.append(score_dict)
  290. score_list = []
  291. for score in purchasing_score_list:
  292. result_score = calculate_score(score)
  293. score_list.append(result_score)
  294. merge_score = calculate_score(score_list)
  295. return purchasing_evaluate_list, round(merge_score, 2)
  296. class PurchasingChecker(object):
  297. """
  298. 中标字段检查
  299. """
  300. def __init__(self):
  301. self.errors_tables = {
  302. "score": {
  303. "name": "标的物评估",
  304. "parent_name": "标的物",
  305. "parent_code": "01",
  306. "checkFn": self.check0101
  307. }
  308. }
  309. def check0101(self, purchasinglist, purchasingsource):
  310. """
  311. 意图结果检测
  312. :param header:
  313. :return:
  314. """
  315. purchasing_evaluate_list, score = purchasing_evaluate_start(purchasinglist, purchasingsource)
  316. print(score)
  317. #大于0.85的通过验证的
  318. # if score < 0.85:
  319. # return True
  320. # else:
  321. # return False
  322. return score