tool.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. package tool
  2. import (
  3. "data_ai/extract"
  4. "data_ai/ul"
  5. log "github.com/donnie4w/go-logger/logger"
  6. "go.mongodb.org/mongo-driver/bson/primitive"
  7. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  8. "strings"
  9. "sync"
  10. )
  11. var unset_check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
  12. // 工具修正程序
  13. func StartToolInfo() {
  14. log.Debug("工具开始大模型修正数据······")
  15. q := map[string]interface{}{}
  16. pool_mgo := make(chan bool, ul.Reading)
  17. wg_mgo := &sync.WaitGroup{}
  18. sess := ul.SourceMgo.GetMgoConn()
  19. defer ul.SourceMgo.DestoryMongoConn(sess)
  20. total, isok := 0, 0
  21. it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Sort("_id").Iter()
  22. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  23. if total%100 == 0 {
  24. log.Debug("cur index ", total)
  25. }
  26. isok++
  27. pool_mgo <- true
  28. wg_mgo.Add(1)
  29. go func(tmp map[string]interface{}) {
  30. defer func() {
  31. <-pool_mgo
  32. wg_mgo.Done()
  33. }()
  34. u_id := ul.BsonTOStringId(tmp["_id"])
  35. data := extract.ResolveInfo(tmp)
  36. if len(data) > 0 || u_id == "" {
  37. tmp["ai_zhipu"] = data
  38. update_check := make(map[string]interface{}, 0)
  39. is_unset := getCheckDataAI(tmp, &update_check)
  40. //最终计算是否清洗
  41. if len(update_check) > 0 {
  42. //$set
  43. ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
  44. "$set": update_check,
  45. })
  46. }
  47. if is_unset {
  48. //"$unset"
  49. ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
  50. "$unset": unset_check,
  51. })
  52. }
  53. }
  54. }(tmp)
  55. tmp = make(map[string]interface{})
  56. }
  57. wg_mgo.Wait()
  58. log.Debug("ai is over ...")
  59. }
  60. // 大模型与抽取数据合并计算
  61. func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interface{}) bool {
  62. if tmp["ai_zhipu"] == nil {
  63. return false
  64. }
  65. //记录抽取原值
  66. //记录抽取原值
  67. ext_ai_record := map[string]interface{}{}
  68. ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
  69. //分类字段···
  70. s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
  71. ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
  72. if ns_toptype != s_toptype || ns_subtype != s_subtype {
  73. ext_ai_record["s_toptype"] = ns_toptype
  74. ext_ai_record["s_subtype"] = ns_subtype
  75. }
  76. //赋值···
  77. s_toptype, s_subtype = ns_toptype, ns_subtype
  78. if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" {
  79. s_toptype = qu.ObjToString(tmp["toptype"])
  80. s_subtype = qu.ObjToString(tmp["subtype"])
  81. } else {
  82. if s_toptype != "" && s_subtype != "" {
  83. (*update_check)["toptype"] = s_toptype
  84. (*update_check)["subtype"] = s_subtype
  85. ext_ai_record["toptype"] = tmp["toptype"]
  86. ext_ai_record["subtype"] = tmp["subtype"]
  87. } else {
  88. s_toptype = qu.ObjToString(tmp["toptype"])
  89. s_subtype = qu.ObjToString(tmp["subtype"])
  90. }
  91. }
  92. //基础字段···
  93. if s_buyer := qu.ObjToString(ai_zhipu["s_buyer"]); s_buyer != "" {
  94. (*update_check)["buyer"] = s_buyer
  95. ext_ai_record["buyer"] = tmp["buyer"]
  96. if agency := qu.ObjToString(tmp["agency"]); agency != "" && agency == s_buyer {
  97. delete((*update_check), "buyer")
  98. delete(ext_ai_record, "buyer")
  99. }
  100. }
  101. if s_projectname := qu.ObjToString(ai_zhipu["s_projectname"]); s_projectname != "" {
  102. (*update_check)["projectname"] = s_projectname
  103. ext_ai_record["projectname"] = tmp["projectname"]
  104. }
  105. if s_projectcode := qu.ObjToString(ai_zhipu["s_projectcode"]); s_projectcode != "" {
  106. (*update_check)["projectcode"] = s_projectcode
  107. ext_ai_record["projectcode"] = tmp["projectcode"]
  108. }
  109. if s_budget := qu.Float64All(ai_zhipu["s_budget"]); s_budget > 0.0 && s_budget < 1000000000.0 {
  110. (*update_check)["budget"] = s_budget
  111. ext_ai_record["budget"] = tmp["budget"]
  112. }
  113. //地域字段···
  114. o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"])
  115. s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"])
  116. if s_area != "" && s_area != "全国" {
  117. (*update_check)["area"] = s_area
  118. if s_city != "" {
  119. (*update_check)["city"] = s_city
  120. if o_district != "" {
  121. //判断抽取的区县是否合理···
  122. isT := false
  123. if ds := ul.S_DistrictDict[o_district]; ds != nil {
  124. for _, v := range ds {
  125. if v.C_Name == s_city && v.P_Name == s_area {
  126. isT = true
  127. break
  128. }
  129. }
  130. }
  131. if !isT {
  132. (*update_check)["district"] = ""
  133. }
  134. }
  135. } else {
  136. if o_area != s_area {
  137. (*update_check)["city"] = ""
  138. (*update_check)["district"] = ""
  139. }
  140. }
  141. ext_ai_record["area"] = tmp["area"]
  142. ext_ai_record["city"] = tmp["city"]
  143. ext_ai_record["district"] = tmp["district"]
  144. }
  145. if s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
  146. //先用外围字段替换
  147. if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
  148. (*update_check)["bidamount"] = s_bidamount
  149. ext_ai_record["bidamount"] = tmp["bidamount"]
  150. }
  151. if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
  152. (*update_check)["s_winner"] = s_winner
  153. ext_ai_record["s_winner"] = tmp["s_winner"]
  154. (*update_check)["winner"] = s_winner
  155. ext_ai_record["winner"] = tmp["winner"]
  156. //对于winner来说...规则值有包含关系,采用规则值
  157. if winner := qu.ObjToString(tmp["winner"]); winner != "" {
  158. if strings.Contains(s_winner, winner) {
  159. delete((*update_check), "winner")
  160. delete(ext_ai_record, "winner")
  161. }
  162. }
  163. }
  164. isRulePkg := false
  165. if pkg := *qu.ObjToMap(tmp["package"]); len(pkg) > 1 && (s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同") {
  166. if !staffInfo(pkg) {
  167. isRulePkg = true
  168. }
  169. }
  170. if isRulePkg { //优先采用大模型分包-值替换
  171. if ispkg, ok := ai_zhipu["ispkg"].(bool); ispkg && ok {
  172. if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
  173. if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
  174. (*update_check)["s_winner"] = p_winner
  175. (*update_check)["winner"] = p_winner
  176. ext_ai_record["s_winner"] = tmp["s_winner"]
  177. ext_ai_record["winner"] = tmp["winner"]
  178. }
  179. if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
  180. (*update_check)["bidamount"] = p_bidamount
  181. ext_ai_record["bidamount"] = tmp["bidamount"]
  182. }
  183. if s_package := qu.ObjToMap((*s_pkg)["s_pkg"]); s_package != nil {
  184. (*update_check)["package"] = s_package
  185. ext_ai_record["package"] = tmp["package"]
  186. }
  187. }
  188. }
  189. }
  190. } else if s_subtype == "单一" {
  191. if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
  192. (*update_check)["bidamount"] = s_bidamount
  193. ext_ai_record["bidamount"] = tmp["bidamount"]
  194. }
  195. if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
  196. (*update_check)["s_winner"] = s_winner
  197. (*update_check)["winner"] = s_winner
  198. ext_ai_record["s_winner"] = tmp["s_winner"]
  199. ext_ai_record["winner"] = tmp["winner"]
  200. }
  201. } else {
  202. (*update_check)["ext_ai_record"] = ext_ai_record
  203. for k, _ := range unset_check {
  204. if tmp[k] != nil {
  205. return true
  206. }
  207. }
  208. }
  209. (*update_check)["ext_ai_record"] = ext_ai_record
  210. //根据识别金额的进行选取与修正
  211. if r_budget := qu.Float64All((*update_check)["budget"]); r_budget > 0.0 && r_budget < 1000000000.0 {
  212. if o_budget := qu.Float64All(tmp["budget"]); o_budget > 0.0 {
  213. if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
  214. (*update_check)["budget"] = filterAmount(r_budget, o_budget)
  215. }
  216. }
  217. }
  218. if r_bidamount := qu.Float64All((*update_check)["bidamount"]); r_bidamount > 0.0 && r_bidamount < 1000000000.0 {
  219. if o_bidamount := qu.Float64All(tmp["bidamount"]); o_bidamount > 0.0 {
  220. if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
  221. (*update_check)["bidamount"] = filterAmount(r_bidamount, o_bidamount)
  222. }
  223. }
  224. }
  225. return false
  226. }
  227. // 筛选金额
  228. func filterAmount(f1 float64, f2 float64) float64 {
  229. //选取一个合适的金额 ...
  230. if f1 > f2 {
  231. if f1 > 100000000.0 {
  232. return f2
  233. } else {
  234. return f1
  235. }
  236. } else if f1 < f2 {
  237. if f2 > 100000000.0 {
  238. return f1
  239. } else {
  240. return f2
  241. }
  242. } else {
  243. return f1
  244. }
  245. }
  246. // 核算分包信息
  247. func staffInfo(pkg map[string]interface{}) bool {
  248. //鉴定中标单位
  249. is_w := 0
  250. for _, v := range pkg {
  251. info := *qu.ObjToMap(v)
  252. if winner := qu.ObjToString(info["winner"]); winner != "" {
  253. is_w++
  254. }
  255. }
  256. //鉴定中标金额
  257. is_b := 0
  258. for _, v := range pkg {
  259. info := *qu.ObjToMap(v)
  260. if bidamount := qu.Float64All(info["bidamount"]); bidamount > 0.0 {
  261. is_b++
  262. }
  263. }
  264. if is_w != len(pkg) && is_w > 0 {
  265. return false
  266. }
  267. if is_b != len(pkg) && is_b > 0 {
  268. return false
  269. }
  270. if is_w == 0 || is_b == 0 {
  271. return false
  272. }
  273. return true
  274. }
  275. func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]interface{}) (string, string) {
  276. toptype_rule := qu.ObjToString(data["toptype"])
  277. subtype_rule := qu.ObjToString(data["subtype"])
  278. //1、结果类 中标和成交错误校正
  279. s_winner := qu.ObjToString(data["s_winner"])
  280. winnerorder := IsMarkInterfaceMap(data["winnerorder"])
  281. if toptype_ai == "结果" && toptype_rule == "结果" {
  282. if subtype_ai == "成交" && subtype_rule == "成交" && len(winnerorder) > 0 { //规则、大模型都错
  283. return "结果", "中标"
  284. }
  285. if ((subtype_ai == "中标" || subtype_ai == "其它") && subtype_rule == "成交") || ((subtype_ai == "成交" || subtype_ai == "其它") && subtype_rule == "中标") {
  286. if len(winnerorder) > 0 { //有中标候选人->中标
  287. return toptype_ai, "中标"
  288. }
  289. if s_winner != "" || data["bidamount"] != nil {
  290. return toptype_ai, "成交"
  291. }
  292. }
  293. }
  294. //2、招标、结果错误校正
  295. if toptype_ai != "结果" && toptype_rule == "结果" {
  296. //return toptype_rule,subtype_rule//默认规则为准
  297. if len(winnerorder) > 0 || s_winner != "" || data["bidamount"] != nil {
  298. return toptype_rule, subtype_rule
  299. } else {
  300. return toptype_ai, subtype_ai
  301. }
  302. } else if toptype_ai == "结果" && toptype_rule != "结果" && (subtype_ai == "中标" || subtype_ai == "成交") { //结果-变更
  303. //return toptype_rule,subtype_rule//默认规则为准
  304. if len(winnerorder) > 0 { //有中标候选人->中标
  305. return toptype_ai, "中标" //这里subtype返回"中标",避免ai识别错误
  306. } else if s_winner != "" || data["bidamount"] != nil {
  307. return toptype_ai, "成交" //这里subtype返回"成交",避免ai识别错误
  308. } else {
  309. return toptype_ai, subtype_ai
  310. }
  311. }
  312. return toptype_ai, subtype_ai
  313. }
  314. func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
  315. p_list := []map[string]interface{}{}
  316. if list_3, ok_3 := t.([]map[string]interface{}); ok_3 {
  317. p_list = list_3
  318. return p_list
  319. }
  320. if yl_list_1, ok_1 := t.(primitive.A); ok_1 {
  321. p_list = qu.ObjArrToMapArr(yl_list_1)
  322. } else {
  323. if yl_list_2, ok_2 := t.([]interface{}); ok_2 {
  324. p_list = qu.ObjArrToMapArr(yl_list_2)
  325. }
  326. }
  327. return p_list
  328. }