tool.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. package tool
  2. import (
  3. "data_ai/extract"
  4. "data_ai/ul"
  5. log "github.com/donnie4w/go-logger/logger"
  6. "go.mongodb.org/mongo-driver/bson/primitive"
  7. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  8. "sync"
  9. )
  10. var unset_check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
  11. func StartToolInfo() {
  12. log.Debug("开始大模型修正数据···")
  13. q := map[string]interface{}{}
  14. pool_mgo := make(chan bool, 50)
  15. wg_mgo := &sync.WaitGroup{}
  16. sess := ul.SourceMgo.GetMgoConn()
  17. defer ul.SourceMgo.DestoryMongoConn(sess)
  18. total, isok := 0, 0
  19. it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Sort("_id").Iter()
  20. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  21. if total%1000 == 0 {
  22. log.Debug("cur index ", total)
  23. }
  24. isok++
  25. pool_mgo <- true
  26. wg_mgo.Add(1)
  27. go func(tmp map[string]interface{}) {
  28. defer func() {
  29. <-pool_mgo
  30. wg_mgo.Done()
  31. }()
  32. u_id := ul.BsonTOStringId(tmp["_id"])
  33. data := extract.ResolveInfo(tmp)
  34. if len(data) > 0 || u_id == "" {
  35. tmp["ai_zhipu"] = data
  36. update_check := make(map[string]interface{}, 0)
  37. is_unset := getCheckDataAI(tmp, &update_check)
  38. //最终计算是否清洗
  39. if len(update_check) > 0 {
  40. //$set
  41. ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
  42. "$set": update_check,
  43. })
  44. }
  45. if is_unset {
  46. //"$unset"
  47. ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
  48. "$unset": unset_check,
  49. })
  50. }
  51. }
  52. }(tmp)
  53. tmp = make(map[string]interface{})
  54. }
  55. wg_mgo.Wait()
  56. log.Debug("ai is over ...")
  57. }
  58. // 大模型与抽取数据合并计算
  59. func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interface{}) bool {
  60. if tmp["ai_zhipu"] == nil {
  61. return false
  62. }
  63. //记录抽取原值
  64. ext_ai_record := map[string]interface{}{}
  65. ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
  66. //分类字段···
  67. s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
  68. ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
  69. if ns_toptype != s_toptype || ns_subtype != s_subtype {
  70. ext_ai_record["s_toptype"] = ns_toptype
  71. ext_ai_record["s_subtype"] = ns_subtype
  72. }
  73. //赋值···
  74. s_toptype, s_subtype = ns_toptype, ns_subtype
  75. if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" {
  76. s_toptype = qu.ObjToString(tmp["toptype"])
  77. s_subtype = qu.ObjToString(tmp["subtype"])
  78. } else {
  79. if s_toptype != "" && s_subtype != "" {
  80. (*update_check)["toptype"] = s_toptype
  81. (*update_check)["subtype"] = s_subtype
  82. ext_ai_record["toptype"] = tmp["toptype"]
  83. ext_ai_record["subtype"] = tmp["subtype"]
  84. } else {
  85. s_toptype = qu.ObjToString(tmp["toptype"])
  86. s_subtype = qu.ObjToString(tmp["subtype"])
  87. }
  88. }
  89. //基础字段···
  90. if s_buyer := qu.ObjToString(ai_zhipu["s_buyer"]); s_buyer != "" {
  91. (*update_check)["buyer"] = s_buyer
  92. ext_ai_record["buyer"] = tmp["buyer"]
  93. if agency := qu.ObjToString(tmp["agency"]); agency != "" && agency == s_buyer {
  94. delete((*update_check), "buyer")
  95. delete(ext_ai_record, "buyer")
  96. }
  97. }
  98. if s_projectname := qu.ObjToString(ai_zhipu["s_projectname"]); s_projectname != "" {
  99. (*update_check)["projectname"] = s_projectname
  100. ext_ai_record["projectname"] = tmp["projectname"]
  101. }
  102. if s_projectcode := qu.ObjToString(ai_zhipu["s_projectcode"]); s_projectcode != "" {
  103. (*update_check)["projectcode"] = s_projectcode
  104. ext_ai_record["projectcode"] = tmp["projectcode"]
  105. }
  106. if s_budget := qu.Float64All(ai_zhipu["s_budget"]); s_budget > 0.0 && s_budget < 1000000000.0 {
  107. (*update_check)["budget"] = s_budget
  108. ext_ai_record["budget"] = tmp["budget"]
  109. }
  110. //地域字段···
  111. o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"])
  112. s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"])
  113. if s_area != "" && s_area != "全国" {
  114. (*update_check)["area"] = s_area
  115. if s_city != "" {
  116. (*update_check)["city"] = s_city
  117. if o_district != "" {
  118. //判断抽取的区县是否合理···
  119. isT := false
  120. if ds := ul.S_DistrictDict[o_district]; ds != nil {
  121. for _, v := range ds {
  122. if v.C_Name == s_city && v.P_Name == s_area {
  123. isT = true
  124. break
  125. }
  126. }
  127. }
  128. if !isT {
  129. (*update_check)["district"] = ""
  130. }
  131. }
  132. } else {
  133. if o_area != s_area {
  134. (*update_check)["city"] = ""
  135. (*update_check)["district"] = ""
  136. }
  137. }
  138. ext_ai_record["area"] = tmp["area"]
  139. ext_ai_record["city"] = tmp["city"]
  140. ext_ai_record["district"] = tmp["district"]
  141. }
  142. if s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
  143. //先用外围字段替换
  144. if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
  145. (*update_check)["bidamount"] = s_bidamount
  146. ext_ai_record["bidamount"] = tmp["bidamount"]
  147. }
  148. if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
  149. (*update_check)["s_winner"] = s_winner
  150. (*update_check)["winner"] = s_winner
  151. ext_ai_record["s_winner"] = tmp["s_winner"]
  152. ext_ai_record["winner"] = tmp["winner"]
  153. }
  154. isRulePkg := false
  155. if pkg := *qu.ObjToMap(tmp["package"]); len(pkg) > 1 && (s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同") {
  156. if !staffInfo(pkg) {
  157. isRulePkg = true
  158. }
  159. }
  160. if isRulePkg { //优先采用大模型分包-值替换
  161. if ispkg, ok := ai_zhipu["ispkg"].(bool); ispkg && ok {
  162. if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
  163. if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
  164. (*update_check)["s_winner"] = p_winner
  165. (*update_check)["winner"] = p_winner
  166. ext_ai_record["s_winner"] = tmp["s_winner"]
  167. ext_ai_record["winner"] = tmp["winner"]
  168. }
  169. if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
  170. (*update_check)["bidamount"] = p_bidamount
  171. ext_ai_record["bidamount"] = tmp["bidamount"]
  172. }
  173. if s_package := qu.ObjToMap((*s_pkg)["s_pkg"]); s_package != nil {
  174. (*update_check)["package"] = s_package
  175. ext_ai_record["package"] = tmp["package"]
  176. }
  177. }
  178. }
  179. }
  180. } else if s_subtype == "单一" {
  181. if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
  182. (*update_check)["bidamount"] = s_bidamount
  183. ext_ai_record["bidamount"] = tmp["bidamount"]
  184. }
  185. if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
  186. (*update_check)["s_winner"] = s_winner
  187. (*update_check)["winner"] = s_winner
  188. ext_ai_record["s_winner"] = tmp["s_winner"]
  189. ext_ai_record["winner"] = tmp["winner"]
  190. }
  191. } else {
  192. (*update_check)["ext_ai_record"] = ext_ai_record
  193. for k, _ := range unset_check {
  194. if tmp[k] != nil {
  195. return true
  196. }
  197. }
  198. }
  199. (*update_check)["ext_ai_record"] = ext_ai_record
  200. return false
  201. }
  202. // 核算分包信息
  203. func staffInfo(pkg map[string]interface{}) bool {
  204. //鉴定中标单位
  205. is_w := 0
  206. for _, v := range pkg {
  207. info := *qu.ObjToMap(v)
  208. if winner := qu.ObjToString(info["winner"]); winner != "" {
  209. is_w++
  210. }
  211. }
  212. //鉴定中标金额
  213. is_b := 0
  214. for _, v := range pkg {
  215. info := *qu.ObjToMap(v)
  216. if bidamount := qu.Float64All(info["bidamount"]); bidamount > 0.0 {
  217. is_b++
  218. }
  219. }
  220. if is_w != len(pkg) && is_w > 0 {
  221. return false
  222. }
  223. if is_b != len(pkg) && is_b > 0 {
  224. return false
  225. }
  226. if is_w == 0 || is_b == 0 {
  227. return false
  228. }
  229. return true
  230. }
  231. func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]interface{}) (string, string) {
  232. toptype_rule := qu.ObjToString(data["toptype"])
  233. subtype_rule := qu.ObjToString(data["subtype"])
  234. //1、结果类 中标和成交错误校正
  235. s_winner := qu.ObjToString(data["s_winner"])
  236. winnerorder := IsMarkInterfaceMap(data["winnerorder"])
  237. if toptype_ai == "结果" && toptype_rule == "结果" {
  238. if subtype_ai == "成交" && subtype_rule == "成交" && len(winnerorder) > 0 { //规则、大模型都错
  239. return "结果", "中标"
  240. }
  241. if ((subtype_ai == "中标" || subtype_ai == "其它") && subtype_rule == "成交") || ((subtype_ai == "成交" || subtype_ai == "其它") && subtype_rule == "中标") {
  242. if len(winnerorder) > 0 { //有中标候选人->中标
  243. return toptype_ai, "中标"
  244. }
  245. if s_winner != "" || data["bidamount"] != nil {
  246. return toptype_ai, "成交"
  247. }
  248. }
  249. }
  250. //2、招标、结果错误校正
  251. if toptype_ai != "结果" && toptype_rule == "结果" {
  252. //return toptype_rule,subtype_rule//默认规则为准
  253. if len(winnerorder) > 0 || s_winner != "" || data["bidamount"] != nil {
  254. return toptype_rule, subtype_rule
  255. } else {
  256. return toptype_ai, subtype_ai
  257. }
  258. } else if toptype_ai == "结果" && toptype_rule != "结果" && (subtype_ai == "中标" || subtype_ai == "成交") { //结果-变更
  259. //return toptype_rule,subtype_rule//默认规则为准
  260. if len(winnerorder) > 0 { //有中标候选人->中标
  261. return toptype_ai, "中标" //这里subtype返回"中标",避免ai识别错误
  262. } else if s_winner != "" || data["bidamount"] != nil {
  263. return toptype_ai, "成交" //这里subtype返回"成交",避免ai识别错误
  264. } else {
  265. return toptype_ai, subtype_ai
  266. }
  267. }
  268. return toptype_ai, subtype_ai
  269. }
  270. func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
  271. p_list := []map[string]interface{}{}
  272. if list_3, ok_3 := t.([]map[string]interface{}); ok_3 {
  273. p_list = list_3
  274. return p_list
  275. }
  276. if yl_list_1, ok_1 := t.(primitive.A); ok_1 {
  277. p_list = qu.ObjArrToMapArr(yl_list_1)
  278. } else {
  279. if yl_list_2, ok_2 := t.([]interface{}); ok_2 {
  280. p_list = qu.ObjArrToMapArr(yl_list_2)
  281. }
  282. }
  283. return p_list
  284. }