package tool import ( "data_ai/extract" "data_ai/ul" log "github.com/donnie4w/go-logger/logger" "go.mongodb.org/mongo-driver/bson/primitive" qu "jygit.jydev.jianyu360.cn/data_processing/common_utils" "strings" "sync" ) var unset_check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1} // 工具修正程序 func StartToolInfo() { log.Debug("工具开始大模型修正数据······") q := map[string]interface{}{} pool_mgo := make(chan bool, ul.Reading) wg_mgo := &sync.WaitGroup{} sess := ul.SourceMgo.GetMgoConn() defer ul.SourceMgo.DestoryMongoConn(sess) total, isok := 0, 0 it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Sort("_id").Iter() for tmp := make(map[string]interface{}); it.Next(&tmp); total++ { if total%100 == 0 { log.Debug("cur index ", total) } isok++ pool_mgo <- true wg_mgo.Add(1) go func(tmp map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() u_id := ul.BsonTOStringId(tmp["_id"]) data := extract.ResolveInfo(tmp) if len(data) > 0 || u_id == "" { tmp["ai_zhipu"] = data update_check := make(map[string]interface{}, 0) is_unset := getCheckDataAI(tmp, &update_check) //最终计算是否清洗 if len(update_check) > 0 { //$set ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{ "$set": update_check, }) } if is_unset { //"$unset" ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{ "$unset": unset_check, }) } } }(tmp) tmp = make(map[string]interface{}) } wg_mgo.Wait() log.Debug("ai is over ...") } // 大模型与抽取数据合并计算 func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interface{}) bool { if tmp["ai_zhipu"] == nil { return false } //记录抽取原值 //记录抽取原值 ext_ai_record := map[string]interface{}{} ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"]) //分类字段··· s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"]) ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp) if ns_toptype != s_toptype || ns_subtype != s_subtype { ext_ai_record["s_toptype"] = ns_toptype ext_ai_record["s_subtype"] = ns_subtype } //赋值··· s_toptype, s_subtype = ns_toptype, ns_subtype if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" { s_toptype = qu.ObjToString(tmp["toptype"]) s_subtype = qu.ObjToString(tmp["subtype"]) } else { if s_toptype != "" && s_subtype != "" { (*update_check)["toptype"] = s_toptype (*update_check)["subtype"] = s_subtype ext_ai_record["toptype"] = tmp["toptype"] ext_ai_record["subtype"] = tmp["subtype"] } else { s_toptype = qu.ObjToString(tmp["toptype"]) s_subtype = qu.ObjToString(tmp["subtype"]) } } //基础字段··· if s_buyer := qu.ObjToString(ai_zhipu["s_buyer"]); s_buyer != "" { (*update_check)["buyer"] = s_buyer ext_ai_record["buyer"] = tmp["buyer"] if agency := qu.ObjToString(tmp["agency"]); agency != "" && agency == s_buyer { delete((*update_check), "buyer") delete(ext_ai_record, "buyer") } } if s_projectname := qu.ObjToString(ai_zhipu["s_projectname"]); s_projectname != "" { (*update_check)["projectname"] = s_projectname ext_ai_record["projectname"] = tmp["projectname"] } if s_projectcode := qu.ObjToString(ai_zhipu["s_projectcode"]); s_projectcode != "" { (*update_check)["projectcode"] = s_projectcode ext_ai_record["projectcode"] = tmp["projectcode"] } if s_budget := qu.Float64All(ai_zhipu["s_budget"]); s_budget > 0.0 && s_budget < 1000000000.0 { (*update_check)["budget"] = s_budget ext_ai_record["budget"] = tmp["budget"] } //地域字段··· o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"]) s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"]) if s_area != "" && s_area != "全国" { (*update_check)["area"] = s_area if s_city != "" { (*update_check)["city"] = s_city if o_district != "" { //判断抽取的区县是否合理··· isT := false if ds := ul.S_DistrictDict[o_district]; ds != nil { for _, v := range ds { if v.C_Name == s_city && v.P_Name == s_area { isT = true break } } } if !isT { (*update_check)["district"] = "" } } } else { if o_area != s_area { (*update_check)["city"] = "" (*update_check)["district"] = "" } } ext_ai_record["area"] = tmp["area"] ext_ai_record["city"] = tmp["city"] ext_ai_record["district"] = tmp["district"] } if s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" { //先用外围字段替换 if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 { (*update_check)["bidamount"] = s_bidamount ext_ai_record["bidamount"] = tmp["bidamount"] } if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" { (*update_check)["s_winner"] = s_winner ext_ai_record["s_winner"] = tmp["s_winner"] (*update_check)["winner"] = s_winner ext_ai_record["winner"] = tmp["winner"] //对于winner来说...规则值有包含关系,采用规则值 if winner := qu.ObjToString(tmp["winner"]); winner != "" { if strings.Contains(s_winner, winner) { delete((*update_check), "winner") delete(ext_ai_record, "winner") } } } isRulePkg := false if pkg := *qu.ObjToMap(tmp["package"]); len(pkg) > 1 && (s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同") { if !staffInfo(pkg) { isRulePkg = true } } if isRulePkg { //优先采用大模型分包-值替换 if ispkg, ok := ai_zhipu["ispkg"].(bool); ispkg && ok { if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil { if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" { (*update_check)["s_winner"] = p_winner (*update_check)["winner"] = p_winner ext_ai_record["s_winner"] = tmp["s_winner"] ext_ai_record["winner"] = tmp["winner"] } if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 { (*update_check)["bidamount"] = p_bidamount ext_ai_record["bidamount"] = tmp["bidamount"] } if s_package := qu.ObjToMap((*s_pkg)["s_pkg"]); s_package != nil { (*update_check)["package"] = s_package ext_ai_record["package"] = tmp["package"] } } } } } else if s_subtype == "单一" { if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 { (*update_check)["bidamount"] = s_bidamount ext_ai_record["bidamount"] = tmp["bidamount"] } if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" { (*update_check)["s_winner"] = s_winner (*update_check)["winner"] = s_winner ext_ai_record["s_winner"] = tmp["s_winner"] ext_ai_record["winner"] = tmp["winner"] } } else { (*update_check)["ext_ai_record"] = ext_ai_record for k, _ := range unset_check { if tmp[k] != nil { return true } } } (*update_check)["ext_ai_record"] = ext_ai_record //根据识别金额的进行选取与修正 if r_budget := qu.Float64All((*update_check)["budget"]); r_budget > 0.0 && r_budget < 1000000000.0 { if o_budget := qu.Float64All(tmp["budget"]); o_budget > 0.0 { if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 { (*update_check)["budget"] = filterAmount(r_budget, o_budget) } } } if r_bidamount := qu.Float64All((*update_check)["bidamount"]); r_bidamount > 0.0 && r_bidamount < 1000000000.0 { if o_bidamount := qu.Float64All(tmp["bidamount"]); o_bidamount > 0.0 { if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 { (*update_check)["bidamount"] = filterAmount(r_bidamount, o_bidamount) } } } return false } // 筛选金额 func filterAmount(f1 float64, f2 float64) float64 { //选取一个合适的金额 ... if f1 > f2 { if f1 > 100000000.0 { return f2 } else { return f1 } } else if f1 < f2 { if f2 > 100000000.0 { return f1 } else { return f2 } } else { return f1 } } // 核算分包信息 func staffInfo(pkg map[string]interface{}) bool { //鉴定中标单位 is_w := 0 for _, v := range pkg { info := *qu.ObjToMap(v) if winner := qu.ObjToString(info["winner"]); winner != "" { is_w++ } } //鉴定中标金额 is_b := 0 for _, v := range pkg { info := *qu.ObjToMap(v) if bidamount := qu.Float64All(info["bidamount"]); bidamount > 0.0 { is_b++ } } if is_w != len(pkg) && is_w > 0 { return false } if is_b != len(pkg) && is_b > 0 { return false } if is_w == 0 || is_b == 0 { return false } return true } func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]interface{}) (string, string) { toptype_rule := qu.ObjToString(data["toptype"]) subtype_rule := qu.ObjToString(data["subtype"]) //1、结果类 中标和成交错误校正 s_winner := qu.ObjToString(data["s_winner"]) winnerorder := IsMarkInterfaceMap(data["winnerorder"]) if toptype_ai == "结果" && toptype_rule == "结果" { if subtype_ai == "成交" && subtype_rule == "成交" && len(winnerorder) > 0 { //规则、大模型都错 return "结果", "中标" } if ((subtype_ai == "中标" || subtype_ai == "其它") && subtype_rule == "成交") || ((subtype_ai == "成交" || subtype_ai == "其它") && subtype_rule == "中标") { if len(winnerorder) > 0 { //有中标候选人->中标 return toptype_ai, "中标" } if s_winner != "" || data["bidamount"] != nil { return toptype_ai, "成交" } } } //2、招标、结果错误校正 if toptype_ai != "结果" && toptype_rule == "结果" { //return toptype_rule,subtype_rule//默认规则为准 if len(winnerorder) > 0 || s_winner != "" || data["bidamount"] != nil { return toptype_rule, subtype_rule } else { return toptype_ai, subtype_ai } } else if toptype_ai == "结果" && toptype_rule != "结果" && (subtype_ai == "中标" || subtype_ai == "成交") { //结果-变更 //return toptype_rule,subtype_rule//默认规则为准 if len(winnerorder) > 0 { //有中标候选人->中标 return toptype_ai, "中标" //这里subtype返回"中标",避免ai识别错误 } else if s_winner != "" || data["bidamount"] != nil { return toptype_ai, "成交" //这里subtype返回"成交",避免ai识别错误 } else { return toptype_ai, subtype_ai } } return toptype_ai, subtype_ai } func IsMarkInterfaceMap(t interface{}) []map[string]interface{} { p_list := []map[string]interface{}{} if list_3, ok_3 := t.([]map[string]interface{}); ok_3 { p_list = list_3 return p_list } if yl_list_1, ok_1 := t.(primitive.A); ok_1 { p_list = qu.ObjArrToMapArr(yl_list_1) } else { if yl_list_2, ok_2 := t.([]interface{}); ok_2 { p_list = qu.ObjArrToMapArr(yl_list_2) } } return p_list }