123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341 |
- package tool
- import (
- "data_ai/extract"
- "data_ai/ul"
- log "github.com/donnie4w/go-logger/logger"
- "go.mongodb.org/mongo-driver/bson/primitive"
- qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "strings"
- "sync"
- )
- var unset_check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
- // 工具修正程序
- func StartToolInfo() {
- log.Debug("工具开始大模型修正数据······")
- q := map[string]interface{}{}
- pool_mgo := make(chan bool, ul.Reading)
- wg_mgo := &sync.WaitGroup{}
- sess := ul.SourceMgo.GetMgoConn()
- defer ul.SourceMgo.DestoryMongoConn(sess)
- total, isok := 0, 0
- it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Sort("_id").Iter()
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%100 == 0 {
- log.Debug("cur index ", total)
- }
- isok++
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- u_id := ul.BsonTOStringId(tmp["_id"])
- data := extract.ResolveInfo(tmp)
- if len(data) > 0 || u_id == "" {
- tmp["ai_zhipu"] = data
- update_check := make(map[string]interface{}, 0)
- is_unset := getCheckDataAI(tmp, &update_check)
- //最终计算是否清洗
- if len(update_check) > 0 {
- //$set
- ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
- "$set": update_check,
- })
- }
- if is_unset {
- //"$unset"
- ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
- "$unset": unset_check,
- })
- }
- }
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg_mgo.Wait()
- log.Debug("ai is over ...")
- }
- // 大模型与抽取数据合并计算
- func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interface{}) bool {
- if tmp["ai_zhipu"] == nil {
- return false
- }
- //记录抽取原值
- //记录抽取原值
- ext_ai_record := map[string]interface{}{}
- ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
- //分类字段···
- s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
- ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
- if ns_toptype != s_toptype || ns_subtype != s_subtype {
- ext_ai_record["s_toptype"] = ns_toptype
- ext_ai_record["s_subtype"] = ns_subtype
- }
- //赋值···
- s_toptype, s_subtype = ns_toptype, ns_subtype
- if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" {
- s_toptype = qu.ObjToString(tmp["toptype"])
- s_subtype = qu.ObjToString(tmp["subtype"])
- } else {
- if s_toptype != "" && s_subtype != "" {
- (*update_check)["toptype"] = s_toptype
- (*update_check)["subtype"] = s_subtype
- ext_ai_record["toptype"] = tmp["toptype"]
- ext_ai_record["subtype"] = tmp["subtype"]
- } else {
- s_toptype = qu.ObjToString(tmp["toptype"])
- s_subtype = qu.ObjToString(tmp["subtype"])
- }
- }
- //基础字段···
- if s_buyer := qu.ObjToString(ai_zhipu["s_buyer"]); s_buyer != "" {
- (*update_check)["buyer"] = s_buyer
- ext_ai_record["buyer"] = tmp["buyer"]
- if agency := qu.ObjToString(tmp["agency"]); agency != "" && agency == s_buyer {
- delete((*update_check), "buyer")
- delete(ext_ai_record, "buyer")
- }
- }
- if s_projectname := qu.ObjToString(ai_zhipu["s_projectname"]); s_projectname != "" {
- (*update_check)["projectname"] = s_projectname
- ext_ai_record["projectname"] = tmp["projectname"]
- }
- if s_projectcode := qu.ObjToString(ai_zhipu["s_projectcode"]); s_projectcode != "" {
- (*update_check)["projectcode"] = s_projectcode
- ext_ai_record["projectcode"] = tmp["projectcode"]
- }
- if s_budget := qu.Float64All(ai_zhipu["s_budget"]); s_budget > 0.0 && s_budget < 1000000000.0 {
- (*update_check)["budget"] = s_budget
- ext_ai_record["budget"] = tmp["budget"]
- }
- //地域字段···
- o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"])
- s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"])
- if s_area != "" && s_area != "全国" {
- (*update_check)["area"] = s_area
- if s_city != "" {
- (*update_check)["city"] = s_city
- if o_district != "" {
- //判断抽取的区县是否合理···
- isT := false
- if ds := ul.S_DistrictDict[o_district]; ds != nil {
- for _, v := range ds {
- if v.C_Name == s_city && v.P_Name == s_area {
- isT = true
- break
- }
- }
- }
- if !isT {
- (*update_check)["district"] = ""
- }
- }
- } else {
- if o_area != s_area {
- (*update_check)["city"] = ""
- (*update_check)["district"] = ""
- }
- }
- ext_ai_record["area"] = tmp["area"]
- ext_ai_record["city"] = tmp["city"]
- ext_ai_record["district"] = tmp["district"]
- }
- if s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
- //先用外围字段替换
- if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
- (*update_check)["bidamount"] = s_bidamount
- ext_ai_record["bidamount"] = tmp["bidamount"]
- }
- if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
- (*update_check)["s_winner"] = s_winner
- ext_ai_record["s_winner"] = tmp["s_winner"]
- (*update_check)["winner"] = s_winner
- ext_ai_record["winner"] = tmp["winner"]
- //对于winner来说...规则值有包含关系,采用规则值
- if winner := qu.ObjToString(tmp["winner"]); winner != "" {
- if strings.Contains(s_winner, winner) {
- delete((*update_check), "winner")
- delete(ext_ai_record, "winner")
- }
- }
- }
- isRulePkg := false
- if pkg := *qu.ObjToMap(tmp["package"]); len(pkg) > 1 && (s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同") {
- if !staffInfo(pkg) {
- isRulePkg = true
- }
- }
- if isRulePkg { //优先采用大模型分包-值替换
- if ispkg, ok := ai_zhipu["ispkg"].(bool); ispkg && ok {
- if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
- if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
- (*update_check)["s_winner"] = p_winner
- (*update_check)["winner"] = p_winner
- ext_ai_record["s_winner"] = tmp["s_winner"]
- ext_ai_record["winner"] = tmp["winner"]
- }
- if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
- (*update_check)["bidamount"] = p_bidamount
- ext_ai_record["bidamount"] = tmp["bidamount"]
- }
- if s_package := qu.ObjToMap((*s_pkg)["s_pkg"]); s_package != nil {
- (*update_check)["package"] = s_package
- ext_ai_record["package"] = tmp["package"]
- }
- }
- }
- }
- } else if s_subtype == "单一" {
- if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
- (*update_check)["bidamount"] = s_bidamount
- ext_ai_record["bidamount"] = tmp["bidamount"]
- }
- if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
- (*update_check)["s_winner"] = s_winner
- (*update_check)["winner"] = s_winner
- ext_ai_record["s_winner"] = tmp["s_winner"]
- ext_ai_record["winner"] = tmp["winner"]
- }
- } else {
- (*update_check)["ext_ai_record"] = ext_ai_record
- for k, _ := range unset_check {
- if tmp[k] != nil {
- return true
- }
- }
- }
- (*update_check)["ext_ai_record"] = ext_ai_record
- //根据识别金额的进行选取与修正
- if r_budget := qu.Float64All((*update_check)["budget"]); r_budget > 0.0 && r_budget < 1000000000.0 {
- if o_budget := qu.Float64All(tmp["budget"]); o_budget > 0.0 {
- if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
- (*update_check)["budget"] = filterAmount(r_budget, o_budget)
- }
- }
- }
- if r_bidamount := qu.Float64All((*update_check)["bidamount"]); r_bidamount > 0.0 && r_bidamount < 1000000000.0 {
- if o_bidamount := qu.Float64All(tmp["bidamount"]); o_bidamount > 0.0 {
- if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
- (*update_check)["bidamount"] = filterAmount(r_bidamount, o_bidamount)
- }
- }
- }
- return false
- }
- // 筛选金额
- func filterAmount(f1 float64, f2 float64) float64 {
- //选取一个合适的金额 ...
- if f1 > f2 {
- if f1 > 100000000.0 {
- return f2
- } else {
- return f1
- }
- } else if f1 < f2 {
- if f2 > 100000000.0 {
- return f1
- } else {
- return f2
- }
- } else {
- return f1
- }
- }
- // 核算分包信息
- func staffInfo(pkg map[string]interface{}) bool {
- //鉴定中标单位
- is_w := 0
- for _, v := range pkg {
- info := *qu.ObjToMap(v)
- if winner := qu.ObjToString(info["winner"]); winner != "" {
- is_w++
- }
- }
- //鉴定中标金额
- is_b := 0
- for _, v := range pkg {
- info := *qu.ObjToMap(v)
- if bidamount := qu.Float64All(info["bidamount"]); bidamount > 0.0 {
- is_b++
- }
- }
- if is_w != len(pkg) && is_w > 0 {
- return false
- }
- if is_b != len(pkg) && is_b > 0 {
- return false
- }
- if is_w == 0 || is_b == 0 {
- return false
- }
- return true
- }
- func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]interface{}) (string, string) {
- toptype_rule := qu.ObjToString(data["toptype"])
- subtype_rule := qu.ObjToString(data["subtype"])
- //1、结果类 中标和成交错误校正
- s_winner := qu.ObjToString(data["s_winner"])
- winnerorder := IsMarkInterfaceMap(data["winnerorder"])
- if toptype_ai == "结果" && toptype_rule == "结果" {
- if subtype_ai == "成交" && subtype_rule == "成交" && len(winnerorder) > 0 { //规则、大模型都错
- return "结果", "中标"
- }
- if ((subtype_ai == "中标" || subtype_ai == "其它") && subtype_rule == "成交") || ((subtype_ai == "成交" || subtype_ai == "其它") && subtype_rule == "中标") {
- if len(winnerorder) > 0 { //有中标候选人->中标
- return toptype_ai, "中标"
- }
- if s_winner != "" || data["bidamount"] != nil {
- return toptype_ai, "成交"
- }
- }
- }
- //2、招标、结果错误校正
- if toptype_ai != "结果" && toptype_rule == "结果" {
- //return toptype_rule,subtype_rule//默认规则为准
- if len(winnerorder) > 0 || s_winner != "" || data["bidamount"] != nil {
- return toptype_rule, subtype_rule
- } else {
- return toptype_ai, subtype_ai
- }
- } else if toptype_ai == "结果" && toptype_rule != "结果" && (subtype_ai == "中标" || subtype_ai == "成交") { //结果-变更
- //return toptype_rule,subtype_rule//默认规则为准
- if len(winnerorder) > 0 { //有中标候选人->中标
- return toptype_ai, "中标" //这里subtype返回"中标",避免ai识别错误
- } else if s_winner != "" || data["bidamount"] != nil {
- return toptype_ai, "成交" //这里subtype返回"成交",避免ai识别错误
- } else {
- return toptype_ai, subtype_ai
- }
- }
- return toptype_ai, subtype_ai
- }
- func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
- p_list := []map[string]interface{}{}
- if list_3, ok_3 := t.([]map[string]interface{}); ok_3 {
- p_list = list_3
- return p_list
- }
- if yl_list_1, ok_1 := t.(primitive.A); ok_1 {
- p_list = qu.ObjArrToMapArr(yl_list_1)
- } else {
- if yl_list_2, ok_2 := t.([]interface{}); ok_2 {
- p_list = qu.ObjArrToMapArr(yl_list_2)
- }
- }
- return p_list
- }
|