package extract import ( "data_ai/clean" "data_ai/prompt" "data_ai/ul" "fmt" log "github.com/donnie4w/go-logger/logger" new_xlsx "github.com/tealeg/xlsx/v3" qu "jygit.jydev.jianyu360.cn/data_processing/common_utils" "os" "strings" "sync" "unicode/utf8" ) func TestSingleFieldInfo(name string, tmpid string) { tmp := ul.BidMgo.FindById(name, tmpid) if len(tmp) == 0 || tmp == nil { log.Debug("未查询到数据...", tmpid) return } data := ResolveInfo(tmp) //最终结果... for k, v := range data { log.Debug(k, "~", v) } } // 导出需要修复的 func TestFullJinOrCodeInfo() { q := map[string]interface{}{} pool_mgo := make(chan bool, 20) wg_mgo := &sync.WaitGroup{} sess := ul.SourceMgo.GetMgoConn() defer ul.SourceMgo.DestoryMongoConn(sess) total := 0 it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter() for tmp := make(map[string]interface{}); it.Next(&tmp); total++ { if total%10000 == 0 { log.Debug("cur index ", total) } pool_mgo <- true wg_mgo.Add(1) go func(tmp map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() tmpid := ul.BsonTOStringId(tmp["_id"]) isPcode, update := false, map[string]interface{}{} ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]) o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0 if ext_ai_record != nil { o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"]) o_budget = qu.Float64All((*ext_ai_record)["budget"]) o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"]) } if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 { if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 { update["budget"] = filterAmount(r_budget, o_budget) } } if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 { if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 { update["bidamount"] = filterAmount(r_bidamount, o_bidamount) } } //对于编号 if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" { if o_projectcode != projectcode { if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil { fns := getpnsinfo(data) //获取附件名字 for _, v := range fns { if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) { if strings.Contains(v, projectcode) { isPcode = true break } } } if isPcode { update["projectcode"] = o_projectcode } } } } if len(update) > 0 { //更新抽取表 ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{ "$set": update, }) //保存待修复表 update["_id"] = tmp["_id"] ul.SourceMgo.Save("zzzzz_kkk_uc_0907", update) } }(tmp) tmp = make(map[string]interface{}) } wg_mgo.Wait() log.Debug("repair ai is over ...") } // 修复金额和编号 func TestRepairJinOrCodeInfo() { q := map[string]interface{}{} pool_mgo := make(chan bool, 20) wg_mgo := &sync.WaitGroup{} sess := ul.SourceMgo.GetMgoConn() defer ul.SourceMgo.DestoryMongoConn(sess) total := 0 it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter() for tmp := make(map[string]interface{}); it.Next(&tmp); total++ { if total%10000 == 0 { log.Debug("cur index ", total) } pool_mgo <- true wg_mgo.Add(1) go func(tmp map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() tmpid := ul.BsonTOStringId(tmp["_id"]) isPcode, update := false, map[string]interface{}{} ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]) o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0 if ext_ai_record != nil { o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"]) o_budget = qu.Float64All((*ext_ai_record)["budget"]) o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"]) } if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 { if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 { update["budget"] = filterAmount(r_budget, o_budget) } } if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 { if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 { update["bidamount"] = filterAmount(r_bidamount, o_bidamount) } } //对于编号 if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" { if o_projectcode != projectcode { if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil { fns := getpnsinfo(data) //获取附件名字 for _, v := range fns { if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) { if strings.Contains(v, projectcode) { isPcode = true break } } } if isPcode { update["projectcode"] = o_projectcode } } } } if len(update) > 0 { ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{ "$set": update, }) } }(tmp) tmp = make(map[string]interface{}) } wg_mgo.Wait() log.Debug("repair ai is over ...") } // 筛选金额 func filterAmount(f1 float64, f2 float64) float64 { //选取一个合适的金额 ... if f1 > f2 { if f1 > 100000000.0 { return f2 } else { return f1 } } else if f1 < f2 { if f2 > 100000000.0 { return f1 } else { return f2 } } else { return f1 } } func TestExportJinErInfo() { q := map[string]interface{}{} pool_mgo := make(chan bool, 20) wg_mgo := &sync.WaitGroup{} sess := ul.SourceMgo.GetMgoConn() defer ul.SourceMgo.DestoryMongoConn(sess) total, isok := 0, 0 it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter() for tmp := make(map[string]interface{}); it.Next(&tmp); total++ { if total%10000 == 0 { log.Debug("cur index ", total) } isok++ pool_mgo <- true wg_mgo.Add(1) go func(tmp map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() tmpid := ul.BsonTOStringId(tmp["_id"]) budget := qu.Float64All(tmp["budget"]) bidamount := qu.Float64All(tmp["bidamount"]) saveinfo := map[string]interface{}{} if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil { ext_budget := qu.Float64All((*ext_ai_record)["budget"]) ext_bidamount := qu.Float64All((*ext_ai_record)["bidamount"]) if budget > 0.0 && ext_budget > 0.0 { if budget/ext_budget == 10000.0 || ext_budget/budget == 10000.0 { saveinfo["budget"] = budget saveinfo["ext_budget"] = ext_budget } } if bidamount > 0.0 && ext_bidamount > 0.0 { if bidamount/ext_bidamount == 10000.0 || ext_bidamount/bidamount == 10000.0 { saveinfo["bidamount"] = bidamount saveinfo["ext_bidamount"] = ext_bidamount } } } if len(saveinfo) > 0 && tmpid != "" { saveinfo["toptype"] = tmp["toptype"] saveinfo["subtype"] = tmp["subtype"] saveinfo["href"] = tmp["href"] saveinfo["jyhref"] = tmp["jytest_href"] ul.SourceMgo.Save("zktest_zzzzzkkk_0903", saveinfo) } }(tmp) tmp = make(map[string]interface{}) } wg_mgo.Wait() log.Debug("repair ai is over ...", isok) } // 修正buyer等字段 func TestRepairBuyerInfo(name string) { q := map[string]interface{}{} pool_mgo := make(chan bool, 20) wg_mgo := &sync.WaitGroup{} sess := ul.SourceMgo.GetMgoConn() defer ul.SourceMgo.DestoryMongoConn(sess) total, isok := 0, 0 it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter() for tmp := make(map[string]interface{}); it.Next(&tmp); total++ { if total%10000 == 0 { log.Debug("cur index ", total) } isok++ pool_mgo <- true wg_mgo.Add(1) go func(tmp map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() tmpid := ul.BsonTOStringId(tmp["_id"]) buyer := qu.ObjToString(tmp["buyer"]) agency := qu.ObjToString(tmp["agency"]) winner := qu.ObjToString(tmp["winner"]) update := map[string]interface{}{} if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil { o_buyer := qu.ObjToString((*ext_ai_record)["buyer"]) if buyer == agency && o_buyer != "" { update["buyer"] = o_buyer } o_winner := qu.ObjToString((*ext_ai_record)["winner"]) if o_winner != "" && strings.Contains(winner, o_winner) && o_winner != o_winner { update["winner"] = o_winner } } if len(update) > 0 && tmpid != "" { ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{ "$set": update, }) } }(tmp) tmp = make(map[string]interface{}) } wg_mgo.Wait() log.Debug("repair ai is over ...", isok) } func TestDelUpBuyerAi() { dataArr, _ := ul.SourceMgo.Find("zktest_buyer_0828_new", map[string]interface{}{}, nil, nil) pool_mgo := make(chan bool, 50) wg_mgo := &sync.WaitGroup{} for k, v := range dataArr { if k%1000 == 0 { log.Debug(k, "~", v["_id"]) } pool_mgo <- true wg_mgo.Add(1) go func(v map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() buyer := qu.ObjToString(v["buyer"]) tmpid := ul.BsonTOStringId(v["_id"]) data1 := ul.SourceMgo.FindById("result_20220218", tmpid) if len(data1) > 0 { ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{ "$set": map[string]interface{}{"buyer": buyer}, }) } data2 := ul.SourceMgo.FindById("result_20220219", tmpid) if len(data2) > 0 { ul.SourceMgo.UpdateById("result_20220219", tmpid, map[string]interface{}{ "$set": map[string]interface{}{"buyer": buyer}, }) } }(v) } wg_mgo.Wait() log.Debug("del ai is over ...") } func TestAiBuyerInfo() { //dataArr, _ := ul.SourceMgo.Find("zktest_buyer_info", map[string]interface{}{}, nil, nil) q := map[string]interface{}{} pool_mgo := make(chan bool, 50) wg_mgo := &sync.WaitGroup{} sess := ul.SourceMgo.GetMgoConn() defer ul.SourceMgo.DestoryMongoConn(sess) total, isok := 0, 0 it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter() for tmp := make(map[string]interface{}); it.Next(&tmp); total++ { if total%1000 == 0 { log.Debug("cur index ", total) } isok++ pool_mgo <- true wg_mgo.Add(1) go func(tmp map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() tmpid := ul.BsonTOStringId(tmp["_id"]) if buyer := qu.ObjToString(tmp["buyer"]); buyer != "" { if zp_buyer := prompt.AcquireBuyerInfo(buyer); zp_buyer["实体单位"] != nil { if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" { ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{ "$set": map[string]interface{}{"buyer": ns_buyer}, }) } } } }(tmp) tmp = make(map[string]interface{}) } wg_mgo.Wait() log.Debug("repair ai is over ...", isok) } func TestExportAiBuyer() { sess := ul.SourceMgo.GetMgoConn() defer ul.SourceMgo.DestoryMongoConn(sess) pool_mgo := make(chan bool, 10) wg_mgo := &sync.WaitGroup{} q, total := map[string]interface{}{ "_id": map[string]interface{}{ "$lte": ul.StringTOBsonId("66cd8299b25c3e1deb9488dd"), }, }, 0 it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Select(map[string]interface{}{ "ai_zhipu": 1, "ext_ai_record": 1, }).Iter() for tmp := make(map[string]interface{}); it.Next(&tmp); total++ { if total%10000 == 0 { log.Debug("cur index ", total, "~", tmp["_id"]) } pool_mgo <- true wg_mgo.Add(1) go func(tmp map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() ai_buyer, ext_buyer := "", "" if ai_zhipu := qu.ObjToMap(tmp["ai_zhipu"]); ai_zhipu != nil { ai_buyer = qu.ObjToString((*ai_zhipu)["s_buyer"]) } if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil { ext_buyer = qu.ObjToString((*ext_ai_record)["buyer"]) } if ai_buyer != "" { ul.SourceMgo.Save("zktest_buyer_0827", map[string]interface{}{ "_id": tmp["_id"], "ai_buyer": ai_buyer, "ext_buyer": ext_buyer, }) } }(tmp) tmp = make(map[string]interface{}) } wg_mgo.Wait() log.Debug("export is over ", total) } func TestIsPackage() { tmpArr := []string{ "669e83fe66cf0db42a6520b3", "669e892066cf0db42a652c9b", "669e904966cf0db42a653b5d", "669f16f466cf0db42a669069", "669f186c66cf0db42a669bf0", "669efb6766cf0db42a65e0b4", "669f004266cf0db42a65f201", "669f02a666cf0db42a65fff3", "669f172766cf0db42a669193", "669ec89566cf0db42a659020", "669e86b266cf0db42a6526ac", "669e86e466cf0db42a6527b7", "669e87b766cf0db42a652a3e", "669f082d66cf0db42a662323", "669e95e966cf0db42a654dd1", "669ea39466cf0db42a656311", "669f140366cf0db42a66772f", "669ee59466cf0db42a65b8aa", "669f05a166cf0db42a66117b", "669e90d666cf0db42a653e0a", "669f08c466cf0db42a66273c", "669f155166cf0db42a6682c7", "669ef0ff66cf0db42a65c83a", "669efdc166cf0db42a65e8f3", "669f090066cf0db42a6629d0", "669f111366cf0db42a665ce7", "669f15fb66cf0db42a668901", "669f0baa66cf0db42a663a72", "669f039766cf0db42a66044e", "669eff3e66cf0db42a65ee73", "669f12c366cf0db42a666b9d", "669e913b66cf0db42a653ffc", "669e833466cf0db42a651e3a", "669f071e66cf0db42a661b03", "669f1a1266cf0db42a66a892", "669f0aec66cf0db42a6635e8", "669f169c66cf0db42a668e1d", "669ed6c966cf0db42a65a75d", "669f072866cf0db42a661b26", "669f185866cf0db42a669af0", "669f15d366cf0db42a6687aa", "669f182466cf0db42a669960", "669f0ed066cf0db42a664e5c", "669f076466cf0db42a661cd4", "669f172966cf0db42a6691c0", "669f198466cf0db42a66a385", "669f1ad366cf0db42a66afb9", "669f156666cf0db42a668403", "669f093c66cf0db42a662c08", "669f0d8266cf0db42a6646cb", "669f06e866cf0db42a661a1d", "669f1bd766cf0db42a66b86e", "669efcd066cf0db42a65e4f4", } pkgArr := []int{ 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, } ok := 0 for k, v := range tmpArr { data := ul.SourceMgo.FindById("ai_41411", v) if len(data) == 0 { data = ul.SourceMgo.FindById("ai_294", v) } detail := qu.ObjToString(data["detail"]) ispkg := prompt.AcquireIsPackageInfo(detail) if (ispkg && pkgArr[k] == 1) || (!ispkg && pkgArr[k] == 0) { ok++ } else { log.Debug("错误~", v) } } log.Debug("is over ~ ", len(tmpArr)-ok) } func TestPackageInfo() { query := map[string]interface{}{ "new_pkg": map[string]interface{}{ "$exists": 1, }, } dataArr, _ := ul.SourceMgo.Find("ai_41411_zhipu", query, nil, map[string]interface{}{}) log.Debug("查询数量...", len(dataArr)) os.Remove("test.xlsx") f := new_xlsx.NewFile() sheet, _ := f.AddSheet("数据信息") row := sheet.AddRow() writeRow(row, []string{"唯一标识", "站点", "剑鱼链接", "子包名称", "子包单位", "子包金额"}) for _, v := range dataArr { tmpid := ul.BsonTOStringId(v["_id"]) ttt := ul.SourceMgo.FindById("ai_41411", tmpid) site := qu.ObjToString(ttt["site"]) jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid)) p_info := *qu.ObjToMap(v["new_pkg"]) p_arr := ul.IsMarkInterfaceMap(p_info["分包信息"]) for _, v1 := range p_arr { row = sheet.AddRow() arr := []string{} arr = append(arr, tmpid) arr = append(arr, site) arr = append(arr, jyhref) arr = append(arr, qu.ObjToString(v1["包项目名称"])) arr = append(arr, qu.ObjToString(v1["中标单位"])) arr = append(arr, qu.ObjToString(v1["中标金额"])) writeRow(row, arr) } } if err := f.Save("test.xlsx"); err != nil { fmt.Println("保存xlsx失败:", err) } else { fmt.Println("保存xlsx成功:", err) } log.Debug("is over ...") return //分包判断,获取信息 pool_mgo := make(chan bool, 80) wg_mgo := &sync.WaitGroup{} for k, v := range dataArr { if k%10 == 0 { log.Debug(k, "~", v["_id"]) } pool_mgo <- true wg_mgo.Add(1) go func(v map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() tmpid := ul.BsonTOStringId(v["_id"]) data := ul.SourceMgo.FindById("ai_41411", tmpid) if detail := qu.ObjToString(data["detail"]); utf8.RuneCountInString(detail) > 100 { pkg := prompt.AcquireMultiplePackageInfo(detail) //最终结果... ul.SourceMgo.UpdateById("ai_41411_zhipu", tmpid, map[string]interface{}{ "$set": map[string]interface{}{ "new_pkg": pkg, }, }) } }(v) } wg_mgo.Wait() } // 更新链接 func TestUpdateJyhref(name string) { dataArr, _ := ul.SourceMgo.Find(name, map[string]interface{}{}, nil, map[string]interface{}{"_id": 1}) for _, v := range dataArr { tmpid := ul.BsonTOStringId(v["_id"]) jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid)) ul.SourceMgo.UpdateById(name, tmpid, map[string]interface{}{ "$set": map[string]interface{}{ "jyhref": jyhref, }, }) } log.Debug("is over ...") } func writeRow(row *new_xlsx.Row, arr []string) { for _, v := range arr { row.AddCell().Value = v } }