package extract import ( "bytes" "data_ai/ul" "encoding/json" "fmt" log "github.com/donnie4w/go-logger/logger" "io" qu "jygit.jydev.jianyu360.cn/data_processing/common_utils" "net/http" "strings" "sync" "time" ) func duibifenbao() { dataArr, _ := ul.BidMgo.Find("zktest_sample_data_source_deepseek", map[string]interface{}{}, nil, map[string]interface{}{}) dict1, dict2, dict3 := map[string]interface{}{}, map[string]interface{}{}, map[string]interface{}{} for _, v := range dataArr { tmpid := ul.BsonTOStringId(v["_id"]) ai_zhipu := qu.ObjToMap(v["ai_zhipu"]) ai_deepseek := qu.ObjToMap(v["ai_deepseek"]) ext_package := qu.ObjToMap(v["package"]) data := map[string]interface{}{} if ai_zhipu != nil { s_pkg := *qu.ObjToMap((*ai_zhipu)["s_pkg"]) com_package := ul.IsMarkInterfaceMap(s_pkg["com_package"]) if len(com_package) > 1 { dict1[tmpid] = tmpid data["zhipu"] = 1 } else { data["zhipu"] = 0 } } if ai_deepseek != nil { s_pkg := *qu.ObjToMap((*ai_deepseek)["s_pkg"]) com_package := ul.IsMarkInterfaceMap(s_pkg["com_package"]) if len(com_package) > 1 { dict2[tmpid] = tmpid data["deepseek"] = 1 } else { data["deepseek"] = 0 } } if ext_package != nil { if len(*ext_package) > 1 { dict3[tmpid] = tmpid data["extract"] = 1 } else { data["extract"] = 0 } } if len(data) > 0 { data["_id"] = v["_id"] data["href"] = v["href"] data["jyhref"] = fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid)) ul.BidMgo.Save("zktest_0225", data) } } } func test1() { dict1 := map[string]interface{}{} dict2 := map[string]interface{}{} for k, _ := range dict2 { if dict1[k] == nil { log.Debug(k) } } log.Debug("is over ...") } // 合并 func merge1(ai_coll string, ai_key string) { dataArr, _ := ul.SourceMgo.Find(ai_coll, map[string]interface{}{}, nil, map[string]interface{}{}) log.Debug("查询量···", len(dataArr)) pool_mgo := make(chan bool, 10) wg_mgo := &sync.WaitGroup{} for k, v := range dataArr { if k%100 == 0 { log.Debug("cur index ", k) } if v[ai_key] == nil { continue } pool_mgo <- true wg_mgo.Add(1) go func(tmp map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() u_id := ul.BsonTOStringId(tmp["_id"]) if u_id != "664af2af66cf0db42a3d217e" { return } data := *qu.ObjToMap(tmp[ai_key]) tmp["ai_zhipu"] = data update_info := make(map[string]interface{}, 0) ul.ChooseCheckDataAI(tmp, &update_info) if u_id != "" { //ul.SourceMgo.UpdateById(ai_coll, u_id, map[string]interface{}{ // "$set": update_info, //}) } }(v) } wg_mgo.Wait() log.Debug("is over ... ") } // 对比程序 func compare1(ai_coll string) { fields := map[string]string{ "toptype": "string", "subtype": "string", "area": "string", "city": "string", "projectname": "string", "projectcode": "string", "buyer": "string", "s_winner": "string", "budget": "float", "bidamount": "float", } dataArr1, _ := ul.PyMgo.Find("standard_sample_data", map[string]interface{}{}, nil, map[string]interface{}{}) dataArr2, _ := ul.SourceMgo.Find(ai_coll, map[string]interface{}{}, nil, map[string]interface{}{}) log.Debug("查询数量:", len(dataArr1), len(dataArr2)) biaozhu, check_exclude, exclude_all := creat(dataArr1, false) //标注数据··· deepseek, _, _ := creat(dataArr2, false) dataArr1 = nil dataArr2 = nil //计数 tj_deepseek := duibi(fields, biaozhu, deepseek, check_exclude, exclude_all) log.Debug("...................") arr := []string{"toptype", "subtype", "area", "city", "projectname", "projectcode", "buyer", "budget", "s_winner", "bidamount"} for _, v := range arr { t2, s2 := tj_deepseek[v]["total"], tj_deepseek[v]["same"] f2 := fmt.Sprintf("模型deepseek~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t2, s2, (float64(s2)/float64(t2))*100.0, "%") log.Debug(f2) } } // 构建数据 func creat(dataArr []map[string]interface{}, zhipu bool) (map[string]map[string]interface{}, map[string]map[string]interface{}, map[string]interface{}) { dict := map[string]map[string]interface{}{} check_exclude := map[string]map[string]interface{}{} exclude_all := map[string]interface{}{} for _, biaozhu := range dataArr { tmpid := ul.BsonTOStringId(biaozhu["_id"]) if biaozhu["check_exclude"] != nil { check_exclude[tmpid] = *qu.ObjToMap(biaozhu["check_exclude"]) } if biaozhu["exclude_all"] != nil { exclude_all[tmpid] = qu.IntAll(biaozhu["exclude_all"]) } if zhipu { if biaozhu["ai_deepseek"] != nil { biaozhu = *qu.ObjToMap(biaozhu["ai_deepseek"]) } toptype := qu.ObjToString(biaozhu["s_toptype"]) subtype := qu.ObjToString(biaozhu["s_subtype"]) area := qu.ObjToString(biaozhu["s_area"]) city := qu.ObjToString(biaozhu["s_city"]) projectname := qu.ObjToString(biaozhu["s_projectname"]) projectcode := qu.ObjToString(biaozhu["s_projectcode"]) budget := qu.Float64All(biaozhu["s_budget"]) bidamount := qu.Float64All(biaozhu["s_bidamount"]) buyer := qu.ObjToString(biaozhu["s_buyer"]) s_winner := qu.ObjToString(biaozhu["s_winner"]) info := map[string]interface{}{} info["toptype"] = toptype info["subtype"] = subtype info["area"] = area info["city"] = city info["projectname"] = projectname info["projectcode"] = projectcode info["budget"] = budget info["bidamount"] = bidamount info["buyer"] = buyer info["s_winner"] = s_winner dict[tmpid] = info } else { toptype := qu.ObjToString(biaozhu["toptype"]) subtype := qu.ObjToString(biaozhu["subtype"]) area := qu.ObjToString(biaozhu["area"]) city := qu.ObjToString(biaozhu["city"]) projectname := qu.ObjToString(biaozhu["projectname"]) projectcode := qu.ObjToString(biaozhu["projectcode"]) budget := qu.Float64All(biaozhu["budget"]) bidamount := qu.Float64All(biaozhu["bidamount"]) buyer := qu.ObjToString(biaozhu["buyer"]) s_winner := qu.ObjToString(biaozhu["s_winner"]) info := map[string]interface{}{} info["toptype"] = toptype info["subtype"] = subtype info["area"] = area info["city"] = city info["projectname"] = projectname info["projectcode"] = projectcode info["budget"] = budget info["bidamount"] = bidamount info["buyer"] = buyer info["s_winner"] = s_winner dict[tmpid] = info } } return dict, check_exclude, exclude_all } func duibi(fields map[string]string, biaozhu map[string]map[string]interface{}, source map[string]map[string]interface{}, check_exclude map[string]map[string]interface{}, exclude_all map[string]interface{}) map[string]map[string]int { //计数 tj := map[string]map[string]int{} for tmpid, tmp := range source { bz := biaozhu[tmpid] exclude := check_exclude[tmpid] if qu.IntAll(exclude_all[tmpid]) == 1 { continue //整条过滤 } for filed, typeof := range fields { if exclude[filed] != nil { continue } nums := tj[filed] if nums == nil { nums = map[string]int{} } if typeof == "string" { b_value := qu.ObjToString(bz[filed]) s_value := qu.ObjToString(tmp[filed]) //字符串通用转换 b_value, s_value = c(b_value), c(s_value) if b_value == "" && s_value == "" { } else { nums["total"] = qu.IntAll(nums["total"]) + 1 if b_value == s_value { nums["same"] = qu.IntAll(nums["same"]) + 1 } else { if filed == "buyer" { //log.Debug("标注:", b_value, "~", "模板:", s_value) } } } } else if typeof == "float" { b_value := qu.Float64All(bz[filed]) s_value := qu.Float64All(tmp[filed]) if b_value == 0.0 && s_value == 0.0 { } else { nums["total"] = qu.IntAll(nums["total"]) + 1 if b_value == s_value { nums["same"] = qu.IntAll(nums["same"]) + 1 } else { if filed == "budget" { //log.Debug(fmt.Sprintf("%f", b_value), "~", fmt.Sprintf("%f", s_value), "~", tmpid) } } } } else { } tj[filed] = nums } } return tj } func update1() { dataArr, _ := ul.BidMgo.Find("zktest_deepseek_0124", map[string]interface{}{}, nil, map[string]interface{}{}) for _, v := range dataArr { //tmpid := ul.BsonTOStringId(v["_id"]) if v["ai_zhipu"] != nil { ai_zhipu := *qu.ObjToMap(v["ai_zhipu"]) if ai_zhipu["s_pkg"] != nil { s_pkg := *qu.ObjToMap(ai_zhipu["s_pkg"]) s_budget := qu.Float64All(s_pkg["s_budget"]) s_bidamount := qu.Float64All(s_pkg["s_bidamount"]) s_winner := qu.ObjToString(s_pkg["s_winner"]) if s_budget > 0.0 && s_budget > qu.Float64All(ai_zhipu["s_budget"]) { ai_zhipu["s_budget"] = s_budget } if s_bidamount > 0.0 && s_bidamount > qu.Float64All(ai_zhipu["s_bidamount"]) { ai_zhipu["s_bidamount"] = s_bidamount } if s_winner != "" { ai_zhipu["s_winner"] = s_winner } } ul.BidMgo.Save("zktest_deepseek_0124_1", map[string]interface{}{ "ai_zhipu": ai_zhipu, "_id": v["_id"], }) } } log.Debug("is over ...") } func export1() { dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1}) pool_mgo := make(chan bool, 1) wg_mgo := &sync.WaitGroup{} for _, v := range dataArr { pool_mgo <- true wg_mgo.Add(1) go func(tmp map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() tmpid := ul.BsonTOStringId(v["_id"]) data := ul.BidMgo.FindById("bidding", tmpid) if len(data) == 0 || data == nil { log.Debug("异常") } ul.BidMgo.Save("zktest_sample_data_source_4", data) }(v) } wg_mgo.Wait() log.Debug("is over ...") } // 替换字符串数据 func c(s string) string { s = strings.ReplaceAll(s, "(", "(") s = strings.ReplaceAll(s, ")", ")") s = strings.ReplaceAll(s, ",", ",") s = strings.ReplaceAll(s, " ", "") s = strings.ReplaceAll(s, "、", "") return s } func post1(data map[string]interface{}) map[string]interface{} { info := map[string]interface{}{} client := &http.Client{Timeout: 2 * time.Second} jsonStr, _ := json.Marshal(data) resp, err := client.Post("http://127.0.0.1:12321/clean/deepseek", "application/json", bytes.NewBuffer(jsonStr)) if err != nil { return info } res, err := io.ReadAll(resp.Body) if err != nil { return info } err = json.Unmarshal(res, &info) if err != nil { return info } return info }