package extract import ( "data_ai/prompt" "data_ai/ul" "fmt" log "github.com/donnie4w/go-logger/logger" new_xlsx "github.com/tealeg/xlsx/v3" qu "jygit.jydev.jianyu360.cn/data_processing/common_utils" "os" "sync" "unicode/utf8" ) func TestSingleFieldInfo(name string, tmpid string) { tmp := ul.SourceMgo.FindById(name, tmpid) if len(tmp) == 0 || tmp == nil { log.Debug("未查询到数据...", tmpid) return } data := ResolveInfo(tmp) //最终结果... for k, v := range data { log.Debug(k, "~", v) } } func TestIsPackage() { tmpArr := []string{ "669e83fe66cf0db42a6520b3", "669e892066cf0db42a652c9b", "669e904966cf0db42a653b5d", "669f16f466cf0db42a669069", "669f186c66cf0db42a669bf0", "669efb6766cf0db42a65e0b4", "669f004266cf0db42a65f201", "669f02a666cf0db42a65fff3", "669f172766cf0db42a669193", "669ec89566cf0db42a659020", "669e86b266cf0db42a6526ac", "669e86e466cf0db42a6527b7", "669e87b766cf0db42a652a3e", "669f082d66cf0db42a662323", "669e95e966cf0db42a654dd1", "669ea39466cf0db42a656311", "669f140366cf0db42a66772f", "669ee59466cf0db42a65b8aa", "669f05a166cf0db42a66117b", "669e90d666cf0db42a653e0a", "669f08c466cf0db42a66273c", "669f155166cf0db42a6682c7", "669ef0ff66cf0db42a65c83a", "669efdc166cf0db42a65e8f3", "669f090066cf0db42a6629d0", "669f111366cf0db42a665ce7", "669f15fb66cf0db42a668901", "669f0baa66cf0db42a663a72", "669f039766cf0db42a66044e", "669eff3e66cf0db42a65ee73", "669f12c366cf0db42a666b9d", "669e913b66cf0db42a653ffc", "669e833466cf0db42a651e3a", "669f071e66cf0db42a661b03", "669f1a1266cf0db42a66a892", "669f0aec66cf0db42a6635e8", "669f169c66cf0db42a668e1d", "669ed6c966cf0db42a65a75d", "669f072866cf0db42a661b26", "669f185866cf0db42a669af0", "669f15d366cf0db42a6687aa", "669f182466cf0db42a669960", "669f0ed066cf0db42a664e5c", "669f076466cf0db42a661cd4", "669f172966cf0db42a6691c0", "669f198466cf0db42a66a385", "669f1ad366cf0db42a66afb9", "669f156666cf0db42a668403", "669f093c66cf0db42a662c08", "669f0d8266cf0db42a6646cb", "669f06e866cf0db42a661a1d", "669f1bd766cf0db42a66b86e", "669efcd066cf0db42a65e4f4", } pkgArr := []int{ 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, } ok := 0 for k, v := range tmpArr { data := ul.SourceMgo.FindById("ai_41411", v) if len(data) == 0 { data = ul.SourceMgo.FindById("ai_294", v) } detail := qu.ObjToString(data["detail"]) ispkg := prompt.AcquireIsPackageInfo(detail) if (ispkg && pkgArr[k] == 1) || (!ispkg && pkgArr[k] == 0) { ok++ } else { log.Debug("错误~", v) } } log.Debug("is over ~ ", len(tmpArr)-ok) } func TestPackageInfo() { query := map[string]interface{}{ "new_pkg": map[string]interface{}{ "$exists": 1, }, } dataArr, _ := ul.SourceMgo.Find("ai_41411_zhipu", query, nil, map[string]interface{}{}) log.Debug("查询数量...", len(dataArr)) os.Remove("test.xlsx") f := new_xlsx.NewFile() sheet, _ := f.AddSheet("数据信息") row := sheet.AddRow() writeRow(row, []string{"唯一标识", "站点", "剑鱼链接", "子包名称", "子包单位", "子包金额"}) for _, v := range dataArr { tmpid := ul.BsonTOStringId(v["_id"]) ttt := ul.SourceMgo.FindById("ai_41411", tmpid) site := qu.ObjToString(ttt["site"]) jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid)) p_info := *qu.ObjToMap(v["new_pkg"]) p_arr := ul.IsMarkInterfaceMap(p_info["分包信息"]) for _, v1 := range p_arr { row = sheet.AddRow() arr := []string{} arr = append(arr, tmpid) arr = append(arr, site) arr = append(arr, jyhref) arr = append(arr, qu.ObjToString(v1["包项目名称"])) arr = append(arr, qu.ObjToString(v1["中标单位"])) arr = append(arr, qu.ObjToString(v1["中标金额"])) writeRow(row, arr) } } if err := f.Save("test.xlsx"); err != nil { fmt.Println("保存xlsx失败:", err) } else { fmt.Println("保存xlsx成功:", err) } log.Debug("is over ...") return //分包判断,获取信息 pool_mgo := make(chan bool, 80) wg_mgo := &sync.WaitGroup{} for k, v := range dataArr { if k%10 == 0 { log.Debug(k, "~", v["_id"]) } pool_mgo <- true wg_mgo.Add(1) go func(v map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() tmpid := ul.BsonTOStringId(v["_id"]) data := ul.SourceMgo.FindById("ai_41411", tmpid) if detail := qu.ObjToString(data["detail"]); utf8.RuneCountInString(detail) > 100 { pkg := prompt.AcquireMultiplePackageInfo(detail) //最终结果... ul.SourceMgo.UpdateById("ai_41411_zhipu", tmpid, map[string]interface{}{ "$set": map[string]interface{}{ "new_pkg": pkg, }, }) } }(v) } wg_mgo.Wait() } // 更新链接 func TestUpdateJyhref(name string) { dataArr, _ := ul.SourceMgo.Find(name, map[string]interface{}{}, nil, map[string]interface{}{"_id": 1}) for _, v := range dataArr { tmpid := ul.BsonTOStringId(v["_id"]) jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid)) ul.SourceMgo.UpdateById(name, tmpid, map[string]interface{}{ "$set": map[string]interface{}{ "jyhref": jyhref, }, }) } log.Debug("is over ...") } func writeRow(row *new_xlsx.Row, arr []string) { for _, v := range arr { row.AddCell().Value = v } }