package extract import ( "data_ai/ul" log "github.com/donnie4w/go-logger/logger" qu "jygit.jydev.jianyu360.cn/data_processing/common_utils" "sync" ) // 获取已存在数据··· func getExistsInfo() map[string]interface{} { log.Debug("开始构建已存在数据···") sess := ul.SourceMgo.GetMgoConn() defer ul.SourceMgo.DestoryMongoConn(sess) dict := map[string]interface{}{} q, total := map[string]interface{}{}, 0 it := sess.DB(ul.SourceMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("_id").Iter() for tmp := make(map[string]interface{}); it.Next(&tmp); total++ { if total%100000 == 0 { log.Debug("cur ai index ", total, tmp["_id"]) } tmpid := ul.BsonTOStringId(tmp["_id"]) dict[tmpid] = "" tmp = make(map[string]interface{}) } log.Debug("is exists ...", total, "~", len(dict)) return dict } // 识别结构化字段 func MovingFullInfo(sid string, eid string) { dict := getExistsInfo() q := map[string]interface{}{ "_id": map[string]interface{}{ "$lt": ul.StringTOBsonId(eid), }, } ul.FlashModel = "glm-4-flash" pool_mgo := make(chan bool, ul.Reading) wg_mgo := &sync.WaitGroup{} sess := ul.BidMgo.GetMgoConn() defer ul.BidMgo.DestoryMongoConn(sess) total := 0 it := sess.DB(ul.BidMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("-_id").Iter() for tmp := make(map[string]interface{}); it.Next(&tmp); total++ { if total%1000 == 0 { log.Debug("cur ai index ", total, tmp["_id"]) } tmpid := ul.BsonTOStringId(tmp["_id"]) if tmpid == "" || dict[tmpid] != nil { //已存在数据···不迁移 tmp = make(map[string]interface{}) continue } pool_mgo <- true wg_mgo.Add(1) go func(tmp map[string]interface{}) { defer func() { <-pool_mgo wg_mgo.Done() }() infoformat := qu.IntAll(tmp["infoformat"]) if infoformat == 1 { //正常数据处理··· data := ResolveInfo(tmp) if len(data) > 0 { tmp["ai_zhipu"] = data update_check := make(map[string]interface{}, 0) is_unset := ul.ChooseCheckDataAI(tmp, update_check) for k, v := range update_check { tmp[k] = v //覆盖值 } if is_unset { for k, _ := range ul.Unset_Check { delete(tmp, k) //删除值 } } } } //迁移数据··· ul.SourceMgo.Save(ul.Bid_Name, tmp) }(tmp) tmp = make(map[string]interface{}) } wg_mgo.Wait() log.Debug("ai is over ...", total) }