123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122 |
- package extract
- import (
- "data_ai/ul"
- log "github.com/donnie4w/go-logger/logger"
- qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "sync"
- )
- // 获取已存在数据···
- func getExistsInfo() map[string]interface{} {
- log.Debug("开始构建已存在数据···")
- sess := ul.SourceMgo.GetMgoConn()
- defer ul.SourceMgo.DestoryMongoConn(sess)
- dict := map[string]interface{}{}
- q, total := map[string]interface{}{}, 0
- it := sess.DB(ul.SourceMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("_id").Iter()
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%100000 == 0 {
- log.Debug("cur ai index ", total, tmp["_id"])
- }
- tmpid := ul.BsonTOStringId(tmp["_id"])
- dict[tmpid] = ""
- tmp = make(map[string]interface{})
- }
- log.Debug("is exists ...", total, "~", len(dict))
- return dict
- }
- // 识别结构化字段
- func MovingFullInfo(sid string, eid string) {
- q := map[string]interface{}{
- "_id": map[string]interface{}{
- "$lt": ul.StringTOBsonId(eid),
- },
- }
- log.Debug("迁移语句:", q)
- ul.FlashModel = "glm-4-flash"
- pool_mgo := make(chan bool, ul.Reading)
- wg_mgo := &sync.WaitGroup{}
- sess := ul.BidMgo.GetMgoConn()
- defer ul.BidMgo.DestoryMongoConn(sess)
- total := 0
- it := sess.DB(ul.BidMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("-_id").Iter()
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%1000 == 0 {
- log.Debug("cur ai index ", total, tmp["_id"])
- }
- tmpid := ul.BsonTOStringId(tmp["_id"])
- if tmpid == "" {
- tmp = make(map[string]interface{})
- continue
- }
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- infoformat := qu.IntAll(tmp["infoformat"])
- if infoformat == 1 || infoformat == 0 { //正常数据处理···
- data := ResolveInfo(tmp)
- if len(data) > 0 {
- tmp["ai_zhipu"] = data
- update_check := make(map[string]interface{}, 0)
- is_unset := ul.ChooseCheckDataAI(tmp, &update_check)
- for k, v := range update_check {
- tmp[k] = v //覆盖值
- }
- if is_unset {
- for k, _ := range ul.Unset_Check {
- delete(tmp, k) //删除值
- }
- }
- }
- }
- //迁移数据···
- delete(tmp, "detail")
- delete(tmp, "contenthtml")
- ul.SourceMgo.Save(ul.Bid_Name, tmp)
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg_mgo.Wait()
- log.Debug("ai is over ...", total)
- }
- func MovingFullInfoCopy(sid string, eid string) {
- q := map[string]interface{}{
- "_id": map[string]interface{}{
- "$gte": ul.StringTOBsonId(sid),
- },
- }
- log.Debug("迁移语句:", q)
- pool_mgo := make(chan bool, ul.Reading)
- wg_mgo := &sync.WaitGroup{}
- sess := ul.SourceMgo.GetMgoConn()
- defer ul.SourceMgo.DestoryMongoConn(sess)
- total := 0
- it := sess.DB(ul.SourceMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("_id").Iter()
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%1000 == 0 {
- log.Debug("cur move index ", total, tmp["_id"])
- }
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- delete(tmp, "detail")
- delete(tmp, "contenthtml")
- ul.SourceMgo.Save("bidding_copy", tmp)
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg_mgo.Wait()
- log.Debug("move is over ...", total)
- }
|