|
@@ -11,42 +11,54 @@ import (
|
|
|
)
|
|
|
|
|
|
// 识别结构化字段
|
|
|
-func ExtractFieldInfo(sid string, eid string, name string) {
|
|
|
+func ExtractFieldInfo(sid string, eid string) {
|
|
|
q := map[string]interface{}{
|
|
|
"_id": map[string]interface{}{
|
|
|
"$gt": ul.StringTOBsonId(sid),
|
|
|
"$lte": ul.StringTOBsonId(eid),
|
|
|
},
|
|
|
}
|
|
|
- pool_mgo := make(chan bool, 50)
|
|
|
+ //先查询抽取表-确定大模型需要识别到范围
|
|
|
+ dict := ConfrimExtractInfo(q)
|
|
|
+ log.Debug("查询语句...", q, "~", len(dict))
|
|
|
+
|
|
|
+ pool_mgo := make(chan bool, 90)
|
|
|
wg_mgo := &sync.WaitGroup{}
|
|
|
- dataArr, _ := ul.SourceMgo.Find(name, q, nil, nil)
|
|
|
- for k, v := range dataArr {
|
|
|
- if k%100 == 0 {
|
|
|
- log.Debug(k, "~", ul.BsonTOStringId(v["_id"]))
|
|
|
+
|
|
|
+ sess := ul.SourceMgo.GetMgoConn()
|
|
|
+ defer ul.SourceMgo.DestoryMongoConn(sess)
|
|
|
+ total, isok := 0, 0
|
|
|
+ it := sess.DB(ul.SourceMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("_id").Iter()
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
+ if total%5000 == 0 {
|
|
|
+ log.Debug("cur index ", total)
|
|
|
}
|
|
|
- infoformat := qu.IntAll(v["infoformat"])
|
|
|
- if infoformat != 1 {
|
|
|
+ tmpid := ul.BsonTOStringId(tmp["_id"])
|
|
|
+ infoformat := qu.IntAll(tmp["infoformat"])
|
|
|
+ if infoformat != 1 || dict[tmpid] == nil {
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
continue
|
|
|
}
|
|
|
+ isok++
|
|
|
pool_mgo <- true
|
|
|
wg_mgo.Add(1)
|
|
|
- go func(v map[string]interface{}) {
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
defer func() {
|
|
|
<-pool_mgo
|
|
|
wg_mgo.Done()
|
|
|
}()
|
|
|
- tmpid := ul.BsonTOStringId(v["_id"])
|
|
|
- data := ResolveInfo(v)
|
|
|
- if len(data) > 0 || tmpid == "" {
|
|
|
- ul.SourceMgo.UpdateById(name, tmpid, map[string]interface{}{
|
|
|
+ u_id := ul.BsonTOStringId(tmp["_id"])
|
|
|
+ data := ResolveInfo(tmp)
|
|
|
+ if len(data) > 0 || u_id == "" {
|
|
|
+ ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
|
|
|
"$set": map[string]interface{}{"ai_zhipu": data},
|
|
|
})
|
|
|
}
|
|
|
- }(v)
|
|
|
+ }(tmp)
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
}
|
|
|
wg_mgo.Wait()
|
|
|
- log.Debug("is over ...", sid, "~", eid)
|
|
|
+ log.Debug("ai is over ...", sid, "~", eid)
|
|
|
}
|
|
|
|
|
|
// 获取处理数据...
|
|
@@ -84,6 +96,23 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
|
|
|
return f_data
|
|
|
}
|
|
|
|
|
|
+func ConfrimExtractInfo(q map[string]interface{}) map[string]interface{} {
|
|
|
+ dict := map[string]interface{}{}
|
|
|
+ sess := ul.SourceMgo.GetMgoConn()
|
|
|
+ defer ul.SourceMgo.DestoryMongoConn(sess)
|
|
|
+ total := 0
|
|
|
+ it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Select(map[string]interface{}{"_id": 1}).Iter()
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
+ if total%100000 == 0 {
|
|
|
+ log.Debug("cur index ", total)
|
|
|
+ }
|
|
|
+ tmpid := ul.BsonTOStringId(tmp["_id"])
|
|
|
+ dict[tmpid] = tmpid
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
+ }
|
|
|
+ return dict
|
|
|
+}
|
|
|
+
|
|
|
// 暂时不启用...无限重试
|
|
|
func RunResetUpdateFieldInfo(arr []string, name string, s_name string) {
|
|
|
//log.Debug("开始重置更新...", len(arr))
|