|
@@ -1,15 +1,17 @@
|
|
package main
|
|
package main
|
|
|
|
|
|
import (
|
|
import (
|
|
- "log"
|
|
|
|
- "strings"
|
|
|
|
- "time"
|
|
|
|
-
|
|
|
|
|
|
+ "fmt"
|
|
"github.com/robfig/cron/v3"
|
|
"github.com/robfig/cron/v3"
|
|
"github.com/spf13/viper"
|
|
"github.com/spf13/viper"
|
|
util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
es "jygit.jydev.jianyu360.cn/data_processing/common_utils/elastic"
|
|
es "jygit.jydev.jianyu360.cn/data_processing/common_utils/elastic"
|
|
"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
|
|
"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
|
|
|
|
+ "log"
|
|
|
|
+ "regexp"
|
|
|
|
+ "strings"
|
|
|
|
+ "sync"
|
|
|
|
+ "time"
|
|
)
|
|
)
|
|
|
|
|
|
var (
|
|
var (
|
|
@@ -24,6 +26,13 @@ var (
|
|
RuleE = make([]string, 0)
|
|
RuleE = make([]string, 0)
|
|
)
|
|
)
|
|
|
|
|
|
|
|
+type RequestData struct {
|
|
|
|
+ Title string
|
|
|
|
+ Projectname string
|
|
|
|
+ Class []string
|
|
|
|
+ ID string
|
|
|
|
+}
|
|
|
|
+
|
|
const timeTypeAll = 1
|
|
const timeTypeAll = 1
|
|
const timeTypeInc = 2
|
|
const timeTypeInc = 2
|
|
|
|
|
|
@@ -47,7 +56,12 @@ func InitConfig() (err error) {
|
|
}
|
|
}
|
|
|
|
|
|
func Init() {
|
|
func Init() {
|
|
- InitConfig()
|
|
|
|
|
|
+ err := InitConfig()
|
|
|
|
+ if err != nil {
|
|
|
|
+ log.Println("配置文件错误", err)
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+
|
|
Mgo = &mongodb.MongodbSim{
|
|
Mgo = &mongodb.MongodbSim{
|
|
MongodbAddr: GF.Mongo.Host,
|
|
MongodbAddr: GF.Mongo.Host,
|
|
//MongodbAddr: "127.0.0.1:27083",
|
|
//MongodbAddr: "127.0.0.1:27083",
|
|
@@ -67,13 +81,15 @@ func Init() {
|
|
}
|
|
}
|
|
Esa.InitElasticSize()
|
|
Esa.InitElasticSize()
|
|
|
|
|
|
- Esb = &es.Elastic{
|
|
|
|
- S_esurl: GF.Esb.URL,
|
|
|
|
- I_size: 5,
|
|
|
|
- Username: GF.Esb.Username,
|
|
|
|
- Password: GF.Esb.Password,
|
|
|
|
|
|
+ if GF.Esb.URL != "" {
|
|
|
|
+ Esb = &es.Elastic{
|
|
|
|
+ S_esurl: GF.Esb.URL,
|
|
|
|
+ I_size: 5,
|
|
|
|
+ Username: GF.Esb.Username,
|
|
|
|
+ Password: GF.Esb.Password,
|
|
|
|
+ }
|
|
|
|
+ Esb.InitElasticSize()
|
|
}
|
|
}
|
|
- Esb.InitElasticSize()
|
|
|
|
|
|
|
|
RuleA = strings.Split(GF.Env.Rulea, "\n")
|
|
RuleA = strings.Split(GF.Env.Rulea, "\n")
|
|
RuleB = strings.Split(GF.Env.Ruleb, "\n")
|
|
RuleB = strings.Split(GF.Env.Ruleb, "\n")
|
|
@@ -84,7 +100,11 @@ func Init() {
|
|
|
|
|
|
func main() {
|
|
func main() {
|
|
Init()
|
|
Init()
|
|
- // dealAll() //存量数据
|
|
|
|
|
|
+ //dealAll() //规则处理存量数据
|
|
|
|
+ //dealInc()
|
|
|
|
+ //dealAllAi() //大模型
|
|
|
|
+
|
|
|
|
+ //select {}
|
|
|
|
|
|
local, _ := time.LoadLocation("Asia/Shanghai")
|
|
local, _ := time.LoadLocation("Asia/Shanghai")
|
|
c := cron.New(cron.WithLocation(local), cron.WithSeconds())
|
|
c := cron.New(cron.WithLocation(local), cron.WithSeconds())
|
|
@@ -101,6 +121,7 @@ func main() {
|
|
|
|
|
|
// dealInc 处理增量数据
|
|
// dealInc 处理增量数据
|
|
func dealInc() {
|
|
func dealInc() {
|
|
|
|
+ log.Println("开始处理增量数据")
|
|
now := time.Now()
|
|
now := time.Now()
|
|
start := time.Date(now.Year(), now.Month(), now.Day()+GF.Env.Start, 0, 0, 0, 0, now.Location())
|
|
start := time.Date(now.Year(), now.Month(), now.Day()+GF.Env.Start, 0, 0, 0, 0, now.Location())
|
|
end := time.Date(now.Year(), now.Month(), now.Day()+GF.Env.End, 0, 0, 0, 0, now.Location())
|
|
end := time.Date(now.Year(), now.Month(), now.Day()+GF.Env.End, 0, 0, 0, 0, now.Location())
|
|
@@ -112,39 +133,69 @@ func dealInc() {
|
|
},
|
|
},
|
|
}
|
|
}
|
|
dealTopInformation(where, timeTypeInc)
|
|
dealTopInformation(where, timeTypeInc)
|
|
|
|
+ //大模型处理,保存字段 tag_topinformation_zp
|
|
|
|
+ if GF.Env.Doai {
|
|
|
|
+ log.Println("开始处理大模型结果")
|
|
|
|
+ go dealTopInformationAi4(where)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ log.Println("增量数据处理完毕")
|
|
}
|
|
}
|
|
|
|
|
|
// dealAll 处理存量数据
|
|
// dealAll 处理存量数据
|
|
func dealAll() {
|
|
func dealAll() {
|
|
where := map[string]interface{}{
|
|
where := map[string]interface{}{
|
|
"comeintime": map[string]interface{}{
|
|
"comeintime": map[string]interface{}{
|
|
- "$gte": 1726070400,
|
|
|
|
- "$lt": 1726156800,
|
|
|
|
|
|
+ "$gte": 1704038400,
|
|
|
|
+ "$lt": 1726851634,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
|
|
dealTopInformation(where, timeTypeAll) //处理情报标签一级
|
|
dealTopInformation(where, timeTypeAll) //处理情报标签一级
|
|
}
|
|
}
|
|
|
|
|
|
-// dealTopInformation 处理情报标签一级
|
|
|
|
|
|
+func dealAllAi() {
|
|
|
|
+ //where := map[string]interface{}{
|
|
|
|
+ // //"comeintime": map[string]interface{}{
|
|
|
|
+ // // //"$gte": 1726070400,
|
|
|
|
+ // // "$lt": 1727078591,
|
|
|
|
+ // //},
|
|
|
|
+ //}
|
|
|
|
+
|
|
|
|
+ log.Println("开始处理数据")
|
|
|
|
+ //dealTopInformationAi(where) //处理情报标签一级
|
|
|
|
+ //dealTopInformationAi2(where) //处理情报标签一级;50个一组调用
|
|
|
|
+ //dealTopInformationAi3(where) //单个数据调用
|
|
|
|
+ dealTopInformationAi4(nil) //5一组,开启携程
|
|
|
|
+ //dealTopInformationAi5(nil) //单条调用智普,开启携程
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+// dealTopInformation 处理情报标签一级;剑鱼码 处理方式
|
|
func dealTopInformation(where interface{}, timeType int) {
|
|
func dealTopInformation(where interface{}, timeType int) {
|
|
if where == nil {
|
|
if where == nil {
|
|
log.Println("查询条件为空")
|
|
log.Println("查询条件为空")
|
|
- return
|
|
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ log.Println(where)
|
|
defer util.Catch()
|
|
defer util.Catch()
|
|
sess := Mgo.GetMgoConn()
|
|
sess := Mgo.GetMgoConn()
|
|
defer Mgo.DestoryMongoConn(sess)
|
|
defer Mgo.DestoryMongoConn(sess)
|
|
count := 0
|
|
count := 0
|
|
- it := sess.DB("qfw").C("bidding").Find(where).Select(nil).Iter()
|
|
|
|
|
|
+ ch := make(chan bool, 5)
|
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
|
+ it := sess.DB(GF.Mongo.DB).C("bidding").Find(where).Select(nil).Iter()
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
|
|
- if count%5000 == 0 {
|
|
|
|
- log.Println("current:", count, tmp["title"])
|
|
|
|
|
|
+ if count%1000 == 0 {
|
|
|
|
+ log.Println("dealTopInformation,current:", count, tmp["title"], tmp["_id"])
|
|
}
|
|
}
|
|
-
|
|
|
|
- if gov_classify, ok := tmp["gov_classify"]; !ok {
|
|
|
|
- continue
|
|
|
|
- } else {
|
|
|
|
|
|
+ ch <- true
|
|
|
|
+ wg.Add(1)
|
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
|
+ defer func() {
|
|
|
|
+ <-ch
|
|
|
|
+ wg.Done()
|
|
|
|
+ }()
|
|
|
|
+ //
|
|
topinformation := make([]string, 0) //
|
|
topinformation := make([]string, 0) //
|
|
hasNew := false
|
|
hasNew := false
|
|
if existTop, okk := tmp["tag_topinformation"]; okk {
|
|
if existTop, okk := tmp["tag_topinformation"]; okk {
|
|
@@ -155,21 +206,25 @@ func dealTopInformation(where interface{}, timeType int) {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- if govMap, ok3 := gov_classify.(map[string]interface{}); ok3 {
|
|
|
|
|
|
+ if govMap, ok3 := tmp["gov_classify"].(map[string]interface{}); ok3 {
|
|
zc_code := util.ObjToString(govMap["zc_code"])
|
|
zc_code := util.ObjToString(govMap["zc_code"])
|
|
if IsInStringArray(zc_code, RuleA) {
|
|
if IsInStringArray(zc_code, RuleA) {
|
|
hasNew = true
|
|
hasNew = true
|
|
topinformation = append(topinformation, "情报_环境采购")
|
|
topinformation = append(topinformation, "情报_环境采购")
|
|
- } else if IsInStringArray(zc_code, RuleB) {
|
|
|
|
|
|
+ }
|
|
|
|
+ if IsInStringArray(zc_code, RuleB) {
|
|
hasNew = true
|
|
hasNew = true
|
|
topinformation = append(topinformation, "情报_印务商机")
|
|
topinformation = append(topinformation, "情报_印务商机")
|
|
- } else if IsInStringArray(zc_code, RuleC) {
|
|
|
|
|
|
+ }
|
|
|
|
+ if IsInStringArray(zc_code, RuleC) {
|
|
hasNew = true
|
|
hasNew = true
|
|
topinformation = append(topinformation, "情报_家具招投标")
|
|
topinformation = append(topinformation, "情报_家具招投标")
|
|
- } else if IsInStringArray(zc_code, RuleD) {
|
|
|
|
|
|
+ }
|
|
|
|
+ if IsInStringArray(zc_code, RuleD) {
|
|
hasNew = true
|
|
hasNew = true
|
|
topinformation = append(topinformation, "情报_车辆租赁")
|
|
topinformation = append(topinformation, "情报_车辆租赁")
|
|
- } else if IsInStringArray(zc_code, RuleE) {
|
|
|
|
|
|
+ }
|
|
|
|
+ if IsInStringArray(zc_code, RuleE) {
|
|
hasNew = true
|
|
hasNew = true
|
|
topinformation = append(topinformation, "情报_安防")
|
|
topinformation = append(topinformation, "情报_安防")
|
|
}
|
|
}
|
|
@@ -190,29 +245,284 @@ func dealTopInformation(where interface{}, timeType int) {
|
|
//log.Println("hasNew", " ====== ", biddingID)
|
|
//log.Println("hasNew", " ====== ", biddingID)
|
|
Mgo.UpdateById("bidding", biddingID, map[string]interface{}{"$set": updateMgo})
|
|
Mgo.UpdateById("bidding", biddingID, map[string]interface{}{"$set": updateMgo})
|
|
//ToDo 2.更新es
|
|
//ToDo 2.更新es
|
|
- // 存量数据
|
|
|
|
- if timeType == timeTypeAll {
|
|
|
|
- if util.IntAll(tmp["extracttype"]) != 1 {
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- // 增量数据
|
|
|
|
- if util.IntAll(tmp["dataprocess"]) != 8 {
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
|
|
+ if GF.Esa.URL != "" {
|
|
|
|
+ _ = Esa.UpdateDocument("bidding", biddingID, updateEs)
|
|
}
|
|
}
|
|
|
|
|
|
- err := Esa.UpdateDocument("bidding", biddingID, updateEs)
|
|
|
|
- if err != nil && err.Error() != "Document not updated: noop" {
|
|
|
|
- log.Println("esa update err", biddingID, err)
|
|
|
|
|
|
+ if GF.Esb.URL != "" {
|
|
|
|
+ _ = Esb.UpdateDocument("bidding", biddingID, updateEs)
|
|
}
|
|
}
|
|
- err = Esb.UpdateDocument("bidding", biddingID, updateEs)
|
|
|
|
- if err != nil && err.Error() != "Document not updated: noop" {
|
|
|
|
- log.Println("esb update err", biddingID, err)
|
|
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ }(tmp)
|
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ wg.Wait()
|
|
|
|
+ log.Println("剑鱼码规则处理数据处理完毕")
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+// dealTopInformationAi2的基础上,开启协程
|
|
|
|
+func dealTopInformationAi4(where interface{}) {
|
|
|
|
+ defer util.Catch()
|
|
|
|
+ sess := Mgo.GetMgoConn()
|
|
|
|
+ defer Mgo.DestoryMongoConn(sess)
|
|
|
|
+ count := 0
|
|
|
|
+
|
|
|
|
+ key := "4d5206b1b297c1e7b77f9578edcb2cf7.TNU2i8G1oUNdR02i"
|
|
|
|
+ model := "glm-4-flash"
|
|
|
|
+ //key := "6c86cea8659ff1d33b161ea7213ea97c.m4OcENaRan8NeLSZ" //我自己的key
|
|
|
|
+
|
|
|
|
+ re := regexp.MustCompile(`title:(.*?),projectname:(.*?),id:(.*?),class:(.*?)(?:\s*$|\n)`)
|
|
|
|
+ var lines = make([]string, 0)
|
|
|
|
+ it := sess.DB(GF.Mongo.DB).C(GF.Mongo.Coll).Find(where).Select(nil).Sort("-_id").Iter()
|
|
|
|
+ resultsChan := make(chan map[string]string, 5000)
|
|
|
|
+ sem := make(chan struct{}, 100) // 控制并发数量
|
|
|
|
+ wg := sync.WaitGroup{}
|
|
|
|
+ wg.Add(1)
|
|
|
|
+ go func() {
|
|
|
|
+ defer wg.Done()
|
|
|
|
+ count2 := 0
|
|
|
|
+ for item := range resultsChan {
|
|
|
|
+ count2++
|
|
|
|
+ // 处理返回结果,例如更新数据库
|
|
|
|
+ tags := make([]string, 0)
|
|
|
|
+ class := util.ObjToString(item["class"])
|
|
|
|
+ if class == "其他分类" || class == "" || item["id"] == "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ for _, v2 := range strings.Split(class, ",") {
|
|
|
|
+ if v2 == "车辆领域" {
|
|
|
|
+ tags = append(tags, "情报_车辆租赁")
|
|
|
|
+ } else if v2 == "安防领域" {
|
|
|
|
+ tags = append(tags, "情报_安防")
|
|
|
|
+ } else if v2 == "印务领域" {
|
|
|
|
+ tags = append(tags, "情报_印务商机")
|
|
|
|
+ } else if v2 == "环境领域" {
|
|
|
|
+ tags = append(tags, "情报_环境采购")
|
|
|
|
+ } else if v2 == "家具领域" {
|
|
|
|
+ tags = append(tags, "情报_家具招投标")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+ if len(tags) > 0 {
|
|
|
|
+ id := util.ObjToString(item["id"])
|
|
|
|
+ updateMgo := map[string]interface{}{
|
|
|
|
+ "tag_topinformation_zp": tags,
|
|
|
|
+ //"topinformation_time": time.Now().Unix(),
|
|
|
|
+ }
|
|
|
|
+ Mgo.UpdateById("bidding", id, map[string]interface{}{"$set": updateMgo})
|
|
|
|
+ updateEs := map[string]interface{}{
|
|
|
|
+ "tag_topinformation_zp": tags,
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if count2%100 == 0 {
|
|
|
|
+ log.Println("update es", id, tags)
|
|
|
|
+ }
|
|
|
|
+ if GF.Esa.URL != "" {
|
|
|
|
+ _ = Esa.UpdateDocument("bidding", id, updateEs)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if GF.Esb.URL != "" {
|
|
|
|
+ _ = Esb.UpdateDocument("bidding", id, updateEs)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }()
|
|
|
|
+
|
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
|
|
|
|
+ if count%1000 == 0 {
|
|
|
|
+ log.Println("current:", count, tmp["title"], tmp["_id"])
|
|
|
|
+ }
|
|
|
|
+ title := util.ObjToString(tmp["title"])
|
|
|
|
+ projectname := util.ObjToString(tmp["projectname"])
|
|
|
|
+ biddingID := mongodb.BsonIdToSId(tmp["_id"])
|
|
|
|
+ lines = append(lines, fmt.Sprintf("title:%s,projectname:%s,id:%s", title, projectname, biddingID))
|
|
|
|
+ if len(lines) == 5 {
|
|
|
|
+ sem <- struct{}{} // 增加并发控制
|
|
|
|
+ go func(lines []string) {
|
|
|
|
+ defer func() { <-sem }() // 释放并发控制
|
|
|
|
+ report := strings.Join(lines, "\n")
|
|
|
|
+ resu, err := ZpAI4(key, model, report)
|
|
|
|
+ lines = make([]string, 0)
|
|
|
|
+
|
|
|
|
+ if err != nil {
|
|
|
|
+ log.Println("智普请求失败,", err, report)
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ splitLines := strings.Split(resu, `;`)
|
|
|
|
+ for _, line := range splitLines {
|
|
|
|
+ matches := re.FindStringSubmatch(line)
|
|
|
|
+ if len(matches) == 5 {
|
|
|
|
+ result := map[string]string{
|
|
|
|
+ "title": matches[1],
|
|
|
|
+ "projectname": matches[2],
|
|
|
|
+ "id": matches[3],
|
|
|
|
+ "class": matches[4],
|
|
|
|
+ }
|
|
|
|
+ resultsChan <- result
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }(lines)
|
|
|
|
+ // 清空 lines 切片而不是 reportBuilder
|
|
|
|
+ lines = make([]string, 0)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ if len(lines) > 0 {
|
|
|
|
+ report := strings.Join(lines, "\n")
|
|
|
|
+ resu, err := ZpAI4(key, model, report)
|
|
|
|
+ if err != nil {
|
|
|
|
+ log.Println("智普请求失败,尝试再次请求", err)
|
|
|
|
+ }
|
|
|
|
+ resu, err = ZpAI4(key, model, report)
|
|
|
|
+ if err == nil && resu != "" {
|
|
|
|
+ splitLines := strings.Split(resu, "\n")
|
|
|
|
+ for _, line := range splitLines {
|
|
|
|
+ matches := re.FindStringSubmatch(line)
|
|
|
|
+ if len(matches) == 5 {
|
|
|
|
+ result := map[string]string{
|
|
|
|
+ "title": matches[1],
|
|
|
|
+ "projectname": matches[2],
|
|
|
|
+ "id": matches[3],
|
|
|
|
+ "class": matches[4],
|
|
|
|
+ }
|
|
|
|
+ resultsChan <- result
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ close(resultsChan) // 关闭 channel
|
|
|
|
+ wg.Wait()
|
|
|
|
+ log.Println("大模型处理数据处理完毕")
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+// dealTopInformationAi5 单个调用ZpAI4,多协程
|
|
|
|
+func dealTopInformationAi5(where interface{}) {
|
|
|
|
+ defer util.Catch()
|
|
|
|
+ sess := Mgo.GetMgoConn()
|
|
|
|
+ defer Mgo.DestoryMongoConn(sess)
|
|
|
|
+ count := 0
|
|
|
|
+
|
|
|
|
+ key := "4d5206b1b297c1e7b77f9578edcb2cf7.TNU2i8G1oUNdR02i"
|
|
|
|
+ model := "glm-4-flash"
|
|
|
|
+ re := regexp.MustCompile(`title:(.*?),projectname:(.*?),id:(.*?),class:(.*?)(?:\s*$|\n)`)
|
|
|
|
+ ch := make(chan bool, 50)
|
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
|
+ it := sess.DB(GF.Mongo.DB).C(GF.Mongo.Coll).Find(where).Select(nil).Sort("-_id").Iter()
|
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
|
|
|
|
+ if count%100 == 0 {
|
|
|
|
+ log.Println("current:", count, tmp["title"], tmp["_id"])
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ ch <- true
|
|
|
|
+ wg.Add(1)
|
|
|
|
+
|
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
|
+ defer func() {
|
|
|
|
+ <-ch
|
|
|
|
+ wg.Done()
|
|
|
|
+ }()
|
|
|
|
+ ////
|
|
|
|
+ title := util.ObjToString(tmp["title"])
|
|
|
|
+ projectname := util.ObjToString(tmp["projectname"])
|
|
|
|
+ biddingID := mongodb.BsonIdToSId(tmp["_id"])
|
|
|
|
+ text := fmt.Sprintf("title:%s,projectname:%s,id:%s\n", title, projectname, biddingID)
|
|
|
|
+ resu, err := ZpAI4(key, model, text)
|
|
|
|
+ if len(resu) > 0 && err != nil {
|
|
|
|
+ splitLines := strings.Split(resu, `;`)
|
|
|
|
+ for _, line := range splitLines {
|
|
|
|
+ matches := re.FindStringSubmatch(line)
|
|
|
|
+ if len(matches) == 5 {
|
|
|
|
+ class := matches[4]
|
|
|
|
+ if class != "" && class != "其他分类" {
|
|
|
|
+ tags := make([]string, 0)
|
|
|
|
+ for _, v2 := range strings.Split(class, ",") {
|
|
|
|
+ if v2 == "车辆领域" {
|
|
|
|
+ tags = append(tags, "情报_车辆租赁")
|
|
|
|
+ } else if v2 == "安防领域" {
|
|
|
|
+ tags = append(tags, "情报_安防")
|
|
|
|
+ } else if v2 == "印务领域" {
|
|
|
|
+ tags = append(tags, "情报_印务商机")
|
|
|
|
+ } else if v2 == "环境领域" {
|
|
|
|
+ tags = append(tags, "情报_环境采购")
|
|
|
|
+ } else if v2 == "家具领域" {
|
|
|
|
+ tags = append(tags, "情报_家具招投标")
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if len(tags) > 0 {
|
|
|
|
+ if biddingID != "" {
|
|
|
|
+ updateMgo := map[string]interface{}{
|
|
|
|
+ "tag_topinformation_zp": tags,
|
|
|
|
+ "topinformation_time": time.Now().Unix(),
|
|
|
|
+ }
|
|
|
|
+ Mgo.UpdateById("bidding", biddingID, map[string]interface{}{"$set": updateMgo})
|
|
|
|
+
|
|
|
|
+ if count%100 == 0 {
|
|
|
|
+ log.Println("update es", biddingID, tags)
|
|
|
|
+ }
|
|
|
|
+ updateEs := map[string]interface{}{
|
|
|
|
+ "tag_topinformation_zp": tags,
|
|
|
|
+ }
|
|
|
|
+ if GF.Esa.URL != "" {
|
|
|
|
+ log.Println("更新es ,", biddingID)
|
|
|
|
+ err := Esa.UpdateDocument("bidding", biddingID, updateEs)
|
|
|
|
+ if err != nil && err.Error() != "Document not updated: noop" {
|
|
|
|
+ log.Println("esa update err", biddingID, err)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if GF.Esb.URL != "" {
|
|
|
|
+ err := Esb.UpdateDocument("bidding", biddingID, updateEs)
|
|
|
|
+ if err != nil && err.Error() != "Document not updated: noop" {
|
|
|
|
+ log.Println("esb update err", biddingID, err)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }(tmp)
|
|
|
|
+
|
|
|
|
+ wg.Wait()
|
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
log.Println("数据处理完毕")
|
|
log.Println("数据处理完毕")
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+func dealAi(datas []RequestData) {
|
|
|
|
+
|
|
|
|
+ // 有匹配新的标签,需要更新MongoDB以及ES 数据
|
|
|
|
+ //if hasNew {
|
|
|
|
+ // topinformation = removeDuplicates(topinformation) //去重
|
|
|
|
+ // //ToDo 1.更新MongoDB
|
|
|
|
+ // updateMgo := map[string]interface{}{
|
|
|
|
+ // "tag_topinformation": topinformation,
|
|
|
|
+ // "topinformation_time": time.Now().Unix(),
|
|
|
|
+ // }
|
|
|
|
+ // updateEs := map[string]interface{}{
|
|
|
|
+ // "tag_topinformation": topinformation,
|
|
|
|
+ // }
|
|
|
|
+ // //log.Println("hasNew", " ====== ", biddingID)
|
|
|
|
+ // Mgo.UpdateById("bidding", biddingID, map[string]interface{}{"$set": updateMgo})
|
|
|
|
+ // //ToDo 2.更新es
|
|
|
|
+ //
|
|
|
|
+ // if GF.Esa.URL != "" {
|
|
|
|
+ // err := Esa.UpdateDocument("bidding", biddingID, updateEs)
|
|
|
|
+ // if err != nil && err.Error() != "Document not updated: noop" {
|
|
|
|
+ // log.Println("esa update err", biddingID, err)
|
|
|
|
+ // }
|
|
|
|
+ // }
|
|
|
|
+ //
|
|
|
|
+ // if GF.Esb.URL != "" {
|
|
|
|
+ // err := Esb.UpdateDocument("bidding", biddingID, updateEs)
|
|
|
|
+ // if err != nil && err.Error() != "Document not updated: noop" {
|
|
|
|
+ // log.Println("esb update err", biddingID, err)
|
|
|
|
+ // }
|
|
|
|
+ // }
|
|
|
|
+ //}
|
|
|
|
+}
|