|
@@ -5,14 +5,15 @@ import (
|
|
"log"
|
|
"log"
|
|
mu "mfw/util"
|
|
mu "mfw/util"
|
|
"net"
|
|
"net"
|
|
- "qfw/util"
|
|
|
|
|
|
+ "qfw/common/src/qfw/util"
|
|
|
|
+ qu "qfw/util"
|
|
"sync"
|
|
"sync"
|
|
"time"
|
|
"time"
|
|
)
|
|
)
|
|
|
|
|
|
-//开始判重程序
|
|
|
|
-func taskRepeat(mapInfo map[string]interface{}) {
|
|
|
|
- defer util.Catch()
|
|
|
|
|
|
+//开始增量判重程序
|
|
|
|
+func increaseRepeat(mapInfo map[string]interface{}) {
|
|
|
|
+ defer qu.Catch()
|
|
//区间id
|
|
//区间id
|
|
q := map[string]interface{}{
|
|
q := map[string]interface{}{
|
|
"_id": map[string]interface{}{
|
|
"_id": map[string]interface{}{
|
|
@@ -20,31 +21,18 @@ func taskRepeat(mapInfo map[string]interface{}) {
|
|
"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
|
|
"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
|
|
},
|
|
},
|
|
}
|
|
}
|
|
- //全量
|
|
|
|
- if IsFull && gtept!="" && ltept!=""{
|
|
|
|
- log.Println("执行全量分段模式:",gtept,"---",ltept)
|
|
|
|
- q = map[string]interface{}{
|
|
|
|
- "publishtime": map[string]interface{}{
|
|
|
|
- "$gte": util.Int64All(gtept),
|
|
|
|
- "$lte": util.Int64All(ltept),
|
|
|
|
- },
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- //临时赋值
|
|
|
|
log.Println("开始数据判重~查询条件:",mgo.DbName, extract, q)
|
|
log.Println("开始数据判重~查询条件:",mgo.DbName, extract, q)
|
|
-
|
|
|
|
sess := mgo.GetMgoConn()
|
|
sess := mgo.GetMgoConn()
|
|
defer mgo.DestoryMongoConn(sess)
|
|
defer mgo.DestoryMongoConn(sess)
|
|
it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
|
|
it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
|
|
- pool := make(chan bool, threadNum)
|
|
|
|
- wg := &sync.WaitGroup{}
|
|
|
|
- n, repeateN := 0, 0
|
|
|
|
|
|
+ n, isok ,repeatN:= 0,0,0
|
|
|
|
+ dataAllDict := make(map[string][]map[string]interface{},0)
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
if n%1000 == 0 {
|
|
if n%1000 == 0 {
|
|
- log.Println("current:", n, tmp["_id"],tmp["publishtime"], "repeateN:", repeateN)
|
|
|
|
|
|
+ log.Println("index: ", n, isok)
|
|
}
|
|
}
|
|
if util.IntAll(tmp["repeat"]) == 1 {
|
|
if util.IntAll(tmp["repeat"]) == 1 {
|
|
- repeateN++
|
|
|
|
|
|
+ repeatN++
|
|
tmp = make(map[string]interface{})
|
|
tmp = make(map[string]interface{})
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
@@ -52,79 +40,88 @@ func taskRepeat(mapInfo map[string]interface{}) {
|
|
tmp = make(map[string]interface{})
|
|
tmp = make(map[string]interface{})
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+ isok++
|
|
//数据分组-按照类别分组
|
|
//数据分组-按照类别分组
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
|
|
+ subtype := qu.ObjToString(tmp["subtype"])
|
|
|
|
+ if subtype=="招标"||subtype=="邀标"||subtype=="询价"||
|
|
|
|
+ subtype=="竞谈"||subtype=="竞价" {
|
|
|
|
+ subtype = "招标"
|
|
|
|
+ }
|
|
|
|
+ dataArr := dataAllDict[subtype]
|
|
|
|
+ if dataArr==nil {
|
|
|
|
+ dataArr = []map[string]interface{}{}
|
|
|
|
+ }
|
|
|
|
+ dataArr = append(dataArr,tmp)
|
|
|
|
+ dataAllDict[subtype] = dataArr
|
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
|
+ }
|
|
|
|
+ log.Println("类别组划分完毕:",len(dataAllDict),"组","~","需要判重:",isok,"条")
|
|
|
|
+ pool := make(chan bool, threadNum)
|
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
|
+ for _,dataArr := range dataAllDict {
|
|
pool <- true
|
|
pool <- true
|
|
wg.Add(1)
|
|
wg.Add(1)
|
|
- go func(tmp map[string]interface{}) {
|
|
|
|
|
|
+ go func(dataArr []map[string]interface{}) {
|
|
defer func() {
|
|
defer func() {
|
|
<-pool
|
|
<-pool
|
|
wg.Done()
|
|
wg.Done()
|
|
}()
|
|
}()
|
|
- info := NewInfo(tmp)
|
|
|
|
- //正常判重
|
|
|
|
- b, source, reason := DM.check(info)
|
|
|
|
- if b {
|
|
|
|
- repeateN++
|
|
|
|
- var updateID = map[string]interface{}{} //记录更新判重的
|
|
|
|
- updateID["_id"] = StringTOBsonId(info.id)
|
|
|
|
- repeat_ids:=source.repeat_ids
|
|
|
|
- repeat_ids = append(repeat_ids,info.id)
|
|
|
|
- source.repeat_ids = repeat_ids
|
|
|
|
- //替换数据池-更新
|
|
|
|
- DM.replacePoolData(source)
|
|
|
|
- Update.updatePool <- []map[string]interface{}{//重复数据打标签
|
|
|
|
- updateID,
|
|
|
|
- map[string]interface{}{
|
|
|
|
- "$set": map[string]interface{}{
|
|
|
|
- "repeat": 1,
|
|
|
|
- "repeat_reason": reason,
|
|
|
|
- "repeat_id": source.id,
|
|
|
|
- "dataging": 0,
|
|
|
|
- "updatetime_repeat" :util.Int64All(time.Now().Unix()),
|
|
|
|
|
|
+ num := 0
|
|
|
|
+ for _,tmp := range dataArr{
|
|
|
|
+ info := NewInfo(tmp)
|
|
|
|
+ b, source, reason := DM.check(info)
|
|
|
|
+ if b {
|
|
|
|
+ num++
|
|
|
|
+ var updateID = map[string]interface{}{} //记录更新判重的
|
|
|
|
+ updateID["_id"] = StringTOBsonId(info.id)
|
|
|
|
+ repeat_ids:=source.repeat_ids
|
|
|
|
+ repeat_ids = append(repeat_ids,info.id)
|
|
|
|
+ source.repeat_ids = repeat_ids
|
|
|
|
+ DM.replacePoolData(source)//替换数据池-更新
|
|
|
|
+ Update.updatePool <- []map[string]interface{}{//重复数据打标签
|
|
|
|
+ updateID,
|
|
|
|
+ map[string]interface{}{
|
|
|
|
+ "$set": map[string]interface{}{
|
|
|
|
+ "repeat": 1,
|
|
|
|
+ "repeat_reason": reason,
|
|
|
|
+ "repeat_id": source.id,
|
|
|
|
+ "dataging": 0,
|
|
|
|
+ "updatetime_repeat" :util.Int64All(time.Now().Unix()),
|
|
|
|
+ },
|
|
},
|
|
},
|
|
- },
|
|
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- }(tmp)
|
|
|
|
- tmp = make(map[string]interface{})
|
|
|
|
|
|
+ updatelock.Lock()
|
|
|
|
+ repeatN+=num
|
|
|
|
+ updatelock.Unlock()
|
|
|
|
+ }(dataArr)
|
|
}
|
|
}
|
|
wg.Wait()
|
|
wg.Wait()
|
|
-
|
|
|
|
- log.Println("this current task over.", n, "repeateN:", repeateN, mapInfo["stop"])
|
|
|
|
- //log.Println("当前数据池的数量:",DM.currentTotalCount())
|
|
|
|
- //睡眠时间30s 目的是让数据池更新所有数据...
|
|
|
|
- time.Sleep(15 * time.Second)
|
|
|
|
|
|
+ log.Println("this cur task over.", n, "repeateN:", repeatN)
|
|
//更新Ocr的标记
|
|
//更新Ocr的标记
|
|
- if !IsFull {
|
|
|
|
- updateOcrFileData(mapInfo["lteid"].(string))
|
|
|
|
- //任务完成,开始发送广播通知下面节点
|
|
|
|
- if n >= repeateN && mapInfo["stop"] == nil {
|
|
|
|
- log.Println("判重任务完成发送udp")
|
|
|
|
- for _, to := range nextNode {
|
|
|
|
- sid, _ := mapInfo["gtid"].(string)
|
|
|
|
- eid, _ := mapInfo["lteid"].(string)
|
|
|
|
- key := sid + "-" + eid + "-" + util.ObjToString(to["stype"])
|
|
|
|
- by, _ := json.Marshal(map[string]interface{}{
|
|
|
|
- "gtid": sid,
|
|
|
|
- "lteid": eid,
|
|
|
|
- "stype": util.ObjToString(to["stype"]),
|
|
|
|
- "key": key,
|
|
|
|
- })
|
|
|
|
- addr := &net.UDPAddr{
|
|
|
|
- IP: net.ParseIP(to["addr"].(string)),
|
|
|
|
- Port: util.IntAll(to["port"]),
|
|
|
|
- }
|
|
|
|
- node := &udpNode{by, addr, time.Now().Unix(), 0}
|
|
|
|
- udptaskmap.Store(key, node)
|
|
|
|
- udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
|
|
|
|
- }
|
|
|
|
|
|
+ updateOcrFileData(mapInfo["lteid"].(string))
|
|
|
|
+ time.Sleep(15 * time.Second)
|
|
|
|
+ //任务完成,开始发送广播通知下面节点
|
|
|
|
+ log.Println("判重任务完成发送udp")
|
|
|
|
+ for _, to := range nextNode {
|
|
|
|
+ sid, _ := mapInfo["gtid"].(string)
|
|
|
|
+ eid, _ := mapInfo["lteid"].(string)
|
|
|
|
+ key := sid + "-" + eid + "-" + util.ObjToString(to["stype"])
|
|
|
|
+ by, _ := json.Marshal(map[string]interface{}{
|
|
|
|
+ "gtid": sid,
|
|
|
|
+ "lteid": eid,
|
|
|
|
+ "stype": util.ObjToString(to["stype"]),
|
|
|
|
+ "key": key,
|
|
|
|
+ })
|
|
|
|
+ addr := &net.UDPAddr{
|
|
|
|
+ IP: net.ParseIP(to["addr"].(string)),
|
|
|
|
+ Port: util.IntAll(to["port"]),
|
|
}
|
|
}
|
|
|
|
+ node := &udpNode{by, addr, time.Now().Unix(), 0}
|
|
|
|
+ udptaskmap.Store(key, node)
|
|
|
|
+ udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
|
|
}
|
|
}
|
|
-
|
|
|
|
}
|
|
}
|
|
//更新ocr表
|
|
//更新ocr表
|
|
func updateOcrFileData(cur_lteid string) {
|
|
func updateOcrFileData(cur_lteid string) {
|