|
@@ -7,15 +7,12 @@ package main
|
|
|
import (
|
|
|
"encoding/json"
|
|
|
"flag"
|
|
|
- "github.com/cron"
|
|
|
- "gopkg.in/mgo.v2/bson"
|
|
|
"log"
|
|
|
mu "mfw/util"
|
|
|
"net"
|
|
|
"os"
|
|
|
"qfw/util"
|
|
|
"regexp"
|
|
|
- "strconv"
|
|
|
"sync"
|
|
|
"time"
|
|
|
)
|
|
@@ -190,7 +187,6 @@ func main() {
|
|
|
|
|
|
log.Println("测试:全量判重-准备开始")
|
|
|
taskRepeat(mapinfo)
|
|
|
-
|
|
|
time.Sleep(99999 * time.Hour)
|
|
|
}
|
|
|
}
|
|
@@ -208,7 +204,6 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
|
|
|
key = "udpok"
|
|
|
}
|
|
|
udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
|
|
|
-
|
|
|
//插入任务-判断任务-是否存在
|
|
|
updatelock.Lock()
|
|
|
taskList = append(taskList,mapInfo)
|
|
@@ -225,58 +220,6 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
|
|
|
}
|
|
|
|
|
|
|
|
|
-//upd接收
|
|
|
-//func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
|
|
|
-// select {
|
|
|
-// case udptask <- struct{}{}:
|
|
|
-// log.Println("...接收段落,通道正常...")
|
|
|
-// switch act {
|
|
|
-// case mu.OP_TYPE_DATA: //上个节点的数据
|
|
|
-// var mapInfo map[string]interface{}
|
|
|
-// err := json.Unmarshal(data, &mapInfo)
|
|
|
-// if err != nil {
|
|
|
-// log.Println("error data:", err)
|
|
|
-// udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
|
|
|
-// } else if mapInfo != nil {
|
|
|
-// key, _ := mapInfo["key"].(string)
|
|
|
-// if key == "" {
|
|
|
-// key = "udpok"
|
|
|
-// }
|
|
|
-// log.Println("当前段落,需要判重...",mapInfo)
|
|
|
-// udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
|
|
|
-// task(data, mapInfo)
|
|
|
-// }
|
|
|
-// log.Println("此段任务结束...",err,mapInfo)
|
|
|
-// <-udptask
|
|
|
-// case mu.OP_NOOP: //下个节点回应
|
|
|
-// ok := string(data)
|
|
|
-// if ok != "" {
|
|
|
-// log.Println("下节点回应-ok:", ok)
|
|
|
-// udptaskmap.Delete(ok)
|
|
|
-// }
|
|
|
-// <-udptask
|
|
|
-// }
|
|
|
-// case <-time.After(2 * time.Second):
|
|
|
-// switch act {
|
|
|
-// case mu.OP_TYPE_DATA: //上个节点的数据
|
|
|
-// log.Println("通道堵塞中...上节点")
|
|
|
-// udpclient.WriteUdp([]byte("repeat_busy"), mu.OP_NOOP, ra)
|
|
|
-// case mu.OP_NOOP: //下个节点回应
|
|
|
-// log.Println("通道堵塞中...下节点")
|
|
|
-// ok := string(data)
|
|
|
-// if ok != "" {
|
|
|
-// log.Println("下节点回应-ok:", ok)
|
|
|
-// udptaskmap.Delete(ok)
|
|
|
-// }
|
|
|
-// }
|
|
|
-// }
|
|
|
-//
|
|
|
-// //udptask <- struct{}{}
|
|
|
-// //defer func() {
|
|
|
-// // <-udptask
|
|
|
-// //}()
|
|
|
-//}
|
|
|
-
|
|
|
//监听-获取-分发判重任务
|
|
|
func getRepeatTask() {
|
|
|
for {
|
|
@@ -302,527 +245,28 @@ func getRepeatTask() {
|
|
|
|
|
|
|
|
|
|
|
|
-//开始判重程序
|
|
|
-func taskRepeat(mapInfo map[string]interface{}) {
|
|
|
- defer util.Catch()
|
|
|
- //区间id
|
|
|
- q := map[string]interface{}{
|
|
|
- "_id": map[string]interface{}{
|
|
|
- "$gt": StringTOBsonId(mapInfo["gtid"].(string)),
|
|
|
- "$lte": StringTOBsonId(mapInfo["lteid"].(string)),
|
|
|
- },
|
|
|
- }
|
|
|
- //全量
|
|
|
- if IsFull && gtept!="" && ltept!=""{
|
|
|
- log.Println("执行全量分段模式")
|
|
|
- log.Println(gtept,"---",ltept)
|
|
|
- q = map[string]interface{}{
|
|
|
- "publishtime": map[string]interface{}{
|
|
|
- "$gte": util.Int64All(gtept),
|
|
|
- "$lte": util.Int64All(ltept),
|
|
|
- },
|
|
|
- }
|
|
|
- }
|
|
|
- //临时赋值
|
|
|
- log.Println("开始数据判重~查询条件:",mgo.DbName, extract, q)
|
|
|
-
|
|
|
-
|
|
|
- sess := mgo.GetMgoConn()
|
|
|
- defer mgo.DestoryMongoConn(sess)
|
|
|
- it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
|
|
|
- pool := make(chan bool, threadNum)
|
|
|
- wg := &sync.WaitGroup{}
|
|
|
- n, repeateN := 0, 0
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
|
- if n%1000 == 0 {
|
|
|
- log.Println("current:", n, tmp["_id"],tmp["publishtime"], "repeateN:", repeateN)
|
|
|
- }
|
|
|
|
|
|
- if util.IntAll(tmp["repeat"]) == 1 {
|
|
|
- repeateN++
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- continue
|
|
|
- }
|
|
|
|
|
|
- if util.IntAll(tmp["dataging"]) == 1 && !IsFull{
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- continue
|
|
|
- }
|
|
|
|
|
|
- pool <- true
|
|
|
- wg.Add(1)
|
|
|
- go func(tmp map[string]interface{}) {
|
|
|
- defer func() {
|
|
|
- <-pool
|
|
|
- wg.Done()
|
|
|
- }()
|
|
|
- info := NewInfo(tmp)
|
|
|
- //正常判重
|
|
|
- b, source, reason := DM.check(info)
|
|
|
- if b {
|
|
|
- repeateN++
|
|
|
- var updateID = map[string]interface{}{} //记录更新判重的
|
|
|
- updateID["_id"] = StringTOBsonId(info.id)
|
|
|
- repeat_ids:=source.repeat_ids
|
|
|
- repeat_ids = append(repeat_ids,info.id)
|
|
|
- source.repeat_ids = repeat_ids
|
|
|
- //替换数据池-更新
|
|
|
- DM.replacePoolData(source)
|
|
|
-
|
|
|
- //Update.updatePool <- []map[string]interface{}{//原始数据打标签
|
|
|
- // map[string]interface{}{
|
|
|
- // "_id": StringTOBsonId(source.id),
|
|
|
- // },
|
|
|
- // map[string]interface{}{
|
|
|
- // "$set": map[string]interface{}{
|
|
|
- // "repeat_ids": repeat_ids,
|
|
|
- // },
|
|
|
- // },
|
|
|
- //}
|
|
|
- Update.updatePool <- []map[string]interface{}{//重复数据打标签
|
|
|
- updateID,
|
|
|
- map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "repeat": 1,
|
|
|
- "repeat_reason": reason,
|
|
|
- "repeat_id": source.id,
|
|
|
- "dataging": 0,
|
|
|
- "updatetime_repeat" :util.Int64All(time.Now().Unix()),
|
|
|
- },
|
|
|
- },
|
|
|
- }
|
|
|
- }
|
|
|
- }(tmp)
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- }
|
|
|
- wg.Wait()
|
|
|
-
|
|
|
- log.Println("this current task over.", n, "repeateN:", repeateN, mapInfo["stop"])
|
|
|
- //log.Println("当前数据池的数量:",DM.currentTotalCount())
|
|
|
- //睡眠时间30s 目的是让数据池更新所有数据...
|
|
|
- time.Sleep(15 * time.Second)
|
|
|
- //更新Ocr的标记
|
|
|
- if !IsFull {
|
|
|
- updateOcrFileData(mapInfo["lteid"].(string))
|
|
|
- //任务完成,开始发送广播通知下面节点
|
|
|
- if n >= repeateN && mapInfo["stop"] == nil {
|
|
|
- log.Println("判重任务完成发送udp")
|
|
|
- for _, to := range nextNode {
|
|
|
- sid, _ := mapInfo["gtid"].(string)
|
|
|
- eid, _ := mapInfo["lteid"].(string)
|
|
|
- key := sid + "-" + eid + "-" + util.ObjToString(to["stype"])
|
|
|
- by, _ := json.Marshal(map[string]interface{}{
|
|
|
- "gtid": sid,
|
|
|
- "lteid": eid,
|
|
|
- "stype": util.ObjToString(to["stype"]),
|
|
|
- "key": key,
|
|
|
- })
|
|
|
- addr := &net.UDPAddr{
|
|
|
- IP: net.ParseIP(to["addr"].(string)),
|
|
|
- Port: util.IntAll(to["port"]),
|
|
|
- }
|
|
|
- node := &udpNode{by, addr, time.Now().Unix(), 0}
|
|
|
- udptaskmap.Store(key, node)
|
|
|
- udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
-}
|
|
|
|
|
|
-func updateOcrFileData(cur_lteid string) {
|
|
|
- //更新ocr 分类表-判重的状态
|
|
|
- log.Println("开始更新Ocr表-标记",cur_lteid)
|
|
|
- task_sess := task_mgo.GetMgoConn()
|
|
|
- defer task_mgo.DestoryMongoConn(task_sess)
|
|
|
- q_task:=map[string]interface{}{}
|
|
|
- it_last := task_sess.DB(task_mgo.DbName).C(task_collName).Find(&q_task).Sort("-_id").Iter()
|
|
|
- isUpdateOcr:=false
|
|
|
- updateOcrFile:=[][]map[string]interface{}{}
|
|
|
- for tmp := make(map[string]interface{}); it_last.Next(&tmp); {
|
|
|
- cur_id := BsonTOStringId(tmp["_id"])
|
|
|
- lteid:=util.ObjToString(tmp["lteid"])
|
|
|
- if (lteid==cur_lteid) { //需要更新
|
|
|
- log.Println("找到该lteid数据",cur_lteid,cur_id)
|
|
|
- isUpdateOcr = true
|
|
|
- updateOcrFile = append(updateOcrFile, []map[string]interface{}{//重复数据打标签
|
|
|
- map[string]interface{}{
|
|
|
- "_id": tmp["_id"],
|
|
|
- },
|
|
|
- map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "is_repeat_status": 1,
|
|
|
- "is_repeat_time" : util.Int64All(time.Now().Unix()),
|
|
|
- },
|
|
|
- },
|
|
|
- })
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- break
|
|
|
- }else {
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- }
|
|
|
- }
|
|
|
- if !isUpdateOcr {
|
|
|
- log.Println("出现异常问题,查询不到ocr的lteid",cur_lteid)
|
|
|
- }else {
|
|
|
- if len(updateOcrFile) > 0 {
|
|
|
- task_mgo.UpSertBulk(task_collName, updateOcrFile...)
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
|
|
|
-//历史判重
|
|
|
-func historyTaskDay() {
|
|
|
- defer util.Catch()
|
|
|
|
|
|
- for {
|
|
|
- start:=time.Now().Unix()
|
|
|
|
|
|
- if gtid=="" {
|
|
|
- log.Println("请传gtid,否则无法运行")
|
|
|
- os.Exit(0)
|
|
|
- return
|
|
|
- }
|
|
|
- if lteid!="" {
|
|
|
- //先进行数据迁移
|
|
|
- log.Println("开启一次迁移任务",gtid,lteid)
|
|
|
- moveHistoryData(gtid,lteid)
|
|
|
- gtid = lteid //替换数据
|
|
|
- }
|
|
|
|
|
|
- //查询表最后一个id
|
|
|
- task_sess := task_mgo.GetMgoConn()
|
|
|
- defer task_mgo.DestoryMongoConn(task_sess)
|
|
|
- q:=map[string]interface{}{}
|
|
|
- between_time := time.Now().Unix() - (86400 * timingPubScope)//两年周期
|
|
|
- it_last := task_sess.DB(task_mgo.DbName).C(task_collName).Find(&q).Sort("-_id").Iter()
|
|
|
-
|
|
|
- isRepeatStatus:=false
|
|
|
- for tmp := make(map[string]interface{}); it_last.Next(&tmp); {
|
|
|
- is_repeat_status:=util.IntAll(tmp["is_repeat_status"])
|
|
|
- if is_repeat_status == 1 {
|
|
|
- lteid = util.ObjToString(tmp["lteid"])
|
|
|
- log.Println("查询的最后一个已标记的任务lteid:",lteid)
|
|
|
- isRepeatStatus = true
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- break
|
|
|
- }else {
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
- if !isRepeatStatus {
|
|
|
- log.Println("查询不到有标记的lteid数据")
|
|
|
- log.Println("睡眠5分钟 gtid:",gtid,"lteid:",lteid)
|
|
|
- time.Sleep(5 * time.Minute)
|
|
|
- continue
|
|
|
- }
|
|
|
|
|
|
- log.Println("查询完毕-找到有标记的lteid-先睡眠5分钟",gtid,lteid)
|
|
|
- time.Sleep(5 * time.Minute)
|
|
|
-
|
|
|
- sess := mgo.GetMgoConn()//连接器
|
|
|
- defer mgo.DestoryMongoConn(sess)
|
|
|
- //开始判重
|
|
|
- q = map[string]interface{}{
|
|
|
- "_id": map[string]interface{}{
|
|
|
- "$gt": StringTOBsonId(gtid),
|
|
|
- "$lte": StringTOBsonId(lteid),
|
|
|
- },
|
|
|
- }
|
|
|
- log.Println("历史判重查询条件:",q,"时间:", between_time)
|
|
|
- it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
|
|
|
- num,oknum,outnum, deterTime:= int64(0),int64(0),int64(0),int64(0) //计数
|
|
|
- pendAllArr:=[][]map[string]interface{}{}//待处理数组
|
|
|
- dayArr := []map[string]interface{}{}
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); num++ {
|
|
|
- if num%10000 == 0 {
|
|
|
- log.Println("正序遍历:", num)
|
|
|
- }
|
|
|
- //取-符合-发布时间X年内的数据
|
|
|
- if util.IntAll(tmp["dataging"]) == 1 {
|
|
|
- pubtime := util.Int64All(tmp["publishtime"])
|
|
|
- if pubtime > 0 && pubtime >= between_time {
|
|
|
- oknum++
|
|
|
- if deterTime==0 {
|
|
|
- log.Println("找到第一条符合条件的数据")
|
|
|
- deterTime = util.Int64All(tmp["publishtime"])
|
|
|
- dayArr = append(dayArr,tmp)
|
|
|
- }else {
|
|
|
- if pubtime-deterTime >timingSpanDay*86400 {
|
|
|
- //新数组重新构建,当前组数据加到全部组数据
|
|
|
- pendAllArr = append(pendAllArr,dayArr)
|
|
|
- dayArr = []map[string]interface{}{}
|
|
|
- deterTime = util.Int64All(tmp["publishtime"])
|
|
|
- dayArr = append(dayArr,tmp)
|
|
|
- }else {
|
|
|
- dayArr = append(dayArr,tmp)
|
|
|
- }
|
|
|
- }
|
|
|
- }else {
|
|
|
- outnum++
|
|
|
- //不在两年内的也清标记
|
|
|
- Update.updatePool <- []map[string]interface{}{//重复数据打标签
|
|
|
- map[string]interface{}{
|
|
|
- "_id": tmp["_id"],
|
|
|
- },
|
|
|
- map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "dataging": 0,
|
|
|
- "history_updatetime":util.Int64All(time.Now().Unix()),
|
|
|
- },
|
|
|
- },
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- }
|
|
|
|
|
|
- if len(dayArr)>0 {
|
|
|
- pendAllArr = append(pendAllArr,dayArr)
|
|
|
- dayArr = []map[string]interface{}{}
|
|
|
- }
|
|
|
|
|
|
- log.Println("查询数量:",num,"符合条件:",oknum,"未在两年内:",outnum)
|
|
|
|
|
|
- if len(pendAllArr) <= 0 {
|
|
|
- log.Println("没找到dataging==1的数据")
|
|
|
- }
|
|
|
|
|
|
- //测试分组数量是否正确
|
|
|
- testNum:=0
|
|
|
- for k,v:=range pendAllArr {
|
|
|
- log.Println("第",k,"组--","数量:",len(v))
|
|
|
- testNum = testNum+len(v)
|
|
|
- }
|
|
|
- log.Println("本地构建分组完成:",len(pendAllArr),"组","测试-总计数量:",testNum)
|
|
|
-
|
|
|
- n, repeateN := 0, 0
|
|
|
- log.Println("线程数:",threadNum)
|
|
|
- pool := make(chan bool, threadNum)
|
|
|
- wg := &sync.WaitGroup{}
|
|
|
- for k,v:=range pendAllArr { //每组结束更新一波数据
|
|
|
- pool <- true
|
|
|
- wg.Add(1)
|
|
|
- go func(k int, v []map[string]interface{}) {
|
|
|
- defer func() {
|
|
|
- <-pool
|
|
|
- wg.Done()
|
|
|
- }()
|
|
|
- //相关ids 跨表
|
|
|
- groupOtherExtract := [][]map[string]interface{}{}
|
|
|
-
|
|
|
- //构建当前组的数据池
|
|
|
- log.Println("构建第", k, "组---(数据池)")
|
|
|
- //当前组的第一个发布时间
|
|
|
- first_pt := util.Int64All(v[len(v)-1]["publishtime"])
|
|
|
- curTM := TimedTaskDatamap(dupdays+int(timingSpanDay)+1, first_pt+86400, int(k))
|
|
|
- log.Println("开始遍历判重第", k, "组 共计数量:", len(v))
|
|
|
- n = n + len(v)
|
|
|
- log.Println("统计目前总数量:", n, "重复数量:", repeateN)
|
|
|
- for _, tmp := range v {
|
|
|
- info := NewInfo(tmp)
|
|
|
- b, source, reason := curTM.check(info)
|
|
|
- if b { //有重复,生成更新语句,更新抽取和更新招标
|
|
|
- repeateN++
|
|
|
- //重复数据打标签
|
|
|
- repeat_ids:=source.repeat_ids
|
|
|
- repeat_ids = append(repeat_ids,info.id)
|
|
|
- source.repeat_ids = repeat_ids
|
|
|
-
|
|
|
- updatelock.Lock()
|
|
|
- //替换数据池-更新
|
|
|
- DM.replacePoolData(source)
|
|
|
- //更新数据源
|
|
|
- //判断是否在当前段落
|
|
|
- if judgeIsCurIds(gtid,lteid,source.id) {
|
|
|
- Update.updatePool <- []map[string]interface{}{//重复数据打标签
|
|
|
- map[string]interface{}{
|
|
|
- "_id": StringTOBsonId(source.id),
|
|
|
- },
|
|
|
- map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "repeat_ids": repeat_ids,
|
|
|
- },
|
|
|
- },
|
|
|
- }
|
|
|
- }else {
|
|
|
- groupOtherExtract = append(groupOtherExtract, []map[string]interface{}{//重复数据打标签
|
|
|
- map[string]interface{}{
|
|
|
- "_id": StringTOBsonId(source.id),
|
|
|
- },
|
|
|
- map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "repeat_ids": repeat_ids,
|
|
|
- },
|
|
|
- },
|
|
|
- })
|
|
|
- }
|
|
|
- Update.updatePool <- []map[string]interface{}{//重复数据打标签
|
|
|
- map[string]interface{}{
|
|
|
- "_id": tmp["_id"],
|
|
|
- },
|
|
|
- map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "repeat": 1,
|
|
|
- "repeat_reason": reason,
|
|
|
- "repeat_id": source.id,
|
|
|
- "dataging": 0,
|
|
|
- "history_updatetime":util.Int64All(time.Now().Unix()),
|
|
|
- },
|
|
|
- },
|
|
|
- }
|
|
|
- if len(groupOtherExtract) >= 500 {
|
|
|
- mgo.UpSertBulk(extract_back, groupOtherExtract...)
|
|
|
- groupOtherExtract = [][]map[string]interface{}{}
|
|
|
- }
|
|
|
-
|
|
|
- updatelock.Unlock()
|
|
|
-
|
|
|
-
|
|
|
- } else {
|
|
|
- Update.updatePool <- []map[string]interface{}{//重复数据打标签
|
|
|
- map[string]interface{}{
|
|
|
- "_id": tmp["_id"],
|
|
|
- },
|
|
|
- map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "dataging": 0, //符合条件的都为dataging==0
|
|
|
- "history_updatetime":util.Int64All(time.Now().Unix()),
|
|
|
- },
|
|
|
- },
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- //每组数据结束-更新数据
|
|
|
- updatelock.Lock()
|
|
|
- if len(groupOtherExtract) > 0 {
|
|
|
- mgo.UpSertBulk(extract_back, groupOtherExtract...)
|
|
|
- }
|
|
|
- updatelock.Unlock()
|
|
|
-
|
|
|
- }(k, v)
|
|
|
|
|
|
- }
|
|
|
|
|
|
- wg.Wait()
|
|
|
-
|
|
|
- log.Println("this timeTask over.", n, "repeateN:", repeateN,gtid,lteid)
|
|
|
-
|
|
|
- time.Sleep(30 * time.Second)
|
|
|
- //任务完成,开始发送广播通知下面节点 发udp 去升索引待定 + 合并
|
|
|
- if n >= repeateN && gtid!=lteid{
|
|
|
- for _, to := range nextNode {
|
|
|
- next_sid := util.BsonIdToSId(gtid)
|
|
|
- next_eid := util.BsonIdToSId(lteid)
|
|
|
- key := next_sid + "-" + next_eid + "-" + util.ObjToString(to["stype"])
|
|
|
- by, _ := json.Marshal(map[string]interface{}{
|
|
|
- "gtid": next_sid,
|
|
|
- "lteid": next_eid,
|
|
|
- "stype": util.ObjToString(to["stype"]),
|
|
|
- "key": key,
|
|
|
- })
|
|
|
- addr := &net.UDPAddr{
|
|
|
- IP: net.ParseIP(to["addr"].(string)),
|
|
|
- Port: util.IntAll(to["port"]),
|
|
|
- }
|
|
|
- node := &udpNode{by, addr, time.Now().Unix(), 0}
|
|
|
- udptaskmap.Store(key, node)
|
|
|
- udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
- end:=time.Now().Unix()
|
|
|
|
|
|
- log.Println(gtid,lteid)
|
|
|
|
|
|
- if end-start<60*5 {
|
|
|
- log.Println("睡眠.............")
|
|
|
- time.Sleep(5 * time.Minute)
|
|
|
- }
|
|
|
- log.Println("继续下一段的历史判重")
|
|
|
- }
|
|
|
-}
|
|
|
|
|
|
-//判断是否在当前id段落
|
|
|
-func judgeIsCurIds (gtid string,lteid string,curid string) bool {
|
|
|
|
|
|
- gt_time, _ := strconv.ParseInt(gtid[:8], 16, 64)
|
|
|
- lte_time, _ := strconv.ParseInt(lteid[:8], 16, 64)
|
|
|
- cur_time, _ := strconv.ParseInt(curid[:8], 16, 64)
|
|
|
- if cur_time>=gt_time&&cur_time<=lte_time {
|
|
|
- return true
|
|
|
- }
|
|
|
- return false
|
|
|
-}
|
|
|
-
|
|
|
-//迁移上一段数据
|
|
|
-func moveHistoryData(startid string,endid string) {
|
|
|
- sess := mgo.GetMgoConn()
|
|
|
- defer mgo.DestoryMongoConn(sess)
|
|
|
- year, month, day := time.Now().Date()
|
|
|
- q := map[string]interface{}{
|
|
|
- "_id": map[string]interface{}{
|
|
|
- "$gt": StringTOBsonId(startid),
|
|
|
- "$lte": StringTOBsonId(endid),
|
|
|
- },
|
|
|
- }
|
|
|
- log.Println(q)
|
|
|
- it := sess.DB(mgo.DbName).C(extract).Find(&q).Iter()
|
|
|
- index := 0
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
|
|
|
- mgo.Save(extract_back, tmp)
|
|
|
- tmp = map[string]interface{}{}
|
|
|
- if index%1000 == 0 {
|
|
|
- log.Println("index", index)
|
|
|
- }
|
|
|
- }
|
|
|
- log.Println("save to", extract_back, " ok index", index)
|
|
|
-
|
|
|
- qv := map[string]interface{}{
|
|
|
- "comeintime": map[string]interface{}{
|
|
|
- "$lt": time.Date(year, month, day, 0, 0, 0, 0, time.Local).Add(-time.Duration(dupdays+1) * 24 * time.Hour*2).Unix(),
|
|
|
- },
|
|
|
- }
|
|
|
- delnum := mgo.Delete(extract, qv)
|
|
|
- log.Println("remove from ", extract, delnum)
|
|
|
-
|
|
|
-}
|
|
|
-
|
|
|
-func moveTimeoutData() {
|
|
|
- log.Println("部署迁移定时任务")
|
|
|
- c := cron.New()
|
|
|
- c.AddFunc("0 0 0 * * ?", func() { moveOnceTimeOut() })
|
|
|
- c.Start()
|
|
|
-}
|
|
|
-
|
|
|
-func moveOnceTimeOut() {
|
|
|
- log.Println("执行一次迁移超时数据")
|
|
|
- sess := mgo.GetMgoConn()
|
|
|
- defer mgo.DestoryMongoConn(sess)
|
|
|
- now:=time.Now()
|
|
|
- move_time := time.Date(now.Year()-2, now.Month(), now.Day(), 0, 0, 0, 0, time.Local)
|
|
|
- task_id := util.BsonIdToSId(bson.NewObjectIdWithTime(move_time))
|
|
|
- q := map[string]interface{}{
|
|
|
- "_id": map[string]interface{}{
|
|
|
- "$lt": StringTOBsonId(task_id),
|
|
|
- },
|
|
|
- }
|
|
|
-
|
|
|
- it := sess.DB(mgo.DbName).C("result_20200714").Find(&q).Iter()
|
|
|
- index := 0
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
|
|
|
- if index%10000 == 0 {
|
|
|
- log.Println("index", index)
|
|
|
- }
|
|
|
- del_id:=BsonTOStringId(tmp["_id"])
|
|
|
- mgo.Save("result_20200713", tmp)
|
|
|
- mgo.DeleteById("result_20200714",del_id)
|
|
|
- tmp = map[string]interface{}{}
|
|
|
- }
|
|
|
- log.Println("save and delete", " ok index", index)
|
|
|
-
|
|
|
-}
|
|
|
|
|
|
|
|
|
|