|
@@ -8,8 +8,6 @@ import (
|
|
|
"encoding/json"
|
|
|
"flag"
|
|
|
"fmt"
|
|
|
- "github.com/cron"
|
|
|
- "gopkg.in/mgo.v2/bson"
|
|
|
"log"
|
|
|
mu "mfw/util"
|
|
|
"net"
|
|
@@ -18,6 +16,9 @@ import (
|
|
|
"regexp"
|
|
|
"sync"
|
|
|
"time"
|
|
|
+
|
|
|
+ "github.com/cron"
|
|
|
+ "gopkg.in/mgo.v2/bson"
|
|
|
)
|
|
|
|
|
|
var (
|
|
@@ -31,7 +32,7 @@ var (
|
|
|
DM *datamap //
|
|
|
HM *historymap //判重数据
|
|
|
|
|
|
- lastid = ""
|
|
|
+ lastid = ""
|
|
|
|
|
|
//正则筛选相关
|
|
|
FilterRegTitle = regexp.MustCompile("^_$")
|
|
@@ -39,13 +40,13 @@ var (
|
|
|
FilterRegTitle_1 = regexp.MustCompile("^_$")
|
|
|
FilterRegTitle_2 = regexp.MustCompile("^_$")
|
|
|
|
|
|
- isMerger bool //是否合并
|
|
|
- Is_Sort bool //是否排序
|
|
|
- threadNum int //线程数量
|
|
|
- SiteMap map[string]map[string]interface{} //站点map
|
|
|
- LowHeavy bool //低质量数据判重
|
|
|
- TimingTask bool //是否定时任务
|
|
|
- sid, eid string //测试人员判重使用
|
|
|
+ isMerger bool //是否合并
|
|
|
+ Is_Sort bool //是否排序
|
|
|
+ threadNum int //线程数量
|
|
|
+ SiteMap map[string]map[string]interface{} //站点map
|
|
|
+ LowHeavy bool //低质量数据判重
|
|
|
+ TimingTask bool //是否定时任务
|
|
|
+ sid, eid string //测试人员判重使用
|
|
|
)
|
|
|
|
|
|
func init() {
|
|
@@ -74,8 +75,8 @@ func init() {
|
|
|
isMerger = Sysconfig["isMerger"].(bool)
|
|
|
Is_Sort = Sysconfig["isSort"].(bool)
|
|
|
threadNum = util.IntAllDef(Sysconfig["threads"], 1)
|
|
|
- LowHeavy = Sysconfig["lowHeavy"].(bool)
|
|
|
- TimingTask = Sysconfig["timingTask"].(bool)
|
|
|
+ LowHeavy = Sysconfig["lowHeavy"].(bool)
|
|
|
+ TimingTask = Sysconfig["timingTask"].(bool)
|
|
|
//站点配置
|
|
|
site := mconf["site"].(map[string]interface{})
|
|
|
SiteMap = make(map[string]map[string]interface{}, 0)
|
|
@@ -102,15 +103,11 @@ func main() {
|
|
|
udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
|
|
|
if TimingTask {
|
|
|
go timedTaskDay()
|
|
|
- }else {
|
|
|
+ } else {
|
|
|
udpclient.Listen(processUdpMsg)
|
|
|
log.Println("Udp服务监听", updport)
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
time.Sleep(99999 * time.Hour)
|
|
|
}
|
|
|
|
|
@@ -127,7 +124,6 @@ func mainT() {
|
|
|
sid = "5da3f2c5a5cb26b9b79847fc"
|
|
|
eid = "5db2735ba5cb26b9b7c99c6f"
|
|
|
|
|
|
-
|
|
|
mapinfo := map[string]interface{}{}
|
|
|
if sid == "" || eid == "" {
|
|
|
log.Println("sid,eid参数不能为空")
|
|
@@ -176,6 +172,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
//开始判重程序
|
|
|
func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
log.Println("开始数据判重")
|
|
@@ -208,7 +205,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
if n%10000 == 0 {
|
|
|
log.Println("current:", n, tmp["_id"], "repeateN:", repeateN)
|
|
|
}
|
|
|
- if util.IntAll(tmp["repeat"]) == 1||util.IntAll(tmp["repeat"]) == -1 {
|
|
|
+ if util.IntAll(tmp["repeat"]) == 1 || util.IntAll(tmp["repeat"]) == -1 {
|
|
|
tmp = make(map[string]interface{})
|
|
|
repeateN++
|
|
|
continue
|
|
@@ -221,7 +218,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
wg.Done()
|
|
|
}()
|
|
|
info := NewInfo(tmp)
|
|
|
- if !LowHeavy { //是否进行低质量数据判重
|
|
|
+ if !LowHeavy { //是否进行低质量数据判重
|
|
|
if invalidData(info.buyer, info.projectname, info.projectcode, info.contractnumber) {
|
|
|
updateExtract = append(updateExtract, []map[string]interface{}{
|
|
|
map[string]interface{}{
|
|
@@ -229,7 +226,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
},
|
|
|
map[string]interface{}{
|
|
|
"$set": map[string]interface{}{
|
|
|
- "repeat": -1,//无效数据标签
|
|
|
+ "repeat": -1, //无效数据标签
|
|
|
},
|
|
|
},
|
|
|
})
|
|
@@ -251,9 +248,9 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
var merge_idMap = map[string]interface{}{} //记录合并的
|
|
|
repeat_idMap["_id"] = StringTOBsonId(info.id)
|
|
|
merge_idMap["_id"] = StringTOBsonId(source.id)
|
|
|
- repeat_id := source.id//初始化一个数据
|
|
|
+ repeat_id := source.id //初始化一个数据
|
|
|
|
|
|
- if isMerger {//合并相关
|
|
|
+ if isMerger { //合并相关
|
|
|
basic_bool := basicDataScore(source, info)
|
|
|
if basic_bool {
|
|
|
//已原始数据为标准 - 对比数据打判重标签-
|
|
@@ -317,7 +314,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
merge_map,
|
|
|
})
|
|
|
}
|
|
|
- }else { //高质量数据
|
|
|
+ } else { //高质量数据
|
|
|
basic_bool := basicDataScore(source, info)
|
|
|
if !basic_bool {
|
|
|
DM.replaceSourceData(info, source.id) //替换
|
|
@@ -383,7 +380,7 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
|
|
|
sess := mgo.GetMgoConn()
|
|
|
defer mgo.DestoryMongoConn(sess)
|
|
|
|
|
|
- q:= map[string]interface{}{
|
|
|
+ q := map[string]interface{}{
|
|
|
"_id": map[string]interface{}{
|
|
|
"$gt": StringTOBsonId(mapInfo["gtid"].(string)),
|
|
|
"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
|
|
@@ -461,7 +458,7 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
|
|
|
wg.Done()
|
|
|
}()
|
|
|
info := NewInfo(tmp)
|
|
|
- if !LowHeavy { //是否进行低质量数据判重
|
|
|
+ if !LowHeavy { //是否进行低质量数据判重
|
|
|
if invalidData(info.buyer, info.projectname, info.projectcode, info.contractnumber) {
|
|
|
updateExtract = append(updateExtract, []map[string]interface{}{
|
|
|
map[string]interface{}{
|
|
@@ -469,7 +466,7 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
|
|
|
},
|
|
|
map[string]interface{}{
|
|
|
"$set": map[string]interface{}{
|
|
|
- "repeat": -1,//无效数据标签
|
|
|
+ "repeat": -1, //无效数据标签
|
|
|
},
|
|
|
},
|
|
|
})
|
|
@@ -573,7 +570,7 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
|
|
|
merge_map,
|
|
|
})
|
|
|
}
|
|
|
- }else { //高质量数据
|
|
|
+ } else { //高质量数据
|
|
|
basic_bool := basicDataScore(source, info)
|
|
|
if !basic_bool {
|
|
|
HM.replaceSourceData(info, source.id) //替换
|
|
@@ -633,23 +630,23 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-
|
|
|
//定时任务
|
|
|
-func timedTaskDay() {
|
|
|
+func timedTaskDay() {
|
|
|
c := cron.New()
|
|
|
c.AddFunc("0 0 0 * * ?", func() { timedTaskOnce() }) //每天凌晨执行一次
|
|
|
+ c.AddFunc("0 1 0 * * ?", func() { movedata() }) //每天凌晨1点执行一次
|
|
|
c.Start()
|
|
|
timedTaskOnce()
|
|
|
}
|
|
|
-func timedTaskOnce() {
|
|
|
+func timedTaskOnce() {
|
|
|
log.Println("开始一次定时任务")
|
|
|
now := time.Now()
|
|
|
- preTime:=time.Date(now.Year(),now.Month(),now.Day()-1,0,0,0,0,time.Local)
|
|
|
- curTime:=time.Date(now.Year(),now.Month(),now.Day(),0,0,0,0,time.Local)
|
|
|
+ preTime := time.Date(now.Year(), now.Month(), now.Day()-1, 0, 0, 0, 0, time.Local)
|
|
|
+ curTime := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.Local)
|
|
|
task_sid := util.BsonIdToSId(bson.NewObjectIdWithTime(preTime))
|
|
|
task_eid := util.BsonIdToSId(bson.NewObjectIdWithTime(curTime))
|
|
|
- lastid :=task_sid
|
|
|
- log.Println(task_sid,task_eid)
|
|
|
+ lastid := task_sid
|
|
|
+ log.Println(task_sid, task_eid)
|
|
|
|
|
|
//ObjectId("5da3f31aa5cb26b9b798d3aa")
|
|
|
//ObjectId("5da418c4a5cb26b9b7e3e9a6")
|
|
@@ -660,7 +657,7 @@ func timedTaskOnce() {
|
|
|
//区间id
|
|
|
q := map[string]interface{}{
|
|
|
"_id": map[string]interface{}{
|
|
|
- "$gte": StringTOBsonId(task_sid),
|
|
|
+ "$gte": StringTOBsonId(task_sid),
|
|
|
"$lte": StringTOBsonId(task_eid),
|
|
|
},
|
|
|
}
|
|
@@ -672,14 +669,13 @@ func timedTaskOnce() {
|
|
|
if startNum%10000 == 0 {
|
|
|
log.Println("正序遍历:", startNum)
|
|
|
}
|
|
|
- if util.IntAll(tmp_start["dataging"])==1 {//取起始id
|
|
|
+ if util.IntAll(tmp_start["dataging"]) == 1 { //取起始id
|
|
|
lastid = BsonTOStringId(tmp_start["_id"])
|
|
|
break
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-
|
|
|
- DM = NewDatamap(dupdays,lastid)
|
|
|
+ DM = NewDatamap(dupdays, lastid)
|
|
|
log.Println("本地数据加载完成,定时任务数据判重开始")
|
|
|
sess := mgo.GetMgoConn()
|
|
|
defer mgo.DestoryMongoConn(sess)
|
|
@@ -693,11 +689,11 @@ func timedTaskOnce() {
|
|
|
if n%10000 == 0 {
|
|
|
log.Println("current:", n, tmp["_id"], "repeateN:", repeateN)
|
|
|
}
|
|
|
- if util.IntAll(tmp["repeat"]) == 1||util.IntAll(tmp["repeat"]) == -1{
|
|
|
+ if util.IntAll(tmp["repeat"]) == 1 || util.IntAll(tmp["repeat"]) == -1 {
|
|
|
tmp = make(map[string]interface{})
|
|
|
continue
|
|
|
}
|
|
|
- if util.IntAll(tmp["dataging"])!=1 {
|
|
|
+ if util.IntAll(tmp["dataging"]) != 1 {
|
|
|
tmp = make(map[string]interface{})
|
|
|
continue
|
|
|
}
|
|
@@ -710,7 +706,7 @@ func timedTaskOnce() {
|
|
|
wg.Done()
|
|
|
}()
|
|
|
info := NewInfo(tmp)
|
|
|
- if !LowHeavy { //是否进行低质量数据判重
|
|
|
+ if !LowHeavy { //是否进行低质量数据判重
|
|
|
if invalidData(info.buyer, info.projectname, info.projectcode, info.contractnumber) {
|
|
|
updateExtract = append(updateExtract, []map[string]interface{}{
|
|
|
map[string]interface{}{
|
|
@@ -718,7 +714,7 @@ func timedTaskOnce() {
|
|
|
},
|
|
|
map[string]interface{}{
|
|
|
"$set": map[string]interface{}{
|
|
|
- "repeat": -1,//无效数据标签
|
|
|
+ "repeat": -1, //无效数据标签
|
|
|
"dataging": 0,
|
|
|
},
|
|
|
},
|
|
@@ -741,9 +737,9 @@ func timedTaskOnce() {
|
|
|
var merge_idMap = map[string]interface{}{} //记录合并的
|
|
|
repeat_idMap["_id"] = StringTOBsonId(info.id)
|
|
|
merge_idMap["_id"] = StringTOBsonId(source.id)
|
|
|
- repeat_id := source.id//初始化一个数据
|
|
|
+ repeat_id := source.id //初始化一个数据
|
|
|
|
|
|
- if isMerger {//合并相关
|
|
|
+ if isMerger { //合并相关
|
|
|
basic_bool := basicDataScore(source, info)
|
|
|
if basic_bool {
|
|
|
//已原始数据为标准 - 对比数据打判重标签-
|
|
@@ -767,7 +763,7 @@ func timedTaskOnce() {
|
|
|
if is_replace { //有过合并-更新数据
|
|
|
merge_map = map[string]interface{}{
|
|
|
"$set": map[string]interface{}{
|
|
|
- "merge": newData.mergemap,
|
|
|
+ "merge": newData.mergemap,
|
|
|
"dataging": 0,
|
|
|
},
|
|
|
}
|
|
@@ -808,7 +804,7 @@ func timedTaskOnce() {
|
|
|
merge_map,
|
|
|
})
|
|
|
}
|
|
|
- }else { //高质量数据
|
|
|
+ } else { //高质量数据
|
|
|
basic_bool := basicDataScore(source, info)
|
|
|
if !basic_bool {
|
|
|
DM.replaceSourceData(info, source.id) //替换
|
|
@@ -825,7 +821,7 @@ func timedTaskOnce() {
|
|
|
"repeat": 1,
|
|
|
"repeat_reason": reason,
|
|
|
"repeat_id": repeat_id,
|
|
|
- "dataging": 0,
|
|
|
+ "dataging": 0,
|
|
|
},
|
|
|
},
|
|
|
})
|
|
@@ -844,7 +840,6 @@ func timedTaskOnce() {
|
|
|
}
|
|
|
log.Println("this timeTask over.", n, "repeateN:", repeateN)
|
|
|
|
|
|
-
|
|
|
//任务完成,开始发送广播通知下面节点 发udp 去升索引待定 + 合并
|
|
|
if n > repeateN {
|
|
|
for _, to := range nextNode {
|
|
@@ -868,15 +863,6 @@ func timedTaskOnce() {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
//合并字段-并更新merge字段的值
|
|
|
func mergeDataFields(source *Info, info *Info) (*Info, []int64, bool) {
|
|
|
|
|
@@ -1183,3 +1169,26 @@ func invalidData(d1 string, d2 string, d3 string, d4 string) bool {
|
|
|
}
|
|
|
return false
|
|
|
}
|
|
|
+
|
|
|
+//迁移数据dupdays+5之前的数据
|
|
|
+func movedata() {
|
|
|
+ sess := mgo.GetMgoConn()
|
|
|
+ defer mgo.DestoryMongoConn(sess)
|
|
|
+ year, month, day := time.Now().Date()
|
|
|
+ q := map[string]interface{}{
|
|
|
+ "comeintime": map[string]interface{}{
|
|
|
+ "$lt": time.Date(year, month, day, 0, 0, 0, 0, time.Local).Add(-time.Duration(dupdays+5) * 24 * time.Hour).Unix(),
|
|
|
+ },
|
|
|
+ }
|
|
|
+ log.Println(q)
|
|
|
+ it := sess.DB(mgo.DbName).C(extract).Find(&q).Iter()
|
|
|
+ index := 0
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
|
|
|
+ mgo.Save(extract+"_back", tmp)
|
|
|
+ tmp = map[string]interface{}{}
|
|
|
+ if index%1000 == 0 {
|
|
|
+ log.Println("index", index)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ log.Println("movedata ok index", index)
|
|
|
+}
|