|
@@ -674,10 +674,19 @@ func timedTaskOnce() {
|
|
//task_eid = "5db2735ba5cb26b9b7c99c6f"
|
|
//task_eid = "5db2735ba5cb26b9b7c99c6f"
|
|
//between_time := int64(1331898261)
|
|
//between_time := int64(1331898261)
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ ObjectId("5e20965785a9271abf0ad6bd")
|
|
|
|
+ ObjectId("5e20968d85a9271abf0ad6c2")
|
|
|
|
+ ObjectId("5e20965785a9271abf0ad6bd")
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ task_sid = "5e20965785a9271abf0ad6bd"
|
|
|
|
+ task_eid = "5e20968d85a9271abf0ad6c2"
|
|
|
|
+ between_time := int64(1565801997)
|
|
|
|
|
|
//发布时间间隔时间 半年
|
|
//发布时间间隔时间 半年
|
|
- between_time := curTime.Unix()-(86400*timingPubScope)
|
|
|
|
- lastid := task_sid
|
|
|
|
|
|
+ //between_time := curTime.Unix()-(86400*timingPubScope)
|
|
lasttime := int64(0)
|
|
lasttime := int64(0)
|
|
log.Println(task_sid, task_eid,curTime.Unix(),between_time)
|
|
log.Println(task_sid, task_eid,curTime.Unix(),between_time)
|
|
//区间id
|
|
//区间id
|
|
@@ -697,10 +706,9 @@ func timedTaskOnce() {
|
|
}
|
|
}
|
|
//log.Println(util.Int64All(tmp_start["publishtime"]))
|
|
//log.Println(util.Int64All(tmp_start["publishtime"]))
|
|
//取-符合-发布时间半年内的数据
|
|
//取-符合-发布时间半年内的数据
|
|
- if util.IntAll(tmp_start["dataging"]) != 1 {
|
|
|
|
|
|
+ if util.IntAll(tmp_start["dataging"]) == 1 {
|
|
pubtime := util.Int64All(tmp_start["publishtime"])
|
|
pubtime := util.Int64All(tmp_start["publishtime"])
|
|
if pubtime>0 && pubtime>between_time {
|
|
if pubtime>0 && pubtime>between_time {
|
|
- lastid = BsonTOStringId(tmp_start["_id"])
|
|
|
|
lasttime = pubtime
|
|
lasttime = pubtime
|
|
log.Println("找到第一条符合条件的数据")
|
|
log.Println("找到第一条符合条件的数据")
|
|
break
|
|
break
|
|
@@ -708,8 +716,14 @@ func timedTaskOnce() {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- log.Println("... ...",lasttime)
|
|
|
|
|
|
+ log.Println("... ...",lasttime,)
|
|
|
|
+ if lasttime <=0 {
|
|
|
|
+ log.Println("没找到dataging==1的数据")
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+
|
|
//构建第一条需要判重的数据 (数据池)
|
|
//构建第一条需要判重的数据 (数据池)
|
|
|
|
+ log.Println("开始构建第一条需要判重的数据 ---(数据池)")
|
|
DM = TimedTaskDatamap(dupdays,lasttime)
|
|
DM = TimedTaskDatamap(dupdays,lasttime)
|
|
|
|
|
|
|
|
|
|
@@ -718,12 +732,15 @@ func timedTaskOnce() {
|
|
|
|
|
|
q := map[string]interface{}{
|
|
q := map[string]interface{}{
|
|
"_id": map[string]interface{}{
|
|
"_id": map[string]interface{}{
|
|
- "$gte": StringTOBsonId(lastid),
|
|
|
|
|
|
+ "$gte": StringTOBsonId(task_sid),
|
|
"$lte": StringTOBsonId(task_eid),
|
|
"$lte": StringTOBsonId(task_eid),
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
|
|
- it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
|
|
|
|
|
|
+ log.Println("正式判重:",q)
|
|
|
|
+ it := sess.DB(mgo.DbName).C(extract_back).Find(&q).Sort("publishtime").Iter()
|
|
|
|
+
|
|
|
|
+
|
|
updateExtract := [][]map[string]interface{}{}
|
|
updateExtract := [][]map[string]interface{}{}
|
|
log.Println("线程数:", threadNum)
|
|
log.Println("线程数:", threadNum)
|
|
pool := make(chan bool, threadNum)
|
|
pool := make(chan bool, threadNum)
|
|
@@ -734,6 +751,7 @@ func timedTaskOnce() {
|
|
if n%10000 == 0 {
|
|
if n%10000 == 0 {
|
|
log.Println("current:", n, tmp["_id"], "repeateN:", repeateN)
|
|
log.Println("current:", n, tmp["_id"], "repeateN:", repeateN)
|
|
}
|
|
}
|
|
|
|
+
|
|
if util.IntAll(tmp["repeat"]) == 1 || util.IntAll(tmp["repeat"]) == -1 {
|
|
if util.IntAll(tmp["repeat"]) == 1 || util.IntAll(tmp["repeat"]) == -1 {
|
|
tmp = make(map[string]interface{})
|
|
tmp = make(map[string]interface{})
|
|
continue
|
|
continue
|
|
@@ -749,12 +767,12 @@ func timedTaskOnce() {
|
|
<-pool
|
|
<-pool
|
|
wg.Done()
|
|
wg.Done()
|
|
}()
|
|
}()
|
|
-
|
|
|
|
if pre_publishtime==0 {
|
|
if pre_publishtime==0 {
|
|
pre_publishtime = util.Int64All(tmp["publishtime"])
|
|
pre_publishtime = util.Int64All(tmp["publishtime"])
|
|
}else {
|
|
}else {
|
|
|
|
+ log.Println("现在时间差:",util.Int64All(tmp["publishtime"])-pre_publishtime,"跨度--",86400*timingSpanDay)
|
|
//时间跨度是否大于X天
|
|
//时间跨度是否大于X天
|
|
- if util.Int64All(tmp["publishtime"])-pre_publishtime >86400*timingSpanDay {
|
|
|
|
|
|
+ if (util.Int64All(tmp["publishtime"])-pre_publishtime) >(86400*timingSpanDay) {
|
|
//重新构建数据池
|
|
//重新构建数据池
|
|
log.Println("超过跨度-重新构建:",util.Int64All(tmp["publishtime"]),"---",pre_publishtime)
|
|
log.Println("超过跨度-重新构建:",util.Int64All(tmp["publishtime"]),"---",pre_publishtime)
|
|
pre_publishtime = util.Int64All(tmp["publishtime"])
|
|
pre_publishtime = util.Int64All(tmp["publishtime"])
|
|
@@ -776,7 +794,7 @@ func timedTaskOnce() {
|
|
},
|
|
},
|
|
})
|
|
})
|
|
if len(updateExtract) > 500 {
|
|
if len(updateExtract) > 500 {
|
|
- mgo.UpSertBulk(extract, updateExtract...)
|
|
|
|
|
|
+ //mgo.UpSertBulk(extract, updateExtract...)
|
|
updateExtract = [][]map[string]interface{}{}
|
|
updateExtract = [][]map[string]interface{}{}
|
|
}
|
|
}
|
|
return
|
|
return
|
|
@@ -885,14 +903,14 @@ func timedTaskOnce() {
|
|
}
|
|
}
|
|
}(tmp)
|
|
}(tmp)
|
|
if len(updateExtract) > 500 {
|
|
if len(updateExtract) > 500 {
|
|
- mgo.UpSertBulk(extract, updateExtract...)
|
|
|
|
|
|
+ //mgo.UpSertBulk(extract, updateExtract...)
|
|
updateExtract = [][]map[string]interface{}{}
|
|
updateExtract = [][]map[string]interface{}{}
|
|
}
|
|
}
|
|
tmp = make(map[string]interface{})
|
|
tmp = make(map[string]interface{})
|
|
}
|
|
}
|
|
wg.Wait()
|
|
wg.Wait()
|
|
if len(updateExtract) > 0 {
|
|
if len(updateExtract) > 0 {
|
|
- mgo.UpSertBulk(extract, updateExtract...)
|
|
|
|
|
|
+ //mgo.UpSertBulk(extract, updateExtract...)
|
|
}
|
|
}
|
|
log.Println("this timeTask over.", n, "repeateN:", repeateN)
|
|
log.Println("this timeTask over.", n, "repeateN:", repeateN)
|
|
|
|
|