|
@@ -3,7 +3,6 @@ package main
|
|
|
import (
|
|
|
"fmt"
|
|
|
"log"
|
|
|
- "math"
|
|
|
qutil "qfw/util"
|
|
|
"qfw/util/mongodb"
|
|
|
"strconv"
|
|
@@ -39,13 +38,13 @@ type Info struct {
|
|
|
titleSpecialWord bool //标题特殊词
|
|
|
specialWord bool //再次判断的特殊词
|
|
|
mergemap map[string]interface{} //合并记录
|
|
|
- accurateTime int64 //最终准确的时间
|
|
|
|
|
|
|
|
|
-}
|
|
|
|
|
|
|
|
|
|
|
|
+}
|
|
|
+
|
|
|
var datelimit = float64(432000) //五天
|
|
|
var reason string //判重原因记录
|
|
|
|
|
@@ -85,7 +84,6 @@ func NewDatamap(days int, lastid string) *datamap {
|
|
|
continuSum++
|
|
|
} else {
|
|
|
cm := tmp["comeintime"] //时间单位?
|
|
|
- //cm := tmp["publishtime"]
|
|
|
comeintime := qutil.Int64All(cm)
|
|
|
if comeintime == 0 {
|
|
|
id := qutil.BsonIdToSId(tmp["_id"])[0:8]
|
|
@@ -238,14 +236,6 @@ func NewInfo(tmp map[string]interface{}) *Info {
|
|
|
}
|
|
|
|
|
|
|
|
|
- info.accurateTime = qutil.Int64All(tmp["publishtime"])
|
|
|
- if info.accurateTime ==0 {
|
|
|
- info.accurateTime = qutil.Int64All(tmp["comeintime"])
|
|
|
- if info.accurateTime ==0{
|
|
|
- info.accurateTime, _ = strconv.ParseInt(qutil.BsonIdToSId(tmp["_id"]), 16, 64)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
return info
|
|
|
}
|
|
|
//判重方法
|
|
@@ -262,6 +252,7 @@ func (d *datamap) check(info *Info) (b bool, source *Info,reasons string) {
|
|
|
keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
L:
|
|
|
for _, k := range keys {
|
|
|
data := d.data[k]
|
|
@@ -271,10 +262,6 @@ L:
|
|
|
if v.id == info.id {//正常重复
|
|
|
return false, v,""
|
|
|
}
|
|
|
- //备份 新增发布时间为空-取入库时间-在为空取id
|
|
|
- if math.Abs(qutil.Float64All(v.accurateTime-info.accurateTime)) > datelimit {
|
|
|
- continue //是否为5天内数据
|
|
|
- }
|
|
|
//类型分组
|
|
|
if info.subtype==v.subtype {
|
|
|
//站点配置--
|
|
@@ -368,7 +355,16 @@ L:
|
|
|
|
|
|
//往预存数据 d 添加
|
|
|
if !b {
|
|
|
- ct, _ := strconv.ParseInt(info.id[:8], 16, 64)
|
|
|
+ ct:=int64(0)
|
|
|
+ if info.publishtime>0 {
|
|
|
+ ct = info.publishtime
|
|
|
+ }else {
|
|
|
+ ct, _ = strconv.ParseInt(info.id[:8], 16, 64)
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ //ct, _ := strconv.ParseInt(info.id[:8], 16, 64)
|
|
|
dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
|
|
|
k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
|
|
|
data := d.data[k]
|
|
@@ -384,6 +380,7 @@ L:
|
|
|
d.data[k] = data
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
return
|
|
|
}
|
|
|
|
|
@@ -409,10 +406,6 @@ L:
|
|
|
if v.id == info.id {//正常重复
|
|
|
return false, v,""
|
|
|
}
|
|
|
- //备份 新增发布时间为空-取入库时间-在为空取id
|
|
|
- if math.Abs(qutil.Float64All(v.accurateTime-info.accurateTime)) > datelimit {
|
|
|
- continue //是否为5天内数据
|
|
|
- }
|
|
|
//类型分组
|
|
|
if info.subtype==v.subtype {
|
|
|
//站点配置--
|