Selaa lähdekoodia

publishtime时间为标准

apple 5 vuotta sitten
vanhempi
commit
8f4089774b
4 muutettua tiedostoa jossa 24 lisäystä ja 24 poistoa
  1. 2 2
      udpfilterdup/src/config.json
  2. 14 21
      udpfilterdup/src/datamap.go
  3. 4 1
      udpfilterdup/src/main.go
  4. 4 0
      udps/main.go

+ 2 - 2
udpfilterdup/src/config.json

@@ -4,8 +4,8 @@
     "mongodb": {
         "addr": "192.168.3.207:27082",
         "pool": 15,
-        "db": "zhaolongyue",
-        "extract": "aliyun_0102",
+        "db": "extract_kf",
+        "extract": "a_testbidding_new",
         "extract_copy": "a_testbidding",
         "bidding": "bidding_126"
     },

+ 14 - 21
udpfilterdup/src/datamap.go

@@ -3,7 +3,6 @@ package main
 import (
 	"fmt"
 	"log"
-	"math"
 	qutil "qfw/util"
 	"qfw/util/mongodb"
 	"strconv"
@@ -39,13 +38,13 @@ type Info struct {
 	titleSpecialWord   bool 		//标题特殊词
 	specialWord        bool	 		//再次判断的特殊词
 	mergemap           map[string]interface{}   //合并记录
-	accurateTime       int64		//最终准确的时间
 
 
-}
 
 
 
+}
+
 var datelimit = float64(432000)  //五天
 var reason string //判重原因记录
 
@@ -85,7 +84,6 @@ func NewDatamap(days int, lastid string) *datamap {
 			continuSum++
 		} else {
 			cm := tmp["comeintime"] //时间单位?
-			//cm := tmp["publishtime"]
 			comeintime := qutil.Int64All(cm)
 			if comeintime == 0 {
 				id := qutil.BsonIdToSId(tmp["_id"])[0:8]
@@ -238,14 +236,6 @@ func NewInfo(tmp map[string]interface{}) *Info {
 	}
 
 
-	info.accurateTime = qutil.Int64All(tmp["publishtime"])
-	if info.accurateTime ==0 {
-		info.accurateTime = qutil.Int64All(tmp["comeintime"])
-		if info.accurateTime ==0{
-			info.accurateTime, _ = strconv.ParseInt(qutil.BsonIdToSId(tmp["_id"]), 16, 64)
-		}
-	}
-
 	return info
 }
 //判重方法
@@ -262,6 +252,7 @@ func (d *datamap) check(info *Info) (b bool,  source *Info,reasons string) {
 			keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
 		}
 	}
+
 L:
 	for _, k := range keys {
 		data := d.data[k]
@@ -271,10 +262,6 @@ L:
 				if v.id == info.id {//正常重复
 					return false, v,""
 				}
-				//备份  新增发布时间为空-取入库时间-在为空取id
-				if math.Abs(qutil.Float64All(v.accurateTime-info.accurateTime)) > datelimit {
-					continue   //是否为5天内数据
-				}
 				//类型分组
 				if info.subtype==v.subtype {
 					//站点配置--
@@ -368,7 +355,16 @@ L:
 
 	//往预存数据 d 添加
 	if !b {
-		ct, _ := strconv.ParseInt(info.id[:8], 16, 64)
+		ct:=int64(0)
+		if info.publishtime>0 {
+			ct = info.publishtime
+		}else {
+			ct, _ = strconv.ParseInt(info.id[:8], 16, 64)
+		}
+
+
+
+		//ct, _ := strconv.ParseInt(info.id[:8], 16, 64)
 		dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
 		k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
 		data := d.data[k]
@@ -384,6 +380,7 @@ L:
 			d.data[k] = data
 		}
 	}
+
 	return
 }
 
@@ -409,10 +406,6 @@ L:
 				if v.id == info.id {//正常重复
 					return false, v,""
 				}
-				//备份  新增发布时间为空-取入库时间-在为空取id
-				if math.Abs(qutil.Float64All(v.accurateTime-info.accurateTime)) > datelimit {
-					continue   //是否为5天内数据
-				}
 				//类型分组
 				if info.subtype==v.subtype {
 					//站点配置--

+ 4 - 1
udpfilterdup/src/main.go

@@ -35,6 +35,9 @@ var (
 	DM           *datamap                 //
 	HM           *historymap                 //判重数据
 	lastid       = ""
+	/*
+	5da3f2c5a5cb26b9b79847fc
+	*/
 	//正则筛选相关
 	FilterRegTitle = regexp.MustCompile("^_$")
 	FilterRegTitle_1 = regexp.MustCompile("^_$")
@@ -158,7 +161,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 			"$lte": util.StringTOBsonId(mapInfo["lteid"].(string)),
 		},
 	}
-	it := sess.DB(mgo.DbName).C(extract).Find(&q).Iter()
+	it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
 	updateExtract := [][]map[string]interface{}{}
 	pool := make(chan bool, 16)
 	wg := &sync.WaitGroup{}

+ 4 - 0
udps/main.go

@@ -25,7 +25,11 @@ func main() {
 
 
 	/*
+	ObjectId("5da3f2c5a5cb26b9b79847fc")
+	ObjectId("5db2735ba5cb26b9b7c99c6f")
 
+	5da3f2c5a5cb26b9b79847fc
+	5db2735ba5cb26b9b7c99c6f
 	*/
 	flag.StringVar(&sid, "sid", "", "开始id")
 	flag.StringVar(&eid, "eid", "", "结束id")