Browse Source

修改打印

apple 5 years ago
parent
commit
c5bd3d428a
3 changed files with 22 additions and 41 deletions
  1. 2 2
      udpfilterdup/src/config.json
  2. 9 28
      udpfilterdup/src/datamap.go
  3. 11 11
      udpfilterdup/src/main.go

+ 2 - 2
udpfilterdup/src/config.json

@@ -21,10 +21,10 @@
     "threads": 1,
     "isMerger": false,
     "isSort":true,
-    "lowHeavy":false,
+    "lowHeavy":true,
     "timingTask":false,
     "timingSpanDay": 3,
-    "timingPubScope": 720,
+    "timingPubScope": 1080,
     "specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",
     "specialtitle_0": "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789][))]?",
     "specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包|批|期)",

+ 9 - 28
udpfilterdup/src/datamap.go

@@ -261,10 +261,8 @@ L:
 			for _, v := range data {
 				reason = ""
 				if v.id == info.id { //正常重复
-					//log.Println("相同id",info.id)
 					return false, v, ""
 				}
-
 				//if v.id == "5c761a4fa5cb26b9b73d9512" &&info.id=="5c767bd1a5cb26b9b7a61597" {
 				//	log.Println("测试数据")
 				//}
@@ -579,17 +577,12 @@ func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
 			isValue++
 		}
 		if isValue==0 {
-			//if info.site!=v.site {
-			//	log.Println("符合低质量条件条件0",info.id,"--",v.id)
-			//}
-			//log.Println("符合低质量条件条件0",info.id,"--",v.id)
-			reason = reason + "---要素均为空,标题包含关系"
+			reason = reason + "---低质量-要素均为空,标题包含关系"
 			return true, reason
 		}else if isValue==1 {
 			isMeet := false
 			if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet {
-				//log.Println("符合低质量条件条件1",info.id,"--",v.id)
-				reason = reason + "---有且一个要素组合"
+				reason = reason + "---低质量-有且一个要素组合"
 				return true, reason
 			}
 		}else {
@@ -841,7 +834,7 @@ func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
 		p11 = true
 	}
 
-	if (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) ||
+	if  (p1 && p2 && p4) || (p1 && p2 && p9) ||
 		(p1 && p2 && p10) || (p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
 		(p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
 		(p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
@@ -933,15 +926,6 @@ func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
 		ss = ss + "p3(编号组)-"
 		p3 = true
 	}
-	//if v.bidamount != 0 && v.bidamount == info.bidamount {
-	//	ss = ss + "p5(中标金)-"
-	//	p5 = true
-	//}
-	//if v.winner != "" && v.winner == info.winner {
-	//	ss = ss + "p6(中标人)-"
-	//	p6 = true
-	//}
-
 	if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
 		ss = ss + "p5(中标金)-"
 		p5 = true
@@ -1011,16 +995,13 @@ func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
 //中标_C
 func winningRepeat_C(v *Info, info *Info) bool {
 
-	//if v.bidamount != 0 && info.bidamount != 0 && v.bidamount != info.bidamount {
-	//	return true
-	//}
-	//if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
-	//	return true
-	//}
+	if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
+		return true
+	}
 	//
-	//if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
-	//	return true
-	//}
+	if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
+		return true
+	}
 	//原始地址...
 	if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
 		return true

+ 11 - 11
udpfilterdup/src/main.go

@@ -51,7 +51,7 @@ var (
 )
 
 func init() {
-	
+
 	flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
 	flag.StringVar(&sid, "sid", "", "开始id")
 	flag.StringVar(&eid, "eid", "", "结束id")
@@ -141,9 +141,9 @@ func mainT() {
 		}
 		mapinfo["gtid"] = sid
 		mapinfo["lteid"] = eid
-		//mapinfo["stop"] = "true"
+		mapinfo["stop"] = "true"
 		task([]byte{}, mapinfo)
-		time.Sleep(99999 * time.Second)
+		time.Sleep(99999 * time.Hour)
 	}
 }
 func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
@@ -204,17 +204,16 @@ func task(data []byte, mapInfo map[string]interface{}) {
 	defer mgo.DestoryMongoConn(sess)
 
 	//是否排序
-	it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("_id").Iter()
+	sortName :="_id"
 	if Is_Sort {
-		log.Println("排序:publishtime")
-		it = sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
+		sortName = "publishtime"
 	}
+	it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort(sortName).Iter()
 	updateExtract := [][]map[string]interface{}{}
 	log.Println("线程数:", threadNum)
 	pool := make(chan bool, threadNum)
 	wg := &sync.WaitGroup{}
 	n, repeateN := 0, 0
-
 	for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
 		if n%10000 == 0 {
 			log.Println("current:", n, tmp["_id"], "repeateN:", repeateN)
@@ -244,7 +243,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 							},
 						},
 					})
-					if len(updateExtract) > 500 {
+					if len(updateExtract) >= 200 {
 						mgo.UpSertBulk(extract, updateExtract...)
 						updateExtract = [][]map[string]interface{}{}
 					}
@@ -351,10 +350,11 @@ func task(data []byte, mapInfo map[string]interface{}) {
 						repeat_id = info.id
 					}
 				}
+				if repeateN%90==0&&repeateN>0 {
+					fmt.Println("最终结果","目标id:",repeat_idMap["_id"])
+				}
 
 
-				log.Println("最终结果","目标id:",repeat_idMap["_id"])
-
 
 				//重复数据打标签
 				updateExtract = append(updateExtract, []map[string]interface{}{
@@ -370,7 +370,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 
 			}
 		}(tmp)
-		if len(updateExtract) > 500 {
+		if len(updateExtract) >= 200 {
 			mgo.UpSertBulk(extract, updateExtract...)
 			updateExtract = [][]map[string]interface{}{}
 		}