Browse Source

正式定时-删除无效数据

apple 5 years ago
parent
commit
dd702a84c2
3 changed files with 14 additions and 15 deletions
  1. 8 8
      udpfilterdup/src/config.json
  2. 3 3
      udpfilterdup/src/datamap.go
  3. 3 4
      udpfilterdup/src/main.go

+ 8 - 8
udpfilterdup/src/config.json

@@ -2,13 +2,13 @@
     "udpport": ":1485",
     "dupdays": 5,
     "mongodb": {
-        "addr": "192.168.3.207:27092",
+        "addr": "172.17.4.187:27083",
         "pool": 10,
-        "db": "extract_kf",
-        "extract": "zk_test_2019_new",
-        "extract_back": "zk_test_2019_new",
+        "db": "qfw",
+        "extract": "result_file_20200410_test",
+        "extract_back": "result_file_20200409",
         "site": {
-            "dbname": "extract_kf",
+            "dbname": "qfw",
             "coll": "site"
         }
     },
@@ -19,11 +19,11 @@
     "nextNode": [],
     "threads": 1,
     "isMerger": false,
-    "isSort":true,
+    "isSort":false,
     "lowHeavy":false,
-    "timingTask":false,
+    "timingTask":true,
     "timingSpanDay": 2,
-    "timingPubScope": 180,
+    "timingPubScope": 360,
     "specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",
     "specialtitle_0": "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789][))]?",
     "specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包|批)",

+ 3 - 3
udpfilterdup/src/datamap.go

@@ -387,9 +387,9 @@ L:
 					return false, v, ""
 				}
 
-				if v.id == "5c761a4fa5cb26b9b73d9512" &&info.id=="5c767bd1a5cb26b9b7a61597" {
-					log.Println("测试数据")
-				}
+				//if v.id == "5c761a4fa5cb26b9b73d9512" &&info.id=="5c767bd1a5cb26b9b7a61597" {
+				//	log.Println("测试数据")
+				//}
 
 				if info.subtype == v.subtype {
 					if info.site != "" {

+ 3 - 4
udpfilterdup/src/main.go

@@ -698,7 +698,6 @@ func timedTaskOnce() {
 		//取-符合-发布时间半年内的数据
 		if util.IntAll(tmp_start["dataging"]) == 1 {
 			pubtime := util.Int64All(tmp_start["publishtime"])
-			//log.Println(startNum,"--",pubtime,"--",between_time)
 			if pubtime > 0 && pubtime >= between_time {
 				lasttime = pubtime
 				log.Println("找到第一条符合条件的数据")
@@ -735,7 +734,7 @@ func timedTaskOnce() {
 	pre_publishtime := int64(0)
 	for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
 		if n%10000 == 0 {
-			log.Println("current:", n, tmp["_id"], "repeateN:", repeateN)
+			log.Println("定时任务判重当前数量current:", n, tmp["_id"], "repeateN:", repeateN)
 		}
 
 		//log.Println("当前测试重复数量:",repeateN)
@@ -772,7 +771,7 @@ func timedTaskOnce() {
 			info := NewInfo(tmp)
 			if !LowHeavy { //是否进行低质量数据判重
 				if invalidData(info.buyer, info.projectname, info.projectcode, info.contractnumber) {
-					log.Println("测试-无效数据")
+					log.Println("无效数据")
 					updateExtract = append(updateExtract, []map[string]interface{}{
 						map[string]interface{}{
 							"_id": tmp["_id"],
@@ -793,7 +792,7 @@ func timedTaskOnce() {
 			}
 
 			b, source, reason := DM.check(info)
-			log.Println("判重结果", b, reason)
+			//log.Println("判重结果", b, reason)
 			if b { //有重复,生成更新语句,更新抽取和更新招标
 				repeateN++
 				var is_replace = false