浏览代码

站点判重条件

apple 5 年之前
父节点
当前提交
19422d779f
共有 4 个文件被更改,包括 32 次插入22 次删除
  1. 12 4
      udpfilterdup/src/config.json
  2. 10 7
      udpfilterdup/src/datamap.go
  3. 7 8
      udpfilterdup/src/main.go
  4. 3 3
      udps/main.go

+ 12 - 4
udpfilterdup/src/config.json

@@ -2,11 +2,11 @@
     "udpport": ":1485",
     "dupdays": 5,
     "mongodb": {
-        "addr": "172.17.145.163:27080",
+        "addr": "192.168.3.207:27082",
         "pool": 15,
-        "db": "qfw",
-        "extract": "result_20190906",
-        "extract_copy": "result_20190906",
+        "db": "extract_kf",
+        "extract": "bidding_20190910_01",
+        "extract_copy": "bidding_20190910_01_copy",
         "bidding": "bidding_126"
     },
     "jkmail": {
@@ -27,5 +27,13 @@
             "memo": "创建招标数据索引"
         }
     ],
+    "site": [
+        {
+            "addr": "信息网"
+        },
+        {
+            "addr": "招标网"
+        }
+    ],
     "specialwords": "[((]?[0-9一二三四五六七八九十零123456789再][))]?[子分]?[次批标包]|重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研"
 }

+ 10 - 7
udpfilterdup/src/datamap.go

@@ -233,8 +233,16 @@ L:
 						c_3 :=conditionTAB(v.title,info.title,v.buyer,info.buyer) //标题+采购单位
 
 
-						//同站点判断
-						if info.site != "" && v.site == info.site {
+						site_b := false
+						if info.site != "" {
+							for _,v := range siteArr {
+								if info.site==v["addr"] {
+									site_b=true
+									break
+								}
+							}
+						}
+						if info.site != "" && v.site == info.site&&site_b {
 							if n>1||c_1||c_2 {
 								b = true
 								id = v.id
@@ -335,11 +343,6 @@ L:
 	}
 
 
-
-
-
-
-
 	//往预存数据 d 添加
 	if !b {
 		ct, _ := strconv.ParseInt(info.id[:8], 16, 64)

+ 7 - 8
udpfilterdup/src/main.go

@@ -6,7 +6,6 @@ package main
 
 import (
 	"encoding/json"
-	"flag"
 	"fmt"
 	"gopkg.in/mgo.v2/bson"
 	"log"
@@ -31,24 +30,25 @@ var (
 	dupdays      = 5                      //初始化判重范围
 	DM           *datamap                 //判重数据
 	FilterRegexp = regexp.MustCompile("^_$")
-	lastid       = ""
+	lastid       = "5d767728a5cb26b9b7748868"
 
 
 	FilterRegTitle = regexp.MustCompile("[0-9一二三四五六七八九十零123456789](次|包|标段)")
-
+	inV_n int   //无效数据数量
+	siteArr     []map[string]interface{} //站点
 	//56404035af5374672e00059c
 	//5d767728a5cb26b9b7748868
-
-	inV_n int
 )
 
 func init() {
-	flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
-	flag.Parse()
+	//flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
+	//flag.Parse()
 	//172.17.145.163:27080
 	util.ReadConfig(&Sysconfig)
 	nextNode = util.ObjArrToMapArr(Sysconfig["nextNode"].([]interface{}))
+	siteArr = util.ObjArrToMapArr(Sysconfig["site"].([]interface{}))
 	mconf = Sysconfig["mongodb"].(map[string]interface{})
+
 	mgo = &mongodb.MongodbSim{
 		MongodbAddr: mconf["addr"].(string),
 		DbName:      mconf["db"].(string),
@@ -61,7 +61,6 @@ func init() {
 	mgo.InitPool()
 
 
-
 	//测试临时注释
 	dupdays = util.IntAllDef(Sysconfig["dupdays"], 3)
 	//加载数据

+ 3 - 3
udps/main.go

@@ -30,12 +30,12 @@ func main() {
 	56404035af5374672e00059c
 	5d4da9c8a5cb26b9b7b6bbcd  100万
 	*/
-	flag.StringVar(&sid, "sid", "", "开始id")
-	flag.StringVar(&eid, "eid", "", "结束id")
+	flag.StringVar(&sid, "sid", "5d767728a5cb26b9b7748868", "开始id")
+	flag.StringVar(&eid, "eid", "5d77c881a5cb26b9b7de209d", "结束id")
 	flag.StringVar(&startDate, "start", "", "开始日期2006-01-02")
 	flag.StringVar(&endDate, "end", "", "结束日期2006-01-02")
 	flag.StringVar(&ip, "ip", "127.0.0.1", "ip")
-	flag.StringVar(&port, "port", "1112", "dup端口")
+	flag.StringVar(&port, "port", "1485", "dup端口")
 	flag.StringVar(&stype, "stype", "", "stype")
 	flag.Parse()
 	var startid, endid bson.ObjectId