Przeglądaj źródła

判重相关功能-更新-前置条件等

apple 5 lat temu
rodzic
commit
cf6b11dae4
3 zmienionych plików z 97 dodań i 30 usunięć
  1. 0 8
      udpfilterdup/src/config.json
  2. 38 1
      udpfilterdup/src/datamap.go
  3. 59 21
      udpfilterdup/src/main.go

+ 0 - 8
udpfilterdup/src/config.json

@@ -27,14 +27,6 @@
             "memo": "创建招标数据索引"
         }
     ],
-    "site": [
-        {
-            "addr": "信息网"
-        },
-        {
-            "addr": "招标网"
-        }
-    ],
     "specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",
     "specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包)",
     "specialtitle_2": "项目([0-9a-zA-Z一二三四五六七八九十零123456789])",

+ 38 - 1
udpfilterdup/src/datamap.go

@@ -268,8 +268,36 @@ L:
 				if math.Abs(qutil.Float64All(v.accurateTime-info.accurateTime)) > datelimit {
 					continue   //是否为5天内数据
 				}
-				//类型分组-相同类型继续
+				//类型分组
 				if info.subtype==v.subtype {
+					//站点配置--
+					if info.site!="" {
+						dict := SiteMap[info.site].(map[string]string)
+						if dict!=nil{
+							//临时改变--具体值
+							if info.area=="全国" &&dict["area"]!="" {
+								info.area = dict["area"]
+								info.city = dict["city"]
+							}else {
+								if info.city=="" &&dict["city"]!="" {
+									info.area = dict["area"]
+									info.city = dict["city"]
+								}
+							}
+						}
+					}
+
+					//前置条件
+					if info.titleSpecialWord&&info.title!=v.title&&v.title!="" {
+						continue
+					}
+
+					if info.buyer != "" &&v.buyer == info.buyer {
+						//满足标题
+						if len([]rune(v.title)) >= 10 && len([]rune(info.title)) >= 10 && v.title != info.title && (info.specialWord || v.specialWord) {
+							continue
+						}
+					}
 					//代理机构相同-非空相等
 					if v.agency != "" && info.agency != "" && v.agency == info.agency {
 						reason = fmt.Sprintf(reason,"同机构,")
@@ -718,6 +746,12 @@ func tenderRepeat_C(v *Info ,info *Info) bool {
 	if v.agencyaddr!=""&&info.agencyaddr!=""&&v.agencyaddr!=info.agencyaddr {
 		return true
 	}
+	if info.specialWord||v.specialWord||info.titleSpecialWord||v.titleSpecialWord{
+		return true
+	}
+
+
+
 	return false
 }
 
@@ -811,6 +845,9 @@ func winningRepeat_C(v *Info ,info *Info) bool {
 	}
 	//原始地址...
 
+	if info.specialWord||v.specialWord||info.titleSpecialWord||v.titleSpecialWord{
+		return true
+	}
 
 	return false
 }

+ 59 - 21
udpfilterdup/src/main.go

@@ -18,11 +18,14 @@ import (
 	"time"
 )
 
+
+
+
 var (
 	Sysconfig    map[string]interface{} //配置文件
 	mconf        map[string]interface{} //mongodb配置信息
 	mgo          *mongodb.MongodbSim    //mongodb操作对象
-
+	siteMgo             *mongodb.MongodbSim
 	//mgoTest          *mongodb.MongodbSim    //mongodb操作对象
 
 	extract      string
@@ -42,8 +45,9 @@ var (
 	FilterRegTitle_2 = regexp.MustCompile("^_$")
 
 
-	siteArr     []map[string]interface{} //站点
-	inV_n int   //无效数据数量
+
+
+	SiteMap  map[string]interface{} //站点map
 )
 
 func init() {
@@ -52,7 +56,6 @@ func init() {
 	//172.17.145.163:27080
 	util.ReadConfig(&Sysconfig)
 	nextNode = util.ObjArrToMapArr(Sysconfig["nextNode"].([]interface{}))
-	siteArr = util.ObjArrToMapArr(Sysconfig["site"].([]interface{}))
 	mconf = Sysconfig["mongodb"].(map[string]interface{})
 
 	mgo = &mongodb.MongodbSim{
@@ -70,8 +73,8 @@ func init() {
 	dupdays = util.IntAllDef(Sysconfig["dupdays"], 3)
 	//加载数据
 	DM = NewDatamap(dupdays, lastid)
-	fmt.Println(DM.keys)
-	fmt.Println(DM.data)
+	//fmt.Println(DM.keys)
+	//fmt.Println(DM.data)
 	FilterRegTitle = regexp.MustCompile(util.ObjToString(Sysconfig["specialwords"]))
 	FilterRegTitle_1 = regexp.MustCompile(util.ObjToString(Sysconfig["specialtitle_1"]))
 	FilterRegTitle_2 = regexp.MustCompile(util.ObjToString(Sysconfig["specialtitle_2"]))
@@ -79,15 +82,34 @@ func init() {
 
 
 
-	//数据库
-	//mongodb.InitMongodbPool(5, "192.168.3.207:27081", "")
+	//站点相关数据库
+	mongodb.InitMongodbPool(5, "192.168.3.207:27082", "")
+
+	siteMgo = &mongodb.MongodbSim{
+		MongodbAddr: "192.168.3.207:27082",
+		Size:        5,
+		DbName:      "zhaolongyue",
+	}
+	siteMgo.InitPool()
 
-	//mgoTest = &mongodb.MongodbSim{
-	//	MongodbAddr: "192.168.3.207:27081",
-	//	Size:        5,
-	//	DbName:      "qfw",
-	//}
-	//mgoTest.InitPool()
+
+	SiteMap = make(map[string]interface{},0)
+
+	start := int(time.Now().Unix())
+	//站点配置
+	sess_site := siteMgo.GetMgoConn()
+	defer sess_site.Close()
+	res_site := sess_site.DB("zhaolongyue").C("site").Find(nil).Sort("_id").Iter()
+	for site_dict := make(map[string]interface{}); res_site.Next(&site_dict); {
+			data_map := map[string]string{
+				"area":util.ObjToString(site_dict["area"]),
+				"city":util.ObjToString(site_dict["city"]),
+				"district":util.ObjToString(site_dict["district"]),
+			}
+		SiteMap[site_dict["site"].(string)]= data_map
+	}
+	
+	fmt.Printf("用时:%d秒,%d个",int(time.Now().Unix())-start,len(SiteMap))
 
 
 }
@@ -225,6 +247,7 @@ func mainTest()  {
 
 
 func main() {
+	return
 	go checkMapJob()
 
 	updport := Sysconfig["udpport"].(string)
@@ -306,7 +329,6 @@ func task(data []byte, mapInfo map[string]interface{}) {
 
 			//是否为无效数据
 			if invalidData(info.buyer,info.projectname,info.projectcode) {
-				inV_n++
 				mapLock.Lock()
 				updateExtract = append(updateExtract, []map[string]interface{}{
 					map[string]interface{}{
@@ -459,7 +481,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 		mgo.UpdateBulk(extract, updateExtract...)
 		//mgo.UpdateBulk(bidding, updateBidding...)
 	}
-	log.Println("this task over.", n, "repeateN:", repeateN, mapInfo["stop"],"无效数据:",inV_n)
+	log.Println("this task over.", n, "repeateN:", repeateN, mapInfo["stop"])
 
 	//任务完成,开始发送广播通知下面节点
 	if n > repeateN && mapInfo["stop"] == nil {
@@ -554,7 +576,6 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 
 			//是否为无效数据
 			if invalidData(info.buyer,info.projectname,info.projectcode) {
-				inV_n++
 				mapLock.Lock()
 				updateExtract = append(updateExtract, []map[string]interface{}{
 					map[string]interface{}{
@@ -578,9 +599,26 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 
 					if reason == "未判重记录" {
 						//把info的数据判重的标签更换,并新增字段
-
-
-
+						mapLock.Lock()
+						//构建数据库更新用到的
+						//对比的数据打判重标签
+						DM.replaceSourceData(info,info.id) //替换即添加
+						updateExtract = append(updateExtract, []map[string]interface{}{
+							map[string]interface{}{
+								"_id": tmp["_id"],
+							},
+							map[string]interface{}{
+								"$set": map[string]interface{}{
+									"repeat":   0,
+									"repeatid": "-1",
+								},
+							},
+						})
+						if len(updateExtract) > 500 {
+							mgo.UpdateBulk(extract, updateExtract...)
+							updateExtract = [][]map[string]interface{}{}
+						}
+						mapLock.Unlock()
 					}else {
 						repeateN++
 						mapLock.Lock()
@@ -688,7 +726,7 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 		mgo.UpdateBulk(extract, updateExtract...)
 		//mgo.UpdateBulk(bidding, updateBidding...)
 	}
-	log.Println("this task over.", n, "repeateN:", repeateN, mapInfo["stop"],"无效数据:",inV_n)
+	log.Println("this task over.", n, "repeateN:", repeateN, mapInfo["stop"])