fengweiqiang 4 年之前
父節點
當前提交
a4187490e9
共有 1 個文件被更改,包括 63 次插入73 次删除
  1. 63 73
      udpdataclear/udpSensitiveWords/util/udpdata.go

+ 63 - 73
udpdataclear/udpSensitiveWords/util/udpdata.go

@@ -33,7 +33,7 @@ func ExtractUdp() {
 	eid := "9fffffffffffffffffffffff"
 	QuerySensitiveWords(sid,eid )*/
 }
-
+var syc sync.WaitGroup
 func QuerySensitiveWords(sid, eid string) {
 	log.Println("QuerySensitiveWords:", sid, eid)
 	objSid, err := primitive.ObjectIDFromHex(sid)
@@ -46,7 +46,7 @@ func QuerySensitiveWords(sid, eid string) {
 		log.Println("转换eid err", err)
 		return
 	}
-	var num, unum int64
+	var num, unum int
 	mgoSess := QfwMgo85.GetMgoConn()
 	defer QfwMgo85.DestoryMongoConn(mgoSess)
 	iter := mgoSess.DB(QfwMgo85.DbName).C(Collection).Find(map[string]interface{}{
@@ -55,47 +55,56 @@ func QuerySensitiveWords(sid, eid string) {
 			"$lte": objEid,
 		},
 	}).Select(Fields).Iter()
-
+	c := make(chan struct{}, 3)
 	for tmp := map[string]interface{}{}; iter.Next(&tmp); tmp = map[string]interface{}{} {
-		up := make(map[string]string)
-		if win, isok := tmp["winner"].(string); isok {
-			if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
-				tmp["winner"] = fname
-				up["winner"] = fmt.Sprintf("%s_%s", flog, win)
-			}
+		c <- struct{}{}
+		syc.Add(1)
+		go handletmp(tmp, &unum, c)
+		num++
+	}
+	syc.Wait()
+	log.Printf("%s--->%s 处理完成:%d,更新数:%d\n", sid, eid, num, unum)
+}
+func handletmp(tmp map[string]interface{}, unum *int, c <-chan struct{}) {
+	defer func() {
+		<-c
+		syc.Done()
+	}()
+	up := make(map[string]string)
+	if win, isok := tmp["winner"].(string); isok {
+		if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
+			tmp["winner"] = fname
+			up["winner"] = fmt.Sprintf("%s_%s", flog, win)
 		}
-		if win, isok := tmp["s_winner"].(string); isok {
-			if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
-				tmp["s_winner"] = fname
-				up["s_winner"] = fmt.Sprintf("%s_%s", flog, win)
-			}
+	}
+	if win, isok := tmp["s_winner"].(string); isok {
+		if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
+			tmp["s_winner"] = fname
+			up["s_winner"] = fmt.Sprintf("%s_%s", flog, win)
 		}
+	}
 
-		if agency, isok := tmp["agency"].(string); isok {
-			if fok, flog, fname := cheakname(agency); fok && flog != "" && flog != "tremQuery" {
-				tmp["agency"] = fname
-				up["agency"] = fmt.Sprintf("%s_%s", flog, agency)
-			}
+	if agency, isok := tmp["agency"].(string); isok {
+		if fok, flog, fname := cheakname(agency); fok && flog != "" && flog != "tremQuery" {
+			tmp["agency"] = fname
+			up["agency"] = fmt.Sprintf("%s_%s", flog, agency)
 		}
+	}
 
-		if buyer, isok := tmp["buyer"].(string); isok {
-			if fok, flog, fname := cheakname(buyer); fok && flog != "" && flog != "tremQuery" {
-				tmp["buyer"] = fname
-				up["buyer"] = fmt.Sprintf("%s_%s", flog, buyer)
-			}
-		}
-		if len(up) > 0 {
-			unum++
-			tmp["log"] = up
-			id := tmp["_id"].(primitive.ObjectID).Hex()
-			log.Println(tmp)
-			QfwMgo85.UpdateById(Collection, id, map[string]interface{}{"$set":tmp})
+	if buyer, isok := tmp["buyer"].(string); isok {
+		if fok, flog, fname := cheakname(buyer); fok && flog != "" && flog != "tremQuery" {
+			tmp["buyer"] = fname
+			up["buyer"] = fmt.Sprintf("%s_%s", flog, buyer)
 		}
-		num++
 	}
-	log.Printf("%s--->%s 处理完成:%d,更新数:%d\n",sid, eid,  num, unum)
+	if len(up) > 0 {
+		*unum++
+		tmp["log"] = up
+		id := tmp["_id"].(primitive.ObjectID).Hex()
+		log.Println(tmp)
+		QfwMgo85.UpdateById(Collection, id, map[string]interface{}{"$set": tmp})
+	}
 }
-
 func cheakname(name string) (up bool, log, rname string) {
 	filter := sensitive.New()
 	var cheaklog string
@@ -322,13 +331,7 @@ func dealWithEsData(name string, tmpid string) {
 
 }
 
-
-
-
-
-
-
-func TemporaryTest()  {
+func TemporaryTest() {
 	log.Println("测试......导出数据")
 
 	QfwMgo85 = &MongodbSim{
@@ -344,10 +347,9 @@ func TemporaryTest()  {
 
 	es_type, es_index = "unique_qy", "unique_qy"
 
-
 	q := map[string]interface{}{
-		"check_history":map[string]interface{}{
-			"$exists":0,
+		"check_history": map[string]interface{}{
+			"$exists": 0,
 		},
 	}
 	sess := QfwMgo85.GetMgoConn()
@@ -356,61 +358,51 @@ func TemporaryTest()  {
 	pool_es := make(chan bool, 3)
 	wg_es := &sync.WaitGroup{}
 	it := sess.DB(QfwMgo85.DbName).C("winner_err_new").Find(&q).Iter()
-	total,isOK:=0,0
+	total, isOK := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total % 100 == 0 {
-			log.Println("current index",total,isOK)
+		if total%100 == 0 {
+			log.Println("current index", total, isOK)
 		}
 
-		name:=ObjToString(tmp["name"])
+		name := ObjToString(tmp["name"])
 		tmpid := BsonTOStringId(tmp["_id"])
 		pool_es <- true
 		wg_es.Add(1)
 
-		go func(name string,tmpid string) {
+		go func(name string, tmpid string) {
 			defer func() {
 				<-pool_es
 				wg_es.Done()
 			}()
 			//start := int(time.Now().Unix())
-			new_name,b,_ :=dealWithNameScoreRules(name)
+			new_name, b, _ := dealWithNameScoreRules(name)
 			//log.Println("耗时:",int(time.Now().Unix())-start,"秒",b,name,new_name,tmpid)
-			if new_name!="" && b {
+			if new_name != "" && b {
 				isOK++
-				QfwMgo85.UpdateById("winner_err_new",tmpid,map[string]interface{}{
+				QfwMgo85.UpdateById("winner_err_new", tmpid, map[string]interface{}{
 					"$set": map[string]interface{}{
-						"is_word": 1,
-						"name_word" : new_name,
+						"is_word":   1,
+						"name_word": new_name,
 					},
 				})
-			}else {
-				QfwMgo85.UpdateById("winner_err_new",tmpid,map[string]interface{}{
+			} else {
+				QfwMgo85.UpdateById("winner_err_new", tmpid, map[string]interface{}{
 					"$set": map[string]interface{}{
-						"is_word": -1,
-						"name_word" : new_name,
+						"is_word":   -1,
+						"name_word": new_name,
 					},
 				})
 			}
 
-		}(name,tmpid)
+		}(name, tmpid)
 		tmp = make(map[string]interface{})
 	}
 
 	wg_es.Wait()
 
-	log.Println("is over",total,isOK)
+	log.Println("is over", total, isOK)
 }
 
-
-
-
-
-
-
-
-
-
-
 var reg_alias = regexp.MustCompile("(税务局|工商行政管理局|文化广播电视新闻出版局|外国专家局|" +
 	"中医药管理局|市场监督管理局|广播电视局|医疗保障局|机关事务管理局|粮食和物资储备局|" +
 	"监狱管理局|畜牧兽医局|食品药品监督管理局|城市管理行政执法局|城市管理局|国家保密局|密码管理局|" +
@@ -445,11 +437,9 @@ var con_strReg *regexp.Regexp = regexp.MustCompile("(\\?|?|%|代码标识|删
 
 var uncon_strReg *regexp.Regexp = regexp.MustCompile("(园|政府|集团|公司|有限|合伙|企|院|学|局|处)")
 
-
-
 var startWordReg_1 *regexp.Regexp = regexp.MustCompile("^(.{1,5})(省|市|县|州|自治区|特别行政区)")
 var startWordReg_2 *regexp.Regexp = regexp.MustCompile("^(北京|天津|重庆|上海|河北|山西|" +
 	"浙江|江西|湖北|吉林|海南|甘肃|广东|陕西|辽宁|山东|河南|云南|黑龙江|福建|贵州|江苏|安徽|" +
 	"湖南|四川|青海|台湾|新疆|内蒙古|宁夏|西藏|广西|澳门|香港)")
 
-var endWordReg *regexp.Regexp = regexp.MustCompile("(有限公司|有限责任公司)$")
+var endWordReg *regexp.Regexp = regexp.MustCompile("(有限公司|有限责任公司)$")