Kaynağa Gözat

Merge branch 'dev3.4.2' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.4.2

# Conflicts:
#	udpdataclear/udpSensitiveWords/util/udpdata.go
apple 4 yıl önce
ebeveyn
işleme
baba9f095d

+ 63 - 63
udpdataclear/udpSensitiveWords/util/udpdata.go

@@ -33,7 +33,7 @@ func ExtractUdp() {
 	eid := "9fffffffffffffffffffffff"
 	QuerySensitiveWords(sid,eid )*/
 }
-
+var syc sync.WaitGroup
 func QuerySensitiveWords(sid, eid string) {
 	log.Println("QuerySensitiveWords:", sid, eid)
 	objSid, err := primitive.ObjectIDFromHex(sid)
@@ -46,7 +46,7 @@ func QuerySensitiveWords(sid, eid string) {
 		log.Println("转换eid err", err)
 		return
 	}
-	var num, unum int64
+	var num, unum int
 	mgoSess := QfwMgo85.GetMgoConn()
 	defer QfwMgo85.DestoryMongoConn(mgoSess)
 	iter := mgoSess.DB(QfwMgo85.DbName).C(Collection).Find(map[string]interface{}{
@@ -55,47 +55,56 @@ func QuerySensitiveWords(sid, eid string) {
 			"$lte": objEid,
 		},
 	}).Select(Fields).Iter()
-
+	c := make(chan struct{}, 3)
 	for tmp := map[string]interface{}{}; iter.Next(&tmp); tmp = map[string]interface{}{} {
-		up := make(map[string]string)
-		if win, isok := tmp["winner"].(string); isok {
-			if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
-				tmp["winner"] = fname
-				up["winner"] = fmt.Sprintf("%s_%s", flog, win)
-			}
+		c <- struct{}{}
+		syc.Add(1)
+		go handletmp(tmp, &unum, c)
+		num++
+	}
+	syc.Wait()
+	log.Printf("%s--->%s 处理完成:%d,更新数:%d\n", sid, eid, num, unum)
+}
+func handletmp(tmp map[string]interface{}, unum *int, c <-chan struct{}) {
+	defer func() {
+		<-c
+		syc.Done()
+	}()
+	up := make(map[string]string)
+	if win, isok := tmp["winner"].(string); isok {
+		if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
+			tmp["winner"] = fname
+			up["winner"] = fmt.Sprintf("%s_%s", flog, win)
 		}
-		if win, isok := tmp["s_winner"].(string); isok {
-			if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
-				tmp["s_winner"] = fname
-				up["s_winner"] = fmt.Sprintf("%s_%s", flog, win)
-			}
+	}
+	if win, isok := tmp["s_winner"].(string); isok {
+		if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
+			tmp["s_winner"] = fname
+			up["s_winner"] = fmt.Sprintf("%s_%s", flog, win)
 		}
+	}
 
-		if agency, isok := tmp["agency"].(string); isok {
-			if fok, flog, fname := cheakname(agency); fok && flog != "" && flog != "tremQuery" {
-				tmp["agency"] = fname
-				up["agency"] = fmt.Sprintf("%s_%s", flog, agency)
-			}
+	if agency, isok := tmp["agency"].(string); isok {
+		if fok, flog, fname := cheakname(agency); fok && flog != "" && flog != "tremQuery" {
+			tmp["agency"] = fname
+			up["agency"] = fmt.Sprintf("%s_%s", flog, agency)
 		}
+	}
 
-		if buyer, isok := tmp["buyer"].(string); isok {
-			if fok, flog, fname := cheakname(buyer); fok && flog != "" && flog != "tremQuery" {
-				tmp["buyer"] = fname
-				up["buyer"] = fmt.Sprintf("%s_%s", flog, buyer)
-			}
+	if buyer, isok := tmp["buyer"].(string); isok {
+		if fok, flog, fname := cheakname(buyer); fok && flog != "" && flog != "tremQuery" {
+			tmp["buyer"] = fname
+			up["buyer"] = fmt.Sprintf("%s_%s", flog, buyer)
 		}
-		if len(up) > 0 {
-			unum++
-			tmp["log"] = up
-			id := tmp["_id"].(primitive.ObjectID).Hex()
-			log.Println(tmp)
-			QfwMgo85.UpdateById(Collection, id, map[string]interface{}{"$set":tmp})
-		}
-		num++
 	}
-	log.Printf("%s--->%s 处理完成:%d,更新数:%d\n",sid, eid,  num, unum)
+	if len(up) > 0 {
+		*unum++
+		tmp["log"] = up
+		id := tmp["_id"].(primitive.ObjectID).Hex()
+		log.Println(tmp)
+		QfwMgo85.UpdateById(Collection, id, map[string]interface{}{"$set": tmp})
+	}
 }
-
 func cheakname(name string) (up bool, log, rname string) {
 	filter := sensitive.New()
 	var cheaklog string
@@ -327,13 +336,7 @@ func dealWithEsData(name string, tmpid string) {
 
 }
 
-
-
-
-
-
-
-func TemporaryTest()  {
+func TemporaryTest() {
 	log.Println("测试......导出数据")
 
 
@@ -350,10 +353,9 @@ func TemporaryTest()  {
 
 	es_type, es_index = "unique_qy", "unique_qy"
 
-
 	q := map[string]interface{}{
-		"check_history":map[string]interface{}{
-			"$exists":0,
+		"check_history": map[string]interface{}{
+			"$exists": 0,
 		},
 	}
 	sess := QfwMgo85.GetMgoConn()
@@ -362,22 +364,17 @@ func TemporaryTest()  {
 	pool_es := make(chan bool, 3)
 	wg_es := &sync.WaitGroup{}
 	it := sess.DB(QfwMgo85.DbName).C("winner_err_new").Find(&q).Iter()
-	total,isOK:=0,0
+	total, isOK := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total % 100 == 0 {
-			log.Println("current index",total,isOK)
-		}
-
-		if total<91400 {
-			tmp = make(map[string]interface{})
-			continue
+		if total%100 == 0 {
+			log.Println("current index", total, isOK)
 		}
-		name:=ObjToString(tmp["name"])
+		name := ObjToString(tmp["name"])
 		tmpid := BsonTOStringId(tmp["_id"])
 		pool_es <- true
 		wg_es.Add(1)
 
-		go func(name string,tmpid string) {
+		go func(name string, tmpid string) {
 			defer func() {
 				<-pool_es
 				wg_es.Done()
@@ -385,34 +382,38 @@ func TemporaryTest()  {
 			//start := int(time.Now().Unix())
 			new_name,b,score,_ :=dealWithNameScoreRules(name)
 			//log.Println("耗时:",int(time.Now().Unix())-start,"秒",b,name,new_name,tmpid)
-			if new_name!="" && b {
+			if new_name != "" && b {
 				isOK++
-				QfwMgo85.UpdateById("winner_err_new",tmpid,map[string]interface{}{
+				QfwMgo85.UpdateById("winner_err_new", tmpid, map[string]interface{}{
 					"$set": map[string]interface{}{
 						"is_word": 1,
 						"name_word" : new_name,
 						"score":score,
+
 					},
 				})
-			}else {
-				QfwMgo85.UpdateById("winner_err_new",tmpid,map[string]interface{}{
+			} else {
+				QfwMgo85.UpdateById("winner_err_new", tmpid, map[string]interface{}{
 					"$set": map[string]interface{}{
+
 						"is_word": -1,
 						"name_word" : new_name,
 						"score":score,
+
 					},
 				})
 			}
 
-		}(name,tmpid)
+		}(name, tmpid)
 		tmp = make(map[string]interface{})
 	}
 
 	wg_es.Wait()
 
-	log.Println("is over",total,isOK)
+	log.Println("is over", total, isOK)
 }
 
+
 func TemporaryTestNewData()  {
 	log.Println("测试......导出新数据")
 	QfwMgo85 = &MongodbSim{
@@ -461,6 +462,7 @@ func TemporaryTestNewData()  {
 
 
 
+
 var reg_alias = regexp.MustCompile("(税务局|工商行政管理局|文化广播电视新闻出版局|外国专家局|" +
 	"中医药管理局|市场监督管理局|广播电视局|医疗保障局|机关事务管理局|粮食和物资储备局|" +
 	"监狱管理局|畜牧兽医局|食品药品监督管理局|城市管理行政执法局|城市管理局|国家保密局|密码管理局|" +
@@ -495,11 +497,9 @@ var con_strReg *regexp.Regexp = regexp.MustCompile("(\\?|?|%|代码标识|删
 
 var uncon_strReg *regexp.Regexp = regexp.MustCompile("(园|政府|集团|公司|有限|合伙|企|院|学|局|处)")
 
-
-
 var startWordReg_1 *regexp.Regexp = regexp.MustCompile("^(.{1,5})(省|市|县|州|自治区|特别行政区)")
 var startWordReg_2 *regexp.Regexp = regexp.MustCompile("^(北京|天津|重庆|上海|河北|山西|" +
 	"浙江|江西|湖北|吉林|海南|甘肃|广东|陕西|辽宁|山东|河南|云南|黑龙江|福建|贵州|江苏|安徽|" +
 	"湖南|四川|青海|台湾|新疆|内蒙古|宁夏|西藏|广西|澳门|香港)")
 
-var endWordReg *regexp.Regexp = regexp.MustCompile("(有限公司|有限责任公司)$")
+var endWordReg *regexp.Regexp = regexp.MustCompile("(有限公司|有限责任公司)$")

+ 1 - 1
udpdataclear/udpSensitiveWords/util/words.go

@@ -21,7 +21,7 @@ func dealWithNameScoreRules(name string) (string, bool, float64 , []map[string]i
 		query_name = strings.ReplaceAll(query_name,endstr,"")
 	}
 
-	query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"unique_qy.name_word","query":"` + query_name + `"}}],"must_not":[],"should":[]}},"from":"0","size":"1"}`
+	query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"unique_qy.name_word","query":"` + query_name + `"}}],"must_not":[],"should":[]}},"from":"0","size":"200"}`
 	tmp := make(map[string]interface{})
 	json.Unmarshal([]byte(query), &tmp)
 	searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do()