Browse Source

备份恩

apple 4 years ago
parent
commit
eea2425165

+ 3 - 0
udpdataclear/udpSensitiveWords/go.mod

@@ -7,11 +7,14 @@ require (
 	github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0
 	github.com/importcjj/sensitive v0.0.0-20200106142752-42d1c505be7b
 	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/olivere/elastic v6.2.35+incompatible // indirect
 	github.com/prometheus/client_golang v1.10.0
+	github.com/spf13/pflag v1.0.3 // indirect
 	go.mongodb.org/mongo-driver v1.5.1
 	google.golang.org/grpc v1.36.1
 	google.golang.org/protobuf v1.26.0
+	gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
 	gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22
 	gopkg.in/olivere/elastic.v1 v1.0.1
 	gopkg.in/yaml.v2 v2.3.0

+ 25 - 10
udpdataclear/udpSensitiveWords/grpc_server/data.go

@@ -213,7 +213,7 @@ func dealWithDataMemory()  {
 			runtime.ReadMemStats(&m)
 			men :=util.ToMegaBytes(m.HeapAlloc)
 			log.Printf("current index %d\tos %.2f M",initnum, men)
-			if men>7.5*1024 { //7.5G
+			if men>5*1024 { //7.5G
 				saveIdArr = append(saveIdArr, map[string]string{
 					"start":start_id,
 					"end":util.BsonTOStringId(tmp["_id"]),
@@ -245,13 +245,17 @@ func dealWithDataMemory()  {
 
 func temporaryTest()  {
 	log.Println("测试......导出数据")
-	q := map[string]interface{}{}
+	q := map[string]interface{}{
+		"check_history":map[string]interface{}{
+			"$exists":0,
+		},
+	}
 	sess := MixDataMgo.GetMgoConn()
 	defer MixDataMgo.DestoryMongoConn(sess)
 	//多线程升索引
-	pool_es := make(chan bool, 10)
+	pool_es := make(chan bool, 20)
 	wg_es := &sync.WaitGroup{}
-	it := sess.DB(MixDataMgo.DbName).C("zk_company_test").Find(&q).Iter()
+	it := sess.DB(MixDataMgo.DbName).C("winner_err_new").Find(&q).Iter()
 	total,isOK:=0,0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
 		if total % 1000 == 0 {
@@ -259,27 +263,38 @@ func temporaryTest()  {
 		}
 
 		name:=util.ObjToString(tmp["name"])
+		tmpid := util.BsonTOStringId(tmp["_id"])
 		pool_es <- true
 		wg_es.Add(1)
 
-		go func(name string) {
+		go func(name string,tmpid string) {
 			defer func() {
 				<-pool_es
 				wg_es.Done()
 			}()
 			new_name,b :=dealWithNameScoreRules(name)
+			//log.Println(b,name,new_name,tmpid)
 			if new_name!="" && b {
 				isOK++
-				MixDataMgo.Save("zk_words_test_test", map[string]interface{}{
-					"name":name,
-					"new_name":new_name,
+				MixDataMgo.UpdateById("winner_err_new",tmpid,map[string]interface{}{
+					"$set": map[string]interface{}{
+						"is_word": 1,
+						"name_word" : new_name,
+					},
+				})
+			}else {
+				MixDataMgo.UpdateById("winner_err_new",tmpid,map[string]interface{}{
+					"$set": map[string]interface{}{
+						"is_word": -1,
+						"name_word" : new_name,
+					},
 				})
 			}
-		}(name)
+		}(name,tmpid)
 		tmp = make(map[string]interface{})
 	}
 
 	wg_es.Wait()
 
-	log.Println("is over",total)
+	log.Println("is over",total,isOK)
 }

+ 12 - 12
udpdataclear/udpSensitiveWords/grpc_server/main.go

@@ -41,6 +41,8 @@ var (
 	}, []string{"name"})
 )
 func init() {
+	return
+
 	yamlFile, err := ioutil.ReadFile(YAMLFILE)
 	if err != nil {
 		log.Fatalln("load conf error")
@@ -62,7 +64,7 @@ func init() {
 	Client_Es ,_= elastic.NewClient(http.DefaultClient, "http://192.168.3.11:9800")
 
 
-	es_type, es_index = "azktest","azktest"
+	es_type, es_index = "unique_qy","unique_qy"
 
 
 
@@ -74,25 +76,23 @@ func init() {
 
 func main() {
 
-/*
+	//临时测试
 	MixDataMgo = &util.MongodbSim{
-		MongodbAddr: "192.168.3.207:27092",
+		MongodbAddr: "172.17.4.187:27082,172.17.145.163:27083",
 		Size:        20,
-		DbName:      "zhengkun",
-		UserName:    "",
-		PassWord:    "",
+		DbName:      "mixdata",
+		UserName:    "fengweiqiang",
+		PassWord:    "fwq@123123",
 	}
 	MixDataMgo.InitPool()
 
-	Client_Es ,_= elastic.NewClient(http.DefaultClient, "http://192.168.3.11:9800")
-
-
-	es_type, es_index = "azktest","azktest"
+	Client_Es ,_= elastic.NewClient(http.DefaultClient, "http://172.17.145.170:9800")
 
+	es_type, es_index = "unique_qy","unique_qy"
 	temporaryTest()
+
+	//单独跑-分段
 	return
-*/
-	//淡赌跑断
 	if YamlConfig.IsAddTask == 0 {
 		initSensitiveWordsData() //初始化敏感词数据
 	} else {

+ 3 - 3
udpdataclear/udpSensitiveWords/grpc_server/words.go

@@ -15,12 +15,12 @@ func dealWithNameScoreRules(name string) (string,bool) {
 	if old_name=="" {
 		return "",false
 	}
-	query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name_2","query":"`+old_name+`"}}],"must_not":[],"should":[]}},"from":"0","size":"1"}`
+	query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"unique_qy.name_word","query":"`+old_name+`"}}],"must_not":[],"should":[]}},"from":"0","size":"1"}`
 	tmp := make(map[string]interface{})
 	json.Unmarshal([]byte(query),&tmp)
 	searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do()
 	if err != nil {
-		log.Println("从ES查询出错",name,old_name)
+		//log.Println("从ES查询出错",name,old_name)
 		return "",false
 	}
 	resNum := len(searchResult.Hits.Hits)
@@ -109,7 +109,7 @@ func escape(s string) string {
 		//}
 		if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
 			a := string([]rune{os.PathSeparator, '\\'})
-			news = news + a + string(c)
+			news = news + a + `\` + string(c)
 		} else {
 			news = news + string(c)
 		}