package main import ( "encoding/json" "log" "os" "sensitiveWords.udp/util" "strings" "unicode/utf8" ) func dealWithNameScoreRules(name string) (string,bool) { new_name,new_score,isok :="",float64(0),false old_name := escape(name) if old_name=="" { return "",false } query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name_2","query":"`+old_name+`"}}],"must_not":[],"should":[]}},"from":"0","size":"1"}` tmp := make(map[string]interface{}) json.Unmarshal([]byte(query),&tmp) searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do() if err != nil { log.Println("从ES查询出错",name,old_name) return "",false } resNum := len(searchResult.Hits.Hits) res := make([]map[string]interface{}, resNum) if searchResult.Hits != nil { if resNum < 5000 { for i, hit := range searchResult.Hits.Hits { data := make(map[string]interface{},0) json.Unmarshal(*hit.Source, &data) res[i] = map[string]interface{}{ "name":data["name"], "score":*hit.Score, } } } else { log.Println("查询结果太多,查询到:", resNum, "条") } } if len(res)>0 && res != nil { new_name = util.ObjToString(res[0]["name"]) new_score = util.Float64All(res[0]["score"]) } if new_name!="" { //分析hit比例 total,hit := dealWithWordsRules(name,new_name) proportion := float64(hit)/float64(total) if proportion >=1.0 { isok = true }else { if float64(hit)/float64(total)>=0.8 && new_score> 4.0{ isok = true } } } return new_name,isok } //击中数量以及比例 func dealWithWordsRules(info_name string ,source_name string) (int,int){ total,hit :=0,0 //字符串处理,替换指定字符 info_name = strings.ReplaceAll(info_name,"(","") info_name = strings.ReplaceAll(info_name,")","") info_name = strings.ReplaceAll(info_name,"(","") info_name = strings.ReplaceAll(info_name,")","") source_name = strings.ReplaceAll(source_name,"(","") source_name = strings.ReplaceAll(source_name,")","") source_name = strings.ReplaceAll(source_name,"(","") source_name = strings.ReplaceAll(source_name,")","") nameArr,_ := calculateWordCount(info_name) _,total = calculateWordCount(source_name) for _,v1 := range nameArr { if strings.Contains(source_name,v1) { hit++ } } return total,hit } //分词结果 func calculateWordCount(name string) ([]string,int) { arr ,space:= make([]string,0),2 total := utf8.RuneCountInString(name)-(space-1) if name == "" || total<=0 { return arr,0 } nameRune := []rune(name) for i:=0;i' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' { // a := string([]rune{os.PathSeparator, '\\'}) // news = news + a + string(c) //} else { // return "" //} if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' { a := string([]rune{os.PathSeparator, '\\'}) news = news + a + string(c) } else { news = news + string(c) } } return news }