package util import ( "encoding/json" "log" "strings" "unicode" "unicode/utf8" ) func dealWithNameScoreRules(name string) (string, bool, []map[string]interface{}) { new_name, new_score, isok := "", float64(0), false old_name := escapeNew(name) if old_name == "" { return "", false, nil } query_name := old_name endstr := endWordReg.FindString(query_name) if endstr !="" { query_name = strings.ReplaceAll(query_name,endstr,"") } query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"unique_qy.name_word","query":"` + query_name + `"}}],"must_not":[],"should":[]}},"from":"0","size":"1"}` tmp := make(map[string]interface{}) json.Unmarshal([]byte(query), &tmp) searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do() if err != nil { log.Println("ES查询出错", name, old_name,err) return "", false, nil } if searchResult.Hits!= nil{ resNum := len(searchResult.Hits.Hits) res := make([]map[string]interface{}, resNum) if searchResult.Hits != nil { if resNum < 1000 { for i, hit := range searchResult.Hits.Hits { data := make(map[string]interface{}, 0) json.Unmarshal(*hit.Source, &data) res[i] = map[string]interface{}{ "name": data["name"], "score": *hit.Score, } } } else { log.Println("查询结果太多,查询到:", resNum, "条") } } if len(res) > 0 { //分析分数...取最大 new_name = ObjToString(res[0]["name"]) new_score = Float64All(res[0]["score"]) } if new_name != "" { //分析hit比例 total, hit := dealWithWordsRules(name, new_name) proportion := float64(hit) / float64(total) if proportion >= 1.0 { isok = true } else { if float64(hit)/float64(total) >= 0.8 && new_score > 4.0 { str1,str2:=startWordReg_1.FindString(name),startWordReg_1.FindString(new_name) if str1!="" && str2!="" { if strings.Contains(str1,str2)||strings.Contains(str2,str1) { }else { return new_name, false, res } } str1,str2 = startWordReg_2.FindString(name),startWordReg_2.FindString(new_name) if str1!="" && str2!=""{ if str1 != str2 { return new_name, false, res } } isok = true }else if new_score > 4.0 { str1,str2:=name,new_name str1 = strings.ReplaceAll(str1,"责任","") str2 = strings.ReplaceAll(str2,"责任","") str1 = strings.ReplaceAll(str1,"有限","") str2 = strings.ReplaceAll(str2,"有限","") str1 = strings.ReplaceAll(str1,"科技","") str2 = strings.ReplaceAll(str2,"科技","") str1 = strings.ReplaceAll(str1,"工程","") str2 = strings.ReplaceAll(str2,"工程","") if str1==str2 { return new_name, true, res } }else { } } } return new_name, isok, res } return new_name,isok,nil } //击中数量以及比例 func dealWithWordsRules(info_name string, source_name string) (int, int) { total, hit := 0, 0 //字符串处理,替换指定字符 info_name = strings.ReplaceAll(info_name, "(", "") info_name = strings.ReplaceAll(info_name, ")", "") info_name = strings.ReplaceAll(info_name, "(", "") info_name = strings.ReplaceAll(info_name, ")", "") source_name = strings.ReplaceAll(source_name, "(", "") source_name = strings.ReplaceAll(source_name, ")", "") source_name = strings.ReplaceAll(source_name, "(", "") source_name = strings.ReplaceAll(source_name, ")", "") nameArr, _ := calculateWordCount(info_name) _, total = calculateWordCount(source_name) for _, v1 := range nameArr { if strings.Contains(source_name, v1) { hit++ } } return total, hit } //分词结果 func calculateWordCount(name string) ([]string, int) { arr, space := make([]string, 0), 2 total := utf8.RuneCountInString(name) - (space - 1) if name == "" || total <= 0 { return arr, 0 } nameRune := []rune(name) for i := 0; i < total; i++ { new_str := string(nameRune[i : space+i]) arr = append(arr, new_str) } return arr, len(arr) } //func escape(s string) string { // news := "" // s = strings.ReplaceAll(s," ","") // for _, c := range s { // //if unicode.Is(unicode.Han, c) || unicode.IsNumber(c) || unicode.IsLetter(c) { // // news = news + string(c) // //}else if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' { // // a := string([]rune{os.PathSeparator, '\\'}) // // news = news + a + string(c) // //} else { // // return "" // //} // if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' { // a := string([]rune{os.PathSeparator,'\\'}) // //news = news + a + `\` + string(c) // news = news + a + string(c) // } else { // news = news + string(c) // } // // } // return news //} func escapeNew(s string) string { news := "" s = strings.ReplaceAll(s, " ", "") for _, c := range s { if unicode.Is(unicode.Han, c) || unicode.IsNumber(c) || unicode.IsLetter(c) { news = news + string(c) } } return news }