|
@@ -0,0 +1,136 @@
|
|
|
+package util
|
|
|
+
|
|
|
+import (
|
|
|
+ "encoding/json"
|
|
|
+ "log"
|
|
|
+ "strings"
|
|
|
+ "unicode"
|
|
|
+ "unicode/utf8"
|
|
|
+)
|
|
|
+
|
|
|
+func dealWithNameScoreRules(name string) (string, bool, []map[string]interface{}) {
|
|
|
+ new_name, new_score, isok := "", float64(0), false
|
|
|
+ old_name := escapeNew(name)
|
|
|
+ if old_name == "" {
|
|
|
+ return "", false, nil
|
|
|
+ }
|
|
|
+ query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"unique_qy.name_word","query":"` + old_name + `"}}],"must_not":[],"should":[]}},"from":"0","size":"300"}`
|
|
|
+ tmp := make(map[string]interface{})
|
|
|
+ json.Unmarshal([]byte(query), &tmp)
|
|
|
+ searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do()
|
|
|
+ if err != nil {
|
|
|
+ log.Println("ES查询出错", name, old_name,err)
|
|
|
+ return "", false, nil
|
|
|
+ }
|
|
|
+ if searchResult.Hits!= nil{
|
|
|
+ resNum := len(searchResult.Hits.Hits)
|
|
|
+ res := make([]map[string]interface{}, resNum)
|
|
|
+ if searchResult.Hits != nil {
|
|
|
+ if resNum < 1000 {
|
|
|
+ for i, hit := range searchResult.Hits.Hits {
|
|
|
+ data := make(map[string]interface{}, 0)
|
|
|
+ json.Unmarshal(*hit.Source, &data)
|
|
|
+ res[i] = map[string]interface{}{
|
|
|
+ "name": data["name"],
|
|
|
+ "score": *hit.Score,
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ log.Println("查询结果太多,查询到:", resNum, "条")
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if len(res) > 0 {
|
|
|
+ //分析分数...取最大
|
|
|
+
|
|
|
+ new_name = ObjToString(res[0]["name"])
|
|
|
+ new_score = Float64All(res[0]["score"])
|
|
|
+ }
|
|
|
+ if new_name != "" { //分析hit比例
|
|
|
+ total, hit := dealWithWordsRules(name, new_name)
|
|
|
+ proportion := float64(hit) / float64(total)
|
|
|
+ if proportion >= 1.0 {
|
|
|
+ isok = true
|
|
|
+ } else {
|
|
|
+ if float64(hit)/float64(total) >= 0.8 && new_score > 4.0 {
|
|
|
+ isok = true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return new_name, isok, res
|
|
|
+ }
|
|
|
+ return new_name,isok,nil
|
|
|
+}
|
|
|
+
|
|
|
+//击中数量以及比例
|
|
|
+func dealWithWordsRules(info_name string, source_name string) (int, int) {
|
|
|
+ total, hit := 0, 0
|
|
|
+
|
|
|
+ //字符串处理,替换指定字符
|
|
|
+ info_name = strings.ReplaceAll(info_name, "(", "")
|
|
|
+ info_name = strings.ReplaceAll(info_name, ")", "")
|
|
|
+ info_name = strings.ReplaceAll(info_name, "(", "")
|
|
|
+ info_name = strings.ReplaceAll(info_name, ")", "")
|
|
|
+ source_name = strings.ReplaceAll(source_name, "(", "")
|
|
|
+ source_name = strings.ReplaceAll(source_name, ")", "")
|
|
|
+ source_name = strings.ReplaceAll(source_name, "(", "")
|
|
|
+ source_name = strings.ReplaceAll(source_name, ")", "")
|
|
|
+
|
|
|
+ nameArr, _ := calculateWordCount(info_name)
|
|
|
+ _, total = calculateWordCount(source_name)
|
|
|
+ for _, v1 := range nameArr {
|
|
|
+ if strings.Contains(source_name, v1) {
|
|
|
+ hit++
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return total, hit
|
|
|
+}
|
|
|
+
|
|
|
+//分词结果
|
|
|
+func calculateWordCount(name string) ([]string, int) {
|
|
|
+ arr, space := make([]string, 0), 2
|
|
|
+ total := utf8.RuneCountInString(name) - (space - 1)
|
|
|
+ if name == "" || total <= 0 {
|
|
|
+ return arr, 0
|
|
|
+ }
|
|
|
+ nameRune := []rune(name)
|
|
|
+ for i := 0; i < total; i++ {
|
|
|
+ new_str := string(nameRune[i : space+i])
|
|
|
+ arr = append(arr, new_str)
|
|
|
+ }
|
|
|
+ return arr, len(arr)
|
|
|
+}
|
|
|
+
|
|
|
+//func escape(s string) string {
|
|
|
+// news := ""
|
|
|
+// s = strings.ReplaceAll(s," ","")
|
|
|
+// for _, c := range s {
|
|
|
+// //if unicode.Is(unicode.Han, c) || unicode.IsNumber(c) || unicode.IsLetter(c) {
|
|
|
+// // news = news + string(c)
|
|
|
+// //}else if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
|
|
|
+// // a := string([]rune{os.PathSeparator, '\\'})
|
|
|
+// // news = news + a + string(c)
|
|
|
+// //} else {
|
|
|
+// // return ""
|
|
|
+// //}
|
|
|
+// if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
|
|
|
+// a := string([]rune{os.PathSeparator,'\\'})
|
|
|
+// //news = news + a + `\` + string(c)
|
|
|
+// news = news + a + string(c)
|
|
|
+// } else {
|
|
|
+// news = news + string(c)
|
|
|
+// }
|
|
|
+//
|
|
|
+// }
|
|
|
+// return news
|
|
|
+//}
|
|
|
+
|
|
|
+func escapeNew(s string) string {
|
|
|
+ news := ""
|
|
|
+ s = strings.ReplaceAll(s, " ", "")
|
|
|
+ for _, c := range s {
|
|
|
+ if unicode.Is(unicode.Han, c) || unicode.IsNumber(c) || unicode.IsLetter(c) {
|
|
|
+ news = news + string(c)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return news
|
|
|
+}
|