words.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. package util
  2. import (
  3. "encoding/json"
  4. "log"
  5. "strings"
  6. "unicode"
  7. "unicode/utf8"
  8. )
  9. func dealWithNameScoreRules(name string) (string, bool, []map[string]interface{}) {
  10. new_name, new_score, isok := "", float64(0), false
  11. old_name := escapeNew(name)
  12. if old_name == "" {
  13. return "", false, nil
  14. }
  15. query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"unique_qy.name_word","query":"` + old_name + `"}}],"must_not":[],"should":[]}},"from":"0","size":"300"}`
  16. tmp := make(map[string]interface{})
  17. json.Unmarshal([]byte(query), &tmp)
  18. searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do()
  19. if err != nil {
  20. log.Println("ES查询出错", name, old_name,err)
  21. return "", false, nil
  22. }
  23. if searchResult.Hits!= nil{
  24. resNum := len(searchResult.Hits.Hits)
  25. res := make([]map[string]interface{}, resNum)
  26. if searchResult.Hits != nil {
  27. if resNum < 1000 {
  28. for i, hit := range searchResult.Hits.Hits {
  29. data := make(map[string]interface{}, 0)
  30. json.Unmarshal(*hit.Source, &data)
  31. res[i] = map[string]interface{}{
  32. "name": data["name"],
  33. "score": *hit.Score,
  34. }
  35. }
  36. } else {
  37. log.Println("查询结果太多,查询到:", resNum, "条")
  38. }
  39. }
  40. if len(res) > 0 {
  41. //分析分数...取最大
  42. new_name = ObjToString(res[0]["name"])
  43. new_score = Float64All(res[0]["score"])
  44. }
  45. if new_name != "" { //分析hit比例
  46. total, hit := dealWithWordsRules(name, new_name)
  47. proportion := float64(hit) / float64(total)
  48. if proportion >= 1.0 {
  49. isok = true
  50. } else {
  51. if float64(hit)/float64(total) >= 0.8 && new_score > 4.0 {
  52. isok = true
  53. }
  54. }
  55. }
  56. return new_name, isok, res
  57. }
  58. return new_name,isok,nil
  59. }
  60. //击中数量以及比例
  61. func dealWithWordsRules(info_name string, source_name string) (int, int) {
  62. total, hit := 0, 0
  63. //字符串处理,替换指定字符
  64. info_name = strings.ReplaceAll(info_name, "(", "")
  65. info_name = strings.ReplaceAll(info_name, ")", "")
  66. info_name = strings.ReplaceAll(info_name, "(", "")
  67. info_name = strings.ReplaceAll(info_name, ")", "")
  68. source_name = strings.ReplaceAll(source_name, "(", "")
  69. source_name = strings.ReplaceAll(source_name, ")", "")
  70. source_name = strings.ReplaceAll(source_name, "(", "")
  71. source_name = strings.ReplaceAll(source_name, ")", "")
  72. nameArr, _ := calculateWordCount(info_name)
  73. _, total = calculateWordCount(source_name)
  74. for _, v1 := range nameArr {
  75. if strings.Contains(source_name, v1) {
  76. hit++
  77. }
  78. }
  79. return total, hit
  80. }
  81. //分词结果
  82. func calculateWordCount(name string) ([]string, int) {
  83. arr, space := make([]string, 0), 2
  84. total := utf8.RuneCountInString(name) - (space - 1)
  85. if name == "" || total <= 0 {
  86. return arr, 0
  87. }
  88. nameRune := []rune(name)
  89. for i := 0; i < total; i++ {
  90. new_str := string(nameRune[i : space+i])
  91. arr = append(arr, new_str)
  92. }
  93. return arr, len(arr)
  94. }
  95. //func escape(s string) string {
  96. // news := ""
  97. // s = strings.ReplaceAll(s," ","")
  98. // for _, c := range s {
  99. // //if unicode.Is(unicode.Han, c) || unicode.IsNumber(c) || unicode.IsLetter(c) {
  100. // // news = news + string(c)
  101. // //}else if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
  102. // // a := string([]rune{os.PathSeparator, '\\'})
  103. // // news = news + a + string(c)
  104. // //} else {
  105. // // return ""
  106. // //}
  107. // if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
  108. // a := string([]rune{os.PathSeparator,'\\'})
  109. // //news = news + a + `\` + string(c)
  110. // news = news + a + string(c)
  111. // } else {
  112. // news = news + string(c)
  113. // }
  114. //
  115. // }
  116. // return news
  117. //}
  118. func escapeNew(s string) string {
  119. news := ""
  120. s = strings.ReplaceAll(s, " ", "")
  121. for _, c := range s {
  122. if unicode.Is(unicode.Han, c) || unicode.IsNumber(c) || unicode.IsLetter(c) {
  123. news = news + string(c)
  124. }
  125. }
  126. return news
  127. }