words.go 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. package main
  2. import (
  3. "encoding/json"
  4. "log"
  5. "os"
  6. "strings"
  7. "unicode/utf8"
  8. )
  9. func dealWithNameScoreRules(name string) (string,bool) {
  10. new_name,new_score,isok :="",float64(0),false
  11. old_name := escape(name)
  12. query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name_2","query":"`+old_name+`"}}],"must_not":[],"should":[]}},"from":"0","size":"1"}`
  13. tmp := make(map[string]interface{})
  14. json.Unmarshal([]byte(query),&tmp)
  15. searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do()
  16. if err != nil {
  17. log.Println("从ES查询出错", err.Error())
  18. }
  19. resNum := len(searchResult.Hits.Hits)
  20. res := make([]map[string]interface{}, resNum)
  21. if searchResult.Hits != nil {
  22. if resNum < 5000 {
  23. for i, hit := range searchResult.Hits.Hits {
  24. data := make(map[string]interface{},0)
  25. json.Unmarshal(*hit.Source, &data)
  26. res[i] = map[string]interface{}{
  27. "name":data["name"],
  28. "score":*hit.Score,
  29. }
  30. }
  31. } else {
  32. log.Println("查询结果太多,查询到:", resNum, "条")
  33. }
  34. }
  35. if len(res)>0 && res != nil {
  36. new_name = ObjToString(res[0]["name"])
  37. new_score = Float64All(res[0]["score"])
  38. }
  39. if new_name!="" { //分析hit比例
  40. total,hit := dealWithWordsRules(name,new_name)
  41. proportion := float64(hit)/float64(total)
  42. if proportion >=1.0 {
  43. isok = true
  44. }else {
  45. if float64(hit)/float64(total)>=0.8 && new_score> 4.0{
  46. isok = true
  47. }
  48. }
  49. }
  50. return new_name,isok
  51. }
  52. //击中数量以及比例
  53. func dealWithWordsRules(info_name string ,source_name string) (int,int){
  54. total,hit :=0,0
  55. //字符串处理,替换指定字符
  56. info_name = strings.ReplaceAll(info_name,"(","")
  57. info_name = strings.ReplaceAll(info_name,")","")
  58. info_name = strings.ReplaceAll(info_name,"(","")
  59. info_name = strings.ReplaceAll(info_name,")","")
  60. source_name = strings.ReplaceAll(source_name,"(","")
  61. source_name = strings.ReplaceAll(source_name,")","")
  62. source_name = strings.ReplaceAll(source_name,"(","")
  63. source_name = strings.ReplaceAll(source_name,")","")
  64. nameArr,_ := calculateWordCount(info_name)
  65. _,total = calculateWordCount(source_name)
  66. for _,v1 := range nameArr {
  67. if strings.Contains(source_name,v1) {
  68. hit++
  69. }
  70. }
  71. return total,hit
  72. }
  73. //分词结果
  74. func calculateWordCount(name string) ([]string,int) {
  75. arr ,space:= make([]string,0),2
  76. total := utf8.RuneCountInString(name)-(space-1)
  77. if name == "" || total<=0 {
  78. return arr,0
  79. }
  80. nameRune := []rune(name)
  81. for i:=0;i<total ;i++ {
  82. new_str := string(nameRune[i:space+i])
  83. arr = append(arr,new_str)
  84. }
  85. return arr,len(arr)
  86. }
  87. func escape(s string) string {
  88. news := ""
  89. for _, c := range s {
  90. if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
  91. a := string([]rune{os.PathSeparator, '\\'})
  92. news = news + a + `\` + string(c)
  93. } else {
  94. news = news + string(c)
  95. }
  96. }
  97. return news
  98. }