recomKws.go 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. package public
  2. import (
  3. "math/rand"
  4. "regexp"
  5. "sort"
  6. "strings"
  7. "sync"
  8. "time"
  9. . "app.yhyue.com/moapp/jybase/sort"
  10. . "github.com/thinxer/go-word2vec"
  11. )
  12. var RecomKws = &recomKws{
  13. reqPool: make(chan bool, 5),
  14. }
  15. func init() {
  16. RecomKws.model, _ = Load("./zb.bin")
  17. }
  18. type recomKws struct {
  19. reqPool chan bool
  20. model *Model
  21. }
  22. func (rk *recomKws) GetRecomKws(value string, count int, recommendThreshold float32) []*map[string]interface{} {
  23. rk.reqPool <- true
  24. defer func() {
  25. <-rk.reqPool
  26. }()
  27. recomPool := make(chan bool, 5)
  28. wait := &sync.WaitGroup{}
  29. lock := &sync.Mutex{}
  30. maxCount := count * 3
  31. if maxCount > 100 {
  32. maxCount = 100
  33. }
  34. keys := strings.Split(value, " ")
  35. wordMap := map[string]bool{}
  36. randomNum := rk.generateRandomNumber(maxCount, count)
  37. keyMap := map[string]bool{}
  38. allKeyMap := map[string]bool{}
  39. for _, key := range keys {
  40. for _, v := range strings.Split(key, "+") {
  41. v = strings.TrimSpace(v)
  42. if v == "" {
  43. continue
  44. }
  45. allKeyMap[v] = true
  46. }
  47. }
  48. sl := &ComSortList{
  49. SortKeys: []*ComSortKey{
  50. &ComSortKey{
  51. Keys: []string{"sim"},
  52. Order: -1,
  53. Type: "float",
  54. },
  55. },
  56. List: []*map[string]interface{}{},
  57. }
  58. for _, key := range keys {
  59. key = strings.TrimSpace(key)
  60. if keyMap[key] {
  61. continue
  62. }
  63. keyMap[key] = true
  64. recomPool <- true
  65. wait.Add(1)
  66. go func(v string) {
  67. defer func() {
  68. <-recomPool
  69. wait.Done()
  70. }()
  71. //获取随机数
  72. pw, _ := rk.model.MostSimilar(strings.Split(v, "+"), []string{}, maxCount)
  73. for k, p := range pw {
  74. p.Word = strings.TrimSpace(p.Word)
  75. if sim := p.Sim; sim < recommendThreshold {
  76. continue
  77. }
  78. if p.Word == "" || !randomNum[k] || allKeyMap[p.Word] || len([]rune(p.Word)) == 1 {
  79. continue
  80. }
  81. if strings.HasSuffix(p.Word, "路") || DealString(p.Word) {
  82. continue
  83. }
  84. hzRegexp := regexp.MustCompile("[^A-Za-z0-9\u4e00-\u9fa5]")
  85. if hzRegexp.MatchString(p.Word) {
  86. continue //过滤乱码
  87. }
  88. lock.Lock()
  89. if !wordMap[p.Word] {
  90. sl.List = append(sl.List, &map[string]interface{}{
  91. "sim": p.Sim,
  92. "word": p.Word,
  93. })
  94. }
  95. wordMap[p.Word] = true
  96. lock.Unlock()
  97. }
  98. }(key)
  99. }
  100. wait.Wait()
  101. sort.Sort(sl)
  102. if len(sl.List) > 100 {
  103. return sl.List[:100]
  104. }
  105. return sl.List
  106. }
  107. //随机数查询
  108. func (rk *recomKws) generateRandomNumber(max int, count int) map[int]bool {
  109. nums := map[int]bool{}
  110. //随机数生成器,加入时间戳保证每次生成的随机数不一样
  111. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  112. for len(nums) < count {
  113. nums[r.Intn(max)] = true
  114. }
  115. return nums
  116. }