package public import ( "math/rand" "regexp" "sort" "strings" "sync" "time" . "app.yhyue.com/moapp/jybase/sort" . "github.com/thinxer/go-word2vec" ) var RecomKws = &recomKws{ reqPool: make(chan bool, 5), } func init() { RecomKws.model, _ = Load("./zb.bin") } type recomKws struct { reqPool chan bool model *Model } func (rk *recomKws) GetRecomKws(value string, count int, recommendThreshold float32) []*map[string]interface{} { rk.reqPool <- true defer func() { <-rk.reqPool }() recomPool := make(chan bool, 5) wait := &sync.WaitGroup{} lock := &sync.Mutex{} maxCount := count * 3 if maxCount > 100 { maxCount = 100 } keys := strings.Split(value, " ") wordMap := map[string]bool{} randomNum := rk.generateRandomNumber(maxCount, count) keyMap := map[string]bool{} allKeyMap := map[string]bool{} for _, key := range keys { for _, v := range strings.Split(key, "+") { v = strings.TrimSpace(v) if v == "" { continue } allKeyMap[v] = true } } sl := &ComSortList{ SortKeys: []*ComSortKey{ &ComSortKey{ Keys: []string{"sim"}, Order: -1, Type: "float", }, }, List: []*map[string]interface{}{}, } for _, key := range keys { key = strings.TrimSpace(key) if keyMap[key] { continue } keyMap[key] = true recomPool <- true wait.Add(1) go func(v string) { defer func() { <-recomPool wait.Done() }() //获取随机数 pw, _ := rk.model.MostSimilar(strings.Split(v, "+"), []string{}, maxCount) for k, p := range pw { p.Word = strings.TrimSpace(p.Word) if sim := p.Sim; sim < recommendThreshold { continue } if p.Word == "" || !randomNum[k] || allKeyMap[p.Word] || len([]rune(p.Word)) == 1 { continue } if strings.HasSuffix(p.Word, "路") || DealString(p.Word) { continue } hzRegexp := regexp.MustCompile("[^A-Za-z0-9\u4e00-\u9fa5]") if hzRegexp.MatchString(p.Word) { continue //过滤乱码 } lock.Lock() if !wordMap[p.Word] { sl.List = append(sl.List, &map[string]interface{}{ "sim": p.Sim, "word": p.Word, }) } wordMap[p.Word] = true lock.Unlock() } }(key) } wait.Wait() sort.Sort(sl) if len(sl.List) > 100 { return sl.List[:100] } return sl.List } //随机数查询 func (rk *recomKws) generateRandomNumber(max int, count int) map[int]bool { nums := map[int]bool{} //随机数生成器,加入时间戳保证每次生成的随机数不一样 r := rand.New(rand.NewSource(time.Now().UnixNano())) for len(nums) < count { nums[r.Intn(max)] = true } return nums }