123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- package public
- import (
- "math/rand"
- "regexp"
- "sort"
- "strings"
- "sync"
- "time"
- . "app.yhyue.com/moapp/jybase/sort"
- . "github.com/thinxer/go-word2vec"
- )
- var RecomKws = &recomKws{
- reqPool: make(chan bool, 5),
- }
- func init() {
- RecomKws.model, _ = Load("./zb.bin")
- }
- type recomKws struct {
- reqPool chan bool
- model *Model
- }
- func (rk *recomKws) GetRecomKws(value string, count int, recommendThreshold float32) []*map[string]interface{} {
- rk.reqPool <- true
- defer func() {
- <-rk.reqPool
- }()
- recomPool := make(chan bool, 5)
- wait := &sync.WaitGroup{}
- lock := &sync.Mutex{}
- maxCount := count * 3
- if maxCount > 100 {
- maxCount = 100
- }
- keys := strings.Split(value, " ")
- wordMap := map[string]bool{}
- randomNum := rk.generateRandomNumber(maxCount, count)
- keyMap := map[string]bool{}
- allKeyMap := map[string]bool{}
- for _, key := range keys {
- for _, v := range strings.Split(key, "+") {
- v = strings.TrimSpace(v)
- if v == "" {
- continue
- }
- allKeyMap[v] = true
- }
- }
- sl := &ComSortList{
- SortKeys: []*ComSortKey{
- &ComSortKey{
- Keys: []string{"sim"},
- Order: -1,
- Type: "float",
- },
- },
- List: []*map[string]interface{}{},
- }
- for _, key := range keys {
- key = strings.TrimSpace(key)
- if keyMap[key] {
- continue
- }
- keyMap[key] = true
- recomPool <- true
- wait.Add(1)
- go func(v string) {
- defer func() {
- <-recomPool
- wait.Done()
- }()
- //获取随机数
- pw, _ := rk.model.MostSimilar(strings.Split(v, "+"), []string{}, maxCount)
- for k, p := range pw {
- p.Word = strings.TrimSpace(p.Word)
- if sim := p.Sim; sim < recommendThreshold {
- continue
- }
- if p.Word == "" || !randomNum[k] || allKeyMap[p.Word] || len([]rune(p.Word)) == 1 {
- continue
- }
- if strings.HasSuffix(p.Word, "路") || DealString(p.Word) {
- continue
- }
- hzRegexp := regexp.MustCompile("[^A-Za-z0-9\u4e00-\u9fa5]")
- if hzRegexp.MatchString(p.Word) {
- continue //过滤乱码
- }
- lock.Lock()
- if !wordMap[p.Word] {
- sl.List = append(sl.List, &map[string]interface{}{
- "sim": p.Sim,
- "word": p.Word,
- })
- }
- wordMap[p.Word] = true
- lock.Unlock()
- }
- }(key)
- }
- wait.Wait()
- sort.Sort(sl)
- if len(sl.List) > 100 {
- return sl.List[:100]
- }
- return sl.List
- }
- //随机数查询
- func (rk *recomKws) generateRandomNumber(max int, count int) map[int]bool {
- nums := map[int]bool{}
- //随机数生成器,加入时间戳保证每次生成的随机数不一样
- r := rand.New(rand.NewSource(time.Now().UnixNano()))
- for len(nums) < count {
- nums[r.Intn(max)] = true
- }
- return nums
- }
|