浏览代码

关键词分词处理

wangshan 2 年之前
父节点
当前提交
e9533ffc8b
共有 1 个文件被更改,包括 44 次插入9 次删除
  1. 44 9
      common/src/qfw/util/jy/jy.go

+ 44 - 9
common/src/qfw/util/jy/jy.go

@@ -1,14 +1,6 @@
 package jy
 
 import (
-	"encoding/json"
-	"fmt"
-	"log"
-	"regexp"
-	"sort"
-	"strings"
-	"time"
-
 	util "app.yhyue.com/moapp/jybase/common"
 	. "app.yhyue.com/moapp/jybase/date"
 	"app.yhyue.com/moapp/jybase/es"
@@ -20,6 +12,13 @@ import (
 	"app.yhyue.com/moapp/jybase/sms"
 	. "app.yhyue.com/moapp/jypkg/middleground"
 	"bp.jydev.jianyu360.cn/BaseService/userCenter/rpc/pb"
+	"encoding/json"
+	"fmt"
+	"log"
+	"regexp"
+	"sort"
+	"strings"
+	"time"
 )
 
 // 获取用户合并以前,合并以后的openid
@@ -55,7 +54,7 @@ var filterReg_1 = regexp.MustCompile("^([0-9]{1,3}|[零一二三四五六七八
 var filterReg = regexp.MustCompile("^[的人号时元万公告项目地址电话邮编日期联系招标中结果成交项目项目采购采购项目政府采购公告更正公告]+$")
 var PhoneReg = regexp.MustCompile("^[1][3-9][0-9]{9}$")
 var EmailPattern = regexp.MustCompile("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$")
-var filterReg_4 = regexp.MustCompile("([)>》】\\]\\}}〕,,;;::'\"“”。\\.\\??、/\\+=\\\\_—\\*&……\\^%$¥@!!`~·(\\(<《【\\[\\{{〔])")
+var filterReg_4 = regexp.MustCompile("([)>》】\\]\\}}〕,,;;::'\"“”。\\.\\??、/\\+=\\\\_—\\*&……\\^%$¥@!!`~·(\\(<《【\\[\\{{〔])")
 
 // P279删除通用过滤词,例如“公告”
 func FilteKey(k string) string {
@@ -425,3 +424,39 @@ func ClearUserCache(middleground *Middleground, positionId int64) {
 	})
 	middleground.PowerCheckCenter.DelCheckRedis("10000", positionId)
 }
+
+// 关键词分词处理
+func KeywordsProcessing(keywords, sep string) string {
+	keywords = MatchSpace.ReplaceAllString(keywords, " ")
+	if keywords == "" || len(strings.Split(keywords, sep)) == 1 {
+		return keywords
+	}
+	var newWords = make([]string, 0, 0)
+	if keywords != "" && len(strings.Split(keywords, sep)) > 1 {
+		var words = strings.Split(keywords, sep)
+		for k := 0; k < len(words); k++ {
+			v := words[k]
+			//连续性空格
+			if len([]rune(v)) == 0 {
+				continue
+			}
+			if len([]rune(v)) == 1 {
+				//compare
+				if k == 0 { //first
+					words[k+1] = v + words[k+1]
+				} else if k == len(words)-1 { //last
+					newWords[len(newWords)-1] += v
+				} else {
+					if len(newWords[len(newWords)-1]) < len(words[k+1]) {
+						newWords[len(newWords)-1] += v
+					} else {
+						words[k+1] = v + words[k+1]
+					}
+				}
+			} else {
+				newWords = append(newWords, v)
+			}
+		}
+	}
+	return strings.Join(newWords, sep)
+}