Pārlūkot izejas kodu

分词二次加工

wangshan 4 mēneši atpakaļ
vecāks
revīzija
358a53f738
1 mainītis faili ar 27 papildinājumiem un 5 dzēšanām
  1. 27 5
      common/src/qfw/util/jy/jy.go

+ 27 - 5
common/src/qfw/util/jy/jy.go

@@ -448,14 +448,36 @@ func KeywordsProcessing(keywords, sep string) string {
 			if len([]rune(v)) == 1 {
 				//compare
 				if k == 0 { //first
-					words[k+1] = v + words[k+1]
+					newWord := v + words[k+1]
+					if strings.Contains(keywords, newWord) { //组合后 必须包含
+						words[k+1] = newWord
+					}
 				} else if k == len(words)-1 { //last
-					newWords[len(newWords)-1] += v
+					newWord := newWords[len(newWords)-1] + v
+					if strings.Contains(keywords, newWord) {
+						newWords[len(newWords)-1] = newWord
+					}
 				} else {
-					if len(newWords[len(newWords)-1]) < len(words[k+1]) {
-						newWords[len(newWords)-1] += v
+					if len([]rune(newWords[len(newWords)-1])) < len([]rune(words[k+1])) {
+						newWord := newWords[len(newWords)-1] + v
+						if strings.Contains(keywords, newWord) {
+							newWords[len(newWords)-1] = newWord
+						} else {
+							newWord = v + words[k+1]
+							if strings.Contains(keywords, newWord) { //组合后 必须包含
+								words[k+1] = newWord
+							}
+						}
 					} else {
-						words[k+1] = v + words[k+1]
+						newWord := v + words[k+1]
+						if strings.Contains(keywords, newWord) { //组合后 必须包含
+							words[k+1] = newWord
+						} else {
+							newWord = newWords[len(newWords)-1] + v
+							if strings.Contains(keywords, newWord) {
+								newWords[len(newWords)-1] = newWord
+							}
+						}
 					}
 				}
 			} else {