package match import ( "context" "fmt" "github.com/gogf/gf/v2/frame/g" ) // 定义前缀树节点 type TrieNode struct { Children map[rune]*TrieNode // 子节点 IsEnd bool // 是否为单词结尾 } // 插入单词到前缀树中 func (tn *TrieNode) Insert(words ...string) { for _, word := range words { current := tn for _, char := range word { if current.Children == nil { current.Children = make(map[rune]*TrieNode) } node, exists := current.Children[char] if !exists { node = &TrieNode{} current.Children[char] = node } current = node } current.IsEnd = true } } // Remove 删除单词 func (tn *TrieNode) Remove(words ...string) { if tn == nil { g.Log().Infof(context.Background(), "TrieNode Remove is nil") } for _, word := range words { current := tn parent := tn if tn == nil { return } var lastChar int32 for _, char := range word { if current == nil || current.Children == nil { return } node, _ := current.Children[char] parent = current current = node lastChar = char } if current != nil && current.IsEnd { current.IsEnd = false } if current != nil && current.Children != nil && len(current.Children) == 0 { delete(parent.Children, lastChar) } } } //func (trie *TrieNode) Print() { // g.Dump(trie) //} // 加载数据到前缀树中 func (trie *TrieNode) LoadDataToTrie(data map[string]int) { for key, value := range data { runes := []rune(key) current := trie for i, char := range runes { isEnd := 0 if i == len(runes)-1 { isEnd = value } if current.Children == nil { current.Children = make(map[rune]*TrieNode) } node, exists := current.Children[char] if !exists { node = &TrieNode{} current.Children[char] = node } current = node current.IsEnd = isEnd == 1 } } } // 在前缀树中查找单词 func (trie *TrieNode) FindWordInTrie(word string) bool { current := trie for _, char := range word { node, exists := current.Children[char] if !exists { return false } current = node } return current.IsEnd } // 在文本中查找单词 func (trie *TrieNode) FindWords(text string) []string { words := map[string]struct{}{} var rData []string runes := []rune(text) for i := 0; i < len(runes); i++ { current := trie for j := i; j < len(runes); j++ { char := runes[j] node, exists := current.Children[char] if !exists { break } current = node if current.IsEnd { words[string(runes[i:j+1])] = struct{}{} } } } for word, _ := range words { rData = append(rData, word) } return rData } func (trie *TrieNode) FindOneMaxStr(text string) string { stringArr := trie.FindWords(text) if len(stringArr) == 0 { return "" // 处理空数组的情况 } longest := stringArr[0] for _, s := range stringArr { if len(s) > len(longest) { longest = s } } return longest } // 在文本中查找单词 func (trie *TrieNode) FindWordsInText(title, detail string) map[string]string { words := map[string]string{} var nomatchDetail string runes := []rune(fmt.Sprintf("%s %s", detail, title)) detailArr := []rune(detail) detailLen := len(detailArr) for i := 0; i < len(runes); i++ { current := trie for j := i; j < len(runes); j++ { char := runes[j] node, exists := current.Children[char] if !exists { break } current = node if current.IsEnd { if _, ok := words[string(runes[i:j+1])]; !ok { if j < detailLen { //匹配正文 //fmt.Println(string(runes[i : j+1])) words[string(runes[i:j+1])] = getDetailMatchStr(detailArr, i, j) } else { //匹配到标题 if nomatchDetail == "" && detailLen > 0 { //匹配到标题后,正文取开头 nomatchDetail = getDetailMatchStr(detailArr, detailLen-1, detailLen) } words[string(runes[i:j+1])] = nomatchDetail } } } } } return words } // 取关键词前40后40个字 var splitRune = map[int32]bool{} func init() { splitRune = map[int32]bool{} for _, s := range []string{" ", ",", ".", ",", "。", " ", ":", ":", "、", "-"} { splitRune[[]rune(s)[0]] = true } } // getDetailMatchStr 获取正文中匹配到的短句 func getDetailMatchStr(desc []rune, left, right int) string { s, e, l := 0, 0, len(desc) sT := left - 40 if sT < 0 { sT = 0 } eT := sT + 80 if eT > l { eT = l } for i := 1; i < 20; i++ { if s != 0 && e != 0 { return string(desc[s:e]) } if s == 0 { //fmt.Printf("[sT]%s-%d %s-%d\n", string(desc[sT-i]), desc[sT-i], string(desc[sT+i]), desc[sT+i]) if sT+i < left && splitRune[desc[sT+i]] { s = sT + i + 1 } if sT-i > 0 && splitRune[desc[sT-i]] { s = sT - i + 1 } } if e == 0 { //fmt.Printf("[eT]%s-%d %s-%d\n", string(desc[eT-i]), desc[eT-i], string(desc[eT+i]), desc[eT+i]) if eT+i < l && splitRune[desc[eT+i]] { e = eT + i } if eT-i > right && splitRune[desc[eT-i]] { e = eT - i } } } if s == 0 { s = sT } if e == 0 { e = eT } return string(desc[s:e]) }