|
@@ -0,0 +1,142 @@
|
|
|
+/**
|
|
|
+ *兴趣分析
|
|
|
+ *
|
|
|
+ */
|
|
|
+package dfa
|
|
|
+
|
|
|
+import (
|
|
|
+ "log"
|
|
|
+ "strings"
|
|
|
+)
|
|
|
+
|
|
|
+//DFA实现
|
|
|
+type DFA struct {
|
|
|
+ link map[string]interface{} //存放or
|
|
|
+ linkAnd map[string]int //存放and
|
|
|
+ linkAndWord map[string]interface{} //存放and中的拆分词
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+//添加词组,用于初始化,该方法是可以调用多次的
|
|
|
+func (d *DFA) AddWord(words ...string) {
|
|
|
+ if d.link == nil {
|
|
|
+ d.link = make(map[string]interface{})
|
|
|
+ d.linkAnd = make(map[string]int)
|
|
|
+ d.linkAndWord = make(map[string]interface{})
|
|
|
+ }
|
|
|
+ var nowMap *map[string]interface{}
|
|
|
+ for _, key := range words {
|
|
|
+ keys := strings.Split(key, "+")
|
|
|
+ lenkeys := len(keys)
|
|
|
+ if lenkeys > 1 {
|
|
|
+ d.linkAnd[key] = lenkeys
|
|
|
+ for k := 0; k < lenkeys; k++ {
|
|
|
+ minKey := keys[k]
|
|
|
+ nowMap = &d.linkAndWord
|
|
|
+ for i := 0; i < len(minKey); i++ {
|
|
|
+ kc := minKey[i : i+1]
|
|
|
+ if v, ok := (*nowMap)[kc]; ok {
|
|
|
+ nowMap, _ = v.(*map[string]interface{})
|
|
|
+ } else {
|
|
|
+ newMap := map[string]interface{}{}
|
|
|
+ newMap["YN"] = "N"
|
|
|
+ (*nowMap)[kc] = &newMap
|
|
|
+ nowMap = &newMap
|
|
|
+ }
|
|
|
+ if i == len(minKey)-1 {
|
|
|
+ (*nowMap)["YN"] = "Y"
|
|
|
+ if (*nowMap)["key"] == nil {
|
|
|
+ (*nowMap)["key"] = make(map[string]int)
|
|
|
+ }
|
|
|
+ (*nowMap)["key"].(map[string]int)[key] = k
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ nowMap = &d.link
|
|
|
+ for i := 0; i < len(key); i++ {
|
|
|
+ kc := key[i : i+1]
|
|
|
+ if v, ok := (*nowMap)[kc]; ok {
|
|
|
+ nowMap, _ = v.(*map[string]interface{})
|
|
|
+ } else {
|
|
|
+ newMap := map[string]interface{}{}
|
|
|
+ newMap["YN"] = "N"
|
|
|
+ (*nowMap)[kc] = &newMap
|
|
|
+ nowMap = &newMap
|
|
|
+ }
|
|
|
+
|
|
|
+ if i == len(key)-1 {
|
|
|
+ (*nowMap)["YN"] = "Y"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+func (d *DFA) Clear() {
|
|
|
+ d.link = nil
|
|
|
+}
|
|
|
+
|
|
|
+//从给定的内容中找出匹配上的关键词
|
|
|
+func (d *DFA) Analy(src string) []string {
|
|
|
+ if d.link == nil {
|
|
|
+ log.Println("请先添加词组")
|
|
|
+ return []string{}
|
|
|
+ }
|
|
|
+ keywords := []string{}
|
|
|
+ tempMap := make(map[string][]bool)
|
|
|
+ for i := 0; i < len(src); i++ {
|
|
|
+ nowMap := &d.link
|
|
|
+ length := 0 // 匹配标识数默认为0
|
|
|
+ //flag := false // 敏感词结束标识位:用于敏感词只有1位的情况
|
|
|
+ for j := i; j < len(src); j++ {
|
|
|
+ word := src[j : j+1]
|
|
|
+ nowMap, _ = (*nowMap)[word].(*map[string]interface{})
|
|
|
+ if nowMap != nil {
|
|
|
+ length = length + 1
|
|
|
+ tag, _ := (*nowMap)["YN"].(string)
|
|
|
+ if "Y" == tag {
|
|
|
+ //flag = true
|
|
|
+ keywords = append(keywords, src[i:i+length])
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ nowMap = &d.linkAndWord
|
|
|
+ length = 0
|
|
|
+ for j := i; j < len(src); j++ {
|
|
|
+ word := src[j : j+1]
|
|
|
+ nowMap, _ = (*nowMap)[word].(*map[string]interface{})
|
|
|
+ if nowMap != nil {
|
|
|
+ length = length + 1
|
|
|
+ tag, _ := (*nowMap)["YN"].(string)
|
|
|
+ if "Y" == tag {
|
|
|
+ mkeys := (*nowMap)["key"].(map[string]int)
|
|
|
+ for k, v := range mkeys {
|
|
|
+ tempBool := tempMap[k]
|
|
|
+ if tempBool == nil {
|
|
|
+ tempBool = make([]bool, d.linkAnd[k])
|
|
|
+ tempMap[k] = tempBool
|
|
|
+ }
|
|
|
+ tempBool[v] = true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for k, v := range tempMap {
|
|
|
+ ball := true
|
|
|
+ for _, m := range v {
|
|
|
+ if !m {
|
|
|
+ ball = false
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if ball {
|
|
|
+ keywords = append(keywords, k)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return keywords
|
|
|
+}
|