wangshan 3 vuotta sitten
vanhempi
commit
51bbb1ecd3
2 muutettua tiedostoa jossa 187 lisäystä ja 0 poistoa
  1. 142 0
      src/qfw/util/dfa/interestanalysis.go
  2. 45 0
      src/qfw/util/dfa/interestanalysis_test.go

+ 142 - 0
src/qfw/util/dfa/interestanalysis.go

@@ -0,0 +1,142 @@
+/**
+ *兴趣分析
+ *
+ */
+package dfa
+
+import (
+	"log"
+	"strings"
+)
+
+//DFA实现
+type DFA struct {
+	link        map[string]interface{} //存放or
+	linkAnd     map[string]int         //存放and
+	linkAndWord map[string]interface{} //存放and中的拆分词
+
+}
+
+//添加词组,用于初始化,该方法是可以调用多次的
+func (d *DFA) AddWord(words ...string) {
+	if d.link == nil {
+		d.link = make(map[string]interface{})
+		d.linkAnd = make(map[string]int)
+		d.linkAndWord = make(map[string]interface{})
+	}
+	var nowMap *map[string]interface{}
+	for _, key := range words {
+		keys := strings.Split(key, "+")
+		lenkeys := len(keys)
+		if lenkeys > 1 {
+			d.linkAnd[key] = lenkeys
+			for k := 0; k < lenkeys; k++ {
+				minKey := keys[k]
+				nowMap = &d.linkAndWord
+				for i := 0; i < len(minKey); i++ {
+					kc := minKey[i : i+1]
+					if v, ok := (*nowMap)[kc]; ok {
+						nowMap, _ = v.(*map[string]interface{})
+					} else {
+						newMap := map[string]interface{}{}
+						newMap["YN"] = "N"
+						(*nowMap)[kc] = &newMap
+						nowMap = &newMap
+					}
+					if i == len(minKey)-1 {
+						(*nowMap)["YN"] = "Y"
+						if (*nowMap)["key"] == nil {
+							(*nowMap)["key"] = make(map[string]int)
+						}
+						(*nowMap)["key"].(map[string]int)[key] = k
+					}
+				}
+			}
+		} else {
+			nowMap = &d.link
+			for i := 0; i < len(key); i++ {
+				kc := key[i : i+1]
+				if v, ok := (*nowMap)[kc]; ok {
+					nowMap, _ = v.(*map[string]interface{})
+				} else {
+					newMap := map[string]interface{}{}
+					newMap["YN"] = "N"
+					(*nowMap)[kc] = &newMap
+					nowMap = &newMap
+				}
+
+				if i == len(key)-1 {
+					(*nowMap)["YN"] = "Y"
+				}
+			}
+		}
+	}
+}
+func (d *DFA) Clear() {
+	d.link = nil
+}
+
+//从给定的内容中找出匹配上的关键词
+func (d *DFA) Analy(src string) []string {
+	if d.link == nil {
+		log.Println("请先添加词组")
+		return []string{}
+	}
+	keywords := []string{}
+	tempMap := make(map[string][]bool)
+	for i := 0; i < len(src); i++ {
+		nowMap := &d.link
+		length := 0 // 匹配标识数默认为0
+		//flag := false // 敏感词结束标识位:用于敏感词只有1位的情况
+		for j := i; j < len(src); j++ {
+			word := src[j : j+1]
+			nowMap, _ = (*nowMap)[word].(*map[string]interface{})
+			if nowMap != nil {
+				length = length + 1
+				tag, _ := (*nowMap)["YN"].(string)
+				if "Y" == tag {
+					//flag = true
+					keywords = append(keywords, src[i:i+length])
+				}
+			} else {
+				break
+			}
+		}
+		nowMap = &d.linkAndWord
+		length = 0
+		for j := i; j < len(src); j++ {
+			word := src[j : j+1]
+			nowMap, _ = (*nowMap)[word].(*map[string]interface{})
+			if nowMap != nil {
+				length = length + 1
+				tag, _ := (*nowMap)["YN"].(string)
+				if "Y" == tag {
+					mkeys := (*nowMap)["key"].(map[string]int)
+					for k, v := range mkeys {
+						tempBool := tempMap[k]
+						if tempBool == nil {
+							tempBool = make([]bool, d.linkAnd[k])
+							tempMap[k] = tempBool
+						}
+						tempBool[v] = true
+					}
+				}
+			} else {
+				break
+			}
+		}
+	}
+	for k, v := range tempMap {
+		ball := true
+		for _, m := range v {
+			if !m {
+				ball = false
+				break
+			}
+		}
+		if ball {
+			keywords = append(keywords, k)
+		}
+	}
+	return keywords
+}

+ 45 - 0
src/qfw/util/dfa/interestanalysis_test.go

@@ -0,0 +1,45 @@
+package dfa
+
+import (
+	"log"
+	"strings"
+	"testing"
+	"time"
+)
+
+var d *DFA = &DFA{}
+
+func copyMap(m map[string]int) (m2 map[string]int) {
+	m2 = make(map[string]int)
+	for k, v := range m {
+		m2[k] = v
+	}
+	return m2
+}
+
+func TestAnaly(t *testing.T) {
+	d.AddWord("办公", "办+楼", "河+省", "完+你们8")
+	log.Println(strings.Split("河+南+", "+")[2])
+	t1 := time.Now()
+	log.Println(d.Analy("这胡省锦河涛写给江泽民的信我们你们于办公楼上你完就是啊。"), "=====")
+	log.Println(time.Now().Sub(t1).Seconds())
+	d.Clear()
+	//log.Println(d.Analy("这是胡锦涛写给江泽民的信啊。"))
+
+}
+
+func Test_Label(t *testing.T) {
+	log.Println("000----")
+
+	for _, v := range []int{1, 2, 3, 4, 5} {
+		log.Println(v)
+	L1:
+		for _, vv := range []string{"a", "b", "c", "d"} {
+			log.Println(vv)
+			if vv == "add" {
+				break L1
+			}
+		}
+	}
+	log.Println("111----")
+}