123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- /**
- *兴趣分析
- *
- */
- package util
- import (
- "log"
- //"strings"
- )
- //DFA实现
- type DFA struct {
- link map[string]interface{} //存放or
- linkAnd map[string]int //存放and
- linkAndWord map[string]interface{} //存放and中的拆分词
- }
- //添加词组,用于初始化,该方法是可以调用多次的
- func (d *DFA) AddWord(words ...string) {
- if d.link == nil {
- d.link = make(map[string]interface{})
- d.linkAnd = make(map[string]int)
- d.linkAndWord = make(map[string]interface{})
- }
- var nowMap *map[string]interface{}
- for _, key := range words {
- //keys := strings.Split(key, "+")
- keys := []string{key}
- lenkeys := len(keys)
- if lenkeys > 1 {
- d.linkAnd[key] = lenkeys
- for k := 0; k < lenkeys; k++ {
- minKey := keys[k]
- nowMap = &d.linkAndWord
- for i := 0; i < len(minKey); i++ {
- kc := minKey[i : i+1]
- if v, ok := (*nowMap)[kc]; ok {
- nowMap, _ = v.(*map[string]interface{})
- } else {
- newMap := map[string]interface{}{}
- newMap["YN"] = "N"
- (*nowMap)[kc] = &newMap
- nowMap = &newMap
- }
- if i == len(minKey)-1 {
- (*nowMap)["YN"] = "Y"
- if (*nowMap)["key"] == nil {
- (*nowMap)["key"] = make(map[string]int)
- }
- (*nowMap)["key"].(map[string]int)[key] = k
- }
- }
- }
- } else {
- nowMap = &d.link
- for i := 0; i < len(key); i++ {
- kc := key[i : i+1]
- if v, ok := (*nowMap)[kc]; ok {
- nowMap, _ = v.(*map[string]interface{})
- } else {
- newMap := map[string]interface{}{}
- newMap["YN"] = "N"
- (*nowMap)[kc] = &newMap
- nowMap = &newMap
- }
- if i == len(key)-1 {
- (*nowMap)["YN"] = "Y"
- }
- }
- }
- }
- }
- func (d *DFA) Clear() {
- d.link = nil
- }
- //从给定的内容中找出匹配上的关键词
- func (d *DFA) Analy(src string) []string {
- if d.link == nil {
- log.Println("请先添加词组")
- return []string{}
- }
- keywords := []string{}
- tempMap := make(map[string][]bool)
- for i := 0; i < len(src); i++ {
- nowMap := &d.link
- length := 0 // 匹配标识数默认为0
- //flag := false // 敏感词结束标识位:用于敏感词只有1位的情况
- for j := i; j < len(src); j++ {
- word := src[j : j+1]
- nowMap, _ = (*nowMap)[word].(*map[string]interface{})
- if nowMap != nil {
- length = length + 1
- tag, _ := (*nowMap)["YN"].(string)
- if "Y" == tag {
- //flag = true
- keywords = append(keywords, src[i:i+length])
- }
- } else {
- break
- }
- }
- nowMap = &d.linkAndWord
- length = 0
- for j := i; j < len(src); j++ {
- word := src[j : j+1]
- nowMap, _ = (*nowMap)[word].(*map[string]interface{})
- if nowMap != nil {
- length = length + 1
- tag, _ := (*nowMap)["YN"].(string)
- if "Y" == tag {
- mkeys := (*nowMap)["key"].(map[string]int)
- for k, v := range mkeys {
- tempBool := tempMap[k]
- if tempBool == nil {
- tempBool = make([]bool, d.linkAnd[k])
- tempMap[k] = tempBool
- }
- tempBool[v] = true
- }
- }
- } else {
- break
- }
- }
- }
- for k, v := range tempMap {
- ball := true
- for _, m := range v {
- if !m {
- ball = false
- break
- }
- }
- if ball {
- keywords = append(keywords, k)
- }
- }
- return keywords
- }
|