123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- package megaloscope
- /**
- AC算法实现敏感词识别
- */
- import (
- "container/list"
- )
- type trieNode struct {
- count int
- fail *trieNode
- child map[rune]*trieNode
- index int
- }
- func newTrieNode() *trieNode {
- return &trieNode{
- count: 0,
- fail: nil,
- child: make(map[rune]*trieNode),
- index: -1,
- }
- }
- type Matcher struct {
- root *trieNode
- size int
- }
- type Term struct {
- Index int
- EndPosition int
- }
- func NewMatcher() *Matcher {
- return &Matcher{
- root: newTrieNode(),
- size: 0,
- }
- }
- func BuildNewMatcher(dictionary []string) *Matcher {
- m := &Matcher{
- root: newTrieNode(),
- size: 0,
- }
- m.Build(dictionary)
- return m
- }
- // initialize the ahocorasick
- func (m *Matcher) Build(dictionary []string) {
- for i := range dictionary {
- m.insert(dictionary[i])
- }
- m.build()
- }
- // string match search
- // return all strings matched as indexes into the original dictionary and their positions on matched string
- func (m *Matcher) Match(s string) []*Term {
- curNode := m.root
- var p *trieNode = nil
- mark := make([]bool, m.size)
- ret := make([]*Term, 0)
- for index, rune := range s {
- for curNode.child[rune] == nil && curNode != m.root {
- curNode = curNode.fail
- }
- curNode = curNode.child[rune]
- if curNode == nil {
- curNode = m.root
- }
- p = curNode
- for p != m.root && p.count > 0 && !mark[p.index] {
- mark[p.index] = true
- for i := 0; i < p.count; i++ {
- ret = append(ret, &Term{Index: p.index, EndPosition: index})
- }
- p = p.fail
- }
- }
- return ret
- }
- // just return the number of len(Match(s))
- func (m *Matcher) GetMatchResultSize(s string) int {
- curNode := m.root
- var p *trieNode = nil
- mark := make([]bool, m.size)
- num := 0
- for _, v := range s {
- for curNode.child[v] == nil && curNode != m.root {
- curNode = curNode.fail
- }
- curNode = curNode.child[v]
- if curNode == nil {
- curNode = m.root
- }
- p = curNode
- for p != m.root && p.count > 0 && !mark[p.index] {
- mark[p.index] = true
- num += p.count
- p = p.fail
- }
- }
- return num
- }
- func (m *Matcher) build() {
- ll := list.New()
- ll.PushBack(m.root)
- for ll.Len() > 0 {
- temp := ll.Remove(ll.Front()).(*trieNode)
- var p *trieNode = nil
- for i, v := range temp.child {
- if temp == m.root {
- v.fail = m.root
- } else {
- p = temp.fail
- for p != nil {
- if p.child[i] != nil {
- v.fail = p.child[i]
- break
- }
- p = p.fail
- }
- if p == nil {
- v.fail = m.root
- }
- }
- ll.PushBack(v)
- }
- }
- }
- func (m *Matcher) insert(s string) {
- curNode := m.root
- for _, v := range s {
- if curNode.child[v] == nil {
- curNode.child[v] = newTrieNode()
- }
- curNode = curNode.child[v]
- }
- curNode.count++
- curNode.index = m.size
- m.size++
- }
|