package megaloscope import ( "bufio" "os" "strings" "github.com/Chain-Zhang/pinyin" ) /** 词组 */ type ( //词组 WordSlice []string //规则 Rule struct { Raw string //规则原始定义 Words WordSlice //中文词组合 ExcludeWords WordSlice //排除词 WordsPY WordSlice //词拼音 ExcludeWordsPY WordSlice //排除词拼音 } //敏感词检测 Megaloscope struct { AllWords WordSlice //所有词 AllWordsPY WordSlice //所有词的拼音 AllRules map[int]*Rule //所有词组 WordsMatcher *Matcher WordsPYMatcher *Matcher } ) // func NewMegaloscope(filepath string) *Megaloscope { m := &Megaloscope{AllRules: make(map[int]*Rule)} m.loadRules(filepath) return m } //博采+网站^打击|禁止|杜绝 //赌博^打击|禁止|杜绝|取缔 //规则解析 func (m *Megaloscope) parseRule(line string) *Rule { r := &Rule{Raw: line} if strings.Contains(line, "^") { tmp := strings.Split(line, "^") r.Words = strings.Split(tmp[0], "+") r.ExcludeWords = strings.Split(tmp[1], "|") } else { r.Words = strings.Split(line, "+") } return r } //加载规则 func (m *Megaloscope) loadRules(filepath string) error { fi, err := os.Open(filepath) if err != nil { return err } defer fi.Close() reader := bufio.NewReader(fi) allWords := make(WordSlice, 0) for i := 0; ; i++ { bs, _, err := reader.ReadLine() if err != nil { break } rule := m.parseRule(string(bs)) m.AllRules[i] = rule allWords = append(allWords, rule.Words...) allWords = append(allWords, rule.ExcludeWords...) } //所有词去重 tmp := map[string]bool{} for _, w := range allWords { tmp[w] = true } //计算词的拼音 allWordPY := make(map[string]string) words := make([]string, len(tmp)) wordsPY := make([]string, len(tmp)) index := 0 for w := range tmp { py, _ := pinyin.New(w).Split("").Mode(pinyin.InitialsInCapitals).Convert() allWordPY[w] = py words[index] = w wordsPY[index] = py index += 1 } m.AllWords = words m.AllWordsPY = wordsPY //完善规则中的拼音 for _, v := range m.AllRules { v.WordsPY = make(WordSlice, len(v.Words)) for i, w := range v.Words { v.WordsPY[i] = allWordPY[w] } if len(v.ExcludeWords) > 0 { v.ExcludeWordsPY = make(WordSlice, len(v.ExcludeWords)) for i, w := range v.ExcludeWords { v.ExcludeWordsPY[i] = allWordPY[w] } } } m.WordsPYMatcher = BuildNewMatcher(m.AllWordsPY) m.WordsMatcher = BuildNewMatcher(m.AllWords) return nil }