123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- package main
- import (
- "encoding/json"
- "log"
- "os"
- "sensitiveWords.udp/util"
- "strings"
- "unicode/utf8"
- )
- func dealWithNameScoreRules(name string) (string,bool) {
- new_name,new_score,isok :="",float64(0),false
- old_name := escape(name)
- if old_name=="" {
- return "",false
- }
- query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name_2","query":"`+old_name+`"}}],"must_not":[],"should":[]}},"from":"0","size":"1"}`
- tmp := make(map[string]interface{})
- json.Unmarshal([]byte(query),&tmp)
- searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do()
- if err != nil {
- log.Println("从ES查询出错",name,old_name)
- return "",false
- }
- resNum := len(searchResult.Hits.Hits)
- res := make([]map[string]interface{}, resNum)
- if searchResult.Hits != nil {
- if resNum < 5000 {
- for i, hit := range searchResult.Hits.Hits {
- data := make(map[string]interface{},0)
- json.Unmarshal(*hit.Source, &data)
- res[i] = map[string]interface{}{
- "name":data["name"],
- "score":*hit.Score,
- }
- }
- } else {
- log.Println("查询结果太多,查询到:", resNum, "条")
- }
- }
- if len(res)>0 && res != nil {
- new_name = util.ObjToString(res[0]["name"])
- new_score = util.Float64All(res[0]["score"])
- }
- if new_name!="" { //分析hit比例
- total,hit := dealWithWordsRules(name,new_name)
- proportion := float64(hit)/float64(total)
- if proportion >=1.0 {
- isok = true
- }else {
- if float64(hit)/float64(total)>=0.8 && new_score> 4.0{
- isok = true
- }
- }
- }
- return new_name,isok
- }
- //击中数量以及比例
- func dealWithWordsRules(info_name string ,source_name string) (int,int){
- total,hit :=0,0
- //字符串处理,替换指定字符
- info_name = strings.ReplaceAll(info_name,"(","")
- info_name = strings.ReplaceAll(info_name,")","")
- info_name = strings.ReplaceAll(info_name,"(","")
- info_name = strings.ReplaceAll(info_name,")","")
- source_name = strings.ReplaceAll(source_name,"(","")
- source_name = strings.ReplaceAll(source_name,")","")
- source_name = strings.ReplaceAll(source_name,"(","")
- source_name = strings.ReplaceAll(source_name,")","")
- nameArr,_ := calculateWordCount(info_name)
- _,total = calculateWordCount(source_name)
- for _,v1 := range nameArr {
- if strings.Contains(source_name,v1) {
- hit++
- }
- }
- return total,hit
- }
- //分词结果
- func calculateWordCount(name string) ([]string,int) {
- arr ,space:= make([]string,0),2
- total := utf8.RuneCountInString(name)-(space-1)
- if name == "" || total<=0 {
- return arr,0
- }
- nameRune := []rune(name)
- for i:=0;i<total ;i++ {
- new_str := string(nameRune[i:space+i])
- arr = append(arr,new_str)
- }
- return arr,len(arr)
- }
- func escape(s string) string {
- news := ""
- s = strings.ReplaceAll(s," ","")
- for _, c := range s {
- //if unicode.Is(unicode.Han, c) || unicode.IsNumber(c) || unicode.IsLetter(c) {
- // news = news + string(c)
- //}else if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
- // a := string([]rune{os.PathSeparator, '\\'})
- // news = news + a + string(c)
- //} else {
- // return ""
- //}
- if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
- a := string([]rune{os.PathSeparator, '\\'})
- news = news + a + string(c)
- } else {
- news = news + string(c)
- }
- }
- return news
- }
|