package util import ( cm "app.yhyue.com/moapp/jybase/common" "encoding/base64" "encoding/json" "io/ioutil" "log" "net/http" "net/url" "regexp" "strings" ) /*hanlp分词工具类*/ var hanlpNormalKindOfWordsReg = regexp.MustCompile("[nvmqa]") var hanlpRegTail = regexp.MustCompile("[??.。!!,,;;]+$") var hanlpWordsFilter = []string{"请问", "如何", "应该", "怎么办", "怎么", "需要", "想", "要", "是", "有", "有何", "可有"} var hanlpWordsFilterMap = make(map[string]bool, 0) //var Encrypt = &SimpleEncrypt{Key: "Smart20180502_!"} func init() { for _, val := range hanlpWordsFilter { hanlpWordsFilterMap[val] = true } } func getResult(str, url string) []map[string]interface{} { var ( bs []byte err error resMap []map[string]interface{} ) str = hanlpRegTail.ReplaceAllString(str, "") resp, _ := http.Post(url, "application/x-www-form-urlencoded", strings.NewReader("content="+str)) bs, err = ioutil.ReadAll(resp.Body) if err != nil { log.Printf("获取hanlp分词结果出错:[%v]", err) return resMap } err = json.Unmarshal(bs, &resMap) if err != nil { log.Printf("获取hanlp分词json解码出错:[%v]", err) } return resMap } // HanlpGetNoun 获取名词分词词组 func HanlpGetNoun(words, url string) []string { var ( data = make([]string, 0) ) baseResult := getResult(words, url) if len(baseResult) > 0 { for _, v := range baseResult { if strings.Contains(v["nature"].(string), "n") && len(v["word"].(string)) > 2 { data = append(data, v["word"].(string)) } } } return data } /*获取nvmqa5类分词词组*/ func HanlpGetNormalWords(words, url string) []string { var ( data = make([]string, 0) ) baseResult := getResult(words, url) if len(baseResult) > 0 { for _, v := range baseResult { w, _ := v["word"].(string) if !hanlpWordsFilterMap[w] { n, _ := v["nature"].(string) n = strings.ToLower(n) if hanlpNormalKindOfWordsReg.MatchString(n) && len([]rune(w)) > 0 { data = append(data, w) } } } } return data } func ElasticSmartIK(words, urls string) (res string) { URL, _ := url.Parse(urls) Q := URL.Query() Q.Add("text", words) Q.Add("analyzer", "sik") URL.RawQuery = Q.Encode() resp, err := http.Get(URL.String()) if err != nil { log.Println("ElasticSmartIK error:", err) } else { result, err := ioutil.ReadAll(resp.Body) if err == nil { defer resp.Body.Close() var resmap map[string]interface{} err = json.Unmarshal(result, &resmap) if err != nil { log.Println("ElasticSmartIK json解码 error:", err) } if resmap != nil { log.Println("ik分词结果:", resmap) tokens := cm.ObjArrToMapArr(resmap["tokens"].([]interface{})) for _, v := range tokens { res += v["token"].(string) } } } } return } // HttpDo smartInfo中用到,对问题分词 func HttpDo(ques string, url string) (result string) { var ( kwMap []map[string]interface{} keyWords = "" ) client := &http.Client{} //req, err := http.NewRequest("POST", "http://39.106.145.77:8080/api/segment", strings.NewReader("content="+ques)) req, err := http.NewRequest("POST", url, strings.NewReader("content="+ques)) if err != nil { log.Println("问题分词出错err1:", err) return "" } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") resp, err := client.Do(req) if err != nil { log.Println("问题分词出错err2:", err) return "" } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { log.Println("问题分词出错err3:", err) return "" } err = json.Unmarshal([]byte(body), &kwMap) if err != nil { log.Println("推荐答案获取分词解码出错err4:", err) return "" } if kwMap != nil && len(kwMap) > 0 { for k, v := range kwMap { if strings.Contains(v["nature"].(string), "n") && len(v["word"].(string)) > 3 { if k > 0 && len(keyWords) > 1 { keyWords += " " } keyWords += v["word"].(string) } } } return keyWords } type SimpleEncrypt struct { Key string //加解密用到的key(加密key索引)+ } func (s *SimpleEncrypt) EncodeString(str string) string { data := []byte(str) s.doEncode(data) return base64.StdEncoding.EncodeToString(data) } // DecodeString 解密String func (s *SimpleEncrypt) DecodeString(str string) string { data, _ := base64.StdEncoding.DecodeString(str) s.doEncode(data) return string(data) } func (s *SimpleEncrypt) doEncode(bs []byte) { tmp := []byte(s.Key) THEFOR: for i := 0; i < len(bs); { for j := 0; j < len(tmp); j, i = j+1, i+1 { if i >= len(bs) { break THEFOR } bs[i] = bs[i] ^ tmp[j] } } }