123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192 |
- package util
- import (
- cm "app.yhyue.com/moapp/jybase/common"
- "encoding/base64"
- "encoding/json"
- "errors"
- "io/ioutil"
- "log"
- "net/http"
- "net/url"
- "regexp"
- "strings"
- )
- /*hanlp分词工具类*/
- var hanlpNormalKindOfWordsReg = regexp.MustCompile("[nvmqa]")
- var hanlpRegTail = regexp.MustCompile("[??.。!!,,;;]+$")
- var hanlpWordsFilter = []string{"请问", "如何", "应该", "怎么办", "怎么", "需要", "想", "要", "是", "有", "有何", "可有"}
- var hanlpWordsFilterMap = make(map[string]bool, 0)
- //var Encrypt = &SimpleEncrypt{Key: "Smart20180502_!"}
- func init() {
- for _, val := range hanlpWordsFilter {
- hanlpWordsFilterMap[val] = true
- }
- }
- func getResult(str, url string) []map[string]interface{} {
- var (
- bs []byte
- err error
- resMap []map[string]interface{}
- )
- str = hanlpRegTail.ReplaceAllString(str, "")
- resp, _ := http.Post(url, "application/x-www-form-urlencoded", strings.NewReader("content="+str))
- bs, err = ioutil.ReadAll(resp.Body)
- if err != nil {
- log.Printf("获取hanlp分词结果出错:[%v]", err)
- return resMap
- }
- err = json.Unmarshal(bs, &resMap)
- if err != nil {
- log.Printf("获取hanlp分词json解码出错:[%v]", err)
- }
- return resMap
- }
- // HanlpGetNoun 获取名词分词词组
- func HanlpGetNoun(words, url string) []string {
- var (
- data = make([]string, 0)
- )
- baseResult := getResult(words, url)
- if len(baseResult) > 0 {
- for _, v := range baseResult {
- if strings.Contains(v["nature"].(string), "n") && len(v["word"].(string)) > 2 {
- data = append(data, v["word"].(string))
- }
- }
- }
- return data
- }
- /*获取nvmqa5类分词词组*/
- func HanlpGetNormalWords(words, url string) []string {
- var (
- data = make([]string, 0)
- )
- baseResult := getResult(words, url)
- if len(baseResult) > 0 {
- for _, v := range baseResult {
- w, _ := v["word"].(string)
- if !hanlpWordsFilterMap[w] {
- n, _ := v["nature"].(string)
- n = strings.ToLower(n)
- if hanlpNormalKindOfWordsReg.MatchString(n) && len([]rune(w)) > 0 {
- data = append(data, w)
- }
- }
- }
- }
- return data
- }
- func ElasticSmartIK(words, urls string) (res string) {
- URL, _ := url.Parse(urls)
- Q := URL.Query()
- Q.Add("text", words)
- Q.Add("analyzer", "sik")
- URL.RawQuery = Q.Encode()
- resp, err := http.Get(URL.String())
- if err != nil {
- log.Println("ElasticSmartIK error:", err)
- } else {
- result, err := ioutil.ReadAll(resp.Body)
- if err == nil {
- defer resp.Body.Close()
- var resmap map[string]interface{}
- err = json.Unmarshal(result, &resmap)
- if err != nil {
- log.Println("ElasticSmartIK json解码 error:", err)
- }
- if resmap != nil {
- //log.Println("ik分词结果:", resmap)
- if value, ok := resmap["tokens"].([]interface{}); ok {
- tokens := cm.ObjArrToMapArr(value)
- for _, v := range tokens {
- res += v["token"].(string)
- }
- } else {
- log.Println("error:", errors.New("分词程序出错"))
- }
- }
- }
- }
- return
- }
- // HttpDo smartInfo中用到,对问题分词
- func HttpDo(ques string, url string) (result string) {
- var (
- kwMap []map[string]interface{}
- keyWords = ""
- )
- client := &http.Client{}
- //req, err := http.NewRequest("POST", "http://39.106.145.77:8080/api/segment", strings.NewReader("content="+ques))
- req, err := http.NewRequest("POST", url, strings.NewReader("content="+ques))
- if err != nil {
- log.Println("问题分词出错err1:", err)
- return ""
- }
- req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
- resp, err := client.Do(req)
- if err != nil {
- log.Println("问题分词出错err2:", err)
- return ""
- }
- defer resp.Body.Close()
- body, err := ioutil.ReadAll(resp.Body)
- if err != nil {
- log.Println("问题分词出错err3:", err)
- return ""
- }
- err = json.Unmarshal([]byte(body), &kwMap)
- if err != nil {
- log.Println("推荐答案获取分词解码出错err4:", err)
- return ""
- }
- if kwMap != nil && len(kwMap) > 0 {
- for k, v := range kwMap {
- if strings.Contains(v["nature"].(string), "n") && len(v["word"].(string)) > 3 {
- if k > 0 && len(keyWords) > 1 {
- keyWords += " "
- }
- keyWords += v["word"].(string)
- }
- }
- }
- return keyWords
- }
- type SimpleEncrypt struct {
- Key string //加解密用到的key(加密key索引)+
- }
- func (s *SimpleEncrypt) EncodeString(str string) string {
- data := []byte(str)
- s.doEncode(data)
- return base64.StdEncoding.EncodeToString(data)
- }
- // DecodeString 解密String
- func (s *SimpleEncrypt) DecodeString(str string) string {
- data, _ := base64.StdEncoding.DecodeString(str)
- s.doEncode(data)
- return string(data)
- }
- func (s *SimpleEncrypt) doEncode(bs []byte) {
- tmp := []byte(s.Key)
- THEFOR:
- for i := 0; i < len(bs); {
- for j := 0; j < len(tmp); j, i = j+1, i+1 {
- if i >= len(bs) {
- break THEFOR
- }
- bs[i] = bs[i] ^ tmp[j]
- }
- }
- }
|