hanlp.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. package util
  2. import (
  3. cm "app.yhyue.com/moapp/jybase/common"
  4. "encoding/base64"
  5. "encoding/json"
  6. "io/ioutil"
  7. . "knowledgeBase/rpc/knowledge/init"
  8. "log"
  9. "net/http"
  10. "net/url"
  11. "regexp"
  12. "strings"
  13. )
  14. /*hanlp分词工具类*/
  15. var hanlpNormalKindOfWordsReg = regexp.MustCompile("[nvmqa]")
  16. var hanlpRegTail = regexp.MustCompile("[??.。!!,,;;]+$")
  17. var hanlpWordsFilter = []string{"请问", "如何", "应该", "怎么办", "怎么", "需要", "想", "要", "是", "有", "有何", "可有"}
  18. var hanlpWordsFilterMap = make(map[string]bool, 0)
  19. //var Encrypt = &SimpleEncrypt{Key: "Smart20180502_!"}
  20. func init() {
  21. for _, val := range hanlpWordsFilter {
  22. hanlpWordsFilterMap[val] = true
  23. }
  24. }
  25. func getResult(str, url string) []map[string]interface{} {
  26. var (
  27. bs []byte
  28. err error
  29. resMap []map[string]interface{}
  30. )
  31. str = hanlpRegTail.ReplaceAllString(str, "")
  32. resp, _ := http.Post(url, "application/x-www-form-urlencoded", strings.NewReader("content="+str))
  33. bs, err = ioutil.ReadAll(resp.Body)
  34. if err != nil {
  35. log.Printf("获取hanlp分词结果出错:[%v]", err)
  36. return resMap
  37. }
  38. err = json.Unmarshal(bs, &resMap)
  39. if err != nil {
  40. log.Printf("获取hanlp分词json解码出错:[%v]", err)
  41. }
  42. return resMap
  43. }
  44. // HanlpGetNoun 获取名词分词词组
  45. func HanlpGetNoun(words, url string) []string {
  46. var (
  47. data = make([]string, 0)
  48. )
  49. baseResult := getResult(words, url)
  50. if len(baseResult) > 0 {
  51. for _, v := range baseResult {
  52. if strings.Contains(v["nature"].(string), "n") && len(v["word"].(string)) > 2 {
  53. data = append(data, v["word"].(string))
  54. }
  55. }
  56. }
  57. return data
  58. }
  59. /*获取nvmqa5类分词词组*/
  60. func HanlpGetNormalWords(words, url string) []string {
  61. var (
  62. data = make([]string, 0)
  63. )
  64. baseResult := getResult(words, url)
  65. if len(baseResult) > 0 {
  66. for _, v := range baseResult {
  67. w, _ := v["word"].(string)
  68. if !hanlpWordsFilterMap[w] {
  69. n, _ := v["nature"].(string)
  70. n = strings.ToLower(n)
  71. if hanlpNormalKindOfWordsReg.MatchString(n) && len([]rune(w)) > 0 {
  72. data = append(data, w)
  73. }
  74. }
  75. }
  76. }
  77. return data
  78. }
  79. func ElasticSmartIK(words, urls string) (res string) {
  80. URL, _ := url.Parse(urls)
  81. Q := URL.Query()
  82. Q.Add("text", words)
  83. Q.Add("analyzer", "sik")
  84. URL.RawQuery = Q.Encode()
  85. resp, err := http.Get(URL.String())
  86. if err != nil {
  87. log.Println("ElasticSmartIK error:", err)
  88. } else {
  89. result, err := ioutil.ReadAll(resp.Body)
  90. if err == nil {
  91. defer resp.Body.Close()
  92. var resmap map[string]interface{}
  93. err = json.Unmarshal(result, &resmap)
  94. if err != nil {
  95. log.Println("ElasticSmartIK json解码 error:", err)
  96. }
  97. if resmap != nil {
  98. tokens := cm.ObjArrToMapArr(resmap["tokens"].([]interface{}))
  99. for _, v := range tokens {
  100. res += v["token"].(string)
  101. }
  102. }
  103. }
  104. }
  105. return
  106. }
  107. // HttpDo smartInfo中用到,对问题分词
  108. func HttpDo(ques string) (result string) {
  109. client := &http.Client{}
  110. //req, err := http.NewRequest("POST", "http://39.106.145.77:8080/api/segment", strings.NewReader("content="+ques))
  111. req, err := http.NewRequest("POST", C.Segment, strings.NewReader("content="+ques))
  112. if err != nil {
  113. log.Println("err1:")
  114. }
  115. req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
  116. resp, err := client.Do(req)
  117. defer resp.Body.Close()
  118. body, err := ioutil.ReadAll(resp.Body)
  119. if err != nil {
  120. log.Println("err2:")
  121. }
  122. return string(body)
  123. }
  124. type SimpleEncrypt struct {
  125. Key string //加解密用到的key(加密key索引)+
  126. }
  127. func (s *SimpleEncrypt) EncodeString(str string) string {
  128. data := []byte(str)
  129. s.doEncode(data)
  130. return base64.StdEncoding.EncodeToString(data)
  131. }
  132. // DecodeString 解密String
  133. func (s *SimpleEncrypt) DecodeString(str string) string {
  134. data, _ := base64.StdEncoding.DecodeString(str)
  135. s.doEncode(data)
  136. return string(data)
  137. }
  138. func (s *SimpleEncrypt) doEncode(bs []byte) {
  139. tmp := []byte(s.Key)
  140. THEFOR:
  141. for i := 0; i < len(bs); {
  142. for j := 0; j < len(tmp); j, i = j+1, i+1 {
  143. if i >= len(bs) {
  144. break THEFOR
  145. }
  146. bs[i] = bs[i] ^ tmp[j]
  147. }
  148. }
  149. }