hanlp.go 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. package util
  2. import (
  3. cm "app.yhyue.com/moapp/jybase/common"
  4. "encoding/base64"
  5. "encoding/json"
  6. "io/ioutil"
  7. "log"
  8. "net/http"
  9. "net/url"
  10. "regexp"
  11. "strings"
  12. )
  13. /*hanlp分词工具类*/
  14. var hanlpNormalKindOfWordsReg = regexp.MustCompile("[nvmqa]")
  15. var hanlpRegTail = regexp.MustCompile("[??.。!!,,;;]+$")
  16. var hanlpWordsFilter = []string{"请问", "如何", "应该", "怎么办", "怎么", "需要", "想", "要", "是", "有", "有何", "可有"}
  17. var hanlpWordsFilterMap = make(map[string]bool, 0)
  18. //var Encrypt = &SimpleEncrypt{Key: "Smart20180502_!"}
  19. func init() {
  20. for _, val := range hanlpWordsFilter {
  21. hanlpWordsFilterMap[val] = true
  22. }
  23. }
  24. func getResult(str, url string) []map[string]interface{} {
  25. var (
  26. bs []byte
  27. err error
  28. resMap []map[string]interface{}
  29. )
  30. str = hanlpRegTail.ReplaceAllString(str, "")
  31. resp, _ := http.Post(url, "application/x-www-form-urlencoded", strings.NewReader("content="+str))
  32. bs, err = ioutil.ReadAll(resp.Body)
  33. if err != nil {
  34. log.Printf("获取hanlp分词结果出错:[%v]", err)
  35. return resMap
  36. }
  37. err = json.Unmarshal(bs, &resMap)
  38. if err != nil {
  39. log.Printf("获取hanlp分词json解码出错:[%v]", err)
  40. }
  41. return resMap
  42. }
  43. // HanlpGetNoun 获取名词分词词组
  44. func HanlpGetNoun(words, url string) []string {
  45. var (
  46. data = make([]string, 0)
  47. )
  48. baseResult := getResult(words, url)
  49. if len(baseResult) > 0 {
  50. for _, v := range baseResult {
  51. if strings.Contains(v["nature"].(string), "n") && len(v["word"].(string)) > 2 {
  52. data = append(data, v["word"].(string))
  53. }
  54. }
  55. }
  56. return data
  57. }
  58. /*获取nvmqa5类分词词组*/
  59. func HanlpGetNormalWords(words, url string) []string {
  60. var (
  61. data = make([]string, 0)
  62. )
  63. baseResult := getResult(words, url)
  64. if len(baseResult) > 0 {
  65. for _, v := range baseResult {
  66. w, _ := v["word"].(string)
  67. if !hanlpWordsFilterMap[w] {
  68. n, _ := v["nature"].(string)
  69. n = strings.ToLower(n)
  70. if hanlpNormalKindOfWordsReg.MatchString(n) && len([]rune(w)) > 0 {
  71. data = append(data, w)
  72. }
  73. }
  74. }
  75. }
  76. return data
  77. }
  78. func ElasticSmartIK(words, urls string) (res string) {
  79. URL, _ := url.Parse(urls)
  80. Q := URL.Query()
  81. Q.Add("text", words)
  82. Q.Add("analyzer", "sik")
  83. URL.RawQuery = Q.Encode()
  84. resp, err := http.Get(URL.String())
  85. if err != nil {
  86. log.Println("ElasticSmartIK error:", err)
  87. } else {
  88. result, err := ioutil.ReadAll(resp.Body)
  89. if err == nil {
  90. defer resp.Body.Close()
  91. var resmap map[string]interface{}
  92. err = json.Unmarshal(result, &resmap)
  93. if err != nil {
  94. log.Println("ElasticSmartIK json解码 error:", err)
  95. }
  96. if resmap != nil {
  97. log.Println("ik分词结果:", resmap)
  98. tokens := cm.ObjArrToMapArr(resmap["tokens"].([]interface{}))
  99. for _, v := range tokens {
  100. res += v["token"].(string)
  101. }
  102. }
  103. }
  104. }
  105. return
  106. }
  107. // HttpDo smartInfo中用到,对问题分词
  108. func HttpDo(ques string, url string) (result string) {
  109. var (
  110. kwMap []map[string]interface{}
  111. keyWords = ""
  112. )
  113. client := &http.Client{}
  114. //req, err := http.NewRequest("POST", "http://39.106.145.77:8080/api/segment", strings.NewReader("content="+ques))
  115. req, err := http.NewRequest("POST", url, strings.NewReader("content="+ques))
  116. if err != nil {
  117. log.Println("问题分词出错err1:", err)
  118. return ""
  119. }
  120. req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
  121. resp, err := client.Do(req)
  122. if err != nil {
  123. log.Println("问题分词出错err2:", err)
  124. return ""
  125. }
  126. defer resp.Body.Close()
  127. body, err := ioutil.ReadAll(resp.Body)
  128. if err != nil {
  129. log.Println("问题分词出错err3:", err)
  130. return ""
  131. }
  132. err = json.Unmarshal([]byte(body), &kwMap)
  133. if err != nil {
  134. log.Println("推荐答案获取分词解码出错err4:", err)
  135. return ""
  136. }
  137. if kwMap != nil && len(kwMap) > 0 {
  138. for k, v := range kwMap {
  139. if strings.Contains(v["nature"].(string), "n") && len(v["word"].(string)) > 3 {
  140. if k > 0 && len(keyWords) > 1 {
  141. keyWords += " "
  142. }
  143. keyWords += v["word"].(string)
  144. }
  145. }
  146. }
  147. return keyWords
  148. }
  149. type SimpleEncrypt struct {
  150. Key string //加解密用到的key(加密key索引)+
  151. }
  152. func (s *SimpleEncrypt) EncodeString(str string) string {
  153. data := []byte(str)
  154. s.doEncode(data)
  155. return base64.StdEncoding.EncodeToString(data)
  156. }
  157. // DecodeString 解密String
  158. func (s *SimpleEncrypt) DecodeString(str string) string {
  159. data, _ := base64.StdEncoding.DecodeString(str)
  160. s.doEncode(data)
  161. return string(data)
  162. }
  163. func (s *SimpleEncrypt) doEncode(bs []byte) {
  164. tmp := []byte(s.Key)
  165. THEFOR:
  166. for i := 0; i < len(bs); {
  167. for j := 0; j < len(tmp); j, i = j+1, i+1 {
  168. if i >= len(bs) {
  169. break THEFOR
  170. }
  171. bs[i] = bs[i] ^ tmp[j]
  172. }
  173. }
  174. }