hanlp.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. package util
  2. import (
  3. cm "app.yhyue.com/moapp/jybase/common"
  4. "encoding/base64"
  5. "encoding/json"
  6. "errors"
  7. "io/ioutil"
  8. "log"
  9. "net/http"
  10. "net/url"
  11. "regexp"
  12. "strings"
  13. )
  14. /*hanlp分词工具类*/
  15. var hanlpNormalKindOfWordsReg = regexp.MustCompile("[nvmqa]")
  16. var hanlpRegTail = regexp.MustCompile("[??.。!!,,;;]+$")
  17. var hanlpWordsFilter = []string{"请问", "如何", "应该", "怎么办", "怎么", "需要", "想", "要", "是", "有", "有何", "可有"}
  18. var hanlpWordsFilterMap = make(map[string]bool, 0)
  19. //var Encrypt = &SimpleEncrypt{Key: "Smart20180502_!"}
  20. func init() {
  21. for _, val := range hanlpWordsFilter {
  22. hanlpWordsFilterMap[val] = true
  23. }
  24. }
  25. func getResult(str, url string) []map[string]interface{} {
  26. var (
  27. bs []byte
  28. err error
  29. resMap []map[string]interface{}
  30. )
  31. str = hanlpRegTail.ReplaceAllString(str, "")
  32. resp, _ := http.Post(url, "application/x-www-form-urlencoded", strings.NewReader("content="+str))
  33. bs, err = ioutil.ReadAll(resp.Body)
  34. if err != nil {
  35. log.Printf("获取hanlp分词结果出错:[%v]", err)
  36. return resMap
  37. }
  38. err = json.Unmarshal(bs, &resMap)
  39. if err != nil {
  40. log.Printf("获取hanlp分词json解码出错:[%v]", err)
  41. }
  42. return resMap
  43. }
  44. // HanlpGetNoun 获取名词分词词组
  45. func HanlpGetNoun(words, url string) []string {
  46. var (
  47. data = make([]string, 0)
  48. )
  49. baseResult := getResult(words, url)
  50. if len(baseResult) > 0 {
  51. for _, v := range baseResult {
  52. if strings.Contains(v["nature"].(string), "n") && len(v["word"].(string)) > 2 {
  53. data = append(data, v["word"].(string))
  54. }
  55. }
  56. }
  57. return data
  58. }
  59. /*获取nvmqa5类分词词组*/
  60. func HanlpGetNormalWords(words, url string) []string {
  61. var (
  62. data = make([]string, 0)
  63. )
  64. baseResult := getResult(words, url)
  65. if len(baseResult) > 0 {
  66. for _, v := range baseResult {
  67. w, _ := v["word"].(string)
  68. if !hanlpWordsFilterMap[w] {
  69. n, _ := v["nature"].(string)
  70. n = strings.ToLower(n)
  71. if hanlpNormalKindOfWordsReg.MatchString(n) && len([]rune(w)) > 0 {
  72. data = append(data, w)
  73. }
  74. }
  75. }
  76. }
  77. return data
  78. }
  79. func ElasticSmartIK(words, urls string) (res string) {
  80. URL, _ := url.Parse(urls)
  81. Q := URL.Query()
  82. Q.Add("text", words)
  83. Q.Add("analyzer", "sik")
  84. URL.RawQuery = Q.Encode()
  85. resp, err := http.Get(URL.String())
  86. if err != nil {
  87. log.Println("ElasticSmartIK error:", err)
  88. } else {
  89. result, err := ioutil.ReadAll(resp.Body)
  90. if err == nil {
  91. defer resp.Body.Close()
  92. var resmap map[string]interface{}
  93. err = json.Unmarshal(result, &resmap)
  94. if err != nil {
  95. log.Println("ElasticSmartIK json解码 error:", err)
  96. }
  97. if resmap != nil {
  98. //log.Println("ik分词结果:", resmap)
  99. if value, ok := resmap["tokens"].([]interface{}); ok {
  100. tokens := cm.ObjArrToMapArr(value)
  101. for _, v := range tokens {
  102. res += v["token"].(string)
  103. }
  104. } else {
  105. log.Println("error:", errors.New("分词程序出错"))
  106. }
  107. }
  108. }
  109. }
  110. return
  111. }
  112. // HttpDo smartInfo中用到,对问题分词
  113. func HttpDo(ques string, url string) (result string) {
  114. var (
  115. kwMap []map[string]interface{}
  116. keyWords = ""
  117. )
  118. client := &http.Client{}
  119. //req, err := http.NewRequest("POST", "http://39.106.145.77:8080/api/segment", strings.NewReader("content="+ques))
  120. req, err := http.NewRequest("POST", url, strings.NewReader("content="+ques))
  121. if err != nil {
  122. log.Println("问题分词出错err1:", err)
  123. return ""
  124. }
  125. req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
  126. resp, err := client.Do(req)
  127. if err != nil {
  128. log.Println("问题分词出错err2:", err)
  129. return ""
  130. }
  131. defer resp.Body.Close()
  132. body, err := ioutil.ReadAll(resp.Body)
  133. if err != nil {
  134. log.Println("问题分词出错err3:", err)
  135. return ""
  136. }
  137. err = json.Unmarshal([]byte(body), &kwMap)
  138. if err != nil {
  139. log.Println("推荐答案获取分词解码出错err4:", err)
  140. return ""
  141. }
  142. if kwMap != nil && len(kwMap) > 0 {
  143. for k, v := range kwMap {
  144. if strings.Contains(v["nature"].(string), "n") && len(v["word"].(string)) > 3 {
  145. if k > 0 && len(keyWords) > 1 {
  146. keyWords += " "
  147. }
  148. keyWords += v["word"].(string)
  149. }
  150. }
  151. }
  152. return keyWords
  153. }
  154. type SimpleEncrypt struct {
  155. Key string //加解密用到的key(加密key索引)+
  156. }
  157. func (s *SimpleEncrypt) EncodeString(str string) string {
  158. data := []byte(str)
  159. s.doEncode(data)
  160. return base64.StdEncoding.EncodeToString(data)
  161. }
  162. // DecodeString 解密String
  163. func (s *SimpleEncrypt) DecodeString(str string) string {
  164. data, _ := base64.StdEncoding.DecodeString(str)
  165. s.doEncode(data)
  166. return string(data)
  167. }
  168. func (s *SimpleEncrypt) doEncode(bs []byte) {
  169. tmp := []byte(s.Key)
  170. THEFOR:
  171. for i := 0; i < len(bs); {
  172. for j := 0; j < len(tmp); j, i = j+1, i+1 {
  173. if i >= len(bs) {
  174. break THEFOR
  175. }
  176. bs[i] = bs[i] ^ tmp[j]
  177. }
  178. }
  179. }