elasticsearch_dsl.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. package util
  2. import (
  3. "fmt"
  4. . "knowledgeBase/rpc/knowledge/init"
  5. "log"
  6. "strconv"
  7. "strings"
  8. )
  9. var (
  10. Analyze string
  11. Segment string
  12. )
  13. /*项目中所用到的几类查询dsl语句构建工具类*/
  14. // DSL4SearchByKwsAndTags 管理后台列表查询
  15. func DSL4SearchByKwsAndTags(kws string, tags ...string) string {
  16. var (
  17. sql = `{"query": {"bool": {"must": [%s,%s]}}}`
  18. queryMatch = `{"match":{"knowledgeKeyWords":{"query":"%s","minimum_should_match":"%s"}}}`
  19. tagsTerms = `{"terms":{"tags.code":[%s]}}`
  20. tag = `"%s"`
  21. )
  22. if kws != "" {
  23. }
  24. queryMatch = fmt.Sprintf(queryMatch, kws, "20%")
  25. tmp := ""
  26. for i, val := range tags {
  27. if i < len(tags)-1 {
  28. tmp += fmt.Sprintf(tag, val) + ","
  29. } else {
  30. tmp += fmt.Sprintf(tag, val)
  31. }
  32. }
  33. tagsTerms = fmt.Sprintf(tagsTerms, tmp)
  34. sql = fmt.Sprintf(sql, queryMatch, tagsTerms)
  35. return sql
  36. }
  37. func DSL4SmartResponse(question string, tenantId int64, msgType int) string {
  38. var (
  39. totalQuery = `{"post_filter":{%s},"query":{%s},"_source":["_id"%s],"size":%d}`
  40. postFilter = `"script":{"script":"def sk=_source.must_keywords;def n=0;for(item in sk){ n++;if(que.indexOf(item)>-1){return true}};if(n==0){ return true}","params":{"que":"%s"}}`
  41. query = `"bool":{"must_not":[%s],"must":[{"match":{"%s":{"query":"%s","minimum_should_match":"%s"}}},{"terms":{"tenantId":"%s"}}]}`
  42. )
  43. var typeStr string
  44. /*1.首先将问题使用hanlp分词*/
  45. //hanlpCutWords := HanlpGetNormalWords(question, "http://39.106.145.77:8080/api/segment")
  46. hanlpCutWords := HanlpGetNormalWords(question, C.Segment)
  47. question = strings.Join(hanlpCutWords, "")
  48. lenQuestion := len([]rune(question))
  49. if lenQuestion >= 2 {
  50. queryPercent := "40%"
  51. if lenQuestion < 5 {
  52. queryPercent = "85%"
  53. } else if lenQuestion < 9 {
  54. queryPercent = "60%"
  55. } else if lenQuestion < 12 {
  56. queryPercent = "55%"
  57. }
  58. if msgType == 1 {
  59. typeStr = "knowledgeKeyWords"
  60. } else if msgType == 2 { //百度语音过来的
  61. typeStr = "knowledgeKeyWords.key_pinyin"
  62. }
  63. /*2使用sik分词将问题分词以获取更多查询词语*/
  64. //mustque := ElasticSmartIK(question, "http://39.106.145.77:9201/smart/_analyze")
  65. mustque := ElasticSmartIK(question, Analyze)
  66. if mustque != "" {
  67. postFilter = fmt.Sprintf(postFilter, mustque)
  68. }
  69. query = fmt.Sprintf(query, "", typeStr, question, queryPercent, strconv.Itoa(int(tenantId)))
  70. queryDSL := fmt.Sprintf(totalQuery, postFilter, query, `,"answer","questions","_id"`, 1)
  71. //log.Println("queryDSL:", queryDSL)
  72. return queryDSL
  73. }
  74. return ""
  75. }
  76. func DSL4SmartResponseList(question, msgType string, repositoryId []string, must_not string, size int, tags ...string) string {
  77. queryPercent := "20%"
  78. var (
  79. totalQuery = `{"post_filter":{%s},"query":{%s},"_source":["_id"%s],"size":%d}`
  80. postFilter = `"script":{"script":"def sk=_source.must_keywords;def n=0;for(item in sk){ n++;if(que.indexOf(item)>-1){return true}};if(n==0){ return true}","params":{"que":"%s"}}`
  81. query = `"bool":{"must_not":[%s],"must":[{"match":{"%s":{"query":"%s","minimum_should_match":"%s"}}},{"terms":{"repositoryId":%s}},{"terms":{"tags.code":[%s]}}]}`
  82. )
  83. /*2使用sik分词将问题分词以获取更多查询词语*/
  84. //mustque := ElasticSmartIK(question, "http://39.106.145.77:9201/smart/_analyze")
  85. mustque := ElasticSmartIK(question, Analyze)
  86. if mustque != "" {
  87. postFilter = fmt.Sprintf(postFilter, mustque)
  88. }
  89. tmp := ""
  90. for i, val := range tags {
  91. if i < len(tags)-1 {
  92. tmp += fmt.Sprintf(`"%s"`, val) + ","
  93. } else {
  94. tmp += fmt.Sprintf(`"%s"`, val)
  95. }
  96. }
  97. query = fmt.Sprintf(query, must_not, msgType, question, queryPercent, repositoryId, tmp)
  98. queryDSL := fmt.Sprintf(totalQuery, postFilter, query, `,"answer","questions.question"`, size)
  99. log.Println("queryDSL:", queryDSL)
  100. return queryDSL
  101. }
  102. func DSL4SearchByKwsOrid(keyWords string, tenantId string) string {
  103. var (
  104. sql = `{"query": {"bool": {"must": [%s%s]}}}`
  105. queryMatch = `{"match":{"knowledgeKeyWords":{"query":"%s","minimum_should_match":"%s"}}}`
  106. //idTerms = `,{"terms":{"smart.id":[%s]}}`
  107. ridTerms = `,{"terms":{"tenantId":%s}}`
  108. //id = `"%s"`
  109. )
  110. queryMatch = fmt.Sprintf(queryMatch, keyWords, "20%")
  111. //fmt.Println("queryMatch:", queryMatch)
  112. //tmp := ""
  113. ridTerms = fmt.Sprintf(ridTerms, tenantId)
  114. sql = fmt.Sprintf(sql, queryMatch, ridTerms)
  115. log.Println("sql", sql)
  116. return sql
  117. }
  118. func GetQueryOT(tags, question, keywords, repositoryId string) (qstr string) {
  119. var query = `{"query":{"bool":{"must":[%s%s%s%s]}}}`
  120. queryMatch := ``
  121. queryTerms := ``
  122. queryId := ``
  123. queryQues := ``
  124. if keywords != "" {
  125. queryMatch = `{"match":{"knowledgeKeyWords":{"query":"` + keywords + `","minimum_should_match":"40%"}}},`
  126. //query_match = `{"match":{"questions.question":{"query":"` + keywords + `","minimum_should_match":"20%"}}},`
  127. }
  128. tags = strings.Replace(tags, ` `, `","`, -1)
  129. if tags != "" {
  130. queryTerms = `{"terms":{"tags.code":["` + tags + `"]}},`
  131. }
  132. if repositoryId != "" {
  133. queryId = `{"terms":{"repositoryId":[` + repositoryId + `]}}`
  134. }
  135. if question != "" {
  136. queryQues = `,{"match": {"smart.questions.question": {"query": "` + question + `","fuzziness": "AUTO","operator": "and"}}}`
  137. }
  138. qstr = fmt.Sprintf(query, queryMatch, queryTerms, queryId, queryQues)
  139. return qstr
  140. }
  141. /*func GetFindQuery(keywords, tags, repositoryId string) string {
  142. var query = `{"query":{"bool":{"must":[%s%s%s]}}}`
  143. query_match := ``
  144. query_terms := ``
  145. query_id := ``
  146. if keywords != "" {
  147. query_match = `{"match":{"knowledgeKeyWords":{"query":"` + keywords + `","minimum_should_match":"40%"}}},`
  148. }
  149. tags = strings.Replace(tags, ` `, `","`, -1)
  150. if tags != "" {
  151. query_terms = `{"terms":{"tags.code":["` + tags + `"]}},`
  152. }
  153. if repositoryId != "" {
  154. query_id = `{"terms":{"repositoryId":[` + repositoryId + `]}}`
  155. }
  156. qstr := fmt.Sprintf(query, query_match, query_terms, query_id)
  157. return qstr
  158. }*/