elasticsearch_dsl.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. package util
  2. import (
  3. "fmt"
  4. . "knowledgeBase/rpc/knowledge/init"
  5. "log"
  6. "strconv"
  7. "strings"
  8. )
  9. var (
  10. Analyze string
  11. Segment string
  12. )
  13. /*项目中所用到的几类查询dsl语句构建工具类*/
  14. // DSL4SearchByKwsAndTags 管理后台列表查询
  15. func DSL4SearchByKwsAndTags(kws string, tags ...string) string {
  16. var (
  17. sql = `{"query": {"bool": {"must": [%s,%s]}}}`
  18. queryMatch = `{"match":{"knowledgeKeyWords":{"query":"%s","minimum_should_match":"%s"}}}`
  19. tagsTerms = `{"terms":{"tags.code":[%s]}}`
  20. tag = `"%s"`
  21. )
  22. if kws != "" {
  23. }
  24. queryMatch = fmt.Sprintf(queryMatch, kws, "20%")
  25. tmp := ""
  26. for i, val := range tags {
  27. if i < len(tags)-1 {
  28. tmp += fmt.Sprintf(tag, val) + ","
  29. } else {
  30. tmp += fmt.Sprintf(tag, val)
  31. }
  32. }
  33. tagsTerms = fmt.Sprintf(tagsTerms, tmp)
  34. sql = fmt.Sprintf(sql, queryMatch, tagsTerms)
  35. return sql
  36. }
  37. func DSL4SmartResponse(question string, tenantId int64, msgType int) string {
  38. var (
  39. totalQuery = `{"post_filter":{%s},"query":{%s},"_source":[%s],"size":%d}`
  40. postFilter = `"script":{"script":"def sk=_source.must_keywords;def n=0;for(item in sk){ n++;if(que.indexOf(item)>-1){return true}};if(n==0){ return true}","params":{"que":"%s"}}`
  41. query = `"bool":{"must_not":[%s],"must":[{"match":{"%s":{"query":"%s","minimum_should_match":"%s"}}},{"terms":{"tenantId":"%s"}}]}`
  42. )
  43. var typeStr string
  44. /*1.首先将问题使用hanlp分词*/
  45. //hanlpCutWords := HanlpGetNormalWords(question, "http://39.106.145.77:8080/api/segment")
  46. hanlpCutWords := HanlpGetNormalWords(question, C.Segment)
  47. question = strings.Join(hanlpCutWords, "")
  48. lenQuestion := len([]rune(question))
  49. if lenQuestion >= 2 {
  50. queryPercent := "40%"
  51. if lenQuestion < 5 {
  52. queryPercent = "85%"
  53. } else if lenQuestion < 9 {
  54. queryPercent = "60%"
  55. } else if lenQuestion < 12 {
  56. queryPercent = "55%"
  57. }
  58. log.Println("消息类型:", msgType)
  59. if msgType == 1 {
  60. typeStr = "keywords"
  61. } else if msgType == 2 { //百度语音过来的
  62. typeStr = "keywords.key_pinyin"
  63. }
  64. /*2使用sik分词将问题分词以获取更多查询词语*/
  65. //mustque := ElasticSmartIK(question, "http://39.106.145.77:9201/smart/_analyze")
  66. mustque := ElasticSmartIK(question, C.Es.Addr+"/"+C.Es.Index+"/_analyze")
  67. if mustque != "" {
  68. postFilter = fmt.Sprintf(postFilter, mustque)
  69. }
  70. query = fmt.Sprintf(query, "", typeStr, question, queryPercent, strconv.Itoa(int(tenantId)))
  71. queryDSL := fmt.Sprintf(totalQuery, postFilter, query, `"answer","question"`, 1)
  72. log.Println("queryDSL:", queryDSL)
  73. return queryDSL
  74. }
  75. return ""
  76. }
  77. func DSL4SmartResponseList(question, msgType string, repositoryId []string, must_not string, size int, tags ...string) string {
  78. queryPercent := "20%"
  79. var (
  80. totalQuery = `{"post_filter":{%s},"query":{%s},"_source":["_id"%s],"size":%d}`
  81. postFilter = `"script":{"script":"def sk=_source.must_keywords;def n=0;for(item in sk){ n++;if(que.indexOf(item)>-1){return true}};if(n==0){ return true}","params":{"que":"%s"}}`
  82. query = `"bool":{"must_not":[%s],"must":[{"match":{"%s":{"query":"%s","minimum_should_match":"%s"}}},{"terms":{"repositoryId":%s}},{"terms":{"tags.code":[%s]}}]}`
  83. )
  84. /*2使用sik分词将问题分词以获取更多查询词语*/
  85. //mustque := ElasticSmartIK(question, "http://39.106.145.77:9201/smart/_analyze")
  86. mustque := ElasticSmartIK(question, Analyze)
  87. if mustque != "" {
  88. postFilter = fmt.Sprintf(postFilter, mustque)
  89. }
  90. tmp := ""
  91. for i, val := range tags {
  92. if i < len(tags)-1 {
  93. tmp += fmt.Sprintf(`"%s"`, val) + ","
  94. } else {
  95. tmp += fmt.Sprintf(`"%s"`, val)
  96. }
  97. }
  98. query = fmt.Sprintf(query, must_not, msgType, question, queryPercent, repositoryId, tmp)
  99. queryDSL := fmt.Sprintf(totalQuery, postFilter, query, `,"answer","questions.question"`, size)
  100. log.Println("queryDSL:", queryDSL)
  101. return queryDSL
  102. }
  103. func DSL4SearchByKwsOrid(keyWords string, tenantId string) string {
  104. var (
  105. sql = `{"query": {"bool": {"must": [%s%s]}}}`
  106. queryMatch = `{"match":{"knowledgeKeyWords":{"query":"%s","minimum_should_match":"%s"}}}`
  107. //idTerms = `,{"terms":{"smart.id":[%s]}}`
  108. ridTerms = `,{"terms":{"tenantId":%s}}`
  109. //id = `"%s"`
  110. )
  111. queryMatch = fmt.Sprintf(queryMatch, keyWords, "20%")
  112. //fmt.Println("queryMatch:", queryMatch)
  113. //tmp := ""
  114. ridTerms = fmt.Sprintf(ridTerms, tenantId)
  115. sql = fmt.Sprintf(sql, queryMatch, ridTerms)
  116. log.Println("sql", sql)
  117. return sql
  118. }
  119. func GetQueryOT(tags, question, keywords, repositoryId string) (qstr string) {
  120. var query = `{"query":{"bool":{"must":[%s%s%s%s]}}}`
  121. queryMatch := ``
  122. queryTerms := ``
  123. queryId := ``
  124. queryQues := ``
  125. if keywords != "" {
  126. queryMatch = `{"match":{"knowledgeKeyWords":{"query":"` + keywords + `","minimum_should_match":"40%"}}},`
  127. //query_match = `{"match":{"questions.question":{"query":"` + keywords + `","minimum_should_match":"20%"}}},`
  128. }
  129. tags = strings.Replace(tags, ` `, `","`, -1)
  130. if tags != "" {
  131. queryTerms = `{"terms":{"tags.code":["` + tags + `"]}},`
  132. }
  133. if repositoryId != "" {
  134. queryId = `{"terms":{"repositoryId":[` + repositoryId + `]}}`
  135. }
  136. if question != "" {
  137. queryQues = `,{"match": {"smart.questions.question": {"query": "` + question + `","fuzziness": "AUTO","operator": "and"}}}`
  138. }
  139. qstr = fmt.Sprintf(query, queryMatch, queryTerms, queryId, queryQues)
  140. return qstr
  141. }
  142. /*func GetFindQuery(keywords, tags, repositoryId string) string {
  143. var query = `{"query":{"bool":{"must":[%s%s%s]}}}`
  144. query_match := ``
  145. query_terms := ``
  146. query_id := ``
  147. if keywords != "" {
  148. query_match = `{"match":{"knowledgeKeyWords":{"query":"` + keywords + `","minimum_should_match":"40%"}}},`
  149. }
  150. tags = strings.Replace(tags, ` `, `","`, -1)
  151. if tags != "" {
  152. query_terms = `{"terms":{"tags.code":["` + tags + `"]}},`
  153. }
  154. if repositoryId != "" {
  155. query_id = `{"terms":{"repositoryId":[` + repositoryId + `]}}`
  156. }
  157. qstr := fmt.Sprintf(query, query_match, query_terms, query_id)
  158. return qstr
  159. }*/