elasticsearch_dsl.go 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. package util
  2. import (
  3. "bp.jydev.jianyu360.cn/SocialPlatform/knowledgeBase/rpc/knowledge/internal/config"
  4. "fmt"
  5. "github.com/zeromicro/go-zero/core/logx"
  6. "log"
  7. "strings"
  8. )
  9. /*
  10. 项目中所用到的几类查询dsl语句构建工具类
  11. */
  12. func DSL4SmartResponse(question string, entId string, msgType int) string {
  13. var (
  14. totalQuery = `{"post_filter":{%s},"query":{%s},"_source":[%s],"size":%d}`
  15. postFilter = `"script":{"script":"def sk=_source.must_keywords;def n=0;for(item in sk){ n++;if(que.indexOf(item)>-1){return true}};if(n==0){ return true}","params":{"que":"%s"}}`
  16. query = `"bool":{"must_not":[%s],"must":[{"match":{"%s":{"query":"%s","minimum_should_match":"%s"}}},{"term":{"entId":"%s"}}]}`
  17. )
  18. var typeStr string
  19. /*1.首先将问题使用hanlp分词*/
  20. //hanlpCutWords := HanlpGetNormalWords(question, "http://39.106.145.77:8080/api/segment")
  21. hanlpCutWords := HanlpGetNormalWords(question, config.C.Segment)
  22. log.Println("hanlp分词结果:", hanlpCutWords)
  23. if len(hanlpCutWords) == 0 {
  24. hanlpCutWords = append(hanlpCutWords, question)
  25. }
  26. question = strings.Join(hanlpCutWords, "")
  27. lenQuestion := len([]rune(question))
  28. //log.Println("len", lenQuestion)
  29. if lenQuestion >= 2 {
  30. queryPercent := "40%"
  31. if lenQuestion < 5 {
  32. queryPercent = "85%"
  33. } else if lenQuestion < 9 {
  34. queryPercent = "60%"
  35. } else if lenQuestion < 12 {
  36. queryPercent = "55%"
  37. }
  38. if msgType == 1 {
  39. typeStr = "keywords"
  40. } else if msgType == 2 { //百度语音过来的
  41. typeStr = "keywords.key_pinyin"
  42. }
  43. /*2使用sik分词将问题分词以获取更多查询词语*/
  44. //mustque := ElasticSmartIK(question, "http://39.106.145.77:9201/smart/_analyze")
  45. mustque := ElasticSmartIK(question, config.C.Es.Addr+"/"+config.C.Es.Index+"/_analyze")
  46. if mustque != "" {
  47. postFilter = fmt.Sprintf(postFilter, mustque)
  48. }
  49. query = fmt.Sprintf(query, "", typeStr, question, queryPercent, entId)
  50. queryDSL := fmt.Sprintf(totalQuery, postFilter, query, `"answer","question"`, 1)
  51. logx.Info("queryDSL:", queryDSL)
  52. return queryDSL
  53. }
  54. return ""
  55. }
  56. func DSL4SearchByKwsOrid(keyWords string, entId string) string {
  57. var (
  58. sql = `{"query": {"bool": {"must": [%s%s]}}}`
  59. queryMatch = `{"match":{"keywords":{"query":"%s","minimum_should_match":"%s"}}}`
  60. ridTerms = `,{"term":{"entId":%s}}`
  61. )
  62. queryMatch = fmt.Sprintf(queryMatch, keyWords, "20%")
  63. ridTerms = fmt.Sprintf(ridTerms, entId)
  64. sql = fmt.Sprintf(sql, queryMatch, ridTerms)
  65. //log.Println("sql", sql)
  66. return sql
  67. }
  68. func GetQueryOT(tags, question, keywords, repositoryId string) (qstr string) {
  69. var query = `{"query":{"bool":{"must":[%s%s%s%s]}}}`
  70. queryMatch := ``
  71. queryTerms := ``
  72. queryId := ``
  73. queryQues := ``
  74. if keywords != "" {
  75. queryMatch = `{"match":{"knowledgeKeyWords":{"query":"` + keywords + `","minimum_should_match":"40%"}}},`
  76. //query_match = `{"match":{"questions.question":{"query":"` + keywords + `","minimum_should_match":"20%"}}},`
  77. }
  78. tags = strings.Replace(tags, ` `, `","`, -1)
  79. if tags != "" {
  80. queryTerms = `{"terms":{"tags.code":["` + tags + `"]}},`
  81. }
  82. if repositoryId != "" {
  83. queryId = `{"terms":{"repositoryId":[` + repositoryId + `]}}`
  84. }
  85. if question != "" {
  86. queryQues = `,{"match": {"smart.questions.question": {"query": "` + question + `","fuzziness": "AUTO","operator": "and"}}}`
  87. }
  88. qstr = fmt.Sprintf(query, queryMatch, queryTerms, queryId, queryQues)
  89. return qstr
  90. }