elasticsearch_dsl.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. package util
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "github.com/zeromicro/go-zero/core/logx"
  6. "strings"
  7. )
  8. /*项目中所用到的几类查询dsl语句构建工具类*/
  9. func DSL4SmartResponse(question string, entId string, msgType int, addr, index, segment string) string {
  10. var (
  11. totalQuery = `{"post_filter":{%s},"query":{%s},"_source":[%s],"size":%d}`
  12. postFilter = `"script":{"script":"def sk=_source.must_keywords;def n=0;for(item in sk){ n++;if(que.indexOf(item)>-1){return true}};if(n==0){ return true}","params":{"que":"%s"}}`
  13. query = `"bool":{"must_not":[%s],"must":[{"match":{"%s":{"query":"%s","minimum_should_match":"%s"}}},{"term":{"entId":"%s"}}]}`
  14. )
  15. var typeStr string
  16. /*1.首先将问题使用hanlp分词*/
  17. hanlpCutWords := HanlpGetNormalWords(question, segment)
  18. if len(hanlpCutWords) == 0 {
  19. hanlpCutWords = append(hanlpCutWords, question)
  20. }
  21. question = strings.Join(hanlpCutWords, " ")
  22. lenQuestion := len([]rune(question))
  23. //logx.Info("lenQuestion", lenQuestion)
  24. if lenQuestion >= 2 {
  25. queryPercent := "40%"
  26. if lenQuestion < 5 {
  27. queryPercent = "85%"
  28. } else if lenQuestion < 9 {
  29. queryPercent = "60%"
  30. } else if lenQuestion < 12 {
  31. queryPercent = "55%"
  32. }
  33. if msgType == 1 {
  34. typeStr = "keywords"
  35. } else if msgType == 2 { //百度语音过来的
  36. typeStr = "keywords.key_pinyin"
  37. }
  38. /*2使用sik分词将问题分词以获取更多查询词语*/
  39. mustque := ElasticSmartIK(question, addr+"/"+index+"/_analyze")
  40. if mustque != "" {
  41. postFilter = fmt.Sprintf(postFilter, mustque)
  42. }
  43. query = fmt.Sprintf(query, "", typeStr, question, queryPercent, entId)
  44. queryDSL := fmt.Sprintf(totalQuery, postFilter, query, `"answer","question"`, 1)
  45. //logx.Info("queryDSL:", queryDSL)
  46. return queryDSL
  47. }
  48. return ""
  49. }
  50. func DSL4SearchByKwsOrid(keyWords string, entId string, mark int) string {
  51. var (
  52. sql = `{"query": {"bool": {"must": [%s%s]}}}`
  53. queryMatch = `{"match":{"keywords":{"query":"%s","minimum_should_match":"%s"}}}`
  54. ridTerms = `,{"term":{"entId":%s}}`
  55. )
  56. if mark == 1 {
  57. queryMatch = fmt.Sprintf(queryMatch, keyWords, "30%")
  58. } else {
  59. queryMatch = fmt.Sprintf(queryMatch, keyWords, "20%")
  60. }
  61. ridTerms = fmt.Sprintf(ridTerms, entId)
  62. sql = fmt.Sprintf(sql, queryMatch, ridTerms)
  63. //log.Println("sql", sql)
  64. return sql
  65. }
  66. func GetQueryOT(tags, question, keywords, repositoryId string) (qstr string) {
  67. var query = `{"query":{"bool":{"must":[%s%s%s%s]}}}`
  68. queryMatch := ``
  69. queryTerms := ``
  70. queryId := ``
  71. queryQues := ``
  72. if keywords != "" {
  73. queryMatch = `{"match":{"knowledgeKeyWords":{"query":"` + keywords + `","minimum_should_match":"40%"}}},`
  74. //query_match = `{"match":{"questions.question":{"query":"` + keywords + `","minimum_should_match":"20%"}}},`
  75. }
  76. tags = strings.Replace(tags, ` `, `","`, -1)
  77. if tags != "" {
  78. queryTerms = `{"terms":{"tags.code":["` + tags + `"]}},`
  79. }
  80. if repositoryId != "" {
  81. queryId = `{"terms":{"repositoryId":[` + repositoryId + `]}}`
  82. }
  83. if question != "" {
  84. queryQues = `,{"match": {"smart.questions.question": {"query": "` + question + `","fuzziness": "AUTO","operator": "and"}}}`
  85. }
  86. qstr = fmt.Sprintf(query, queryMatch, queryTerms, queryId, queryQues)
  87. return qstr
  88. }
  89. var queryStr = `{"_source": ["question","intention","answer"],"size": %d, "min_score":%v,"query": {"bool": {"must": [{"script_score": {"query": {"term":{"entId":%v}},"script": {"source": "cosine(params.queryVector,doc['questionVector'])+1", "params": {"queryVector": %v}}}}]}}}`
  90. func GetAnswerQueryStr(question string, entId string, size int, minScore float64) string {
  91. qv, _ := EncodeVector(question)
  92. bs, err := json.Marshal(qv)
  93. if err != nil {
  94. logx.Info("向量序列化失败:", err)
  95. }
  96. return fmt.Sprintf(queryStr, size, minScore, entId, string(bs))
  97. }