123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169 |
- package util
- import (
- "fmt"
- . "knowledgeBase/rpc/knowledge/init"
- "log"
- "strconv"
- "strings"
- )
- var (
- Analyze string
- Segment string
- )
- /*项目中所用到的几类查询dsl语句构建工具类*/
- // DSL4SearchByKwsAndTags 管理后台列表查询
- func DSL4SearchByKwsAndTags(kws string, tags ...string) string {
- var (
- sql = `{"query": {"bool": {"must": [%s,%s]}}}`
- queryMatch = `{"match":{"knowledgeKeyWords":{"query":"%s","minimum_should_match":"%s"}}}`
- tagsTerms = `{"terms":{"tags.code":[%s]}}`
- tag = `"%s"`
- )
- if kws != "" {
- }
- queryMatch = fmt.Sprintf(queryMatch, kws, "20%")
- tmp := ""
- for i, val := range tags {
- if i < len(tags)-1 {
- tmp += fmt.Sprintf(tag, val) + ","
- } else {
- tmp += fmt.Sprintf(tag, val)
- }
- }
- tagsTerms = fmt.Sprintf(tagsTerms, tmp)
- sql = fmt.Sprintf(sql, queryMatch, tagsTerms)
- return sql
- }
- func DSL4SmartResponse(question string, tenantId int64, msgType int) string {
- var (
- totalQuery = `{"post_filter":{%s},"query":{%s},"_source":["_id"%s],"size":%d}`
- postFilter = `"script":{"script":"def sk=_source.must_keywords;def n=0;for(item in sk){ n++;if(que.indexOf(item)>-1){return true}};if(n==0){ return true}","params":{"que":"%s"}}`
- query = `"bool":{"must_not":[%s],"must":[{"match":{"%s":{"query":"%s","minimum_should_match":"%s"}}},{"terms":{"tenantId":"%s"}}]}`
- )
- var typeStr string
- /*1.首先将问题使用hanlp分词*/
- //hanlpCutWords := HanlpGetNormalWords(question, "http://39.106.145.77:8080/api/segment")
- hanlpCutWords := HanlpGetNormalWords(question, C.Segment)
- question = strings.Join(hanlpCutWords, "")
- lenQuestion := len([]rune(question))
- if lenQuestion >= 2 {
- queryPercent := "40%"
- if lenQuestion < 5 {
- queryPercent = "85%"
- } else if lenQuestion < 9 {
- queryPercent = "60%"
- } else if lenQuestion < 12 {
- queryPercent = "55%"
- }
- if msgType == 1 {
- typeStr = "knowledgeKeyWords"
- } else if msgType == 2 { //百度语音过来的
- typeStr = "knowledgeKeyWords.key_pinyin"
- }
- /*2使用sik分词将问题分词以获取更多查询词语*/
- //mustque := ElasticSmartIK(question, "http://39.106.145.77:9201/smart/_analyze")
- mustque := ElasticSmartIK(question, Analyze)
- if mustque != "" {
- postFilter = fmt.Sprintf(postFilter, mustque)
- }
- query = fmt.Sprintf(query, "", typeStr, question, queryPercent, strconv.Itoa(int(tenantId)))
- queryDSL := fmt.Sprintf(totalQuery, postFilter, query, `,"answer","questions","_id"`, 1)
- //log.Println("queryDSL:", queryDSL)
- return queryDSL
- }
- return ""
- }
- func DSL4SmartResponseList(question, msgType string, repositoryId []string, must_not string, size int, tags ...string) string {
- queryPercent := "20%"
- var (
- totalQuery = `{"post_filter":{%s},"query":{%s},"_source":["_id"%s],"size":%d}`
- postFilter = `"script":{"script":"def sk=_source.must_keywords;def n=0;for(item in sk){ n++;if(que.indexOf(item)>-1){return true}};if(n==0){ return true}","params":{"que":"%s"}}`
- query = `"bool":{"must_not":[%s],"must":[{"match":{"%s":{"query":"%s","minimum_should_match":"%s"}}},{"terms":{"repositoryId":%s}},{"terms":{"tags.code":[%s]}}]}`
- )
- /*2使用sik分词将问题分词以获取更多查询词语*/
- //mustque := ElasticSmartIK(question, "http://39.106.145.77:9201/smart/_analyze")
- mustque := ElasticSmartIK(question, Analyze)
- if mustque != "" {
- postFilter = fmt.Sprintf(postFilter, mustque)
- }
- tmp := ""
- for i, val := range tags {
- if i < len(tags)-1 {
- tmp += fmt.Sprintf(`"%s"`, val) + ","
- } else {
- tmp += fmt.Sprintf(`"%s"`, val)
- }
- }
- query = fmt.Sprintf(query, must_not, msgType, question, queryPercent, repositoryId, tmp)
- queryDSL := fmt.Sprintf(totalQuery, postFilter, query, `,"answer","questions.question"`, size)
- log.Println("queryDSL:", queryDSL)
- return queryDSL
- }
- func DSL4SearchByKwsOrid(keyWords string, tenantId string) string {
- var (
- sql = `{"query": {"bool": {"must": [%s%s]}}}`
- queryMatch = `{"match":{"knowledgeKeyWords":{"query":"%s","minimum_should_match":"%s"}}}`
- //idTerms = `,{"terms":{"smart.id":[%s]}}`
- ridTerms = `,{"terms":{"tenantId":%s}}`
- //id = `"%s"`
- )
- queryMatch = fmt.Sprintf(queryMatch, keyWords, "20%")
- //fmt.Println("queryMatch:", queryMatch)
- //tmp := ""
- ridTerms = fmt.Sprintf(ridTerms, tenantId)
- sql = fmt.Sprintf(sql, queryMatch, ridTerms)
- log.Println("sql", sql)
- return sql
- }
- func GetQueryOT(tags, question, keywords, repositoryId string) (qstr string) {
- var query = `{"query":{"bool":{"must":[%s%s%s%s]}}}`
- queryMatch := ``
- queryTerms := ``
- queryId := ``
- queryQues := ``
- if keywords != "" {
- queryMatch = `{"match":{"knowledgeKeyWords":{"query":"` + keywords + `","minimum_should_match":"40%"}}},`
- //query_match = `{"match":{"questions.question":{"query":"` + keywords + `","minimum_should_match":"20%"}}},`
- }
- tags = strings.Replace(tags, ` `, `","`, -1)
- if tags != "" {
- queryTerms = `{"terms":{"tags.code":["` + tags + `"]}},`
- }
- if repositoryId != "" {
- queryId = `{"terms":{"repositoryId":[` + repositoryId + `]}}`
- }
- if question != "" {
- queryQues = `,{"match": {"smart.questions.question": {"query": "` + question + `","fuzziness": "AUTO","operator": "and"}}}`
- }
- qstr = fmt.Sprintf(query, queryMatch, queryTerms, queryId, queryQues)
- return qstr
- }
- /*func GetFindQuery(keywords, tags, repositoryId string) string {
- var query = `{"query":{"bool":{"must":[%s%s%s]}}}`
- query_match := ``
- query_terms := ``
- query_id := ``
- if keywords != "" {
- query_match = `{"match":{"knowledgeKeyWords":{"query":"` + keywords + `","minimum_should_match":"40%"}}},`
- }
- tags = strings.Replace(tags, ` `, `","`, -1)
- if tags != "" {
- query_terms = `{"terms":{"tags.code":["` + tags + `"]}},`
- }
- if repositoryId != "" {
- query_id = `{"terms":{"repositoryId":[` + repositoryId + `]}}`
- }
- qstr := fmt.Sprintf(query, query_match, query_terms, query_id)
- return qstr
- }*/
|