package util import ( "fmt" . "knowledgeBase/rpc/knowledge/init" "log" "strings" ) var ( Analyze string Segment string ) /*项目中所用到的几类查询dsl语句构建工具类*/ // DSL4SearchByKwsAndTags 管理后台列表查询 func DSL4SearchByKwsAndTags(kws string, tags ...string) string { var ( sql = `{"query": {"bool": {"must": [%s,%s]}}}` queryMatch = `{"match":{"knowledgeKeyWords":{"query":"%s","minimum_should_match":"%s"}}}` tagsTerms = `{"terms":{"tags.code":[%s]}}` tag = `"%s"` ) if kws != "" { } queryMatch = fmt.Sprintf(queryMatch, kws, "20%") tmp := "" for i, val := range tags { if i < len(tags)-1 { tmp += fmt.Sprintf(tag, val) + "," } else { tmp += fmt.Sprintf(tag, val) } } tagsTerms = fmt.Sprintf(tagsTerms, tmp) sql = fmt.Sprintf(sql, queryMatch, tagsTerms) return sql } func DSL4SmartResponse(question, tenantId string, msgType int) string { var ( totalQuery = `{"post_filter":{%s},"query":{%s},"_source":["_id"%s],"size":%d}` postFilter = `"script":{"script":"def sk=_source.must_keywords;def n=0;for(item in sk){ n++;if(que.indexOf(item)>-1){return true}};if(n==0){ return true}","params":{"que":"%s"}}` query = `"bool":{"must_not":[%s],"must":[{"match":{"%s":{"query":"%s","minimum_should_match":"%s"}}},{"terms":{"tenantId":"%s"}}]}` ) var typeStr string /*1.首先将问题使用hanlp分词*/ //hanlpCutWords := HanlpGetNormalWords(question, "http://39.106.145.77:8080/api/segment") hanlpCutWords := HanlpGetNormalWords(question, C.Segment) question = strings.Join(hanlpCutWords, "") //fmt.Println("question:", question) lenQuestion := len([]rune(question)) if lenQuestion >= 2 { queryPercent := "40%" if lenQuestion < 5 { queryPercent = "85%" } else if lenQuestion < 9 { queryPercent = "60%" } else if lenQuestion < 12 { queryPercent = "55%" } if msgType == 1 { typeStr = "knowledgeKeyWords" } else if msgType == 2 { //百度语音过来的 typeStr = "knowledgeKeyWords.key_pinyin" } /*2使用sik分词将问题分词以获取更多查询词语*/ //mustque := ElasticSmartIK(question, "http://39.106.145.77:9201/smart/_analyze") mustque := ElasticSmartIK(question, Analyze) if mustque != "" { postFilter = fmt.Sprintf(postFilter, mustque) } query = fmt.Sprintf(query, "", typeStr, question, queryPercent, tenantId) queryDSL := fmt.Sprintf(totalQuery, postFilter, query, `,"answer","questions","_id"`, 1) //log.Println("queryDSL:", queryDSL) return queryDSL } return "" } func DSL4SmartResponseList(question, msgType string, repositoryId []string, must_not string, size int, tags ...string) string { queryPercent := "20%" var ( totalQuery = `{"post_filter":{%s},"query":{%s},"_source":["_id"%s],"size":%d}` postFilter = `"script":{"script":"def sk=_source.must_keywords;def n=0;for(item in sk){ n++;if(que.indexOf(item)>-1){return true}};if(n==0){ return true}","params":{"que":"%s"}}` query = `"bool":{"must_not":[%s],"must":[{"match":{"%s":{"query":"%s","minimum_should_match":"%s"}}},{"terms":{"repositoryId":%s}},{"terms":{"tags.code":[%s]}}]}` ) /*2使用sik分词将问题分词以获取更多查询词语*/ //mustque := ElasticSmartIK(question, "http://39.106.145.77:9201/smart/_analyze") mustque := ElasticSmartIK(question, Analyze) if mustque != "" { postFilter = fmt.Sprintf(postFilter, mustque) } tmp := "" for i, val := range tags { if i < len(tags)-1 { tmp += fmt.Sprintf(`"%s"`, val) + "," } else { tmp += fmt.Sprintf(`"%s"`, val) } } query = fmt.Sprintf(query, must_not, msgType, question, queryPercent, repositoryId, tmp) queryDSL := fmt.Sprintf(totalQuery, postFilter, query, `,"answer","questions.question"`, size) log.Println("queryDSL:", queryDSL) return queryDSL } func DSL4SearchByKwsOrid(kws, _id string, repositoryArr []string) string { var ( sql = `{"query": {"bool": {"must": [%s%s]}}}` queryMatch = `{"match":{"knowledgeKeyWords":{"query":"%s","minimum_should_match":"%s"}}}` idTerms = `,{"terms":{"smart.id":[%s]}}` ridTerms = `,{"terms":{"repositoryId":%s}}` id = `"%s"` ) queryMatch = fmt.Sprintf(queryMatch, kws, "20%") //fmt.Println("queryMatch:", queryMatch) tmp := "" if _id != "" { tmp = fmt.Sprintf(id, _id) idTerms = fmt.Sprintf(idTerms, tmp, repositoryArr) sql = fmt.Sprintf(sql, queryMatch, idTerms) } else { ridTerms = fmt.Sprintf(ridTerms, repositoryArr) sql = fmt.Sprintf(sql, queryMatch, ridTerms) } log.Println("sql", sql) return sql } func GetQueryOT(tags, question, keywords, repositoryId string) (qstr string) { var query = `{"query":{"bool":{"must":[%s%s%s%s]}}}` queryMatch := `` queryTerms := `` queryId := `` queryQues := `` if keywords != "" { queryMatch = `{"match":{"knowledgeKeyWords":{"query":"` + keywords + `","minimum_should_match":"40%"}}},` //query_match = `{"match":{"questions.question":{"query":"` + keywords + `","minimum_should_match":"20%"}}},` } tags = strings.Replace(tags, ` `, `","`, -1) if tags != "" { queryTerms = `{"terms":{"tags.code":["` + tags + `"]}},` } if repositoryId != "" { queryId = `{"terms":{"repositoryId":[` + repositoryId + `]}}` } if question != "" { queryQues = `,{"match": {"smart.questions.question": {"query": "` + question + `","fuzziness": "AUTO","operator": "and"}}}` } qstr = fmt.Sprintf(query, queryMatch, queryTerms, queryId, queryQues) return qstr } /*func GetFindQuery(keywords, tags, repositoryId string) string { var query = `{"query":{"bool":{"must":[%s%s%s]}}}` query_match := `` query_terms := `` query_id := `` if keywords != "" { query_match = `{"match":{"knowledgeKeyWords":{"query":"` + keywords + `","minimum_should_match":"40%"}}},` } tags = strings.Replace(tags, ` `, `","`, -1) if tags != "" { query_terms = `{"terms":{"tags.code":["` + tags + `"]}},` } if repositoryId != "" { query_id = `{"terms":{"repositoryId":[` + repositoryId + `]}}` } qstr := fmt.Sprintf(query, query_match, query_terms, query_id) return qstr }*/