|
@@ -9,6 +9,7 @@ import (
|
|
|
util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
|
"log"
|
|
|
"sort"
|
|
|
+ "strconv"
|
|
|
|
|
|
//"sort"
|
|
|
"strings"
|
|
@@ -30,7 +31,7 @@ func SearchProjectFullScoring(client *elastic.Client, target InputData, projectN
|
|
|
city = util.ObjToString((*res)["city"])
|
|
|
}
|
|
|
|
|
|
- // 1. 精准查询(权重 1.0)
|
|
|
+ // 1. 精准查询
|
|
|
preciseHits, err := searchPrecise(client, projectName, province, city, publish, 20)
|
|
|
if err != nil {
|
|
|
return nil, err
|
|
@@ -41,7 +42,7 @@ func SearchProjectFullScoring(client *elastic.Client, target InputData, projectN
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // 2. 分词查询(权重 0.8)
|
|
|
+ // 2. 分词查询(
|
|
|
tokenHits, err := searchByToken(client, projectName, province, city, publish, 20)
|
|
|
if err != nil {
|
|
|
return nil, err
|
|
@@ -52,7 +53,7 @@ func SearchProjectFullScoring(client *elastic.Client, target InputData, projectN
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // 3. common 查询(权重 0.5)
|
|
|
+ // 3. common 查询
|
|
|
commonHits, err := searchCommon(client, projectName, province, city, publish, 10)
|
|
|
if err != nil {
|
|
|
return nil, err
|
|
@@ -104,28 +105,16 @@ func SearchProjectFullScoring(client *elastic.Client, target InputData, projectN
|
|
|
}
|
|
|
|
|
|
score := calculateConfidenceScore(target, candidate)
|
|
|
- candidate.Score = score
|
|
|
+ //candidate.Score = score
|
|
|
+ candidate.Score, _ = strconv.ParseFloat(fmt.Sprintf("%.2f", score), 64)
|
|
|
allCandidates = append(allCandidates, candidate)
|
|
|
}
|
|
|
|
|
|
- // 打印打分调试
|
|
|
- //for i, c := range allCandidates {
|
|
|
- // fmt.Printf("Candidat 排序前: %d Score: %.4f\n", i, c.Score)
|
|
|
- //}
|
|
|
-
|
|
|
// 排序(降序)
|
|
|
sort.SliceStable(allCandidates, func(i, j int) bool {
|
|
|
return allCandidates[i].Score > allCandidates[j].Score
|
|
|
})
|
|
|
|
|
|
- //for i, c := range allCandidates {
|
|
|
- // fmt.Printf("Candidate 排序后: %d Score: %.4f\n", i, c.Score)
|
|
|
- //}
|
|
|
- //// 5. 排序
|
|
|
- //sort.Slice(allCandidates, func(i, j int) bool {
|
|
|
- // return allCandidates[i].Score > allCandidates[j].Score
|
|
|
- //})
|
|
|
-
|
|
|
for _, doc := range allCandidates {
|
|
|
item := map[string]interface{}{
|
|
|
"id": doc.Id,
|
|
@@ -154,51 +143,6 @@ func SearchProjectFullScoring(client *elastic.Client, target InputData, projectN
|
|
|
return results, nil
|
|
|
}
|
|
|
|
|
|
-// searchPrecise 精准查询
|
|
|
-func searchPrecise22(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
- fieldsToTry := []string{"projectname.pname", "title", "detail"}
|
|
|
- filtersToTry := [][]elastic.Query{
|
|
|
- {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
|
|
|
- {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
|
|
|
- {elastic.NewTermsQuery("toptype", "拟建")},
|
|
|
- }
|
|
|
- var allResults []*elastic.SearchHit
|
|
|
- seenIDs := make(map[string]bool)
|
|
|
-
|
|
|
- query := elastic.NewBoolQuery()
|
|
|
-
|
|
|
- for _, field := range fieldsToTry {
|
|
|
- if field == "detail" && len(allResults) > maxResults {
|
|
|
- break
|
|
|
- }
|
|
|
- for _, filter := range filtersToTry {
|
|
|
- //query := elastic.NewBoolQuery().
|
|
|
- query.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")).
|
|
|
- Filter(filter...)
|
|
|
-
|
|
|
- fetchFields := elastic.NewFetchSourceContext(true).Include("id", "title", "projectname", "projectcode", "bidamount", "area", "city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel", "s_winner", "winnertel", "agency", "publishtime")
|
|
|
-
|
|
|
- searchResult, err := client.Search().
|
|
|
- Index("bidding").
|
|
|
- Query(query).
|
|
|
- Size(maxResults).
|
|
|
- FetchSourceContext(fetchFields).
|
|
|
- Do(context.Background())
|
|
|
- if err != nil {
|
|
|
- return nil, err
|
|
|
- }
|
|
|
-
|
|
|
- for _, hit := range searchResult.Hits.Hits {
|
|
|
- if !seenIDs[hit.Id] {
|
|
|
- seenIDs[hit.Id] = true
|
|
|
- allResults = append(allResults, hit)
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return allResults, nil
|
|
|
-}
|
|
|
-
|
|
|
func searchPrecise(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
fieldsToTry := []string{"projectname.pname", "title", "detail"}
|
|
|
filtersToTry := [][]elastic.Query{
|
|
@@ -295,11 +239,6 @@ func searchPrecise(client *elastic.Client, projectName, province, city, publish
|
|
|
return nil, err
|
|
|
}
|
|
|
|
|
|
- // 打印 query JSON(调试用)
|
|
|
- //if sourceQ, err := query.Source(); err == nil {
|
|
|
- // log.Println(printInterfaceAsJSON(sourceQ))
|
|
|
- //}
|
|
|
-
|
|
|
for _, hit := range searchResult.Hits.Hits {
|
|
|
if !seenIDs[hit.Id] {
|
|
|
seenIDs[hit.Id] = true
|
|
@@ -318,66 +257,6 @@ func searchPrecise(client *elastic.Client, projectName, province, city, publish
|
|
|
return allResults, nil
|
|
|
}
|
|
|
|
|
|
-// searchByToken 分词查询
|
|
|
-func searchByToken22(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
- fieldsToTry := []string{"projectname.pname", "title", "detail"}
|
|
|
- filtersToTry := [][]elastic.Query{
|
|
|
- {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
|
|
|
- {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
|
|
|
- {elastic.NewTermsQuery("toptype", "拟建")},
|
|
|
- }
|
|
|
- analyzeResp, err := client.IndexAnalyze().
|
|
|
- Index("bidding").
|
|
|
- Analyzer("ik_smart").
|
|
|
- Text(projectName).
|
|
|
- Do(context.Background())
|
|
|
- if err != nil {
|
|
|
- return nil, err
|
|
|
- }
|
|
|
-
|
|
|
- var tokens []string
|
|
|
- for _, token := range analyzeResp.Tokens {
|
|
|
- tokens = append(tokens, token.Token)
|
|
|
- }
|
|
|
- if len(tokens) == 0 {
|
|
|
- return nil, fmt.Errorf("no tokens found from ik_smart")
|
|
|
- }
|
|
|
- queryText := strings.Join(tokens, " ")
|
|
|
-
|
|
|
- var allHits []*elastic.SearchHit
|
|
|
- seen := make(map[string]bool)
|
|
|
-
|
|
|
- for _, filter := range filtersToTry {
|
|
|
- query := elastic.NewBoolQuery().
|
|
|
- Must(elastic.NewMultiMatchQuery(queryText, fieldsToTry...).MinimumShouldMatch("100%")).
|
|
|
- Filter(filter...)
|
|
|
-
|
|
|
- searchResult, err := client.Search().
|
|
|
- Index("bidding").
|
|
|
- Query(query).
|
|
|
- Size(maxResults).
|
|
|
- Do(context.Background())
|
|
|
- if err != nil {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- for _, hit := range searchResult.Hits.Hits {
|
|
|
- if !seen[hit.Id] {
|
|
|
- seen[hit.Id] = true
|
|
|
- allHits = append(allHits, hit)
|
|
|
- if len(allHits) >= maxResults {
|
|
|
- break
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if len(allHits) >= maxResults {
|
|
|
- break
|
|
|
- }
|
|
|
- }
|
|
|
- return allHits, nil
|
|
|
-}
|
|
|
-
|
|
|
func searchByToken(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
fieldsToTry := []string{"projectname.pname", "title", "detail"}
|
|
|
filtersToTry := [][]elastic.Query{
|
|
@@ -510,47 +389,6 @@ func searchByToken(client *elastic.Client, projectName, province, city, publish
|
|
|
return allHits, nil
|
|
|
}
|
|
|
|
|
|
-// searchCommon common 查询
|
|
|
-func searchCommon22(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
- queryMap := map[string]interface{}{
|
|
|
- "bool": map[string]interface{}{
|
|
|
- "should": []interface{}{
|
|
|
- map[string]interface{}{"common": map[string]interface{}{"projectname.pname": map[string]interface{}{"query": projectName, "cutoff_frequency": 0.01, "low_freq_operator": "and", "boost": 0.2}}},
|
|
|
- map[string]interface{}{"common": map[string]interface{}{"title": map[string]interface{}{"query": projectName, "cutoff_frequency": 0.01, "low_freq_operator": "and", "boost": 0.2}}},
|
|
|
- map[string]interface{}{"common": map[string]interface{}{"detail": map[string]interface{}{"query": projectName, "cutoff_frequency": 0.01, "low_freq_operator": "and", "boost": 0.1}}},
|
|
|
- },
|
|
|
- "minimum_should_match": 1,
|
|
|
- },
|
|
|
- }
|
|
|
- queryBytes, _ := json.Marshal(queryMap)
|
|
|
- queryBase64 := base64.StdEncoding.EncodeToString(queryBytes)
|
|
|
- query := elastic.NewWrapperQuery(queryBase64)
|
|
|
-
|
|
|
- fetchFields := elastic.NewFetchSourceContext(true).Include("id", "title", "projectname", "projectcode", "bidamount", "area", "city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel", "s_winner", "winnertel", "agency", "publishtime")
|
|
|
-
|
|
|
- searchResult, err := client.Search().
|
|
|
- Index("bidding").
|
|
|
- Query(query).
|
|
|
- Size(maxResults).
|
|
|
- FetchSourceContext(fetchFields).
|
|
|
- Do(context.Background())
|
|
|
- if err != nil {
|
|
|
- return nil, err
|
|
|
- }
|
|
|
-
|
|
|
- var allHits []*elastic.SearchHit
|
|
|
- seen := make(map[string]bool)
|
|
|
-
|
|
|
- for _, hit := range searchResult.Hits.Hits {
|
|
|
- if !seen[hit.Id] {
|
|
|
- seen[hit.Id] = true
|
|
|
- allHits = append(allHits, hit)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return allHits, nil
|
|
|
-}
|
|
|
-
|
|
|
func searchCommon(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
fields := []string{"projectname.pname", "title", "detail"}
|
|
|
var t time.Time
|