package main import ( "context" "encoding/base64" "encoding/json" "fmt" "github.com/olivere/elastic/v7" util "jygit.jydev.jianyu360.cn/data_processing/common_utils" "log" "sort" "strconv" //"sort" "strings" "time" ) func SearchProjectFullScoring(client *elastic.Client, target InputData, projectName, areacode, publish string, targetScore float64) ([]map[string]interface{}, error) { seenIDs := make(map[string]*elastic.SearchHit) province, city := "", "" if areacode != "" { code := areacode[:6] where := map[string]interface{}{ "code": code, } res, _ := MgoQY.FindOne("address_new_2020", where) province = util.ObjToString((*res)["province"]) city = util.ObjToString((*res)["city"]) } target.Area = province target.City = city fmt.Println(province, city) projectName = RemoveInvisibleChars(projectName) projectName = FilterGeneric(projectName) // 1. 精准查询 preciseHits, err := searchPrecise(client, projectName, province, city, publish, 20) if err != nil { return nil, err } for _, hit := range preciseHits { if _, exists := seenIDs[hit.Id]; !exists { seenIDs[hit.Id] = hit } } // 2. 分词查询( tokenHits, err := searchByToken(client, projectName, province, city, publish, 20) if err != nil { return nil, err } for _, hit := range tokenHits { if _, exists := seenIDs[hit.Id]; !exists { seenIDs[hit.Id] = hit } } // 3. common 查询 commonHits, err := searchCommon(client, projectName, province, city, publish, 10) if err != nil { return nil, err } for _, hit := range commonHits { if _, exists := seenIDs[hit.Id]; !exists { seenIDs[hit.Id] = hit } } // 4. 合并 + 打分增强 var results []map[string]interface{} //nameScore := computeNameScore(projectName) var allCandidates = []EsDocument{} for id, hit := range seenIDs { var doc map[string]interface{} if err := json.Unmarshal(hit.Source, &doc); err != nil { continue } // 从 Mongo 读取 detail 字段用于后续 buyer 过滤 bidd, _ := MgoB.FindById("bidding", id, nil) detail := util.ObjToString((*bidd)["detail"]) doc["detail"] = detail /** "id", "title", "projectname", "projectcode", "bidamount", "area", "city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel", "s_winner", "winnertel", "agency", "publishtime" */ candidate := EsDocument{ Id: util.ObjToString(doc["id"]), Title: util.ObjToString(doc["title"]), Projectname: util.ObjToString(doc["projectname"]), Toptype: util.ObjToString(doc["toptype"]), Subtype: util.ObjToString(doc["subtype"]), Area: util.ObjToString(doc["area"]), City: util.ObjToString(doc["city"]), Buyer: util.ObjToString(doc["buyer"]), SWinner: util.ObjToString(doc["s_winner"]), Bidamount: util.Float64All(doc["bidamount"]), Publishtime: util.Int64All(doc["publishtime"]), Agency: util.ObjToString(doc["agency"]), WinnerTel: util.ObjToString(doc["winnertel"]), BuyerTel: util.ObjToString(doc["buyertel"]), BuyerPerson: util.ObjToString(doc["buyerperson"]), Budget: util.Float64All(doc["budget"]), } score := calculateConfidenceScore(target, candidate) if targetScore > 0 { if score >= targetScore { candidate.Score, _ = strconv.ParseFloat(fmt.Sprintf("%.2f", score), 64) allCandidates = append(allCandidates, candidate) } } else { candidate.Score, _ = strconv.ParseFloat(fmt.Sprintf("%.2f", score), 64) allCandidates = append(allCandidates, candidate) } } // 排序(降序) sort.SliceStable(allCandidates, func(i, j int) bool { return allCandidates[i].Score > allCandidates[j].Score }) for _, doc := range allCandidates { item := map[string]interface{}{ "id": doc.Id, "title": doc.Title, "projectname": doc.Projectname, "projectcode": doc.ProjectCode, "toptype": doc.Toptype, "subtype": doc.Subtype, "area": doc.Area, "city": doc.City, "buyer": doc.Buyer, "budget": doc.Budget, "bidamount": doc.Bidamount, "winner": doc.Winner, "detail": doc.Detail, "publishtime": doc.Publishtime, "agency": doc.Agency, "s_winner": doc.SWinner, "winnertel": doc.WinnerTel, "buyertel": doc.BuyerTel, "buyerperson": doc.BuyerPerson, "score": doc.Score, } results = append(results, item) } return results, nil } func searchPrecise(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) { fieldsToTry := []string{"projectname.pname", "title", "detail"} filtersToTry := [][]elastic.Query{ {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")}, {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")}, {elastic.NewTermsQuery("toptype", "拟建")}, } var allResults []*elastic.SearchHit seenIDs := make(map[string]bool) // 解析发布时间 var t time.Time var err error if publish != "" { t, err = time.Parse("200601", publish) if err != nil { log.Println("时间解析失败:", err) } } for _, field := range fieldsToTry { var dateRangeStart, dateRangeEnd int64 if !t.IsZero() { if field == "detail" { dateRangeStart, dateRangeEnd = getYearRange(t, 60) } else { dateRangeStart, dateRangeEnd = getYearRange(t, 36) } } for _, filters := range filtersToTry { var queries []*elastic.BoolQuery if field == "detail" { // detail 只加时间 + filter query := elastic.NewBoolQuery() if province != "" { query = query.Must(elastic.NewTermQuery("area", province)) } query = query.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")) query = query.Filter(filters...) if !t.IsZero() { query = query.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd)) } queries = append(queries, query) } else { // 1. 省 + 市 查询 if province != "" && city != "" { query1 := elastic.NewBoolQuery() query1 = query1.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")) query1 = query1.Must(elastic.NewTermQuery("area", province)) query1 = query1.Must(elastic.NewTermQuery("city", city)) query1 = query1.Filter(filters...) if !t.IsZero() { query1 = query1.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd)) } queries = append(queries, query1) } // 2. 仅省份(城市为空或不同) if province != "" { query2 := elastic.NewBoolQuery() query2 = query2.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")) query2 = query2.Must(elastic.NewTermQuery("area", province)) query2 = query2.Filter(filters...) if !t.IsZero() { query2 = query2.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd)) } queries = append(queries, query2) } //// 3. 不限制省市 //if province == "" { // query3 := elastic.NewBoolQuery() // query3 = query3.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")) // query3 = query3.Filter(filters...) // if !t.IsZero() { // query3 = query3.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd)) // } // queries = append(queries, query3) //} } for _, query := range queries { fetchFields := elastic.NewFetchSourceContext(true).Include( "id", "title", "projectname", "projectcode", "bidamount", "area", "city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel", "s_winner", "winnertel", "agency", "publishtime") searchResult, err := client.Search(). Index("bidding"). Query(query). Size(maxResults). FetchSourceContext(fetchFields). Do(context.Background()) if err != nil { return nil, err } for _, hit := range searchResult.Hits.Hits { if !seenIDs[hit.Id] { seenIDs[hit.Id] = true allResults = append(allResults, hit) } } } } // detail 的命中足够就提前结束 if field == "detail" && len(allResults) > maxResults { break } } return allResults, nil } func searchByToken(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) { fieldsToTry := []string{"projectname.pname", "title", "detail"} filtersToTry := [][]elastic.Query{ {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")}, {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")}, {elastic.NewTermsQuery("toptype", "拟建")}, } // 解析时间 var t time.Time var err error if publish != "" { t, err = time.Parse("200601", publish) if err != nil { log.Println("时间解析失败:", err) } } // 分词处理 analyzeResp, err := client.IndexAnalyze(). Index("bidding"). Analyzer("ik_smart"). Text(projectName). Do(context.Background()) if err != nil { return nil, err } var tokens []string for _, token := range analyzeResp.Tokens { tokens = append(tokens, token.Token) } if len(tokens) == 0 { return nil, fmt.Errorf("no tokens found from ik_smart") } queryText := strings.Join(tokens, " ") // 指定返回字段 fetchFields := elastic.NewFetchSourceContext(true).Include( "id", "title", "projectname", "projectcode", "bidamount", "area", "city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel", "s_winner", "winnertel", "agency", "publishtime") var allHits []*elastic.SearchHit seen := make(map[string]bool) for _, field := range fieldsToTry { var dateRangeStart, dateRangeEnd int64 if !t.IsZero() { if field == "detail" { dateRangeStart, dateRangeEnd = getYearRange(t, 60) } else { dateRangeStart, dateRangeEnd = getYearRange(t, 36) } } for _, filters := range filtersToTry { var queries []*elastic.BoolQuery if field == "detail" { query := elastic.NewBoolQuery() if province != "" { query = query.Must(elastic.NewTermQuery("area", province)) } //query = query.Must(elastic.NewMatchQuery(field, queryText)). // Filter(filters...) query = query. Must( elastic.NewMultiMatchQuery(queryText, field). MinimumShouldMatch("100%"), ). Filter(filters...) if !t.IsZero() { query = query.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd)) } queries = append(queries, query) } else { // 省+市 if province != "" && city != "" { q := elastic.NewBoolQuery(). Must(elastic.NewMatchQuery(field, queryText)). Must(elastic.NewTermQuery("area", province)). Must(elastic.NewTermQuery("city", city)). Filter(filters...) if !t.IsZero() { q = q.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd)) } queries = append(queries, q) } // 仅省 if province != "" { q := elastic.NewBoolQuery(). Must(elastic.NewMatchQuery(field, queryText)). Must(elastic.NewTermQuery("area", province)). Filter(filters...) if !t.IsZero() { q = q.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd)) } queries = append(queries, q) } //// 无省市 //q := elastic.NewBoolQuery(). // Must(elastic.NewMatchQuery(field, queryText)). // Filter(filters...) //if !t.IsZero() { // q = q.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd)) //} //queries = append(queries, q) } for _, query := range queries { searchResult, err := client.Search(). Index("bidding"). Query(query). Size(maxResults). FetchSourceContext(fetchFields). Do(context.Background()) if err != nil { continue } for _, hit := range searchResult.Hits.Hits { if !seen[hit.Id] { seen[hit.Id] = true allHits = append(allHits, hit) if len(allHits) >= maxResults { return allHits, nil } } } } } if field == "detail" && len(allHits) >= maxResults { break } } return allHits, nil } func searchCommon(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) { fields := []string{"projectname.pname", "title", "detail"} var t time.Time var err error if publish != "" { t, err = time.Parse("200601", publish) if err != nil { log.Println("时间解析失败:", err) } } var allHits []*elastic.SearchHit seen := make(map[string]bool) fetchFields := elastic.NewFetchSourceContext(true).Include( "id", "title", "projectname", "projectcode", "bidamount", "area", "city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel", "s_winner", "winnertel", "agency", "publishtime") for _, field := range fields { var dateRangeStart, dateRangeEnd int64 if !t.IsZero() { if field == "detail" { dateRangeStart, dateRangeEnd = getYearRange(t, 60) } else { dateRangeStart, dateRangeEnd = getYearRange(t, 36) } } var queries []map[string]interface{} commonQuery := func(f string, boost float64) map[string]interface{} { return map[string]interface{}{ "common": map[string]interface{}{ f: map[string]interface{}{ "query": projectName, "cutoff_frequency": 0.01, "low_freq_operator": "and", "boost": boost, }, }, } } if field == "detail" { // 只做普通匹配 + 时间 boolQuery := map[string]interface{}{} if province != "" { boolQuery = map[string]interface{}{ "bool": map[string]interface{}{ "must": []interface{}{ commonQuery(field, 0.1), map[string]interface{}{"term": map[string]interface{}{"area": province}}, }, }, } } else { boolQuery = map[string]interface{}{ "bool": map[string]interface{}{ "must": []interface{}{ commonQuery(field, 0.1), }, }, } } if !t.IsZero() { boolQuery["bool"].(map[string]interface{})["filter"] = []interface{}{ map[string]interface{}{ "range": map[string]interface{}{ "publishtime": map[string]interface{}{ "gte": dateRangeStart, "lt": dateRangeEnd, }, }, }, } } queries = append(queries, boolQuery) } else { // 1. 省+市 if province != "" && city != "" { q := map[string]interface{}{ "bool": map[string]interface{}{ "must": []interface{}{ commonQuery(field, 0.2), map[string]interface{}{"term": map[string]interface{}{"area": province}}, map[string]interface{}{"term": map[string]interface{}{"city": city}}, }, }, } if !t.IsZero() { q["bool"].(map[string]interface{})["filter"] = []interface{}{ map[string]interface{}{ "range": map[string]interface{}{ "publishtime": map[string]interface{}{ "gte": dateRangeStart, "lt": dateRangeEnd, }, }, }, } } queries = append(queries, q) } // 2. 仅省 if province != "" { q := map[string]interface{}{ "bool": map[string]interface{}{ "must": []interface{}{ commonQuery(field, 0.2), map[string]interface{}{"term": map[string]interface{}{"area": province}}, }, }, } if !t.IsZero() { q["bool"].(map[string]interface{})["filter"] = []interface{}{ map[string]interface{}{ "range": map[string]interface{}{ "publishtime": map[string]interface{}{ "gte": dateRangeStart, "lt": dateRangeEnd, }, }, }, } } queries = append(queries, q) } //// 3. 不加省市 //q := map[string]interface{}{ // "bool": map[string]interface{}{ // "must": []interface{}{ // commonQuery(field, 0.2), // }, // }, //} //if !t.IsZero() { // q["bool"].(map[string]interface{})["filter"] = []interface{}{ // map[string]interface{}{ // "range": map[string]interface{}{ // "publishtime": map[string]interface{}{ // "gte": dateRangeStart, // "lt": dateRangeEnd, // }, // }, // }, // } //} //queries = append(queries, q) } for _, q := range queries { // 编码 query 为 base64 queryBytes, _ := json.Marshal(q) queryBase64 := base64.StdEncoding.EncodeToString(queryBytes) query := elastic.NewWrapperQuery(queryBase64) searchResult, err := client.Search(). Index("bidding"). Query(query). Size(maxResults). FetchSourceContext(fetchFields). Do(context.Background()) if err != nil { log.Println("searchCommon 查询失败:", err) continue } for _, hit := range searchResult.Hits.Hits { if !seen[hit.Id] { seen[hit.Id] = true allHits = append(allHits, hit) if len(allHits) >= maxResults { return allHits, nil } } } } } return allHits, nil } // getYearRange calculates a date range of +/- 1 year from the base date. func getYearRange(baseDate time.Time, m int) (start, end int64) { endTime := baseDate.AddDate(0, m, 0) startTime := baseDate.AddDate(0, -m, 0) return startTime.Unix(), endTime.Unix() }