|
- package main
- import (
- "context"
- "encoding/base64"
- "encoding/json"
- "fmt"
- "github.com/olivere/elastic/v7"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "log"
- "sort"
- //"sort"
- "strings"
- "time"
- )
- func SearchProjectFullScoring(client *elastic.Client, target InputData, projectName, areacode, publish string) ([]map[string]interface{}, error) {
- seenIDs := make(map[string]*elastic.SearchHit)
- province, city := "", ""
- if areacode != "" {
- code := areacode[:6]
- where := map[string]interface{}{
- "code": code,
- }
- res, _ := MgoQY.FindOne("address_new_2020", where)
- province = util.ObjToString((*res)["province"])
- city = util.ObjToString((*res)["city"])
- }
- // 1. 精准查询(权重 1.0)
- preciseHits, err := searchPrecise(client, projectName, province, city, publish, 20)
- if err != nil {
- return nil, err
- }
- for _, hit := range preciseHits {
- if _, exists := seenIDs[hit.Id]; !exists {
- seenIDs[hit.Id] = hit
- }
- }
- // 2. 分词查询(权重 0.8)
- tokenHits, err := searchByToken(client, projectName, province, city, publish, 20)
- if err != nil {
- return nil, err
- }
- for _, hit := range tokenHits {
- if _, exists := seenIDs[hit.Id]; !exists {
- seenIDs[hit.Id] = hit
- }
- }
- // 3. common 查询(权重 0.5)
- commonHits, err := searchCommon(client, projectName, province, city, publish, 10)
- if err != nil {
- return nil, err
- }
- for _, hit := range commonHits {
- if _, exists := seenIDs[hit.Id]; !exists {
- seenIDs[hit.Id] = hit
- }
- }
- // 4. 合并 + 打分增强
- var results []map[string]interface{}
- //nameScore := computeNameScore(projectName)
- var allCandidates = []EsDocument{}
- for id, hit := range seenIDs {
- var doc map[string]interface{}
- if err := json.Unmarshal(hit.Source, &doc); err != nil {
- continue
- }
- // 从 Mongo 读取 detail 字段用于后续 buyer 过滤
- bidd, _ := MgoB.FindById("bidding", id, nil)
- detail := util.ObjToString((*bidd)["detail"])
- doc["detail"] = detail
- /**
- "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
- "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
- "s_winner", "winnertel", "agency", "publishtime"
- */
- candidate := EsDocument{
- Id: util.ObjToString(doc["id"]),
- Title: util.ObjToString(doc["title"]),
- Projectname: util.ObjToString(doc["projectname"]),
- Toptype: util.ObjToString(doc["toptype"]),
- Subtype: util.ObjToString(doc["subtype"]),
- Area: util.ObjToString(doc["area"]),
- City: util.ObjToString(doc["city"]),
- Buyer: util.ObjToString(doc["buyer"]),
- SWinner: util.ObjToString(doc["s_winner"]),
- Bidamount: util.Float64All(doc["bidamount"]),
- Publishtime: util.Int64All(doc["publishtime"]),
- Agency: util.ObjToString(doc["agency"]),
- WinnerTel: util.ObjToString(doc["winnertel"]),
- BuyerTel: util.ObjToString(doc["buyertel"]),
- BuyerPerson: util.ObjToString(doc["buyerperson"]),
- Budget: util.Float64All(doc["budget"]),
- }
- score := calculateConfidenceScore(target, candidate)
- candidate.Score = score
- allCandidates = append(allCandidates, candidate)
- }
- // 打印打分调试
- //for i, c := range allCandidates {
- // fmt.Printf("Candidat 排序前: %d Score: %.4f\n", i, c.Score)
- //}
- // 排序(降序)
- sort.SliceStable(allCandidates, func(i, j int) bool {
- return allCandidates[i].Score > allCandidates[j].Score
- })
- //for i, c := range allCandidates {
- // fmt.Printf("Candidate 排序后: %d Score: %.4f\n", i, c.Score)
- //}
- //// 5. 排序
- //sort.Slice(allCandidates, func(i, j int) bool {
- // return allCandidates[i].Score > allCandidates[j].Score
- //})
- for _, doc := range allCandidates {
- item := map[string]interface{}{
- "id": doc.Id,
- "title": doc.Title,
- "projectname": doc.Projectname,
- "projectcode": doc.ProjectCode,
- "toptype": doc.Toptype,
- "subtype": doc.Subtype,
- "area": doc.Area,
- "city": doc.City,
- "buyer": doc.Buyer,
- "budget": doc.Budget,
- "bidamount": doc.Bidamount,
- "winner": doc.Winner,
- "detail": doc.Detail,
- "publishtime": doc.Publishtime,
- "agency": doc.Agency,
- "s_winner": doc.SWinner,
- "winnertel": doc.WinnerTel,
- "buyertel": doc.BuyerTel,
- "buyerperson": doc.BuyerPerson,
- "score": doc.Score,
- }
- results = append(results, item)
- }
- return results, nil
- }
- // searchPrecise 精准查询
- func searchPrecise22(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
- fieldsToTry := []string{"projectname.pname", "title", "detail"}
- filtersToTry := [][]elastic.Query{
- {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
- {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
- {elastic.NewTermsQuery("toptype", "拟建")},
- }
- var allResults []*elastic.SearchHit
- seenIDs := make(map[string]bool)
- query := elastic.NewBoolQuery()
- for _, field := range fieldsToTry {
- if field == "detail" && len(allResults) > maxResults {
- break
- }
- for _, filter := range filtersToTry {
- //query := elastic.NewBoolQuery().
- query.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")).
- Filter(filter...)
- fetchFields := elastic.NewFetchSourceContext(true).Include("id", "title", "projectname", "projectcode", "bidamount", "area", "city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel", "s_winner", "winnertel", "agency", "publishtime")
- searchResult, err := client.Search().
- Index("bidding").
- Query(query).
- Size(maxResults).
- FetchSourceContext(fetchFields).
- Do(context.Background())
- if err != nil {
- return nil, err
- }
- for _, hit := range searchResult.Hits.Hits {
- if !seenIDs[hit.Id] {
- seenIDs[hit.Id] = true
- allResults = append(allResults, hit)
- }
- }
- }
- }
- return allResults, nil
- }
- func searchPrecise(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
- fieldsToTry := []string{"projectname.pname", "title", "detail"}
- filtersToTry := [][]elastic.Query{
- {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
- {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
- {elastic.NewTermsQuery("toptype", "拟建")},
- }
- var allResults []*elastic.SearchHit
- seenIDs := make(map[string]bool)
- // 解析发布时间
- var t time.Time
- var err error
- if publish != "" {
- t, err = time.Parse("200601", publish)
- if err != nil {
- log.Println("时间解析失败:", err)
- }
- }
- for _, field := range fieldsToTry {
- var dateRangeStart, dateRangeEnd int64
- if !t.IsZero() {
- if field == "detail" {
- dateRangeStart, dateRangeEnd = getYearRange(t, 60)
- } else {
- dateRangeStart, dateRangeEnd = getYearRange(t, 36)
- }
- }
- for _, filters := range filtersToTry {
- var queries []*elastic.BoolQuery
- if field == "detail" {
- // detail 只加时间 + filter
- query := elastic.NewBoolQuery()
- query = query.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase"))
- query = query.Filter(filters...)
- if !t.IsZero() {
- query = query.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
- }
- queries = append(queries, query)
- } else {
- // 1. 省 + 市 查询
- if province != "" && city != "" {
- query1 := elastic.NewBoolQuery()
- query1 = query1.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase"))
- query1 = query1.Must(elastic.NewTermQuery("province", province))
- query1 = query1.Must(elastic.NewTermQuery("city", city))
- query1 = query1.Filter(filters...)
- if !t.IsZero() {
- query1 = query1.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
- }
- queries = append(queries, query1)
- }
- // 2. 仅省份(城市为空或不同)
- if province != "" {
- query2 := elastic.NewBoolQuery()
- query2 = query2.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase"))
- query2 = query2.Must(elastic.NewTermQuery("province", province))
- query2 = query2.Filter(filters...)
- if !t.IsZero() {
- query2 = query2.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
- }
- queries = append(queries, query2)
- }
- // 3. 不限制省市
- if province != "" {
- query3 := elastic.NewBoolQuery()
- query3 = query3.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase"))
- query3 = query3.Filter(filters...)
- if !t.IsZero() {
- query3 = query3.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
- }
- queries = append(queries, query3)
- }
- }
- for _, query := range queries {
- fetchFields := elastic.NewFetchSourceContext(true).Include(
- "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
- "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
- "s_winner", "winnertel", "agency", "publishtime")
- searchResult, err := client.Search().
- Index("bidding").
- Query(query).
- Size(maxResults).
- FetchSourceContext(fetchFields).
- Do(context.Background())
- if err != nil {
- return nil, err
- }
- // 打印 query JSON(调试用)
- //if sourceQ, err := query.Source(); err == nil {
- // log.Println(printInterfaceAsJSON(sourceQ))
- //}
- for _, hit := range searchResult.Hits.Hits {
- if !seenIDs[hit.Id] {
- seenIDs[hit.Id] = true
- allResults = append(allResults, hit)
- }
- }
- }
- }
- // detail 的命中足够就提前结束
- if field == "detail" && len(allResults) > maxResults {
- break
- }
- }
- return allResults, nil
- }
- // searchByToken 分词查询
- func searchByToken22(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
- fieldsToTry := []string{"projectname.pname", "title", "detail"}
- filtersToTry := [][]elastic.Query{
- {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
- {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
- {elastic.NewTermsQuery("toptype", "拟建")},
- }
- analyzeResp, err := client.IndexAnalyze().
- Index("bidding").
- Analyzer("ik_smart").
- Text(projectName).
- Do(context.Background())
- if err != nil {
- return nil, err
- }
- var tokens []string
- for _, token := range analyzeResp.Tokens {
- tokens = append(tokens, token.Token)
- }
- if len(tokens) == 0 {
- return nil, fmt.Errorf("no tokens found from ik_smart")
- }
- queryText := strings.Join(tokens, " ")
- var allHits []*elastic.SearchHit
- seen := make(map[string]bool)
- for _, filter := range filtersToTry {
- query := elastic.NewBoolQuery().
- Must(elastic.NewMultiMatchQuery(queryText, fieldsToTry...).MinimumShouldMatch("100%")).
- Filter(filter...)
- searchResult, err := client.Search().
- Index("bidding").
- Query(query).
- Size(maxResults).
- Do(context.Background())
- if err != nil {
- continue
- }
- for _, hit := range searchResult.Hits.Hits {
- if !seen[hit.Id] {
- seen[hit.Id] = true
- allHits = append(allHits, hit)
- if len(allHits) >= maxResults {
- break
- }
- }
- }
- if len(allHits) >= maxResults {
- break
- }
- }
- return allHits, nil
- }
- func searchByToken(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
- fieldsToTry := []string{"projectname.pname", "title", "detail"}
- filtersToTry := [][]elastic.Query{
- {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
- {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
- {elastic.NewTermsQuery("toptype", "拟建")},
- }
- // 解析时间
- var t time.Time
- var err error
- if publish != "" {
- t, err = time.Parse("200601", publish)
- if err != nil {
- log.Println("时间解析失败:", err)
- }
- }
- // 分词处理
- analyzeResp, err := client.IndexAnalyze().
- Index("bidding").
- Analyzer("ik_smart").
- Text(projectName).
- Do(context.Background())
- if err != nil {
- return nil, err
- }
- var tokens []string
- for _, token := range analyzeResp.Tokens {
- tokens = append(tokens, token.Token)
- }
- if len(tokens) == 0 {
- return nil, fmt.Errorf("no tokens found from ik_smart")
- }
- queryText := strings.Join(tokens, " ")
- // 指定返回字段
- fetchFields := elastic.NewFetchSourceContext(true).Include(
- "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
- "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
- "s_winner", "winnertel", "agency", "publishtime")
- var allHits []*elastic.SearchHit
- seen := make(map[string]bool)
- for _, field := range fieldsToTry {
- var dateRangeStart, dateRangeEnd int64
- if !t.IsZero() {
- if field == "detail" {
- dateRangeStart, dateRangeEnd = getYearRange(t, 60)
- } else {
- dateRangeStart, dateRangeEnd = getYearRange(t, 36)
- }
- }
- for _, filters := range filtersToTry {
- var queries []*elastic.BoolQuery
- if field == "detail" {
- query := elastic.NewBoolQuery().
- Must(elastic.NewMatchQuery(field, queryText)).
- Filter(filters...)
- if !t.IsZero() {
- query = query.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
- }
- queries = append(queries, query)
- } else {
- // 省+市
- if province != "" && city != "" {
- q := elastic.NewBoolQuery().
- Must(elastic.NewMatchQuery(field, queryText)).
- Must(elastic.NewTermQuery("province", province)).
- Must(elastic.NewTermQuery("city", city)).
- Filter(filters...)
- if !t.IsZero() {
- q = q.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
- }
- queries = append(queries, q)
- }
- // 仅省
- if province != "" {
- q := elastic.NewBoolQuery().
- Must(elastic.NewMatchQuery(field, queryText)).
- Must(elastic.NewTermQuery("province", province)).
- Filter(filters...)
- if !t.IsZero() {
- q = q.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
- }
- queries = append(queries, q)
- }
- // 无省市
- q := elastic.NewBoolQuery().
- Must(elastic.NewMatchQuery(field, queryText)).
- Filter(filters...)
- if !t.IsZero() {
- q = q.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
- }
- queries = append(queries, q)
- }
- for _, query := range queries {
- searchResult, err := client.Search().
- Index("bidding").
- Query(query).
- Size(maxResults).
- FetchSourceContext(fetchFields).
- Do(context.Background())
- if err != nil {
- continue
- }
- for _, hit := range searchResult.Hits.Hits {
- if !seen[hit.Id] {
- seen[hit.Id] = true
- allHits = append(allHits, hit)
- if len(allHits) >= maxResults {
- return allHits, nil
- }
- }
- }
- }
- }
- if field == "detail" && len(allHits) >= maxResults {
- break
- }
- }
- return allHits, nil
- }
- // searchCommon common 查询
- func searchCommon22(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
- queryMap := map[string]interface{}{
- "bool": map[string]interface{}{
- "should": []interface{}{
- map[string]interface{}{"common": map[string]interface{}{"projectname.pname": map[string]interface{}{"query": projectName, "cutoff_frequency": 0.01, "low_freq_operator": "and", "boost": 0.2}}},
- map[string]interface{}{"common": map[string]interface{}{"title": map[string]interface{}{"query": projectName, "cutoff_frequency": 0.01, "low_freq_operator": "and", "boost": 0.2}}},
- map[string]interface{}{"common": map[string]interface{}{"detail": map[string]interface{}{"query": projectName, "cutoff_frequency": 0.01, "low_freq_operator": "and", "boost": 0.1}}},
- },
- "minimum_should_match": 1,
- },
- }
- queryBytes, _ := json.Marshal(queryMap)
- queryBase64 := base64.StdEncoding.EncodeToString(queryBytes)
- query := elastic.NewWrapperQuery(queryBase64)
- fetchFields := elastic.NewFetchSourceContext(true).Include("id", "title", "projectname", "projectcode", "bidamount", "area", "city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel", "s_winner", "winnertel", "agency", "publishtime")
- searchResult, err := client.Search().
- Index("bidding").
- Query(query).
- Size(maxResults).
- FetchSourceContext(fetchFields).
- Do(context.Background())
- if err != nil {
- return nil, err
- }
- var allHits []*elastic.SearchHit
- seen := make(map[string]bool)
- for _, hit := range searchResult.Hits.Hits {
- if !seen[hit.Id] {
- seen[hit.Id] = true
- allHits = append(allHits, hit)
- }
- }
- return allHits, nil
- }
- func searchCommon(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
- fields := []string{"projectname.pname", "title", "detail"}
- var t time.Time
- var err error
- if publish != "" {
- t, err = time.Parse("200601", publish)
- if err != nil {
- log.Println("时间解析失败:", err)
- }
- }
- var allHits []*elastic.SearchHit
- seen := make(map[string]bool)
- fetchFields := elastic.NewFetchSourceContext(true).Include(
- "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
- "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
- "s_winner", "winnertel", "agency", "publishtime")
- for _, field := range fields {
- var dateRangeStart, dateRangeEnd int64
- if !t.IsZero() {
- if field == "detail" {
- dateRangeStart, dateRangeEnd = getYearRange(t, 60)
- } else {
- dateRangeStart, dateRangeEnd = getYearRange(t, 36)
- }
- }
- var queries []map[string]interface{}
- commonQuery := func(f string, boost float64) map[string]interface{} {
- return map[string]interface{}{
- "common": map[string]interface{}{
- f: map[string]interface{}{
- "query": projectName,
- "cutoff_frequency": 0.01,
- "low_freq_operator": "and",
- "boost": boost,
- },
- },
- }
- }
- if field == "detail" {
- // 只做普通匹配 + 时间
- boolQuery := map[string]interface{}{
- "bool": map[string]interface{}{
- "must": []interface{}{
- commonQuery(field, 0.1),
- },
- },
- }
- if !t.IsZero() {
- boolQuery["bool"].(map[string]interface{})["filter"] = []interface{}{
- map[string]interface{}{
- "range": map[string]interface{}{
- "publishtime": map[string]interface{}{
- "gte": dateRangeStart,
- "lt": dateRangeEnd,
- },
- },
- },
- }
- }
- queries = append(queries, boolQuery)
- } else {
- // 1. 省+市
- if province != "" && city != "" {
- q := map[string]interface{}{
- "bool": map[string]interface{}{
- "must": []interface{}{
- commonQuery(field, 0.2),
- map[string]interface{}{"term": map[string]interface{}{"province": province}},
- map[string]interface{}{"term": map[string]interface{}{"city": city}},
- },
- },
- }
- if !t.IsZero() {
- q["bool"].(map[string]interface{})["filter"] = []interface{}{
- map[string]interface{}{
- "range": map[string]interface{}{
- "publishtime": map[string]interface{}{
- "gte": dateRangeStart,
- "lt": dateRangeEnd,
- },
- },
- },
- }
- }
- queries = append(queries, q)
- }
- // 2. 仅省
- if province != "" {
- q := map[string]interface{}{
- "bool": map[string]interface{}{
- "must": []interface{}{
- commonQuery(field, 0.2),
- map[string]interface{}{"term": map[string]interface{}{"province": province}},
- },
- },
- }
- if !t.IsZero() {
- q["bool"].(map[string]interface{})["filter"] = []interface{}{
- map[string]interface{}{
- "range": map[string]interface{}{
- "publishtime": map[string]interface{}{
- "gte": dateRangeStart,
- "lt": dateRangeEnd,
- },
- },
- },
- }
- }
- queries = append(queries, q)
- }
- // 3. 不加省市
- q := map[string]interface{}{
- "bool": map[string]interface{}{
- "must": []interface{}{
- commonQuery(field, 0.2),
- },
- },
- }
- if !t.IsZero() {
- q["bool"].(map[string]interface{})["filter"] = []interface{}{
- map[string]interface{}{
- "range": map[string]interface{}{
- "publishtime": map[string]interface{}{
- "gte": dateRangeStart,
- "lt": dateRangeEnd,
- },
- },
- },
- }
- }
- queries = append(queries, q)
- }
- for _, q := range queries {
- // 编码 query 为 base64
- queryBytes, _ := json.Marshal(q)
- queryBase64 := base64.StdEncoding.EncodeToString(queryBytes)
- query := elastic.NewWrapperQuery(queryBase64)
- searchResult, err := client.Search().
- Index("bidding").
- Query(query).
- Size(maxResults).
- FetchSourceContext(fetchFields).
- Do(context.Background())
- if err != nil {
- log.Println("searchCommon 查询失败:", err)
- continue
- }
- for _, hit := range searchResult.Hits.Hits {
- if !seen[hit.Id] {
- seen[hit.Id] = true
- allHits = append(allHits, hit)
- if len(allHits) >= maxResults {
- return allHits, nil
- }
- }
- }
- }
- }
- return allHits, nil
- }
- // getYearRange calculates a date range of +/- 1 year from the base date.
- func getYearRange(baseDate time.Time, m int) (start, end int64) {
- endTime := baseDate.AddDate(0, m, 0)
- startTime := baseDate.AddDate(0, -m, 0)
- return startTime.Unix(), endTime.Unix()
- }
|