|
@@ -0,0 +1,733 @@
|
|
|
+package main
|
|
|
+
|
|
|
+import (
|
|
|
+ "context"
|
|
|
+ "encoding/base64"
|
|
|
+ "encoding/json"
|
|
|
+ "fmt"
|
|
|
+ "github.com/olivere/elastic/v7"
|
|
|
+ util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
|
+ "log"
|
|
|
+ "sort"
|
|
|
+
|
|
|
+ //"sort"
|
|
|
+ "strings"
|
|
|
+ "time"
|
|
|
+)
|
|
|
+
|
|
|
+func SearchProjectFullScoring(client *elastic.Client, target InputData, projectName, areacode, publish string) ([]map[string]interface{}, error) {
|
|
|
+ seenIDs := make(map[string]*elastic.SearchHit)
|
|
|
+
|
|
|
+ province, city := "", ""
|
|
|
+ if areacode != "" {
|
|
|
+ code := areacode[:6]
|
|
|
+ where := map[string]interface{}{
|
|
|
+ "code": code,
|
|
|
+ }
|
|
|
+
|
|
|
+ res, _ := MgoQY.FindOne("address_new_2020", where)
|
|
|
+ province = util.ObjToString((*res)["province"])
|
|
|
+ city = util.ObjToString((*res)["city"])
|
|
|
+ }
|
|
|
+
|
|
|
+ // 1. 精准查询(权重 1.0)
|
|
|
+ preciseHits, err := searchPrecise(client, projectName, province, city, publish, 20)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ for _, hit := range preciseHits {
|
|
|
+ if _, exists := seenIDs[hit.Id]; !exists {
|
|
|
+ seenIDs[hit.Id] = hit
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2. 分词查询(权重 0.8)
|
|
|
+ tokenHits, err := searchByToken(client, projectName, province, city, publish, 20)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ for _, hit := range tokenHits {
|
|
|
+ if _, exists := seenIDs[hit.Id]; !exists {
|
|
|
+ seenIDs[hit.Id] = hit
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3. common 查询(权重 0.5)
|
|
|
+ commonHits, err := searchCommon(client, projectName, province, city, publish, 10)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ for _, hit := range commonHits {
|
|
|
+ if _, exists := seenIDs[hit.Id]; !exists {
|
|
|
+ seenIDs[hit.Id] = hit
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 4. 合并 + 打分增强
|
|
|
+ var results []map[string]interface{}
|
|
|
+ //nameScore := computeNameScore(projectName)
|
|
|
+ var allCandidates = []EsDocument{}
|
|
|
+
|
|
|
+ for id, hit := range seenIDs {
|
|
|
+ var doc map[string]interface{}
|
|
|
+ if err := json.Unmarshal(hit.Source, &doc); err != nil {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
+ // 从 Mongo 读取 detail 字段用于后续 buyer 过滤
|
|
|
+ bidd, _ := MgoB.FindById("bidding", id, nil)
|
|
|
+ detail := util.ObjToString((*bidd)["detail"])
|
|
|
+
|
|
|
+ doc["detail"] = detail
|
|
|
+ /**
|
|
|
+ "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
|
|
|
+ "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
|
|
|
+ "s_winner", "winnertel", "agency", "publishtime"
|
|
|
+ */
|
|
|
+ candidate := EsDocument{
|
|
|
+ Id: util.ObjToString(doc["id"]),
|
|
|
+ Title: util.ObjToString(doc["title"]),
|
|
|
+ Projectname: util.ObjToString(doc["projectname"]),
|
|
|
+ Toptype: util.ObjToString(doc["toptype"]),
|
|
|
+ Subtype: util.ObjToString(doc["subtype"]),
|
|
|
+ Area: util.ObjToString(doc["area"]),
|
|
|
+ City: util.ObjToString(doc["city"]),
|
|
|
+ Buyer: util.ObjToString(doc["buyer"]),
|
|
|
+ SWinner: util.ObjToString(doc["s_winner"]),
|
|
|
+ Bidamount: util.Float64All(doc["bidamount"]),
|
|
|
+ Publishtime: util.Int64All(doc["publishtime"]),
|
|
|
+ Agency: util.ObjToString(doc["agency"]),
|
|
|
+ WinnerTel: util.ObjToString(doc["winnertel"]),
|
|
|
+ BuyerTel: util.ObjToString(doc["buyertel"]),
|
|
|
+ BuyerPerson: util.ObjToString(doc["buyerperson"]),
|
|
|
+ Budget: util.Float64All(doc["budget"]),
|
|
|
+ }
|
|
|
+
|
|
|
+ score := calculateConfidenceScore(target, candidate)
|
|
|
+ candidate.Score = score
|
|
|
+ allCandidates = append(allCandidates, candidate)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 打印打分调试
|
|
|
+ //for i, c := range allCandidates {
|
|
|
+ // fmt.Printf("Candidat 排序前: %d Score: %.4f\n", i, c.Score)
|
|
|
+ //}
|
|
|
+
|
|
|
+ // 排序(降序)
|
|
|
+ sort.SliceStable(allCandidates, func(i, j int) bool {
|
|
|
+ return allCandidates[i].Score > allCandidates[j].Score
|
|
|
+ })
|
|
|
+
|
|
|
+ //for i, c := range allCandidates {
|
|
|
+ // fmt.Printf("Candidate 排序后: %d Score: %.4f\n", i, c.Score)
|
|
|
+ //}
|
|
|
+ //// 5. 排序
|
|
|
+ //sort.Slice(allCandidates, func(i, j int) bool {
|
|
|
+ // return allCandidates[i].Score > allCandidates[j].Score
|
|
|
+ //})
|
|
|
+
|
|
|
+ for _, doc := range allCandidates {
|
|
|
+ item := map[string]interface{}{
|
|
|
+ "id": doc.Id,
|
|
|
+ "title": doc.Title,
|
|
|
+ "projectname": doc.Projectname,
|
|
|
+ "projectcode": doc.ProjectCode,
|
|
|
+ "toptype": doc.Toptype,
|
|
|
+ "subtype": doc.Subtype,
|
|
|
+ "area": doc.Area,
|
|
|
+ "city": doc.City,
|
|
|
+ "buyer": doc.Buyer,
|
|
|
+ "budget": doc.Budget,
|
|
|
+ "bidamount": doc.Bidamount,
|
|
|
+ "winner": doc.Winner,
|
|
|
+ "detail": doc.Detail,
|
|
|
+ "publishtime": doc.Publishtime,
|
|
|
+ "agency": doc.Agency,
|
|
|
+ "s_winner": doc.SWinner,
|
|
|
+ "winnertel": doc.WinnerTel,
|
|
|
+ "buyertel": doc.BuyerTel,
|
|
|
+ "buyerperson": doc.BuyerPerson,
|
|
|
+ "score": doc.Score,
|
|
|
+ }
|
|
|
+ results = append(results, item)
|
|
|
+ }
|
|
|
+ return results, nil
|
|
|
+}
|
|
|
+
|
|
|
+// searchPrecise 精准查询
|
|
|
+func searchPrecise22(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
+ fieldsToTry := []string{"projectname.pname", "title", "detail"}
|
|
|
+ filtersToTry := [][]elastic.Query{
|
|
|
+ {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
|
|
|
+ {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
|
|
|
+ {elastic.NewTermsQuery("toptype", "拟建")},
|
|
|
+ }
|
|
|
+ var allResults []*elastic.SearchHit
|
|
|
+ seenIDs := make(map[string]bool)
|
|
|
+
|
|
|
+ query := elastic.NewBoolQuery()
|
|
|
+
|
|
|
+ for _, field := range fieldsToTry {
|
|
|
+ if field == "detail" && len(allResults) > maxResults {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ for _, filter := range filtersToTry {
|
|
|
+ //query := elastic.NewBoolQuery().
|
|
|
+ query.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")).
|
|
|
+ Filter(filter...)
|
|
|
+
|
|
|
+ fetchFields := elastic.NewFetchSourceContext(true).Include("id", "title", "projectname", "projectcode", "bidamount", "area", "city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel", "s_winner", "winnertel", "agency", "publishtime")
|
|
|
+
|
|
|
+ searchResult, err := client.Search().
|
|
|
+ Index("bidding").
|
|
|
+ Query(query).
|
|
|
+ Size(maxResults).
|
|
|
+ FetchSourceContext(fetchFields).
|
|
|
+ Do(context.Background())
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, hit := range searchResult.Hits.Hits {
|
|
|
+ if !seenIDs[hit.Id] {
|
|
|
+ seenIDs[hit.Id] = true
|
|
|
+ allResults = append(allResults, hit)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return allResults, nil
|
|
|
+}
|
|
|
+
|
|
|
+func searchPrecise(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
+ fieldsToTry := []string{"projectname.pname", "title", "detail"}
|
|
|
+ filtersToTry := [][]elastic.Query{
|
|
|
+ {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
|
|
|
+ {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
|
|
|
+ {elastic.NewTermsQuery("toptype", "拟建")},
|
|
|
+ }
|
|
|
+ var allResults []*elastic.SearchHit
|
|
|
+ seenIDs := make(map[string]bool)
|
|
|
+
|
|
|
+ // 解析发布时间
|
|
|
+ var t time.Time
|
|
|
+ var err error
|
|
|
+ if publish != "" {
|
|
|
+ t, err = time.Parse("200601", publish)
|
|
|
+ if err != nil {
|
|
|
+ log.Println("时间解析失败:", err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, field := range fieldsToTry {
|
|
|
+ var dateRangeStart, dateRangeEnd int64
|
|
|
+ if !t.IsZero() {
|
|
|
+ if field == "detail" {
|
|
|
+ dateRangeStart, dateRangeEnd = getYearRange(t, 60)
|
|
|
+ } else {
|
|
|
+ dateRangeStart, dateRangeEnd = getYearRange(t, 36)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, filters := range filtersToTry {
|
|
|
+ var queries []*elastic.BoolQuery
|
|
|
+
|
|
|
+ if field == "detail" {
|
|
|
+ // detail 只加时间 + filter
|
|
|
+ query := elastic.NewBoolQuery()
|
|
|
+ query = query.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase"))
|
|
|
+ query = query.Filter(filters...)
|
|
|
+ if !t.IsZero() {
|
|
|
+ query = query.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
|
|
|
+ }
|
|
|
+ queries = append(queries, query)
|
|
|
+
|
|
|
+ } else {
|
|
|
+ // 1. 省 + 市 查询
|
|
|
+ if province != "" && city != "" {
|
|
|
+ query1 := elastic.NewBoolQuery()
|
|
|
+ query1 = query1.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase"))
|
|
|
+ query1 = query1.Must(elastic.NewTermQuery("province", province))
|
|
|
+ query1 = query1.Must(elastic.NewTermQuery("city", city))
|
|
|
+ query1 = query1.Filter(filters...)
|
|
|
+ if !t.IsZero() {
|
|
|
+ query1 = query1.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
|
|
|
+ }
|
|
|
+ queries = append(queries, query1)
|
|
|
+ }
|
|
|
+ // 2. 仅省份(城市为空或不同)
|
|
|
+ if province != "" {
|
|
|
+ query2 := elastic.NewBoolQuery()
|
|
|
+ query2 = query2.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase"))
|
|
|
+ query2 = query2.Must(elastic.NewTermQuery("province", province))
|
|
|
+ query2 = query2.Filter(filters...)
|
|
|
+ if !t.IsZero() {
|
|
|
+ query2 = query2.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
|
|
|
+ }
|
|
|
+ queries = append(queries, query2)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3. 不限制省市
|
|
|
+ if province != "" {
|
|
|
+ query3 := elastic.NewBoolQuery()
|
|
|
+ query3 = query3.Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase"))
|
|
|
+ query3 = query3.Filter(filters...)
|
|
|
+ if !t.IsZero() {
|
|
|
+ query3 = query3.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
|
|
|
+ }
|
|
|
+ queries = append(queries, query3)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, query := range queries {
|
|
|
+ fetchFields := elastic.NewFetchSourceContext(true).Include(
|
|
|
+ "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
|
|
|
+ "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
|
|
|
+ "s_winner", "winnertel", "agency", "publishtime")
|
|
|
+
|
|
|
+ searchResult, err := client.Search().
|
|
|
+ Index("bidding").
|
|
|
+ Query(query).
|
|
|
+ Size(maxResults).
|
|
|
+ FetchSourceContext(fetchFields).
|
|
|
+ Do(context.Background())
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ // 打印 query JSON(调试用)
|
|
|
+ //if sourceQ, err := query.Source(); err == nil {
|
|
|
+ // log.Println(printInterfaceAsJSON(sourceQ))
|
|
|
+ //}
|
|
|
+
|
|
|
+ for _, hit := range searchResult.Hits.Hits {
|
|
|
+ if !seenIDs[hit.Id] {
|
|
|
+ seenIDs[hit.Id] = true
|
|
|
+ allResults = append(allResults, hit)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // detail 的命中足够就提前结束
|
|
|
+ if field == "detail" && len(allResults) > maxResults {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return allResults, nil
|
|
|
+}
|
|
|
+
|
|
|
+// searchByToken 分词查询
|
|
|
+func searchByToken22(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
+ fieldsToTry := []string{"projectname.pname", "title", "detail"}
|
|
|
+ filtersToTry := [][]elastic.Query{
|
|
|
+ {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
|
|
|
+ {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
|
|
|
+ {elastic.NewTermsQuery("toptype", "拟建")},
|
|
|
+ }
|
|
|
+ analyzeResp, err := client.IndexAnalyze().
|
|
|
+ Index("bidding").
|
|
|
+ Analyzer("ik_smart").
|
|
|
+ Text(projectName).
|
|
|
+ Do(context.Background())
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ var tokens []string
|
|
|
+ for _, token := range analyzeResp.Tokens {
|
|
|
+ tokens = append(tokens, token.Token)
|
|
|
+ }
|
|
|
+ if len(tokens) == 0 {
|
|
|
+ return nil, fmt.Errorf("no tokens found from ik_smart")
|
|
|
+ }
|
|
|
+ queryText := strings.Join(tokens, " ")
|
|
|
+
|
|
|
+ var allHits []*elastic.SearchHit
|
|
|
+ seen := make(map[string]bool)
|
|
|
+
|
|
|
+ for _, filter := range filtersToTry {
|
|
|
+ query := elastic.NewBoolQuery().
|
|
|
+ Must(elastic.NewMultiMatchQuery(queryText, fieldsToTry...).MinimumShouldMatch("100%")).
|
|
|
+ Filter(filter...)
|
|
|
+
|
|
|
+ searchResult, err := client.Search().
|
|
|
+ Index("bidding").
|
|
|
+ Query(query).
|
|
|
+ Size(maxResults).
|
|
|
+ Do(context.Background())
|
|
|
+ if err != nil {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, hit := range searchResult.Hits.Hits {
|
|
|
+ if !seen[hit.Id] {
|
|
|
+ seen[hit.Id] = true
|
|
|
+ allHits = append(allHits, hit)
|
|
|
+ if len(allHits) >= maxResults {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if len(allHits) >= maxResults {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return allHits, nil
|
|
|
+}
|
|
|
+
|
|
|
+func searchByToken(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
+ fieldsToTry := []string{"projectname.pname", "title", "detail"}
|
|
|
+ filtersToTry := [][]elastic.Query{
|
|
|
+ {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
|
|
|
+ {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
|
|
|
+ {elastic.NewTermsQuery("toptype", "拟建")},
|
|
|
+ }
|
|
|
+
|
|
|
+ // 解析时间
|
|
|
+ var t time.Time
|
|
|
+ var err error
|
|
|
+ if publish != "" {
|
|
|
+ t, err = time.Parse("200601", publish)
|
|
|
+ if err != nil {
|
|
|
+ log.Println("时间解析失败:", err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 分词处理
|
|
|
+ analyzeResp, err := client.IndexAnalyze().
|
|
|
+ Index("bidding").
|
|
|
+ Analyzer("ik_smart").
|
|
|
+ Text(projectName).
|
|
|
+ Do(context.Background())
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ var tokens []string
|
|
|
+ for _, token := range analyzeResp.Tokens {
|
|
|
+ tokens = append(tokens, token.Token)
|
|
|
+ }
|
|
|
+ if len(tokens) == 0 {
|
|
|
+ return nil, fmt.Errorf("no tokens found from ik_smart")
|
|
|
+ }
|
|
|
+ queryText := strings.Join(tokens, " ")
|
|
|
+
|
|
|
+ // 指定返回字段
|
|
|
+ fetchFields := elastic.NewFetchSourceContext(true).Include(
|
|
|
+ "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
|
|
|
+ "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
|
|
|
+ "s_winner", "winnertel", "agency", "publishtime")
|
|
|
+
|
|
|
+ var allHits []*elastic.SearchHit
|
|
|
+ seen := make(map[string]bool)
|
|
|
+
|
|
|
+ for _, field := range fieldsToTry {
|
|
|
+ var dateRangeStart, dateRangeEnd int64
|
|
|
+ if !t.IsZero() {
|
|
|
+ if field == "detail" {
|
|
|
+ dateRangeStart, dateRangeEnd = getYearRange(t, 60)
|
|
|
+ } else {
|
|
|
+ dateRangeStart, dateRangeEnd = getYearRange(t, 36)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, filters := range filtersToTry {
|
|
|
+ var queries []*elastic.BoolQuery
|
|
|
+
|
|
|
+ if field == "detail" {
|
|
|
+ query := elastic.NewBoolQuery().
|
|
|
+ Must(elastic.NewMatchQuery(field, queryText)).
|
|
|
+ Filter(filters...)
|
|
|
+ if !t.IsZero() {
|
|
|
+ query = query.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
|
|
|
+ }
|
|
|
+ queries = append(queries, query)
|
|
|
+ } else {
|
|
|
+ // 省+市
|
|
|
+ if province != "" && city != "" {
|
|
|
+ q := elastic.NewBoolQuery().
|
|
|
+ Must(elastic.NewMatchQuery(field, queryText)).
|
|
|
+ Must(elastic.NewTermQuery("province", province)).
|
|
|
+ Must(elastic.NewTermQuery("city", city)).
|
|
|
+ Filter(filters...)
|
|
|
+ if !t.IsZero() {
|
|
|
+ q = q.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
|
|
|
+ }
|
|
|
+ queries = append(queries, q)
|
|
|
+ }
|
|
|
+ // 仅省
|
|
|
+ if province != "" {
|
|
|
+ q := elastic.NewBoolQuery().
|
|
|
+ Must(elastic.NewMatchQuery(field, queryText)).
|
|
|
+ Must(elastic.NewTermQuery("province", province)).
|
|
|
+ Filter(filters...)
|
|
|
+ if !t.IsZero() {
|
|
|
+ q = q.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
|
|
|
+ }
|
|
|
+ queries = append(queries, q)
|
|
|
+ }
|
|
|
+ // 无省市
|
|
|
+ q := elastic.NewBoolQuery().
|
|
|
+ Must(elastic.NewMatchQuery(field, queryText)).
|
|
|
+ Filter(filters...)
|
|
|
+ if !t.IsZero() {
|
|
|
+ q = q.Must(elastic.NewRangeQuery("publishtime").Gte(dateRangeStart).Lt(dateRangeEnd))
|
|
|
+ }
|
|
|
+ queries = append(queries, q)
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, query := range queries {
|
|
|
+ searchResult, err := client.Search().
|
|
|
+ Index("bidding").
|
|
|
+ Query(query).
|
|
|
+ Size(maxResults).
|
|
|
+ FetchSourceContext(fetchFields).
|
|
|
+ Do(context.Background())
|
|
|
+ if err != nil {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, hit := range searchResult.Hits.Hits {
|
|
|
+ if !seen[hit.Id] {
|
|
|
+ seen[hit.Id] = true
|
|
|
+ allHits = append(allHits, hit)
|
|
|
+ if len(allHits) >= maxResults {
|
|
|
+ return allHits, nil
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if field == "detail" && len(allHits) >= maxResults {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return allHits, nil
|
|
|
+}
|
|
|
+
|
|
|
+// searchCommon common 查询
|
|
|
+func searchCommon22(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
+ queryMap := map[string]interface{}{
|
|
|
+ "bool": map[string]interface{}{
|
|
|
+ "should": []interface{}{
|
|
|
+ map[string]interface{}{"common": map[string]interface{}{"projectname.pname": map[string]interface{}{"query": projectName, "cutoff_frequency": 0.01, "low_freq_operator": "and", "boost": 0.2}}},
|
|
|
+ map[string]interface{}{"common": map[string]interface{}{"title": map[string]interface{}{"query": projectName, "cutoff_frequency": 0.01, "low_freq_operator": "and", "boost": 0.2}}},
|
|
|
+ map[string]interface{}{"common": map[string]interface{}{"detail": map[string]interface{}{"query": projectName, "cutoff_frequency": 0.01, "low_freq_operator": "and", "boost": 0.1}}},
|
|
|
+ },
|
|
|
+ "minimum_should_match": 1,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ queryBytes, _ := json.Marshal(queryMap)
|
|
|
+ queryBase64 := base64.StdEncoding.EncodeToString(queryBytes)
|
|
|
+ query := elastic.NewWrapperQuery(queryBase64)
|
|
|
+
|
|
|
+ fetchFields := elastic.NewFetchSourceContext(true).Include("id", "title", "projectname", "projectcode", "bidamount", "area", "city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel", "s_winner", "winnertel", "agency", "publishtime")
|
|
|
+
|
|
|
+ searchResult, err := client.Search().
|
|
|
+ Index("bidding").
|
|
|
+ Query(query).
|
|
|
+ Size(maxResults).
|
|
|
+ FetchSourceContext(fetchFields).
|
|
|
+ Do(context.Background())
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ var allHits []*elastic.SearchHit
|
|
|
+ seen := make(map[string]bool)
|
|
|
+
|
|
|
+ for _, hit := range searchResult.Hits.Hits {
|
|
|
+ if !seen[hit.Id] {
|
|
|
+ seen[hit.Id] = true
|
|
|
+ allHits = append(allHits, hit)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return allHits, nil
|
|
|
+}
|
|
|
+
|
|
|
+func searchCommon(client *elastic.Client, projectName, province, city, publish string, maxResults int) ([]*elastic.SearchHit, error) {
|
|
|
+ fields := []string{"projectname.pname", "title", "detail"}
|
|
|
+ var t time.Time
|
|
|
+ var err error
|
|
|
+ if publish != "" {
|
|
|
+ t, err = time.Parse("200601", publish)
|
|
|
+ if err != nil {
|
|
|
+ log.Println("时间解析失败:", err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ var allHits []*elastic.SearchHit
|
|
|
+ seen := make(map[string]bool)
|
|
|
+
|
|
|
+ fetchFields := elastic.NewFetchSourceContext(true).Include(
|
|
|
+ "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
|
|
|
+ "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
|
|
|
+ "s_winner", "winnertel", "agency", "publishtime")
|
|
|
+
|
|
|
+ for _, field := range fields {
|
|
|
+ var dateRangeStart, dateRangeEnd int64
|
|
|
+ if !t.IsZero() {
|
|
|
+ if field == "detail" {
|
|
|
+ dateRangeStart, dateRangeEnd = getYearRange(t, 60)
|
|
|
+ } else {
|
|
|
+ dateRangeStart, dateRangeEnd = getYearRange(t, 36)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ var queries []map[string]interface{}
|
|
|
+
|
|
|
+ commonQuery := func(f string, boost float64) map[string]interface{} {
|
|
|
+ return map[string]interface{}{
|
|
|
+ "common": map[string]interface{}{
|
|
|
+ f: map[string]interface{}{
|
|
|
+ "query": projectName,
|
|
|
+ "cutoff_frequency": 0.01,
|
|
|
+ "low_freq_operator": "and",
|
|
|
+ "boost": boost,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if field == "detail" {
|
|
|
+ // 只做普通匹配 + 时间
|
|
|
+ boolQuery := map[string]interface{}{
|
|
|
+ "bool": map[string]interface{}{
|
|
|
+ "must": []interface{}{
|
|
|
+ commonQuery(field, 0.1),
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ if !t.IsZero() {
|
|
|
+ boolQuery["bool"].(map[string]interface{})["filter"] = []interface{}{
|
|
|
+ map[string]interface{}{
|
|
|
+ "range": map[string]interface{}{
|
|
|
+ "publishtime": map[string]interface{}{
|
|
|
+ "gte": dateRangeStart,
|
|
|
+ "lt": dateRangeEnd,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ }
|
|
|
+ queries = append(queries, boolQuery)
|
|
|
+
|
|
|
+ } else {
|
|
|
+ // 1. 省+市
|
|
|
+ if province != "" && city != "" {
|
|
|
+ q := map[string]interface{}{
|
|
|
+ "bool": map[string]interface{}{
|
|
|
+ "must": []interface{}{
|
|
|
+ commonQuery(field, 0.2),
|
|
|
+ map[string]interface{}{"term": map[string]interface{}{"province": province}},
|
|
|
+ map[string]interface{}{"term": map[string]interface{}{"city": city}},
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ if !t.IsZero() {
|
|
|
+ q["bool"].(map[string]interface{})["filter"] = []interface{}{
|
|
|
+ map[string]interface{}{
|
|
|
+ "range": map[string]interface{}{
|
|
|
+ "publishtime": map[string]interface{}{
|
|
|
+ "gte": dateRangeStart,
|
|
|
+ "lt": dateRangeEnd,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ }
|
|
|
+ queries = append(queries, q)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2. 仅省
|
|
|
+ if province != "" {
|
|
|
+ q := map[string]interface{}{
|
|
|
+ "bool": map[string]interface{}{
|
|
|
+ "must": []interface{}{
|
|
|
+ commonQuery(field, 0.2),
|
|
|
+ map[string]interface{}{"term": map[string]interface{}{"province": province}},
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ if !t.IsZero() {
|
|
|
+ q["bool"].(map[string]interface{})["filter"] = []interface{}{
|
|
|
+ map[string]interface{}{
|
|
|
+ "range": map[string]interface{}{
|
|
|
+ "publishtime": map[string]interface{}{
|
|
|
+ "gte": dateRangeStart,
|
|
|
+ "lt": dateRangeEnd,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ }
|
|
|
+ queries = append(queries, q)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3. 不加省市
|
|
|
+ q := map[string]interface{}{
|
|
|
+ "bool": map[string]interface{}{
|
|
|
+ "must": []interface{}{
|
|
|
+ commonQuery(field, 0.2),
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ if !t.IsZero() {
|
|
|
+ q["bool"].(map[string]interface{})["filter"] = []interface{}{
|
|
|
+ map[string]interface{}{
|
|
|
+ "range": map[string]interface{}{
|
|
|
+ "publishtime": map[string]interface{}{
|
|
|
+ "gte": dateRangeStart,
|
|
|
+ "lt": dateRangeEnd,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ }
|
|
|
+ queries = append(queries, q)
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, q := range queries {
|
|
|
+ // 编码 query 为 base64
|
|
|
+ queryBytes, _ := json.Marshal(q)
|
|
|
+ queryBase64 := base64.StdEncoding.EncodeToString(queryBytes)
|
|
|
+ query := elastic.NewWrapperQuery(queryBase64)
|
|
|
+
|
|
|
+ searchResult, err := client.Search().
|
|
|
+ Index("bidding").
|
|
|
+ Query(query).
|
|
|
+ Size(maxResults).
|
|
|
+ FetchSourceContext(fetchFields).
|
|
|
+ Do(context.Background())
|
|
|
+ if err != nil {
|
|
|
+ log.Println("searchCommon 查询失败:", err)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, hit := range searchResult.Hits.Hits {
|
|
|
+ if !seen[hit.Id] {
|
|
|
+ seen[hit.Id] = true
|
|
|
+ allHits = append(allHits, hit)
|
|
|
+ if len(allHits) >= maxResults {
|
|
|
+ return allHits, nil
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return allHits, nil
|
|
|
+}
|
|
|
+
|
|
|
+// getYearRange calculates a date range of +/- 1 year from the base date.
|
|
|
+func getYearRange(baseDate time.Time, m int) (start, end int64) {
|
|
|
+ endTime := baseDate.AddDate(0, m, 0)
|
|
|
+ startTime := baseDate.AddDate(0, -m, 0)
|
|
|
+ return startTime.Unix(), endTime.Unix()
|
|
|
+}
|