123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170 |
- package main
- import (
- "context"
- "encoding/json"
- "fmt"
- "github.com/olivere/elastic/v7"
- "go.uber.org/zap"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
- "regexp"
- "sort"
- "strings"
- "sync"
- "time"
- )
- func dealXlsxTest() {
- // 1. 初始化 ES 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(GF.Es.URL),
- elastic.SetBasicAuth(GF.Es.Username, GF.Es.Password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatal("创建 Elasticsearch 客户端失败", zap.Error(err))
- }
- // 2. 初始化 MongoDB 连接
- sess := MgoP.GetMgoConn()
- defer MgoP.DestoryMongoConn(sess)
- coll := sess.DB("qfw").C("wcc_dealXlsxData_0524")
- iter := coll.Find(nil).Select(nil).Iter()
- // 3. 并发控制
- const maxWorkers = 2
- taskCh := make(chan map[string]interface{}, 2000)
- var wg sync.WaitGroup
- // 4. 启动 worker 处理任务
- for i := 0; i < maxWorkers; i++ {
- wg.Add(1)
- go func() {
- defer wg.Done()
- for doc := range taskCh {
- if len(doc) == 0 {
- log.Info("aaa", zap.Any("client", client))
- }
- processOneProposedTest(doc, client)
- }
- }()
- }
- // 5. 逐条读取数据并派发任务
- log.Info("111111", zap.String("222222", "开始处理数据"))
- count := 0
- for doc := make(map[string]interface{}); iter.Next(doc); {
- count++
- if count%1000 == 0 {
- log.Info("dealProposed", zap.Int("current", count), zap.Any("projectname", doc["p1_project_name"]))
- }
- taskCh <- cloneMap(doc) // 防止 map 重用
- }
- close(taskCh)
- wg.Wait()
- }
- func processOneProposedTest(tmp map[string]interface{}, client *elastic.Client) {
- defer func() {
- if r := recover(); r != nil {
- log.Warn("panic in processOneProposed", zap.Any("recover", r))
- }
- }()
- id := mongodb.BsonIdToSId(tmp["_id"])
- //proposedID := mongodb.BsonIdToSId(tmp["_id"])
- projectName := util.ObjToString(tmp["p1_project_name"])
- buyer := util.ObjToString(tmp["p1_project_owner"])
- //proposed_number := util.ObjToString(tmp["proposed_number"])
- //log.Info("processOneProposed", zap.String("开始查询es", projectName))
- results, err := searchES23(client, projectName, buyer, 20, 50)
- if err != nil {
- log.Warn("searchES22 error", zap.Error(err))
- return
- }
- //log.Info("processOneProposed", zap.String("结束查询es", projectName))
- biddings := []map[string]interface{}{}
- update := map[string]interface{}{}
- // 标讯信息
- for _, re := range results {
- biddingID := util.ObjToString(re["id"])
- da := map[string]interface{}{
- "id": re["id"],
- "title": re["title"],
- "area": re["area"],
- "city": re["city"],
- "projectname": re["projectname"],
- "score": re["score"],
- "toptype": re["toptype"],
- "subtype": re["subtype"],
- "buyer": re["buyer"],
- "budget": re["budget"],
- "buyerperson": re["buyerperson"],
- "buyertel": re["buyertel"],
- "s_winner": re["s_winner"],
- "bidamount": re["bidamount"],
- "winnertel": re["winnertel"],
- "agency": re["agency"],
- "publishtime": re["publishtime"],
- }
- //项目信息
- where2 := map[string]interface{}{"ids": biddingID}
- if util.ObjToString(re["toptype"]) == "拟建" {
- projectset, _ := MgoP.FindOne("projectset_proposed", where2)
- if projectset != nil && len((*projectset)) > 0 {
- v3 := map[string]interface{}{
- "project_id": mongodb.BsonIdToSId((*projectset)["_id"]),
- "projectname": (*projectset)["projectname"],
- "bidamount": (*projectset)["bidamount"],
- "area": (*projectset)["area"],
- "city": (*projectset)["city"],
- "district": (*projectset)["district"],
- "owner": (*projectset)["owner"],
- "approvecode": (*projectset)["approvecode"],
- }
- if (*projectset)["owner"] != "" {
- where11 := map[string]interface{}{
- "company_name": (*projectset)["owner"],
- }
- std, _ := MgoQY.FindOne("qyxy_std", where11)
- v3["credit_no"] = (*std)["credit_no"]
- }
- da["project"] = v3
- }
- biddings = append(biddings, da)
- } else {
- projectset, _ := MgoP.FindOne("projectset_20230904", where2)
- if projectset != nil && len((*projectset)) > 0 {
- v3 := map[string]interface{}{
- "project_id": mongodb.BsonIdToSId((*projectset)["_id"]),
- "projectname": (*projectset)["projectname"],
- "bidamount": (*projectset)["bidamount"],
- "area": (*projectset)["area"],
- "city": (*projectset)["city"],
- "district": (*projectset)["district"],
- "firsttime": (*projectset)["firsttime"],
- "bidtype": (*projectset)["bidtype"],
- "bidstatus": (*projectset)["bidstatus"],
- "sortprice": (*projectset)["sortprice"],
- "buyer": (*projectset)["buyer"],
- }
- if (*projectset)["buyer"] != "" {
- where11 := map[string]interface{}{
- "company_name": (*projectset)["buyer"],
- }
- std, _ := MgoQY.FindOne("qyxy_std", where11)
- v3["credit_no"] = (*std)["credit_no"]
- }
- da["project"] = v3
- }
- biddings = append(biddings, da)
- }
- }
- if len(biddings) > 0 {
- update["bidding"] = biddings
- MgoP.UpdateById("wcc_dealXlsxData_0524", id, map[string]interface{}{"$set": update})
- }
- }
- // dealProposed22Concurrent 多协程处理,拟建存量数据
- func dealProposed22Concurrent() {
- // 1. 初始化 ES 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(GF.Es.URL),
- elastic.SetBasicAuth(GF.Es.Username, GF.Es.Password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatal("创建 Elasticsearch 客户端失败", zap.Error(err))
- }
- // 2. 初始化 MongoDB 连接
- sess := MgoP.GetMgoConn()
- defer MgoP.DestoryMongoConn(sess)
- coll := sess.DB("qfw").C("projectset_proposed")
- query := map[string]interface{}{
- //"firsttime": map[string]interface{}{
- // "$gte": 1735660800,
- // "$lte": 1748102400,
- //},
- //"firsttime": map[string]interface{}{
- // "$lte": 1735660800,
- //},
- "_id": map[string]interface{}{
- "$lte": mongodb.StringTOBsonId("62b6fbc9fa39106bd5e599fc"),
- },
- }
- iter := coll.Find(query).Select(nil).Sort("-_id").Iter()
- // 3. 并发控制
- const maxWorkers = 1
- taskCh := make(chan map[string]interface{}, 2000)
- var wg sync.WaitGroup
- // 4. 启动 worker 处理任务
- for i := 0; i < maxWorkers; i++ {
- wg.Add(1)
- go func() {
- defer wg.Done()
- for doc := range taskCh {
- if len(doc) == 0 {
- log.Info("aaa", zap.Any("client", client))
- }
- processOneProposed(doc, client)
- }
- }()
- }
- // 5. 逐条读取数据并派发任务
- log.Info("111111", zap.String("222222", "开始处理数据"))
- count := 0
- for doc := make(map[string]interface{}); iter.Next(doc); {
- count++
- if count%1000 == 0 {
- log.Info("dealProposed", zap.Int("current", count), zap.Any("projectname", doc["projectname"]), zap.Any("_id", doc["_id"]))
- }
- //if util.ObjToString(doc["area"]) == "甘肃" {
- // continue
- //}
- taskCh <- cloneMap(doc) // 防止 map 重用
- }
- close(taskCh)
- wg.Wait()
- }
- // processOneProposed 处理存量数据
- func processOneProposed(tmp map[string]interface{}, client *elastic.Client) {
- defer func() {
- if r := recover(); r != nil {
- log.Warn("panic in processOneProposed", zap.Any("recover", r))
- }
- }()
- proposedID := mongodb.BsonIdToSId(tmp["_id"])
- projectName := util.ObjToString(tmp["projectname"])
- buyer := util.ObjToString(tmp["owner"])
- proposed_number := util.ObjToString(tmp["proposed_number"])
- //log.Info("processOneProposed", zap.String("开始查询es", projectName))
- results, err := searchES23(client, projectName, buyer, 20, 50)
- if err != nil {
- log.Warn("searchES22 error", zap.Error(err))
- return
- }
- //log.Info("processOneProposed", zap.String("结束查询es", projectName))
- biddings := []map[string]interface{}{}
- // 标讯信息
- for _, re := range results {
- biddingID := util.ObjToString(re["id"])
- da := map[string]interface{}{
- "id": re["id"],
- "title": re["title"],
- "area": re["area"],
- "city": re["city"],
- "projectname": re["projectname"],
- "score": re["score"],
- "toptype": re["toptype"],
- "subtype": re["subtype"],
- "buyer": re["buyer"],
- "budget": re["budget"],
- "buyerperson": re["buyerperson"],
- "buyertel": re["buyertel"],
- "s_winner": re["s_winner"],
- "bidamount": re["bidamount"],
- "winnertel": re["winnertel"],
- "agency": re["agency"],
- "publishtime": re["publishtime"],
- }
- //项目信息
- where2 := map[string]interface{}{"ids": biddingID}
- if util.ObjToString(re["toptype"]) == "拟建" {
- projectset, _ := MgoP.FindOne("projectset_proposed", where2)
- if projectset != nil && len((*projectset)) > 0 {
- v3 := map[string]interface{}{
- "project_id": mongodb.BsonIdToSId((*projectset)["_id"]),
- "projectname": (*projectset)["projectname"],
- "bidamount": (*projectset)["bidamount"],
- "area": (*projectset)["area"],
- "city": (*projectset)["city"],
- "district": (*projectset)["district"],
- "owner": (*projectset)["owner"],
- "approvecode": (*projectset)["approvecode"],
- "approvestatus": (*projectset)["approvestatus"],
- "sourceinfourl": (*projectset)["sourceinfourl"],
- }
- if (*projectset)["owner"] != "" {
- where11 := map[string]interface{}{
- "company_name": (*projectset)["owner"],
- }
- std, _ := MgoQY.FindOne("qyxy_std", where11)
- v3["credit_no"] = (*std)["credit_no"]
- }
- da["project"] = v3
- }
- biddings = append(biddings, da)
- } else {
- projectset, _ := MgoP.FindOne("projectset_20230904", where2)
- if projectset != nil && len((*projectset)) > 0 {
- v3 := map[string]interface{}{
- "project_id": mongodb.BsonIdToSId((*projectset)["_id"]),
- "projectname": (*projectset)["projectname"],
- "bidamount": (*projectset)["bidamount"],
- "area": (*projectset)["area"],
- "city": (*projectset)["city"],
- "firsttime": (*projectset)["firsttime"],
- "bidtype": (*projectset)["bidtype"],
- "bidstatus": (*projectset)["bidstatus"],
- "sortprice": (*projectset)["sortprice"],
- "buyer": (*projectset)["buyer"],
- }
- da["project"] = v3
- }
- biddings = append(biddings, da)
- }
- }
- insert := map[string]interface{}{
- "proposed_id": proposedID,
- "stype": 1, //代表从拟建数据-> 匹配在建数据
- "proposed_number": proposed_number,
- "buyer": buyer,
- "projectname": tmp["projectname"],
- "area": tmp["area"],
- "city": tmp["city"],
- "district": tmp["district"],
- "bidding": biddings,
- "updatetime": time.Now().Unix(),
- }
- if isValidCodeFormat(util.ObjToString(tmp["approvecode"])) {
- insert["approvecode"] = tmp["approvecode"]
- }
- if buyer != "" {
- where11 := map[string]interface{}{
- "company_name": buyer,
- }
- std, _ := MgoQY.FindOne("qyxy_std", where11)
- insert["credit_no"] = (*std)["credit_no"]
- }
- whereExist := map[string]interface{}{
- "proposed_id": proposedID,
- }
- if GF.Env.Savecoll == "" {
- GF.Env.Savecoll = "wcc_nj_zj_bidding"
- }
- exist, _ := MgoP.FindOne(GF.Env.Savecoll, whereExist)
- // 存在就更新
- if exist != nil && len(*exist) > 0 {
- exitsid := mongodb.BsonIdToSId((*exist)["_id"])
- MgoP.UpdateById(GF.Env.Savecoll, exitsid, map[string]interface{}{"$set": insert})
- } else {
- insert["comeintime"] = time.Now().Unix()
- MgoP.InsertOrUpdate("qfw", GF.Env.Savecoll, insert)
- }
- }
- func cloneMap(src map[string]interface{}) map[string]interface{} {
- dst := make(map[string]interface{}, len(src))
- for k, v := range src {
- dst[k] = v
- }
- return dst
- }
- func dealProposed22() {
- url := GF.Es.URL
- //url := "http://127.0.0.1:19908"
- username := GF.Es.Username
- password := GF.Es.Password
- //index := "bidding" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Info("创建 Elasticsearch 客户端失败", zap.Error(err))
- }
- //
- sess := MgoP.GetMgoConn()
- defer MgoP.DestoryMongoConn(sess)
- log.Info("dealProposed", zap.Any("开始处理:拟建数据表", "projectset_proposed"))
- where := map[string]interface{}{
- "firsttime": map[string]interface{}{
- "$gte": 1735660800,
- },
- }
- queryMgo := sess.DB("qfw").C("projectset_proposed").Find(&where).Select(nil).Iter()
- count := 0
- for tmp := make(map[string]interface{}); queryMgo.Next(tmp); count++ {
- if count%1000 == 0 {
- log.Info("dealProposed", zap.Any("current", count), zap.Any("projectname", tmp["projectname"]))
- }
- proposed_id := mongodb.BsonIdToSId(tmp["_id"])
- project_name := util.ObjToString(tmp["projectname"])
- buyer := util.ObjToString(tmp["owner"])
- results, err := searchES22(client, project_name, buyer, 60, 10)
- if err != nil {
- log.Info("searchES22", zap.Error(err))
- }
- projectIds := make([]string, 0) //拟建对应的在建项目ID
- biddingIds := make([]string, 0) //拟建项目对在建项目中的标讯ids
- biddings := make([]map[string]interface{}, 0)
- for _, re := range results {
- bidding_id := util.ObjToString(re["id"])
- biddingIds = append(biddingIds, bidding_id)
- bidding := map[string]interface{}{
- "id": re["id"],
- "title": re["title"],
- "projectname": re["projectname"],
- "score": re["score"],
- "toptype": re["toptype"],
- "subtype": re["subtype"],
- }
- biddings = append(biddings, bidding)
- }
- for _, bid := range biddingIds {
- where2 := map[string]interface{}{
- "ids": bid,
- }
- projectset, _ := MgoP.FindOne("projectset_20230904", where2)
- if projectset != nil && len((*projectset)) > 0 {
- projectIds = append(projectIds, mongodb.BsonIdToSId((*projectset)["_id"]))
- }
- }
- insert := map[string]interface{}{
- "proposed_id": proposed_id,
- "bidding_ids": removeDuplicates(biddingIds),
- "project_ids": removeDuplicates(projectIds),
- "biddings": biddings,
- "project_name": tmp["projectname"],
- }
- MgoP.InsertOrUpdate("qfw", "wcc_dealProposed22", insert)
- }
- }
- // dealProposed 处理拟建数据表
- func dealProposed() {
- sess := MgoP.GetMgoConn()
- defer MgoP.DestoryMongoConn(sess)
- log.Info("dealProposed", zap.Any("开始处理:拟建数据表", "projectset_proposed"))
- where := map[string]interface{}{
- "firsttime": map[string]interface{}{
- "$gte": 1735660800,
- },
- }
- queryMgo := sess.DB("qfw").C("projectset_proposed").Find(&where).Select(nil).Iter()
- count := 0
- for tmp := make(map[string]interface{}); queryMgo.Next(tmp); count++ {
- if count%1000 == 0 {
- log.Info("dealProposed", zap.Any("current", count), zap.Any("projectname", tmp["projectname"]))
- }
- proposed_id := mongodb.BsonIdToSId(tmp["_id"])
- insert := make(map[string]interface{})
- insert["proposed_id"] = proposed_id
- var nzj_follw_records = make([]DwdFnzjFollowRecord, 0)
- err := JianyuSubjectDB.Where("proposed_id = ? ", proposed_id).Find(&nzj_follw_records).Error
- if err != nil {
- log.Info("dealProposed", zap.Error(err))
- }
- //拟建标讯,没有找到对应的在建项目数据
- if len(nzj_follw_records) == 0 {
- insert["has_bidding"] = false
- MgoP.InsertOrUpdate("qfw", "wcc_ok_project_proposed", insert)
- continue
- }
- projectIds := make([]string, 0) //拟建对应的在建项目ID
- biddingIds := make([]string, 0) //拟建项目对在建项目中的标讯ids
- for _, v := range nzj_follw_records {
- biddingIds = append(biddingIds, v.InfoID)
- }
- biddingIds = removeDuplicates(biddingIds)
- insert["bidding_ids"] = biddingIds
- for _, bid := range biddingIds {
- where2 := map[string]interface{}{
- "ids": bid,
- }
- projectset, _ := MgoP.FindOne("projectset_20230904", where2)
- if projectset != nil && len((*projectset)) > 0 {
- projectIds = append(projectIds, mongodb.BsonIdToSId((*projectset)["_id"]))
- }
- }
- if len(projectIds) > 0 {
- insert["project_ids"] = projectIds
- } else {
- insert["has_project"] = false
- }
- MgoP.InsertOrUpdate("qfw", "wcc_ok_project_proposed", insert)
- }
- log.Info("dealProposed", zap.Any("数据处理完毕:拟建数据表", "projectset_proposed"))
- }
- // removeDuplicates 去除重复字符串
- func removeDuplicates(arr []string) []string {
- uniqueMap := make(map[string]bool)
- var result []string
- for _, str := range arr {
- if !uniqueMap[str] {
- uniqueMap[str] = true
- result = append(result, str)
- }
- }
- return result
- }
- // searchES24 添加分词查询
- func searchES24(client *elastic.Client, projectName, buyer2 string, scoreThreshold float64, maxResults int) ([]map[string]interface{}, error) {
- fieldsToTry := []string{"projectname.pname", "title", "detail"}
- filtersToTry := [][]elastic.Query{
- {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
- {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
- {elastic.NewTermsQuery("toptype", "拟建")},
- }
- var allResults []*elastic.SearchHit
- seenIDs := make(map[string]bool)
- // ✅ Step 1: 精准查询
- for _, field := range fieldsToTry {
- if field == "detail" && len(allResults) > 0 {
- break
- }
- for _, filter := range filtersToTry {
- query := elastic.NewBoolQuery().
- Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")).
- Filter(filter...)
- fetchFields := elastic.NewFetchSourceContext(true).Include(
- "id", "title", "projectname", "projectcode", "bidamount", "score",
- "area", "city", "toptype", "subtype", "buyer", "budget", "buyerperson",
- "buyertel", "s_winner", "winnertel", "agency", "publishtime")
- searchResult, err := client.Search().
- Index("bidding").
- Query(query).
- Size(70).
- FetchSourceContext(fetchFields).
- Do(context.Background())
- if err != nil {
- return nil, err
- }
- for _, hit := range searchResult.Hits.Hits {
- if !seenIDs[hit.Id] {
- seenIDs[hit.Id] = true
- allResults = append(allResults, hit)
- }
- }
- if len(allResults) >= maxResults {
- break
- }
- }
- if len(allResults) >= maxResults {
- break
- }
- }
- // ✅ Step 2: 如果没结果,用分词兜底查询
- if len(allResults) == 0 {
- // 分词
- analyzeResp, err := client.IndexAnalyze().
- Index("bidding").
- Analyzer("ik_smart").
- Text(projectName).
- Do(context.Background())
- if err != nil {
- return nil, fmt.Errorf("ik_smart analyze failed: %v", err)
- }
- var tokens []string
- for _, token := range analyzeResp.Tokens {
- tokens = append(tokens, token.Token)
- }
- if len(tokens) == 0 {
- return nil, fmt.Errorf("no tokens found from ik_smart")
- }
- // 用所有分词一次性查询
- queryText := strings.Join(tokens, " ")
- for _, filter := range filtersToTry {
- query := elastic.NewBoolQuery().
- Must(
- elastic.NewMultiMatchQuery(queryText, fieldsToTry...).
- MinimumShouldMatch("100%"), // 必须包含所有分词,可根据需求改成 80%、50%
- ).
- Filter(filter...)
- searchResult, err := client.Search().
- Index("bidding").
- Query(query).
- Size(10). // 根据需要调整
- Do(context.Background())
- if err != nil {
- log.Warn("multi token query failed", zap.Error(err))
- continue
- }
- for _, hit := range searchResult.Hits.Hits {
- if !seenIDs[hit.Id] {
- seenIDs[hit.Id] = true
- allResults = append(allResults, hit)
- }
- }
- if len(allResults) >= maxResults {
- break
- }
- }
- }
- // ✅ Step 3: 后处理
- var results []map[string]interface{}
- seenProjectNames := make(map[string]bool)
- seenProjectCodes := make(map[string]bool)
- bidamountMap := make(map[float64]bool)
- for _, hit := range allResults {
- var doc map[string]interface{}
- if err := json.Unmarshal(hit.Source, &doc); err != nil {
- log.Info("解析文档失败", zap.Error(err))
- continue
- }
- projectNameValue := util.ObjToString(doc["projectname"])
- if projectNameValue == "" {
- continue
- }
- projectCode := util.ObjToString(doc["projectcode"])
- if projectCode != "" {
- if seenProjectCodes[projectCode] {
- continue
- }
- seenProjectCodes[projectCode] = true
- }
- bidamount := util.Float64All(doc["bidamount"])
- if bidamount != 0 {
- if bidamountMap[bidamount] {
- continue
- }
- bidamountMap[bidamount] = true
- }
- score := *hit.Score
- if score < scoreThreshold {
- continue
- }
- id := util.ObjToString(doc["id"])
- bidd, _ := MgoB.FindById("bidding", id, nil)
- detail := util.ObjToString((*bidd)["detail"])
- if detail != "" && !strings.Contains(detail, projectName) {
- continue
- }
- if buyer2 != "" && !strings.Contains(detail, buyer2) {
- continue
- }
- if seenProjectNames[projectNameValue] {
- continue
- }
- seenProjectNames[projectNameValue] = true
- doc["detail"] = detail
- doc["score"] = score
- results = append(results, doc)
- if len(results) >= maxResults {
- break
- }
- }
- // ✅ Step 4: 排序
- sort.Slice(results, func(i, j int) bool {
- return util.Float64All(results[i]["score"]) > util.Float64All(results[j]["score"])
- })
- return results, nil
- }
- func searchES23(client *elastic.Client, projectName, buyer2 string, scoreThreshold float64, maxResults int) ([]map[string]interface{}, error) {
- fieldsToTry := []string{"projectname.pname", "title", "detail"}
- filtersToTry := [][]elastic.Query{
- {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
- {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
- {elastic.NewTermsQuery("toptype", "拟建")},
- }
- var allResults []*elastic.SearchHit
- seenIDs := make(map[string]bool)
- for _, field := range fieldsToTry {
- if field == "detail" && len(allResults) > 0 {
- break
- }
- for _, filter := range filtersToTry {
- // 构建查询:使用 MultiMatchQuery + phrase
- query := elastic.NewBoolQuery().
- Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")).
- Filter(filter...)
- fetchFields := elastic.NewFetchSourceContext(true).Include("id",
- "title", "projectname", "projectcode", "bidamount", "score", "area",
- "city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
- "s_winner", "winnertel", "agency", "publishtime")
- // 执行查询
- searchResult, err := client.Search().
- Index("bidding").
- Query(query).
- Size(70).
- FetchSourceContext(fetchFields). // 添加这一行,查询部分字段
- Do(context.Background())
- if err != nil {
- return nil, err
- }
- // 去重处理
- for _, hit := range searchResult.Hits.Hits {
- if !seenIDs[hit.Id] {
- seenIDs[hit.Id] = true
- allResults = append(allResults, hit)
- }
- }
- if len(allResults) >= maxResults {
- break
- }
- }
- if len(allResults) >= maxResults {
- break
- }
- }
- var results []map[string]interface{}
- seenProjectNames := make(map[string]bool)
- seenProjectCodes := make(map[string]bool)
- bidamountMap := make(map[float64]bool)
- //subtypeMap := make(map[string]bool)
- for _, hit := range allResults {
- var doc map[string]interface{}
- if err := json.Unmarshal(hit.Source, &doc); err != nil {
- log.Info("解析文档失败", zap.Error(err))
- continue
- }
- projectNameValue := util.ObjToString(doc["projectname"])
- if projectNameValue == "" {
- continue
- }
- projectCode := util.ObjToString(doc["projectcode"])
- if projectCode != "" {
- if seenProjectCodes[projectCode] {
- continue
- }
- seenProjectCodes[projectCode] = true
- }
- bidamount := util.Float64All(doc["bidamount"])
- if bidamount != 0 {
- if bidamountMap[bidamount] {
- continue
- }
- bidamountMap[bidamount] = true
- }
- // 相似度筛选
- score := *hit.Score
- doc["score"] = score //相似度
- if score < scoreThreshold {
- continue
- }
- doc["score"] = score
- //detail := util.ObjToString(doc["detail"])
- id := util.ObjToString(doc["id"])
- bidd, _ := MgoB.FindById("bidding", id, nil)
- detail := util.ObjToString((*bidd)["detail"])
- // 字段中必须包含 projectName
- if detail != "" {
- if !strings.Contains(detail, projectName) {
- continue
- }
- }
- if buyer2 != "" {
- if !strings.Contains(detail, buyer2) {
- continue
- }
- }
- if seenProjectNames[projectNameValue] {
- continue
- }
- seenProjectNames[projectNameValue] = true
- doc["detail"] = detail
- results = append(results, doc)
- if len(results) >= maxResults {
- break
- }
- }
- // 排序:按 score 降序
- sort.Slice(results, func(i, j int) bool {
- si := util.Float64All(results[i]["score"])
- sj := util.Float64All(results[j]["score"])
- return si > sj
- })
- return results, nil
- }
- func searchES22(client *elastic.Client, projectName, buyer2 string, scoreThreshold float64, maxResults int) ([]map[string]interface{}, error) {
- fieldsToTry := []string{"projectname.pname", "title", "detail"}
- filtersToTry := [][]elastic.Query{
- {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
- {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向")},
- {elastic.NewTermsQuery("toptype", "拟建")},
- }
- var allResults []*elastic.SearchHit
- seenIDs := make(map[string]bool)
- for _, field := range fieldsToTry {
- for _, filter := range filtersToTry {
- // 构建查询
- query := elastic.NewBoolQuery().
- Must(elastic.NewMatchQuery(field, projectName)).
- Filter(filter...)
- // 执行查询
- searchResult, err := client.Search().
- Index("bidding").
- Query(query).
- Size(70). // 多取一些,后面做筛选和去重
- Do(context.Background())
- if err != nil {
- return nil, err
- }
- for _, hit := range searchResult.Hits.Hits {
- if !seenIDs[hit.Id] {
- allResults = append(allResults, hit)
- seenIDs[hit.Id] = true
- }
- }
- if len(allResults) >= maxResults {
- break
- }
- }
- if len(allResults) >= maxResults {
- break
- }
- }
- var results []map[string]interface{}
- seenProjectNames := make(map[string]bool)
- seenProjectCodes := make(map[string]bool)
- bidamountMap := make(map[float64]bool)
- for _, hit := range allResults {
- var doc map[string]interface{}
- if err := json.Unmarshal(hit.Source, &doc); err != nil {
- log.Info("解析文档失败", zap.Error(err))
- continue
- }
- projectNameValue := util.ObjToString(doc["projectname"])
- if projectNameValue == "" {
- continue
- }
- projectCode := util.ObjToString(doc["projectcode"])
- if seenProjectCodes[projectCode] {
- continue
- }
- seenProjectCodes[projectCode] = true
- bidamount := util.Float64All(doc["bidamount"])
- if bidamountMap[bidamount] {
- continue
- }
- bidamountMap[bidamount] = true
- // 相似度筛选
- score := *hit.Score
- doc["score"] = score //相似度
- if score < scoreThreshold {
- continue
- }
- //id := util.ObjToString(doc["id"])
- //doc["jyhref"] = GetJyURLByID(id)
- //if site := util.ObjToString(doc["site"]); site == "中华人民共和国自然资源部" {
- // doc["title"] = "土地出让" + "-" + util.ObjToString(doc["title"])
- //}
- // enrich: total_investment
- //if bidData, _ := MgoB.FindById("bidding", id, nil); bidData != nil {
- // if util.Float64All((*bidData)["total_investment"]) > 0 {
- // doc["total_investment"] = (*bidData)["total_investment"]
- // }
- //}
- doc["score"] = score
- detail := util.ObjToString(doc["detail"])
- // 字段中必须包含 projectName
- if buyer2 != "" {
- if !strings.Contains(detail, projectName) && !strings.Contains(detail, buyer2) {
- continue
- }
- }
- if seenProjectNames[projectNameValue] {
- continue
- }
- seenProjectNames[projectNameValue] = true
- results = append(results, doc)
- if len(results) >= maxResults {
- break
- }
- }
- return results, nil
- }
- func searchES(client *elastic.Client, projectName, buyer2 string) ([]map[string]interface{}, error) {
- query := elastic.NewBoolQuery().
- Must(
- //elastic.NewMatchQuery("projectname.pname", projectName), // 模糊匹配 projectname
- //elastic.NewMatchQuery("title", projectName), // 模糊匹配 projectname
- elastic.NewMatchQuery("detail", projectName), // 模糊匹配 projectname
- //elastic.NewTermQuery("area", "安徽"), // 过滤区域
- elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一"), // 过滤 subtype
- //elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向"), // 过滤 subtype
- //elastic.NewTermsQuery("toptype", "拟建"), // 过滤 subtype
- )
- searchResult, err := client.Search().
- Index("bidding").
- Query(query).
- Size(70). // 先取 12 条,确保足够数据
- Do(context.Background())
- if err != nil {
- return nil, err
- }
- // 结果集
- var results []map[string]interface{}
- seenProjectNames := make(map[string]bool) // 用于去重
- seenProjectCode := make(map[string]bool) // 用于去重
- bidamountMap := make(map[float64]bool)
- for _, hit := range searchResult.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Info("解析文档失败", zap.Error(err))
- continue
- }
- // 获取 `projectname`,防止 key 不存在时的错误
- projectNameValue, ok := doc["projectname"].(string)
- bidamount := util.Float64All(doc["bidamount"])
- if !ok {
- log.Info("⚠️ 缺少 projectname 字段,跳过:", zap.Any("projectname", doc["projectname"]))
- continue
- }
- projectCodeValue := util.ObjToString(doc["projectcode"])
- if seenProjectCode[projectCodeValue] {
- continue
- }
- if projectCodeValue != "" {
- seenProjectCode[projectCodeValue] = true
- }
- // **处理额外字段**
- id := util.ObjToString(doc["id"])
- bidData, _ := MgoB.FindById("bidding", id, nil)
- if util.Float64All((*bidData)["total_investment"]) > 0 {
- doc["total_investment"] = (*bidData)["total_investment"]
- }
- doc["jyhref"] = GetJyURLByID(id)
- score := *hit.Score
- site := util.ObjToString(doc["site"])
- if site == "中华人民共和国自然资源部" {
- doc["title"] = "土地出让" + "-" + util.ObjToString(doc["title"])
- }
- doc["score"] = score //相似度
- detail := util.ObjToString(doc["detail"])
- if !strings.Contains(detail, projectName) {
- continue
- }
- // **去重逻辑**:如果 `projectname` 已经出现过,则跳过
- if seenProjectNames[projectNameValue] {
- continue
- }
- if bidamountMap[bidamount] {
- continue
- }
- // **记录该 `projectname`,避免重复**
- seenProjectNames[projectNameValue] = true
- bidamountMap[bidamount] = true
- // **加入结果集**
- results = append(results, doc)
- // **如果已经找到 6 条不同 `projectname`,就跳出循环**
- if len(results) >= 10 {
- break
- }
- }
- //2、判断正文包含采购单位
- for _, hit := range searchResult.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Info("解析文档失败:", zap.Error(err))
- continue
- }
- // 获取 `projectname`,防止 key 不存在时的错误
- projectNameValue, ok := doc["projectname"].(string)
- bidamount := util.Float64All(doc["bidamount"])
- if !ok {
- log.Info("⚠️ 缺少 projectname 字段,跳过:", zap.Any("projectname", doc["projectname"]))
- continue
- }
- // **处理额外字段**
- id := util.ObjToString(doc["id"])
- doc["jyhref"] = GetJyURLByID(id)
- score := *hit.Score
- doc["score"] = score //相似度
- site := util.ObjToString(doc["site"])
- if site == "中华人民共和国自然资源部" {
- doc["title"] = "土地出让" + "-" + util.ObjToString(doc["title"])
- }
- //判断正文包含采购单位
- detail := util.ObjToString(doc["detail"])
- if !strings.Contains(detail, buyer2) {
- continue
- }
- // **去重逻辑**:如果 `projectname` 已经出现过,则跳过
- if seenProjectNames[projectNameValue] {
- continue
- }
- if bidamountMap[bidamount] {
- continue
- }
- // **记录该 `projectname`,避免重复**
- seenProjectNames[projectNameValue] = true
- bidamountMap[bidamount] = true
- // **加入结果集**
- results = append(results, doc)
- // **如果已经找到 6 条不同 `projectname`,就跳出循环**
- if len(results) >= 10 {
- break
- }
- }
- return results, nil
- }
- // GetJyURLByID 获取剑鱼地址
- func GetJyURLByID(id string) string {
- var Url = "https://www.jianyu360.com/article/content/%s.html"
- url := fmt.Sprintf(Url, util.CommonEncodeArticle("content", id))
- return url
- }
- // GetIdByURL 解密url,获取bidding ID
- func GetIdByURL(url string) string {
- if strings.Contains(url, "work-bench") {
- return ""
- }
- if strings.Contains(url, "/article/content") {
- urls := strings.Split(url, "content/")
- res := strings.Split(urls[1], ".html")
- ids := util.CommonDecodeArticle("content", res[0])
- return ids[0]
- }
- if strings.HasSuffix(url, "appid") {
- urls := strings.Split(url, "entservice/")
- res := strings.Split(urls[1], ".html")
- se := util.SimpleEncrypt{Key: "entservice"}
- id := se.DecodeString(res[0])
- return id
- }
- return ""
- }
- // isValidCodeFormat 判断 拟建项目编码
- func isValidCodeFormat(s string) bool {
- pattern := `^\d{4}-\d{6}-\d{2}-\d{2}-\d{6}$`
- matched, err := regexp.MatchString(pattern, s)
- if err != nil {
- return false
- }
- return matched
- }
|