bidding.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736
  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "github.com/olivere/elastic/v7"
  7. "github.com/xuri/excelize/v2"
  8. "io"
  9. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  10. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  11. "log"
  12. "strings"
  13. )
  14. // getBidding2 获取bidding数据
  15. func getBidding2() {
  16. url := "http://172.17.4.184:19908"
  17. //url := "http://127.0.0.1:19908"
  18. username := "jybid"
  19. password := "Top2023_JEB01i@31"
  20. index := "bidding" //索引名称
  21. // 创建 Elasticsearch 客户端
  22. client, err := elastic.NewClient(
  23. elastic.SetURL(url),
  24. elastic.SetBasicAuth(username, password),
  25. elastic.SetSniff(false),
  26. )
  27. if err != nil {
  28. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  29. }
  30. //85 抽取库
  31. //Mgo := &mongodb.MongodbSim{
  32. // //MongodbAddr: "127.0.0.1:27080",
  33. // MongodbAddr: "172.17.4.85:27080",
  34. // DbName: "top",
  35. // Size: 10,
  36. // //Direct: true,
  37. //}
  38. //Mgo.InitPool()
  39. //2023年01-01 2023-10-01,,1-3季度
  40. //2024-1 - 2024-4;1704038400-1711900800
  41. //2023-10-1 2024-1-1;1696089600-1704038400
  42. //城市范围
  43. //areaTermsQuery := elastic.NewTermsQuery("company_city", "北京市")
  44. //rangeQuery := elastic.NewRangeQuery("publishtime").Gte(1704038400).Lt(1735660800)
  45. //query := elastic.NewBoolQuery().
  46. // Must(areaTermsQuery).
  47. // Must(rangeQuery)
  48. //---------------------------//
  49. //query := elastic.NewBoolQuery()
  50. //query.Must(elastic.NewMatchQuery("company_area", "广东"))
  51. ////query.Must(elastic.NewTermQuery("company_type", "北京市"))
  52. // 构建查询条件
  53. //query := elastic.NewBoolQuery().
  54. // MustNot(elastic.NewTermQuery("company_type", "个体工商户")). // 排除 company_type 为 "个体工商户"
  55. // Filter(elastic.NewTermQuery("company_area", "广东")) // 过滤 company_area 为 "广东"
  56. //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000")
  57. query := elastic.NewBoolQuery().
  58. Should(
  59. elastic.NewMatchQuery("subtype", "合同"),
  60. elastic.NewMatchQuery("toptype", "结果"),
  61. elastic.NewMatchQuery("toptype", "招标"),
  62. ).
  63. MinimumShouldMatch("1"). // 至少满足一个Should条件 // buyer = "中国建筑股份有限公司"
  64. Must(elastic.NewRangeQuery("publishtime").Gte(1704038400).Lt(1735660800))
  65. ctx := context.Background()
  66. //开始滚动搜索
  67. scrollID := ""
  68. scroll := "10m"
  69. searchSource := elastic.NewSearchSource().
  70. Query(query).
  71. Size(10000).
  72. Sort("_doc", true) //升序排序
  73. //Sort("_doc", false) //降序排序
  74. searchService := client.Scroll(index).
  75. Size(10000).
  76. Scroll(scroll).
  77. SearchSource(searchSource)
  78. res, err := searchService.Do(ctx)
  79. if err != nil {
  80. if err == io.EOF {
  81. fmt.Println("没有数据")
  82. } else {
  83. panic(err)
  84. }
  85. }
  86. //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
  87. fmt.Println("总数是:", res.TotalHits())
  88. total := 0
  89. for len(res.Hits.Hits) > 0 {
  90. for _, hit := range res.Hits.Hits {
  91. var doc map[string]interface{}
  92. err := json.Unmarshal(hit.Source, &doc)
  93. if err != nil {
  94. log.Printf("解析文档失败:%s", err)
  95. continue
  96. }
  97. //存入新表
  98. insert := map[string]interface{}{
  99. "buyer": doc["buyer"],
  100. "id": doc["id"],
  101. "toptype": doc["toptype"],
  102. "subtype": doc["subtype"],
  103. }
  104. err = MgoB.InsertOrUpdate("qfw", "wcc_20250311_bidding", insert)
  105. if err != nil {
  106. log.Println("error", doc["id"])
  107. }
  108. }
  109. total = total + len(res.Hits.Hits)
  110. scrollID = res.ScrollId
  111. res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
  112. log.Println("current count:", total)
  113. if err != nil {
  114. if err == io.EOF {
  115. // 滚动到最后一批数据,退出循环
  116. break
  117. }
  118. log.Println("滚动搜索失败:", err, res)
  119. break // 处理错误时退出循环
  120. }
  121. }
  122. // 在循环外调用 ClearScroll
  123. _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
  124. if err != nil {
  125. log.Printf("清理滚动搜索失败:%s", err)
  126. }
  127. fmt.Println("结束~~~~~~~~~~~~~~~")
  128. }
  129. func getBidding() {
  130. url := "http://172.17.4.184:19908"
  131. //url := "http://127.0.0.1:19908"
  132. username := "jybid"
  133. password := "Top2023_JEB01i@31"
  134. index := "bidding" //索引名称
  135. // 创建 Elasticsearch 客户端
  136. client, err := elastic.NewClient(
  137. elastic.SetURL(url),
  138. elastic.SetBasicAuth(username, password),
  139. elastic.SetSniff(false),
  140. )
  141. if err != nil {
  142. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  143. }
  144. //85 抽取库
  145. //Mgo := &mongodb.MongodbSim{
  146. // //MongodbAddr: "127.0.0.1:27080",
  147. // MongodbAddr: "172.17.4.85:27080",
  148. // DbName: "top",
  149. // Size: 10,
  150. // //Direct: true,
  151. //}
  152. //Mgo.InitPool()
  153. MgoB := &mongodb.MongodbSim{
  154. MongodbAddr: "172.31.31.202:27081,172.20.45.128:27080",
  155. //MongodbAddr: "127.0.0.1:27083",
  156. Size: 10,
  157. DbName: "qfw",
  158. UserName: "SJZY_RWbid_ES",
  159. Password: "SJZY@B4i4D5e6S",
  160. //Direct: true,
  161. }
  162. MgoB.InitPool()
  163. //2023年01-01 2023-10-01,,1-3季度
  164. //2024-1 - 2024-4;1704038400-1711900800
  165. //2023-10-1 2024-1-1;1696089600-1704038400
  166. //城市范围
  167. //areaTermsQuery := elastic.NewTermsQuery("company_city", "北京市")
  168. //rangeQuery := elastic.NewRangeQuery("establish_date").Gte(1704038400)
  169. //query := elastic.NewBoolQuery().
  170. // Must(areaTermsQuery).
  171. // Must(rangeQuery)
  172. //---------------------------//
  173. //query := elastic.NewBoolQuery()
  174. //query.Must(elastic.NewMatchQuery("company_area", "广东"))
  175. ////query.Must(elastic.NewTermQuery("company_type", "北京市"))
  176. // 构建查询条件
  177. query := elastic.NewBoolQuery().
  178. MustNot(elastic.NewTermQuery("company_type", "个体工商户")). // 排除 company_type 为 "个体工商户"
  179. Filter(elastic.NewTermQuery("company_area", "广东")) // 过滤 company_area 为 "广东"
  180. //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000")
  181. //query := elastic.NewBoolQuery().
  182. // //北京,天津,河北,上海,江苏,浙江,安徽
  183. // //Must(elastic.NewTermQuery("area", "北京市")).
  184. // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
  185. // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")).
  186. // Must(rangeQuery)
  187. ctx := context.Background()
  188. //开始滚动搜索
  189. scrollID := ""
  190. scroll := "10m"
  191. searchSource := elastic.NewSearchSource().
  192. Query(query).
  193. Size(10000).
  194. Sort("_doc", true) //升序排序
  195. //Sort("_doc", false) //降序排序
  196. searchService := client.Scroll(index).
  197. Size(10000).
  198. Scroll(scroll).
  199. SearchSource(searchSource)
  200. res, err := searchService.Do(ctx)
  201. if err != nil {
  202. if err == io.EOF {
  203. fmt.Println("没有数据")
  204. } else {
  205. panic(err)
  206. }
  207. }
  208. //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
  209. fmt.Println("总数是:", res.TotalHits())
  210. total := 0
  211. for len(res.Hits.Hits) > 0 {
  212. for _, hit := range res.Hits.Hits {
  213. var doc map[string]interface{}
  214. err := json.Unmarshal(hit.Source, &doc)
  215. if err != nil {
  216. log.Printf("解析文档失败:%s", err)
  217. continue
  218. }
  219. //存入新表
  220. insert := map[string]interface{}{
  221. "companyaaaaaaaaaaaaaaaaaaaaaaaaaaa_name": doc["company_name"],
  222. "id": doc["id"],
  223. "credit_no": doc["credit_no"],
  224. "company_code": doc["company_code"],
  225. }
  226. if strings.Contains(util.ObjToString(doc["company_name"]), "银行") || strings.Contains(util.ObjToString(doc["company_name"]), "保险") || strings.Contains(util.ObjToString(doc["company_name"]), "证券") {
  227. insert["wcc_type"] = 1
  228. }
  229. err = MgoB.InsertOrUpdate("qfw", "wcc_2025_guangdong_qyxy", insert)
  230. if err != nil {
  231. log.Println("error", doc["id"])
  232. }
  233. }
  234. total = total + len(res.Hits.Hits)
  235. scrollID = res.ScrollId
  236. res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
  237. log.Println("current count:", total)
  238. if err != nil {
  239. if err == io.EOF {
  240. // 滚动到最后一批数据,退出循环
  241. break
  242. }
  243. log.Println("滚动搜索失败:", err, res)
  244. break // 处理错误时退出循环
  245. }
  246. }
  247. // 在循环外调用 ClearScroll
  248. _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
  249. if err != nil {
  250. log.Printf("清理滚动搜索失败:%s", err)
  251. }
  252. fmt.Println("结束~~~~~~~~~~~~~~~")
  253. }
  254. // updateHrefByEs 更新mgo by es
  255. func updateHrefByEs() {
  256. url := "http://172.17.4.184:19908"
  257. //url := "http://127.0.0.1:19908"
  258. //url := "http://127.0.0.1:19905"
  259. username := "jybid"
  260. password := "Top2023_JEB01i@31"
  261. //index := "bidding" //索引名称
  262. index := "biddingall" //索引名称
  263. // 创建 Elasticsearch 客户端
  264. client, err := elastic.NewClient(
  265. elastic.SetURL(url),
  266. elastic.SetBasicAuth(username, password),
  267. elastic.SetSniff(false),
  268. )
  269. if err != nil {
  270. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  271. }
  272. //MgoB = &mongodb.MongodbSim{
  273. // //MongodbAddr: "172.17.189.140:27080",
  274. // MongodbAddr: "127.0.0.1:27083",
  275. // Size: 10,
  276. // DbName: "qfw",
  277. // UserName: "SJZY_RWbid_ES",
  278. // Password: "SJZY@B4i4D5e6S",
  279. // Direct: true,
  280. //}
  281. //MgoB.InitPool()
  282. defer util.Catch()
  283. sess := MgoB.GetMgoConn()
  284. defer MgoB.DestoryMongoConn(sess)
  285. where := map[string]interface{}{
  286. "itype": 0,
  287. }
  288. it := sess.DB("qfw").C("bidding_es_update_id").Find(where).Select(nil).Iter()
  289. log.Println("taskRun 开始")
  290. count := 0
  291. for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
  292. if count%100 == 0 {
  293. log.Println("current:", count)
  294. }
  295. id := mongodb.BsonIdToSId(tmp["_id"])
  296. res, _ := GetByID(client, index, id)
  297. if res == nil {
  298. //没有找到
  299. update := map[string]interface{}{
  300. "itype": 0,
  301. }
  302. MgoB.UpdateById("bidding_es_update_id", id, map[string]interface{}{"$set": update})
  303. } else {
  304. // 找到对应数据了
  305. update := map[string]interface{}{
  306. "href": res["href"],
  307. "itype": 1,
  308. }
  309. update2 := map[string]interface{}{
  310. "href": res["href"],
  311. }
  312. MgoB.UpdateById("bidding_es_update_id", id, map[string]interface{}{"$set": update})
  313. MgoB.UpdateById("bidding", id, map[string]interface{}{"$set": update2})
  314. }
  315. }
  316. log.Println("over", count)
  317. }
  318. // GetByID 根据索引和 ID 获取数据
  319. func GetByID(ec *elastic.Client, index string, id string) (map[string]interface{}, error) {
  320. ctx := context.Background()
  321. // 执行 Get 请求
  322. res, err := ec.Get().
  323. Index(index).
  324. Id(id).
  325. Do(ctx)
  326. if err != nil {
  327. if elastic.IsNotFound(err) {
  328. return nil, fmt.Errorf("document not found for index '%s' and id '%s'", index, id)
  329. }
  330. return nil, fmt.Errorf("failed to get document: %w", err)
  331. }
  332. // 解析文档源
  333. if !res.Found {
  334. return nil, fmt.Errorf("document not found for index '%s' and id '%s'", index, id)
  335. }
  336. // 解析 JSON 数据到 map[string]interface{}
  337. var source map[string]interface{}
  338. if err := json.Unmarshal(res.Source, &source); err != nil {
  339. return nil, fmt.Errorf("failed to unmarshal document source: %w", err)
  340. }
  341. return source, nil
  342. }
  343. // updateXlsxDa 更新中标金额
  344. func updateXlsxDa() {
  345. f, err := excelize.OpenFile("202502项目2.xlsx")
  346. if err != nil {
  347. log.Fatal("❌ 无法打开 Excel 文件:", err)
  348. }
  349. defer func() {
  350. f.Save()
  351. if err := f.Close(); err != nil {
  352. fmt.Println(err)
  353. }
  354. }()
  355. rows, err := f.GetRows("Sheet1") // 替换为正确的 Sheet 名称
  356. if err != nil {
  357. log.Fatal("❌ 无法读取 Excel:", err)
  358. }
  359. //
  360. for i := 1; i < len(rows); i++ {
  361. row := rows[i]
  362. // 确保 E 列(索引 4)不为空
  363. if len(row) < 5 || row[4] == "" {
  364. continue
  365. }
  366. //
  367. log.Println("项目名称-------------", row[3])
  368. // 从 F 列(索引 5)开始,每 4 列为一组
  369. for colIndex := 5; colIndex+1 < len(row); colIndex += 4 {
  370. // 提取中标金额(第 1 列)和标题(第 2 列)
  371. //bidAmount := row[colIndex] // F、J、N...列
  372. title := row[colIndex+1] // G、K、O...列
  373. if title == "" {
  374. continue
  375. }
  376. where := map[string]interface{}{
  377. "title": title,
  378. }
  379. bidd, _ := MgoB.FindOne("wcc_biddind_20250318", where)
  380. if len(*bidd) > 0 {
  381. bidamount := (*bidd)["bidamount"]
  382. sss := getColumnLetter(colIndex + 1)
  383. f.SetCellValue("Sheet1", fmt.Sprintf("%s%d", sss, i+1), bidamount) // 中标金额
  384. }
  385. // 只打印非空数据
  386. //if bidAmount != "" && title != "" {
  387. // fmt.Printf("中标金额: %s, 标题: %s\n", bidAmount, title)
  388. //}
  389. }
  390. }
  391. }
  392. // getDataFromFile getDataFromFile
  393. func getDataFromFile() {
  394. url := "http://172.17.4.184:19908"
  395. //url := "http://127.0.0.1:19908"
  396. username := "jybid"
  397. password := "Top2023_JEB01i@31"
  398. //index := "bidding" //索引名称
  399. // 创建 Elasticsearch 客户端
  400. client, err := elastic.NewClient(
  401. elastic.SetURL(url),
  402. elastic.SetBasicAuth(username, password),
  403. elastic.SetSniff(false),
  404. )
  405. if err != nil {
  406. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  407. }
  408. // 2. 读取 Excel
  409. f, err := excelize.OpenFile("202502项目3_人工验证版.xlsx")
  410. if err != nil {
  411. log.Fatal("❌ 无法打开 Excel 文件:", err)
  412. }
  413. defer func() {
  414. f.Save()
  415. if err := f.Close(); err != nil {
  416. fmt.Println(err)
  417. }
  418. }()
  419. // 3. 读取 B 列(项目名称)
  420. rows, err := f.GetRows("Sheet1") // 替换为正确的 Sheet 名称
  421. if err != nil {
  422. log.Fatal("❌ 无法读取 Excel:", err)
  423. }
  424. for i := 1; i < len(rows); i++ {
  425. row := rows[i]
  426. if i == 0 || len(row) < 4 { // 跳过表头或无效数据
  427. continue
  428. }
  429. //已经有数据,跳过
  430. if len(row) > 7 {
  431. continue
  432. }
  433. projectName := row[3] // B 列数据
  434. if projectName == "" {
  435. continue
  436. }
  437. buyer2 := row[2] //单位明细
  438. log.Println("projectName", projectName)
  439. // 查询 ES,获取最相似的前 6 个完整文档
  440. results, err := searchES(client, projectName, buyer2)
  441. if err != nil {
  442. log.Printf("⚠ 查询失败: %s, 错误: %v\n", projectName, err)
  443. continue
  444. }
  445. //
  446. if len(results) > 0 {
  447. startCol := 8 // C列对应索引3 := 3 // C 列开始
  448. for _, result := range results {
  449. //f.SetCellValue("Sheet1", fmt.Sprintf("%s%d", getColumnLetter(startCol), i+1), result["bidamount"]) // 中标金额
  450. f.SetCellValue("Sheet1", fmt.Sprintf("%s%d", getColumnLetter(startCol), i+1), result["budget"]) // 中标金额
  451. //f.SetCellValue("Sheet1", fmt.Sprintf("%s%d", getColumnLetter(startCol), i+1), result["total_investment"]) // 预算金额
  452. f.SetCellValue("Sheet1", fmt.Sprintf("%s%d", getColumnLetter(startCol+1), i+1), result["title"]) // 标题
  453. f.SetCellValue("Sheet1", fmt.Sprintf("%s%d", getColumnLetter(startCol+2), i+1), result["jyhref"]) // 标讯链接
  454. f.SetCellValue("Sheet1", fmt.Sprintf("%s%d", getColumnLetter(startCol+3), i+1), result["score"]) // 相似度
  455. startCol += 4 // **向右移动 4 列,防止数据覆盖**
  456. }
  457. f.SetCellValue("Sheet1", fmt.Sprintf("%s%d", "G", i+1), "拟建") // 相似度
  458. }
  459. }
  460. log.Println("over over")
  461. }
  462. func searchES(client *elastic.Client, projectName, buyer2 string) ([]map[string]interface{}, error) {
  463. query := elastic.NewBoolQuery().
  464. Must(
  465. elastic.NewMatchQuery("projectname.pname", projectName), // 模糊匹配 projectname
  466. //elastic.NewMatchQuery("title", projectName), // 模糊匹配 projectname
  467. //elastic.NewMatchQuery("detail", projectName), // 模糊匹配 projectname
  468. elastic.NewTermQuery("area", "安徽"), // 过滤区域
  469. //elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一"), // 过滤 subtype
  470. //elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向"), // 过滤 subtype
  471. elastic.NewTermsQuery("toptype", "拟建"), // 过滤 subtype
  472. )
  473. searchResult, err := client.Search().
  474. Index("bidding").
  475. Query(query).
  476. Size(70). // 先取 12 条,确保足够数据
  477. Do(context.Background())
  478. if err != nil {
  479. return nil, err
  480. }
  481. // 结果集
  482. var results []map[string]interface{}
  483. seenProjectNames := make(map[string]bool) // 用于去重
  484. seenProjectCode := make(map[string]bool) // 用于去重
  485. bidamountMap := make(map[string]float64)
  486. for _, hit := range searchResult.Hits.Hits {
  487. var doc map[string]interface{}
  488. err := json.Unmarshal(hit.Source, &doc)
  489. if err != nil {
  490. log.Printf("解析文档失败:%s", err)
  491. continue
  492. }
  493. // 获取 `projectname`,防止 key 不存在时的错误
  494. projectNameValue, ok := doc["projectname"].(string)
  495. bidamount := util.Float64All(doc["bidamount"])
  496. if !ok {
  497. log.Printf("⚠️ 缺少 projectname 字段,跳过:%v", doc)
  498. continue
  499. }
  500. projectCodeValue := util.ObjToString(doc["projectcode"])
  501. if seenProjectCode[projectCodeValue] {
  502. continue
  503. }
  504. if projectCodeValue != "" {
  505. seenProjectCode[projectCodeValue] = true
  506. }
  507. // **处理额外字段**
  508. id := util.ObjToString(doc["id"])
  509. bidData, _ := MgoB.FindById("bidding", id, nil)
  510. if util.Float64All((*bidData)["total_investment"]) > 0 {
  511. doc["total_investment"] = (*bidData)["total_investment"]
  512. }
  513. doc["jyhref"] = GetJyURLByID(id)
  514. score := *hit.Score
  515. site := util.ObjToString(doc["site"])
  516. if site == "中华人民共和国自然资源部" {
  517. doc["title"] = "土地出让" + "-" + util.ObjToString(doc["title"])
  518. }
  519. doc["score"] = score //相似度
  520. detail := util.ObjToString(doc["detail"])
  521. if !strings.Contains(detail, projectName) {
  522. continue
  523. }
  524. //detail := util.ObjToString(doc["detail"])
  525. //if !strings.Contains(detail, buyer2) {
  526. // continue
  527. //}
  528. // **去重逻辑**:如果 `projectname` 已经出现过,则跳过
  529. if seenProjectNames[projectNameValue] && bidamount == bidamountMap[projectNameValue] {
  530. continue
  531. }
  532. // **记录该 `projectname`,避免重复**
  533. seenProjectNames[projectNameValue] = true
  534. bidamountMap[projectNameValue] = bidamount
  535. // **存入 MongoDB**
  536. //err = MgoB.InsertOrUpdate("qfw", "wcc_20250312_bidding", doc)
  537. //if err != nil {
  538. // log.Println("MongoDB 插入失败:", doc["id"])
  539. //}
  540. // **加入结果集**
  541. results = append(results, doc)
  542. // **如果已经找到 6 条不同 `projectname`,就跳出循环**
  543. if len(results) >= 10 {
  544. break
  545. }
  546. }
  547. //2、判断正文包含采购单位
  548. for _, hit := range searchResult.Hits.Hits {
  549. var doc map[string]interface{}
  550. err := json.Unmarshal(hit.Source, &doc)
  551. if err != nil {
  552. log.Printf("解析文档失败:%s", err)
  553. continue
  554. }
  555. // 获取 `projectname`,防止 key 不存在时的错误
  556. projectNameValue, ok := doc["projectname"].(string)
  557. bidamount := util.Float64All(doc["bidamount"])
  558. if !ok {
  559. log.Printf("⚠️ 缺少 projectname 字段,跳过:%v", doc)
  560. continue
  561. }
  562. // **处理额外字段**
  563. id := util.ObjToString(doc["id"])
  564. doc["jyhref"] = GetJyURLByID(id)
  565. score := *hit.Score
  566. doc["score"] = score //相似度
  567. site := util.ObjToString(doc["site"])
  568. if site == "中华人民共和国自然资源部" {
  569. doc["title"] = "土地出让" + "-" + util.ObjToString(doc["title"])
  570. }
  571. //detail := util.ObjToString(doc["detail"])
  572. //if !strings.Contains(detail, projectName) {
  573. // continue
  574. //}
  575. //判断正文包含采购单位
  576. detail := util.ObjToString(doc["detail"])
  577. if !strings.Contains(detail, buyer2) {
  578. continue
  579. }
  580. // **去重逻辑**:如果 `projectname` 已经出现过,则跳过
  581. if seenProjectNames[projectNameValue] && bidamount == bidamountMap[projectNameValue] {
  582. continue
  583. }
  584. // **记录该 `projectname`,避免重复**
  585. seenProjectNames[projectNameValue] = true
  586. bidamountMap[projectNameValue] = bidamount
  587. // **存入 MongoDB**
  588. //err = MgoB.InsertOrUpdate("qfw", "wcc_20250312_bidding", doc)
  589. //if err != nil {
  590. // log.Println("MongoDB 插入失败:", doc["id"])
  591. //}
  592. // **加入结果集**
  593. results = append(results, doc)
  594. // **如果已经找到 6 条不同 `projectname`,就跳出循环**
  595. if len(results) >= 10 {
  596. break
  597. }
  598. }
  599. //for _, vv := range results {
  600. // id := util.ObjToString(vv["id"])
  601. // bidData, _ := MgoB.FindById("bidding", id, nil)
  602. // MgoB.SaveByOriID("wcc_biddind_20250318", bidData)
  603. //}
  604. return results, nil
  605. }
  606. // searchES 查询 ES,返回前 6 个相似 `projectname`
  607. func searchES222(client *elastic.Client, projectName string) ([]map[string]interface{}, error) {
  608. //query := elastic.NewMatchQuery("projectname", projectName)
  609. query := elastic.NewBoolQuery().
  610. Must(
  611. elastic.NewMatchQuery("projectname.pname", projectName), // 匹配 projectname
  612. elastic.NewMatchQuery("area", "安徽"), // 匹配 projectname
  613. elastic.NewTermsQuery("subtype", "中标", "成交", "合同"), // 过滤 subtype
  614. )
  615. searchResult, err := client.Search().
  616. Index("bidding").
  617. Query(query).
  618. Size(12). // 取前 6 条数据
  619. Do(context.Background())
  620. if err != nil {
  621. return nil, err
  622. }
  623. var results []map[string]interface{}
  624. for _, hit := range searchResult.Hits.Hits {
  625. var doc map[string]interface{}
  626. err := json.Unmarshal(hit.Source, &doc)
  627. if err != nil {
  628. log.Printf("解析文档失败:%s", err)
  629. continue
  630. }
  631. id := util.ObjToString(doc["id"])
  632. doc["jyhref"] = GetJyURLByID(id)
  633. score := *hit.Score
  634. doc["score"] = score //相似度
  635. err = MgoB.InsertOrUpdate("qfw", "wcc_20250312_bidding", doc)
  636. //if err != nil {
  637. // log.Println("error", doc["id"])
  638. //}
  639. results = append(results, doc)
  640. }
  641. return results, nil
  642. }
  643. func getColumnLetter(index int) string {
  644. col := ""
  645. for index > 0 {
  646. index--
  647. col = string(rune('A'+index%26)) + col
  648. index /= 26
  649. }
  650. return col
  651. }