project.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "github.com/olivere/elastic/v7"
  7. "io"
  8. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  9. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  10. "log"
  11. "regexp"
  12. "strings"
  13. )
  14. // getProject 获取项目数据
  15. func getProject() {
  16. MgoB := &mongodb.MongodbSim{
  17. MongodbAddr: "172.17.189.140:27080",
  18. //MongodbAddr: "127.0.0.1:27083",
  19. Size: 10,
  20. DbName: "qfw",
  21. UserName: "SJZY_RWbid_ES",
  22. Password: "SJZY@B4i4D5e6S",
  23. //Direct: true,
  24. }
  25. MgoB.InitPool()
  26. url := "http://172.17.4.184:19805"
  27. //url := "http://127.0.0.1:19805"
  28. username := "es_all"
  29. password := "TopJkO2E_d1x"
  30. index := "projectset" //索引名称
  31. // 创建 Elasticsearch 客户端
  32. client, err := elastic.NewClient(
  33. elastic.SetURL(url),
  34. elastic.SetBasicAuth(username, password),
  35. elastic.SetSniff(false),
  36. )
  37. if err != nil {
  38. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  39. }
  40. rangeQuery := elastic.NewRangeQuery("pici").Gt("1685548800").Lte("1704038400")
  41. //termQ := elastic.NewTermQuery("multipackage", 0)
  42. //rangeQuery := elastic.NewRangeQuery("id").Gt("657b08556977356f5578cb25").Lte("657b08556977356f5578cb26")
  43. query := elastic.NewBoolQuery().Must(rangeQuery)
  44. ctx := context.Background()
  45. //开始滚动搜索
  46. scrollID := ""
  47. scroll := "1m"
  48. searchSource := elastic.NewSearchSource().
  49. Query(query).
  50. Size(100).
  51. Sort("_doc", true) //升序排序
  52. //Sort("_doc", false) //降序排序
  53. searchService := client.Scroll(index).
  54. Size(100).
  55. Scroll(scroll).
  56. SearchSource(searchSource)
  57. res, err := searchService.Do(ctx)
  58. if err != nil {
  59. if err == io.EOF {
  60. fmt.Println("没有数据")
  61. } else {
  62. panic(err)
  63. }
  64. }
  65. defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
  66. fmt.Println("总数是:", res.TotalHits())
  67. total := 0
  68. for len(res.Hits.Hits) > 0 {
  69. for _, hit := range res.Hits.Hits {
  70. var doc map[string]interface{}
  71. err := json.Unmarshal(hit.Source, &doc)
  72. if err != nil {
  73. log.Printf("解析文档失败:%s", err)
  74. continue
  75. }
  76. //id := util.ObjToString(doc["id"])
  77. //log.Println(id)
  78. var matchWords = make([]string, 0)
  79. var matchList = make([]interface{}, 0)
  80. if list, ok := doc["list"].([]interface{}); ok {
  81. for _, v := range list {
  82. if da, ok := v.(map[string]interface{}); ok {
  83. if util.ObjToString(da["toptype"]) != "招标" {
  84. continue
  85. }
  86. title := util.ObjToString(da["title"])
  87. // 使用正则表达式进行匹配
  88. matches := GetMatches(title)
  89. if len(matches) > 0 {
  90. matchList = append(matchList, da)
  91. }
  92. matchWords = append(matchWords, matches...)
  93. }
  94. }
  95. }
  96. insert := make(map[string]interface{})
  97. insert["project_id"] = doc["id"]
  98. insert["_id"] = doc["id"]
  99. insert["multipackage"] = doc["multipackage"]
  100. insert["list"] = doc["list"]
  101. insert["projectname"] = doc["projectname"]
  102. insert["sourceinfourl"] = doc["sourceinfourl"]
  103. if len(matchWords) > 0 {
  104. insert["matchList"] = matchList
  105. insert["package_name"] = util.ObjToString(doc["projectname"]) + "-" + strings.Join(matchWords, "、")
  106. insert["type"] = 1
  107. }
  108. MgoB.SaveByOriID("wcc_project_20240304", insert)
  109. }
  110. total = total + len(res.Hits.Hits)
  111. scrollID = res.ScrollId
  112. res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
  113. log.Println("current count:", total)
  114. if err != nil {
  115. if err == io.EOF {
  116. // 滚动到最后一批数据,退出循环
  117. break
  118. }
  119. log.Printf("滚动搜索失败:%s", err)
  120. break // 处理错误时退出循环
  121. }
  122. }
  123. fmt.Println("结束~~~~~~~~~~~~~~~")
  124. }
  125. func GetMatches(title string) (res []string) {
  126. // 编译正则表达式
  127. re := regexp.MustCompile(`包\d+`) // 标题只有一个包2
  128. // 定义正则表达式
  129. re2 := regexp.MustCompile(`包(\d+(?:、\d+)*)`) //标题含有多个包;包10、12、14、17、18、19
  130. re3 := regexp.MustCompile(`\d+包`) //冀中股份2023年12月标准件计划框架协议采购2包12.7
  131. re4 := regexp.MustCompile(`标包\d+`) //2024一季度水火电一般工序类中小金工件(标包1)-招标公告
  132. //re4 := regexp.MustCompile(`\d+标段`)
  133. //text := "中国绿发投资集团有限公司直属项目公司2023年第20批集中采购非招标项目(包10、12、14、17、18、19"
  134. //matches := re3.FindAllString(util.ObjToString(text), -1)
  135. matches := re2.FindAllString(util.ObjToString(title), -1)
  136. if len(matches) > 0 {
  137. return matches
  138. }
  139. matches = re4.FindAllString(util.ObjToString(title), -1)
  140. if len(matches) > 0 {
  141. return matches
  142. }
  143. matches = re3.FindAllString(util.ObjToString(title), -1)
  144. if len(matches) > 0 {
  145. return matches
  146. }
  147. matches = re.FindAllString(util.ObjToString(title), -1)
  148. return matches
  149. }