main.go 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. package main
  2. import (
  3. "fmt"
  4. utils "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  5. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  6. "sort"
  7. "time"
  8. )
  9. var (
  10. MgoB *mongodb.MongodbSim
  11. MgoC *mongodb.MongodbSim
  12. channels = []string{"招标公告", "重新招标", "意见征集", "招标预告", "信息变更", "答疑公告", "废标公告", "流标公告",
  13. "开标公示", "候选人公示", "中标通知", "合同公告", "验收合同", "违规公告", "其他公告", "预告", "公告", "变更", "结果", "其他"}
  14. )
  15. func main() {
  16. MgoB = &mongodb.MongodbSim{
  17. MongodbAddr: "172.17.189.140:27080",
  18. //MongodbAddr: "127.0.0.1:27083",
  19. DbName: "qfw",
  20. Size: 10,
  21. UserName: "SJZY_RWbid_ES",
  22. Password: "SJZY@B4i4D5e6S",
  23. //Direct: true,
  24. }
  25. MgoB.InitPool()
  26. MgoC = &mongodb.MongodbSim{
  27. MongodbAddr: "172.17.4.87:27080",
  28. //MongodbAddr: "127.0.0.1:27081",
  29. DbName: "qlm",
  30. Size: 10,
  31. UserName: "",
  32. Password: "",
  33. //Direct: true,
  34. }
  35. MgoC.InitPool()
  36. dealData()
  37. fmt.Println("over")
  38. }
  39. func dealData() {
  40. //5.竞品覆盖率,每周4统计上周的数据
  41. sessC := MgoC.GetMgoConn()
  42. defer MgoC.DestoryMongoConn(sessC)
  43. //获取上周3,千里马的招标数据;然后获取标讯前后个3天,一共7天的所有数据,对比看标题或者项目名称是否存在
  44. lastWednesday := time.Date(2023, 12, 2, 0, 0, 0, 0, time.Local)
  45. whereQlm := map[string]interface{}{
  46. "publishtime": lastWednesday.Format("2006-01-02"),
  47. "site": "千里马",
  48. }
  49. query := sessC.DB("qlm").C("data_merge").Find(whereQlm).Select(map[string]interface{}{"title": 1, "projectname": 1, "channel": 1, "href": 1}).Iter()
  50. count := 0
  51. qlmData := make([]map[string]interface{}, 0) //标讯所有数据
  52. for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  53. data := map[string]interface{}{
  54. "title": tmp["title"],
  55. "projectname": tmp["projectname"],
  56. "href": tmp["href"],
  57. }
  58. channel := utils.ObjToString(tmp["channel"])
  59. //标讯所有数据
  60. if IsInStringArray(channel, channels) {
  61. qlmData = append(qlmData, data)
  62. }
  63. }
  64. biddingWhere := map[string]interface{}{
  65. "publishtime": map[string]interface{}{
  66. "$gte": lastWednesday.AddDate(0, 0, -3).Unix(),
  67. "$lte": lastWednesday.AddDate(0, 0, 3).Unix(),
  68. },
  69. }
  70. biddingDatas, _ := MgoB.Find("bidding", biddingWhere, nil, map[string]interface{}{"title": 1, "projectname": 1}, false, -1, -1)
  71. //log.Info("coverageA", zap.Int("标讯一周总数", len(*biddingDatas)))
  72. fmt.Println("bbbbb", len(*biddingDatas))
  73. // 将切片B中的标题和项目名称分别存储在哈希表中
  74. titlesInB, projectsInB := getUniqueFields(*biddingDatas)
  75. //5.1.1 统计 标讯-整体 数据
  76. matches := countMatches(qlmData, titlesInB, projectsInB)
  77. fmt.Println("matches", matches)
  78. for _, itemA := range qlmData {
  79. title, titleExists := itemA["title"].(string)
  80. project, projectExists := itemA["projectname"].(string)
  81. insert := map[string]interface{}{
  82. "title": itemA["title"],
  83. "projectname": itemA["projectname"],
  84. "href": itemA["href"],
  85. }
  86. if titleExists && titlesInB[title] {
  87. insert["matched"] = true
  88. insert["match_field"] = "title"
  89. } else if projectExists && projectsInB[project] {
  90. insert["matched"] = true
  91. insert["match_field"] = "projectname"
  92. } else {
  93. insert["matched"] = false
  94. }
  95. MgoB.Save("wcc_cover2", insert)
  96. }
  97. }
  98. // IsInStringArray 判断数组中是否存在字符串
  99. func IsInStringArray(str string, arr []string) bool {
  100. // 先对字符串数组进行排序
  101. sort.Strings(arr)
  102. // 使用二分查找算法查找字符串
  103. pos := sort.SearchStrings(arr, str)
  104. // 如果找到了则返回 true,否则返回 false
  105. return pos < len(arr) && arr[pos] == str
  106. }
  107. // getUniqueFields 获取切片中标题和项目名称的唯一值
  108. func getUniqueFields(slice []map[string]interface{}) (map[string]bool, map[string]bool) {
  109. titles := make(map[string]bool)
  110. projects := make(map[string]bool)
  111. for _, item := range slice {
  112. title, titleExists := item["title"].(string)
  113. project, projectExists := item["projectname"].(string)
  114. if titleExists {
  115. titles[title] = true
  116. }
  117. if projectExists {
  118. projects[project] = true
  119. }
  120. }
  121. return titles, projects
  122. }
  123. // countMatches 统计切片A中的元素在切片B中存在的数量和总数量
  124. func countMatches(sliceA []map[string]interface{}, titlesInB, projectsInB map[string]bool) int {
  125. count := 0
  126. for _, itemA := range sliceA {
  127. title, titleExists := itemA["title"].(string)
  128. project, projectExists := itemA["projectname"].(string)
  129. if titleExists && titlesInB[title] {
  130. count++
  131. } else if projectExists && projectsInB[project] {
  132. count++
  133. }
  134. }
  135. return count
  136. }