main.go 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. package main
  2. import (
  3. util "app.yhyue.com/moapp/jybase/common"
  4. "app.yhyue.com/moapp/jybase/encrypt"
  5. "github.com/gogf/gf/v2/os/gfile"
  6. "net/url"
  7. //"app.yhyue.com/moapp/jybase/mongodb"
  8. "context"
  9. "fmt"
  10. _ "github.com/gogf/gf/contrib/drivers/clickhouse/v2"
  11. _ "github.com/gogf/gf/contrib/drivers/mysql/v2"
  12. "github.com/gogf/gf/v2/frame/g"
  13. "github.com/gogf/gf/v2/util/gconv"
  14. "go.mongodb.org/mongo-driver/bson/primitive"
  15. "regexp"
  16. "sync"
  17. "time"
  18. "workTasks/common"
  19. "workTasks/common/mongodb"
  20. )
  21. var (
  22. allPayMap = map[MogUserId]bool{} //所有付费用户
  23. idVisited = map[BidId]bool{} //已访问的三级页
  24. fullData = &VisitDetail{
  25. IdTotal: map[BidId]int64{},
  26. IdUserOnce: map[BidId]int64{},
  27. VisitedEq: map[BidId]map[MogUserId]bool{},
  28. }
  29. size = 50
  30. pool = make(chan bool, 5)
  31. rg = regexp.MustCompile(`/content/(.*).html`)
  32. lock sync.Mutex
  33. biddingGroup = map[string][]BidId{}
  34. )
  35. type (
  36. BidId string
  37. MogUserId string
  38. VisitDetail struct {
  39. IdTotal map[BidId]int64 //不去重
  40. IdUserOnce map[BidId]int64 //去重
  41. VisitedEq map[BidId]map[MogUserId]bool
  42. }
  43. )
  44. func init() {
  45. allPayMap = GetPayUser()
  46. }
  47. func main() {
  48. loadWxPc()
  49. loadApp()
  50. loadBiddingType()
  51. analysis()
  52. }
  53. func loadWxPc() {
  54. sess := common.MG.DB("log").GetMgoConn()
  55. defer common.MG.DB("log").DestoryMongoConn(sess)
  56. it := sess.DB("qfw").C("jy_logs").Find(g.Map{"date": g.Map{"$gt": g.Cfg().MustGet(context.TODO(), "start", 1720540800).Int64()}}).Select(g.Map{"userid": 1, "url": 1}).Iter()
  57. var index int64
  58. for m := make(map[string]interface{}); it.Next(&m); {
  59. index++
  60. if index%10e4 == 0 {
  61. g.Log().Infof(context.TODO(), "loadWxPc %d", index)
  62. }
  63. var (
  64. userid = MogUserId(gconv.String(m["userid"]))
  65. url = gconv.String(m["url"])
  66. )
  67. if len(userid) != 24 {
  68. continue
  69. }
  70. if _, ok := allPayMap[userid]; ok {
  71. continue
  72. }
  73. matchArr := rg.FindStringSubmatch(url)
  74. if len(matchArr) < 2 {
  75. continue
  76. }
  77. if matchArr[1] == "" {
  78. continue
  79. }
  80. bidId, err := GetDetailId(matchArr[1])
  81. if err != nil {
  82. g.Log().Debugf(context.TODO(), "解密三级页id失败 %s %s", url, matchArr[1])
  83. continue
  84. }
  85. //判断是否是三级页地址
  86. idVisited[bidId] = true
  87. fullData.IdTotal[bidId]++
  88. if !fullData.VisitedEq[bidId][userid] {
  89. if fullData.VisitedEq[bidId] == nil {
  90. fullData.VisitedEq[bidId] = map[MogUserId]bool{}
  91. }
  92. fullData.IdUserOnce[bidId]++
  93. fullData.VisitedEq[bidId][userid] = true
  94. }
  95. m = make(map[string]interface{})
  96. }
  97. }
  98. func loadApp() {
  99. sess := common.MG.DB("log").GetMgoConn()
  100. defer common.MG.DB("log").DestoryMongoConn(sess)
  101. it := sess.DB("qfw").C("jyapp_logs").Find(g.Map{"date": g.Map{"$gt": g.Cfg().MustGet(context.TODO(), "start", 1720540800).Int64()}}).Select(g.Map{"userid": 1, "url": 1}).Iter()
  102. var index int64
  103. for m := make(map[string]interface{}); it.Next(&m); {
  104. index++
  105. if index%10e4 == 0 {
  106. g.Log().Infof(context.TODO(), "loadApp %d", index)
  107. }
  108. var (
  109. userid = MogUserId(gconv.String(m["userid"]))
  110. url = gconv.String(m["url"])
  111. )
  112. if len(userid) != 24 {
  113. continue
  114. }
  115. if _, ok := allPayMap[userid]; ok {
  116. continue
  117. }
  118. matchArr := rg.FindStringSubmatch(url)
  119. if len(matchArr) < 2 {
  120. continue
  121. }
  122. if matchArr[1] == "" {
  123. continue
  124. }
  125. bidId, err := GetDetailId(matchArr[1])
  126. if err != nil {
  127. g.Log().Debugf(context.TODO(), "解密三级页id失败 %s %s", url, matchArr[1])
  128. continue
  129. }
  130. //判断是否是三级页地址
  131. idVisited[bidId] = true
  132. fullData.IdTotal[bidId]++
  133. if !fullData.VisitedEq[bidId][userid] {
  134. if fullData.VisitedEq[bidId] == nil {
  135. fullData.VisitedEq[bidId] = map[MogUserId]bool{}
  136. }
  137. fullData.IdUserOnce[bidId]++
  138. fullData.VisitedEq[bidId][userid] = true
  139. }
  140. m = make(map[string]interface{})
  141. }
  142. }
  143. func loadBiddingType() {
  144. var (
  145. ctx = context.TODO()
  146. index int64
  147. now = time.Now()
  148. )
  149. g.Log().Infof(ctx, "开始读取标讯类型 %d", len(idVisited))
  150. groupArr := make([]primitive.ObjectID, 0, size)
  151. for id, _ := range idVisited {
  152. if index%1e4 == 0 {
  153. g.Log().Infof(ctx, "当前已加载%d个", index)
  154. }
  155. if len(groupArr) == size {
  156. pool <- true
  157. go loadBidding(groupArr)
  158. groupArr = make([]primitive.ObjectID, 0, size)
  159. } else {
  160. groupArr = append(groupArr, mongodb.StringTOBsonId(string(id)))
  161. }
  162. index++
  163. }
  164. if len(groupArr) > 0 {
  165. pool <- true
  166. loadBidding(groupArr)
  167. } else {
  168. time.Sleep(time.Second * 10)
  169. }
  170. g.Log().Infof(ctx, "开始读取标讯类型结束 耗时%f秒", time.Now().Sub(now).Seconds())
  171. }
  172. func loadBidding(arr []primitive.ObjectID) {
  173. defer func() { <-pool }()
  174. rData := map[string][]string{}
  175. list, _ := common.MG.DB("bidding").Find("bidding", g.Map{"_id": g.Map{"$in": arr}}, nil, `{"toptype":1,"_id":1}`, false, 0, size)
  176. if list != nil && len(*list) >= 0 {
  177. for _, m := range *list {
  178. toptype := gconv.String(m["toptype"])
  179. if toptype == "招标" || toptype == "预告" || toptype == "结果" || toptype == "其它" {
  180. rData[toptype] = append(rData[toptype], mongodb.BsonIdToSId(m["_id"]))
  181. }
  182. }
  183. }
  184. if len(*list) != size {
  185. list2, _ := common.MG.DB("bidding").Find("bidding_back", g.Map{"_id": g.Map{"$in": arr}}, nil, `{"toptype":1,"_id":1}`, false, 0, size)
  186. if list2 != nil && len(*list2) >= 0 {
  187. for _, m := range *list2 {
  188. toptype := gconv.String(m["toptype"])
  189. if toptype == "招标" || toptype == "预告" || toptype == "结果" || toptype == "其它" {
  190. rData[toptype] = append(rData[toptype], mongodb.BsonIdToSId(m["_id"]))
  191. }
  192. }
  193. }
  194. }
  195. if len(rData) > 0 {
  196. lock.Lock()
  197. defer lock.Unlock()
  198. for toptype, bidArr := range rData {
  199. for _, bid := range bidArr {
  200. biddingGroup[toptype] = append(biddingGroup[toptype], BidId(bid))
  201. }
  202. }
  203. }
  204. }
  205. func analysis() {
  206. gfile.PutContents("./bidding.json", gconv.String(biddingGroup))
  207. gfile.PutContents("./user.json", gconv.String(fullData))
  208. for top, ids := range biddingGroup {
  209. var bidTotal, bidCqTotal int64
  210. u := map[MogUserId]bool{}
  211. for _, id := range ids {
  212. bidCqTotal += fullData.IdUserOnce[id] //用户访问量
  213. bidTotal += fullData.IdTotal[id] //用户访问量
  214. for uId, _ := range fullData.VisitedEq[id] {
  215. u[uId] = true
  216. }
  217. }
  218. g.Log().Infof(context.TODO(), "%s 用户浏览量:%d 标讯浏览总量:%d 标讯浏览去重量:%d", top, len(u), bidTotal, bidCqTotal)
  219. }
  220. }
  221. func GetDetailId(path string) (BidId, error) {
  222. defer util.Catch()
  223. path, _ = url.QueryUnescape(path)
  224. arr := encrypt.CommonDecodeArticle("content", path)
  225. if len(arr) == 0 {
  226. return "", fmt.Errorf("解密失败%s", path)
  227. }
  228. if arr[0] == "" {
  229. return "", fmt.Errorf("解密失败2%s", path)
  230. }
  231. return BidId(arr[0]), nil
  232. }
  233. func GetPayUser() map[MogUserId]bool {
  234. var (
  235. pay = make(map[MogUserId]bool)
  236. ctx = context.Background()
  237. )
  238. res, err := g.DB("subjectdb").Query(ctx, `SELECT id,userid FROM dwd_f_data_equity_info WHERE endtime>?`, time.Now().Format(time.DateTime))
  239. if err == nil && !res.IsEmpty() {
  240. for _, m := range res.List() {
  241. pay[MogUserId(gconv.String(m["userid"]))] = true
  242. }
  243. }
  244. g.Log().Infof(ctx, "加载%d个付费用户", len(pay))
  245. return pay
  246. }