123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- package main
- import (
- util "app.yhyue.com/moapp/jybase/common"
- "app.yhyue.com/moapp/jybase/encrypt"
- "github.com/gogf/gf/v2/os/gfile"
- "net/url"
- //"app.yhyue.com/moapp/jybase/mongodb"
- "context"
- "fmt"
- _ "github.com/gogf/gf/contrib/drivers/clickhouse/v2"
- _ "github.com/gogf/gf/contrib/drivers/mysql/v2"
- "github.com/gogf/gf/v2/frame/g"
- "github.com/gogf/gf/v2/util/gconv"
- "go.mongodb.org/mongo-driver/bson/primitive"
- "regexp"
- "sync"
- "time"
- "workTasks/common"
- "workTasks/common/mongodb"
- )
- var (
- allPayMap = map[MogUserId]bool{} //所有付费用户
- idVisited = map[BidId]bool{} //已访问的三级页
- fullData = &VisitDetail{
- IdTotal: map[BidId]int64{},
- IdUserOnce: map[BidId]int64{},
- VisitedEq: map[BidId]map[MogUserId]bool{},
- }
- size = 50
- pool = make(chan bool, 5)
- rg = regexp.MustCompile(`/content/(.*).html`)
- lock sync.Mutex
- biddingGroup = map[string][]BidId{}
- )
- type (
- BidId string
- MogUserId string
- VisitDetail struct {
- IdTotal map[BidId]int64 //不去重
- IdUserOnce map[BidId]int64 //去重
- VisitedEq map[BidId]map[MogUserId]bool
- }
- )
- func init() {
- allPayMap = GetPayUser()
- }
- func main() {
- loadWxPc()
- loadApp()
- loadBiddingType()
- analysis()
- }
- func loadWxPc() {
- sess := common.MG.DB("log").GetMgoConn()
- defer common.MG.DB("log").DestoryMongoConn(sess)
- it := sess.DB("qfw").C("jy_logs").Find(g.Map{"date": g.Map{"$gt": g.Cfg().MustGet(context.TODO(), "start", 1720540800).Int64()}}).Select(g.Map{"userid": 1, "url": 1}).Iter()
- var index int64
- for m := make(map[string]interface{}); it.Next(&m); {
- index++
- if index%10e4 == 0 {
- g.Log().Infof(context.TODO(), "loadWxPc %d", index)
- }
- var (
- userid = MogUserId(gconv.String(m["userid"]))
- url = gconv.String(m["url"])
- )
- if len(userid) != 24 {
- continue
- }
- if _, ok := allPayMap[userid]; ok {
- continue
- }
- matchArr := rg.FindStringSubmatch(url)
- if len(matchArr) < 2 {
- continue
- }
- if matchArr[1] == "" {
- continue
- }
- bidId, err := GetDetailId(matchArr[1])
- if err != nil {
- g.Log().Debugf(context.TODO(), "解密三级页id失败 %s %s", url, matchArr[1])
- continue
- }
- //判断是否是三级页地址
- idVisited[bidId] = true
- fullData.IdTotal[bidId]++
- if !fullData.VisitedEq[bidId][userid] {
- if fullData.VisitedEq[bidId] == nil {
- fullData.VisitedEq[bidId] = map[MogUserId]bool{}
- }
- fullData.IdUserOnce[bidId]++
- fullData.VisitedEq[bidId][userid] = true
- }
- m = make(map[string]interface{})
- }
- }
- func loadApp() {
- sess := common.MG.DB("log").GetMgoConn()
- defer common.MG.DB("log").DestoryMongoConn(sess)
- it := sess.DB("qfw").C("jyapp_logs").Find(g.Map{"date": g.Map{"$gt": g.Cfg().MustGet(context.TODO(), "start", 1720540800).Int64()}}).Select(g.Map{"userid": 1, "url": 1}).Iter()
- var index int64
- for m := make(map[string]interface{}); it.Next(&m); {
- index++
- if index%10e4 == 0 {
- g.Log().Infof(context.TODO(), "loadApp %d", index)
- }
- var (
- userid = MogUserId(gconv.String(m["userid"]))
- url = gconv.String(m["url"])
- )
- if len(userid) != 24 {
- continue
- }
- if _, ok := allPayMap[userid]; ok {
- continue
- }
- matchArr := rg.FindStringSubmatch(url)
- if len(matchArr) < 2 {
- continue
- }
- if matchArr[1] == "" {
- continue
- }
- bidId, err := GetDetailId(matchArr[1])
- if err != nil {
- g.Log().Debugf(context.TODO(), "解密三级页id失败 %s %s", url, matchArr[1])
- continue
- }
- //判断是否是三级页地址
- idVisited[bidId] = true
- fullData.IdTotal[bidId]++
- if !fullData.VisitedEq[bidId][userid] {
- if fullData.VisitedEq[bidId] == nil {
- fullData.VisitedEq[bidId] = map[MogUserId]bool{}
- }
- fullData.IdUserOnce[bidId]++
- fullData.VisitedEq[bidId][userid] = true
- }
- m = make(map[string]interface{})
- }
- }
- func loadBiddingType() {
- var (
- ctx = context.TODO()
- index int64
- now = time.Now()
- )
- g.Log().Infof(ctx, "开始读取标讯类型 %d", len(idVisited))
- groupArr := make([]primitive.ObjectID, 0, size)
- for id, _ := range idVisited {
- if index%1e4 == 0 {
- g.Log().Infof(ctx, "当前已加载%d个", index)
- }
- if len(groupArr) == size {
- pool <- true
- go loadBidding(groupArr)
- groupArr = make([]primitive.ObjectID, 0, size)
- } else {
- groupArr = append(groupArr, mongodb.StringTOBsonId(string(id)))
- }
- index++
- }
- if len(groupArr) > 0 {
- pool <- true
- loadBidding(groupArr)
- } else {
- time.Sleep(time.Second * 10)
- }
- g.Log().Infof(ctx, "开始读取标讯类型结束 耗时%f秒", time.Now().Sub(now).Seconds())
- }
- func loadBidding(arr []primitive.ObjectID) {
- defer func() { <-pool }()
- rData := map[string][]string{}
- list, _ := common.MG.DB("bidding").Find("bidding", g.Map{"_id": g.Map{"$in": arr}}, nil, `{"toptype":1,"_id":1}`, false, 0, size)
- if list != nil && len(*list) >= 0 {
- for _, m := range *list {
- toptype := gconv.String(m["toptype"])
- if toptype == "招标" || toptype == "预告" || toptype == "结果" || toptype == "其它" {
- rData[toptype] = append(rData[toptype], mongodb.BsonIdToSId(m["_id"]))
- }
- }
- }
- if len(*list) != size {
- list2, _ := common.MG.DB("bidding").Find("bidding_back", g.Map{"_id": g.Map{"$in": arr}}, nil, `{"toptype":1,"_id":1}`, false, 0, size)
- if list2 != nil && len(*list2) >= 0 {
- for _, m := range *list2 {
- toptype := gconv.String(m["toptype"])
- if toptype == "招标" || toptype == "预告" || toptype == "结果" || toptype == "其它" {
- rData[toptype] = append(rData[toptype], mongodb.BsonIdToSId(m["_id"]))
- }
- }
- }
- }
- if len(rData) > 0 {
- lock.Lock()
- defer lock.Unlock()
- for toptype, bidArr := range rData {
- for _, bid := range bidArr {
- biddingGroup[toptype] = append(biddingGroup[toptype], BidId(bid))
- }
- }
- }
- }
- func analysis() {
- gfile.PutContents("./bidding.json", gconv.String(biddingGroup))
- gfile.PutContents("./user.json", gconv.String(fullData))
- for top, ids := range biddingGroup {
- var bidTotal, bidCqTotal int64
- u := map[MogUserId]bool{}
- for _, id := range ids {
- bidCqTotal += fullData.IdUserOnce[id] //用户访问量
- bidTotal += fullData.IdTotal[id] //用户访问量
- for uId, _ := range fullData.VisitedEq[id] {
- u[uId] = true
- }
- }
- g.Log().Infof(context.TODO(), "%s 用户浏览量:%d 标讯浏览总量:%d 标讯浏览去重量:%d", top, len(u), bidTotal, bidCqTotal)
- }
- }
- func GetDetailId(path string) (BidId, error) {
- defer util.Catch()
- path, _ = url.QueryUnescape(path)
- arr := encrypt.CommonDecodeArticle("content", path)
- if len(arr) == 0 {
- return "", fmt.Errorf("解密失败%s", path)
- }
- if arr[0] == "" {
- return "", fmt.Errorf("解密失败2%s", path)
- }
- return BidId(arr[0]), nil
- }
- func GetPayUser() map[MogUserId]bool {
- var (
- pay = make(map[MogUserId]bool)
- ctx = context.Background()
- )
- res, err := g.DB("subjectdb").Query(ctx, `SELECT id,userid FROM dwd_f_data_equity_info WHERE endtime>?`, time.Now().Format(time.DateTime))
- if err == nil && !res.IsEmpty() {
- for _, m := range res.List() {
- pay[MogUserId(gconv.String(m["userid"]))] = true
- }
- }
- g.Log().Infof(ctx, "加载%d个付费用户", len(pay))
- return pay
- }
|