|
@@ -0,0 +1,262 @@
|
|
|
|
+package main
|
|
|
|
+
|
|
|
|
+import (
|
|
|
|
+ util "app.yhyue.com/moapp/jybase/common"
|
|
|
|
+ "app.yhyue.com/moapp/jybase/encrypt"
|
|
|
|
+ "github.com/gogf/gf/v2/os/gfile"
|
|
|
|
+ "net/url"
|
|
|
|
+
|
|
|
|
+ //"app.yhyue.com/moapp/jybase/mongodb"
|
|
|
|
+ "context"
|
|
|
|
+ "fmt"
|
|
|
|
+ _ "github.com/gogf/gf/contrib/drivers/clickhouse/v2"
|
|
|
|
+ _ "github.com/gogf/gf/contrib/drivers/mysql/v2"
|
|
|
|
+ "github.com/gogf/gf/v2/frame/g"
|
|
|
|
+ "github.com/gogf/gf/v2/util/gconv"
|
|
|
|
+ "go.mongodb.org/mongo-driver/bson/primitive"
|
|
|
|
+ "regexp"
|
|
|
|
+ "sync"
|
|
|
|
+ "time"
|
|
|
|
+ "workTasks/common"
|
|
|
|
+ "workTasks/common/mongodb"
|
|
|
|
+)
|
|
|
|
+
|
|
|
|
+var (
|
|
|
|
+ allPayMap = map[MogUserId]bool{} //所有付费用户
|
|
|
|
+ idVisited = map[BidId]bool{} //已访问的三级页
|
|
|
|
+ fullData = &VisitDetail{
|
|
|
|
+ IdTotal: map[BidId]int64{},
|
|
|
|
+ IdUserOnce: map[BidId]int64{},
|
|
|
|
+ VisitedEq: map[BidId]map[MogUserId]bool{},
|
|
|
|
+ }
|
|
|
|
+ size = 50
|
|
|
|
+ pool = make(chan bool, 5)
|
|
|
|
+ rg = regexp.MustCompile(`/content/(.*).html`)
|
|
|
|
+
|
|
|
|
+ lock sync.Mutex
|
|
|
|
+ biddingGroup = map[string][]BidId{}
|
|
|
|
+)
|
|
|
|
+
|
|
|
|
+type (
|
|
|
|
+ BidId string
|
|
|
|
+ MogUserId string
|
|
|
|
+
|
|
|
|
+ VisitDetail struct {
|
|
|
|
+ IdTotal map[BidId]int64 //不去重
|
|
|
|
+ IdUserOnce map[BidId]int64 //去重
|
|
|
|
+ VisitedEq map[BidId]map[MogUserId]bool
|
|
|
|
+ }
|
|
|
|
+)
|
|
|
|
+
|
|
|
|
+func init() {
|
|
|
|
+ allPayMap = GetPayUser()
|
|
|
|
+}
|
|
|
|
+func main() {
|
|
|
|
+ loadWxPc()
|
|
|
|
+ loadApp()
|
|
|
|
+ loadBiddingType()
|
|
|
|
+ analysis()
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func loadWxPc() {
|
|
|
|
+ sess := common.MG.DB("log").GetMgoConn()
|
|
|
|
+ defer common.MG.DB("log").DestoryMongoConn(sess)
|
|
|
|
+ it := sess.DB("qfw").C("jy_logs").Find(g.Map{"date": g.Map{"$gt": g.Cfg().MustGet(context.TODO(), "start", 1720540800).Int64()}}).Select(g.Map{"userid": 1, "url": 1}).Iter()
|
|
|
|
+ var index int64
|
|
|
|
+ for m := make(map[string]interface{}); it.Next(&m); {
|
|
|
|
+ index++
|
|
|
|
+ if index%10e4 == 0 {
|
|
|
|
+ g.Log().Infof(context.TODO(), "loadWxPc %d", index)
|
|
|
|
+ }
|
|
|
|
+ var (
|
|
|
|
+ userid = MogUserId(gconv.String(m["userid"]))
|
|
|
|
+ url = gconv.String(m["url"])
|
|
|
|
+ )
|
|
|
|
+ if len(userid) != 24 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if _, ok := allPayMap[userid]; ok {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ matchArr := rg.FindStringSubmatch(url)
|
|
|
|
+ if len(matchArr) < 2 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if matchArr[1] == "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ bidId, err := GetDetailId(matchArr[1])
|
|
|
|
+ if err != nil {
|
|
|
|
+ g.Log().Debugf(context.TODO(), "解密三级页id失败 %s %s", url, matchArr[1])
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ //判断是否是三级页地址
|
|
|
|
+ idVisited[bidId] = true
|
|
|
|
+ fullData.IdTotal[bidId]++
|
|
|
|
+ if !fullData.VisitedEq[bidId][userid] {
|
|
|
|
+ if fullData.VisitedEq[bidId] == nil {
|
|
|
|
+ fullData.VisitedEq[bidId] = map[MogUserId]bool{}
|
|
|
|
+ }
|
|
|
|
+ fullData.IdUserOnce[bidId]++
|
|
|
|
+ fullData.VisitedEq[bidId][userid] = true
|
|
|
|
+ }
|
|
|
|
+ m = make(map[string]interface{})
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func loadApp() {
|
|
|
|
+ sess := common.MG.DB("log").GetMgoConn()
|
|
|
|
+ defer common.MG.DB("log").DestoryMongoConn(sess)
|
|
|
|
+ it := sess.DB("qfw").C("jyapp_logs").Find(g.Map{"date": g.Map{"$gt": g.Cfg().MustGet(context.TODO(), "start", 1720540800).Int64()}}).Select(g.Map{"userid": 1, "url": 1}).Iter()
|
|
|
|
+ var index int64
|
|
|
|
+ for m := make(map[string]interface{}); it.Next(&m); {
|
|
|
|
+ index++
|
|
|
|
+ if index%10e4 == 0 {
|
|
|
|
+ g.Log().Infof(context.TODO(), "loadApp %d", index)
|
|
|
|
+ }
|
|
|
|
+ var (
|
|
|
|
+ userid = MogUserId(gconv.String(m["userid"]))
|
|
|
|
+ url = gconv.String(m["url"])
|
|
|
|
+ )
|
|
|
|
+ if len(userid) != 24 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if _, ok := allPayMap[userid]; ok {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ matchArr := rg.FindStringSubmatch(url)
|
|
|
|
+ if len(matchArr) < 2 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if matchArr[1] == "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ bidId, err := GetDetailId(matchArr[1])
|
|
|
|
+ if err != nil {
|
|
|
|
+ g.Log().Debugf(context.TODO(), "解密三级页id失败 %s %s", url, matchArr[1])
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ //判断是否是三级页地址
|
|
|
|
+ idVisited[bidId] = true
|
|
|
|
+ fullData.IdTotal[bidId]++
|
|
|
|
+ if !fullData.VisitedEq[bidId][userid] {
|
|
|
|
+ if fullData.VisitedEq[bidId] == nil {
|
|
|
|
+ fullData.VisitedEq[bidId] = map[MogUserId]bool{}
|
|
|
|
+ }
|
|
|
|
+ fullData.IdUserOnce[bidId]++
|
|
|
|
+ fullData.VisitedEq[bidId][userid] = true
|
|
|
|
+ }
|
|
|
|
+ m = make(map[string]interface{})
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+func loadBiddingType() {
|
|
|
|
+ var (
|
|
|
|
+ ctx = context.TODO()
|
|
|
|
+
|
|
|
|
+ index int64
|
|
|
|
+ now = time.Now()
|
|
|
|
+ )
|
|
|
|
+ g.Log().Infof(ctx, "开始读取标讯类型 %d", len(idVisited))
|
|
|
|
+
|
|
|
|
+ groupArr := make([]primitive.ObjectID, 0, size)
|
|
|
|
+ for id, _ := range idVisited {
|
|
|
|
+ if index%1e4 == 0 {
|
|
|
|
+ g.Log().Infof(ctx, "当前已加载%d个", index)
|
|
|
|
+ }
|
|
|
|
+ if len(groupArr) == size {
|
|
|
|
+ pool <- true
|
|
|
|
+ go loadBidding(groupArr)
|
|
|
|
+ groupArr = make([]primitive.ObjectID, 0, size)
|
|
|
|
+ } else {
|
|
|
|
+ groupArr = append(groupArr, mongodb.StringTOBsonId(string(id)))
|
|
|
|
+ }
|
|
|
|
+ index++
|
|
|
|
+ }
|
|
|
|
+ if len(groupArr) > 0 {
|
|
|
|
+ pool <- true
|
|
|
|
+ loadBidding(groupArr)
|
|
|
|
+ } else {
|
|
|
|
+ time.Sleep(time.Second * 10)
|
|
|
|
+ }
|
|
|
|
+ g.Log().Infof(ctx, "开始读取标讯类型结束 耗时%f秒", time.Now().Sub(now).Seconds())
|
|
|
|
+
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func loadBidding(arr []primitive.ObjectID) {
|
|
|
|
+ defer func() { <-pool }()
|
|
|
|
+ rData := map[string][]string{}
|
|
|
|
+ list, _ := common.MG.DB("bidding").Find("bidding", g.Map{"_id": g.Map{"$in": arr}}, nil, `{"toptype":1,"_id":1}`, false, 0, size)
|
|
|
|
+ if list != nil && len(*list) >= 0 {
|
|
|
|
+ for _, m := range *list {
|
|
|
|
+ toptype := gconv.String(m["toptype"])
|
|
|
|
+ if toptype == "招标" || toptype == "预告" || toptype == "结果" || toptype == "其它" {
|
|
|
|
+ rData[toptype] = append(rData[toptype], mongodb.BsonIdToSId(m["_id"]))
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if len(*list) != size {
|
|
|
|
+ list2, _ := common.MG.DB("bidding").Find("bidding_back", g.Map{"_id": g.Map{"$in": arr}}, nil, `{"toptype":1,"_id":1}`, false, 0, size)
|
|
|
|
+ if list2 != nil && len(*list2) >= 0 {
|
|
|
|
+ for _, m := range *list2 {
|
|
|
|
+ toptype := gconv.String(m["toptype"])
|
|
|
|
+ if toptype == "招标" || toptype == "预告" || toptype == "结果" || toptype == "其它" {
|
|
|
|
+ rData[toptype] = append(rData[toptype], mongodb.BsonIdToSId(m["_id"]))
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if len(rData) > 0 {
|
|
|
|
+ lock.Lock()
|
|
|
|
+ defer lock.Unlock()
|
|
|
|
+ for toptype, bidArr := range rData {
|
|
|
|
+ for _, bid := range bidArr {
|
|
|
|
+ biddingGroup[toptype] = append(biddingGroup[toptype], BidId(bid))
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func analysis() {
|
|
|
|
+ gfile.PutContents("./bidding.json", gconv.String(biddingGroup))
|
|
|
|
+ gfile.PutContents("./user.json", gconv.String(fullData))
|
|
|
|
+ for top, ids := range biddingGroup {
|
|
|
|
+ var bidTotal, bidCqTotal int64
|
|
|
|
+ u := map[MogUserId]bool{}
|
|
|
|
+ for _, id := range ids {
|
|
|
|
+ bidCqTotal += fullData.IdUserOnce[id] //用户访问量
|
|
|
|
+ bidTotal += fullData.IdTotal[id] //用户访问量
|
|
|
|
+ for uId, _ := range fullData.VisitedEq[id] {
|
|
|
|
+ u[uId] = true
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ g.Log().Infof(context.TODO(), "%s 用户浏览量:%d 标讯浏览总量:%d 标讯浏览去重量:%d", top, len(u), bidTotal, bidCqTotal)
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func GetDetailId(path string) (BidId, error) {
|
|
|
|
+ defer util.Catch()
|
|
|
|
+ path, _ = url.QueryUnescape(path)
|
|
|
|
+ arr := encrypt.CommonDecodeArticle("content", path)
|
|
|
|
+ if len(arr) == 0 {
|
|
|
|
+ return "", fmt.Errorf("解密失败%s", path)
|
|
|
|
+ }
|
|
|
|
+ if arr[0] == "" {
|
|
|
|
+ return "", fmt.Errorf("解密失败2%s", path)
|
|
|
|
+ }
|
|
|
|
+ return BidId(arr[0]), nil
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func GetPayUser() map[MogUserId]bool {
|
|
|
|
+ var (
|
|
|
|
+ pay = make(map[MogUserId]bool)
|
|
|
|
+ ctx = context.Background()
|
|
|
|
+ )
|
|
|
|
+ res, err := g.DB("subjectdb").Query(ctx, `SELECT id,userid FROM dwd_f_data_equity_info WHERE endtime>?`, time.Now().Format(time.DateTime))
|
|
|
|
+ if err == nil && !res.IsEmpty() {
|
|
|
|
+ for _, m := range res.List() {
|
|
|
|
+ pay[MogUserId(gconv.String(m["userid"]))] = true
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ g.Log().Infof(ctx, "加载%d个付费用户", len(pay))
|
|
|
|
+ return pay
|
|
|
|
+}
|