wangkaiyue 9 kuukautta sitten
vanhempi
commit
288431445b
4 muutettua tiedostoa jossa 314 lisäystä ja 0 poistoa
  1. 14 0
      freeUserVisit/README.MD
  2. 18 0
      freeUserVisit/config.yaml_release
  3. 20 0
      freeUserVisit/config.yaml_test
  4. 262 0
      freeUserVisit/main.go

+ 14 - 0
freeUserVisit/README.MD

@@ -0,0 +1,14 @@
+# 统计免费用户访问各类标讯信息
++ 规则:免费登陆用户一周内浏览的标讯类型(一级类型分布)
++ 统计身份:个人身份
++ 统计端:PC、APP、微信公众号
++ 备注:拟在建和采购意向免费用户不能查看,不需要统计。
+
+| 一级标讯类型   | 浏览用户量 | 浏览用户量 |浏览标讯量   |
+|--------|-----|-----|--------|
+| 招标预告   | 20595 |65390   | 53827     |
+| 招标公告   | 107317 | 1627221 | 1339382   |
+| 招标结果   | 112853 |2002644| 1685498|
+|招标信用信息| 57712 | 410371 | 358397   |
+		
+		

+ 18 - 0
freeUserVisit/config.yaml_release

@@ -0,0 +1,18 @@
+database:
+  subjectdb: #正式环境库名为Jianyu_subjectdb
+    link: "mysql:jianyu:TopMysql@123@tcp(web95.jy360.cn:14000)/Jianyu_subjectdb"
+
+mongodb:
+  log:
+    address: "172.17.4.187:27090,172.17.189.141:27092"
+    size: 5
+    dbName: "qfw"
+    replSet: ""
+    userName: "jianyu"
+    password: "jylog2020_123"
+  bidding:
+    address: "172.17.189.140:27080,172.17.189.141:27081"
+    size: 10
+    dbName: "qfw"
+    userName: "JS2Z_Rbid_ProG"
+    password: "JS2z@S1e3aR5Ch"

+ 20 - 0
freeUserVisit/config.yaml_test

@@ -0,0 +1,20 @@
+database:
+  subjectdb: #正式环境库名为Jianyu_subjectdb
+    link: "mysql:readuser:jyTi_R202403@tcp(192.168.3.71:4003)/Jianyu_subjectdb_test"
+
+mongodb:
+  log:
+    address: "127.0.0.1:27090"
+    size: 5
+    dbName: "qfw"
+    replSet: ""
+    userName: "jianyu"
+    password: "jylog2020_123"
+  bidding:
+    address: "127.0.0.1:27099"
+    size: 5
+    dbName: "qfw"
+    userName: "JS2Z_Rbid_ProG"
+    password: "JS2z@S1e3aR5Ch"
+
+start: 1720596600

+ 262 - 0
freeUserVisit/main.go

@@ -0,0 +1,262 @@
+package main
+
+import (
+	util "app.yhyue.com/moapp/jybase/common"
+	"app.yhyue.com/moapp/jybase/encrypt"
+	"github.com/gogf/gf/v2/os/gfile"
+	"net/url"
+
+	//"app.yhyue.com/moapp/jybase/mongodb"
+	"context"
+	"fmt"
+	_ "github.com/gogf/gf/contrib/drivers/clickhouse/v2"
+	_ "github.com/gogf/gf/contrib/drivers/mysql/v2"
+	"github.com/gogf/gf/v2/frame/g"
+	"github.com/gogf/gf/v2/util/gconv"
+	"go.mongodb.org/mongo-driver/bson/primitive"
+	"regexp"
+	"sync"
+	"time"
+	"workTasks/common"
+	"workTasks/common/mongodb"
+)
+
+var (
+	allPayMap = map[MogUserId]bool{} //所有付费用户
+	idVisited = map[BidId]bool{}     //已访问的三级页
+	fullData  = &VisitDetail{
+		IdTotal:    map[BidId]int64{},
+		IdUserOnce: map[BidId]int64{},
+		VisitedEq:  map[BidId]map[MogUserId]bool{},
+	}
+	size = 50
+	pool = make(chan bool, 5)
+	rg   = regexp.MustCompile(`/content/(.*).html`)
+
+	lock         sync.Mutex
+	biddingGroup = map[string][]BidId{}
+)
+
+type (
+	BidId     string
+	MogUserId string
+
+	VisitDetail struct {
+		IdTotal    map[BidId]int64 //不去重
+		IdUserOnce map[BidId]int64 //去重
+		VisitedEq  map[BidId]map[MogUserId]bool
+	}
+)
+
+func init() {
+	allPayMap = GetPayUser()
+}
+func main() {
+	loadWxPc()
+	loadApp()
+	loadBiddingType()
+	analysis()
+}
+
+func loadWxPc() {
+	sess := common.MG.DB("log").GetMgoConn()
+	defer common.MG.DB("log").DestoryMongoConn(sess)
+	it := sess.DB("qfw").C("jy_logs").Find(g.Map{"date": g.Map{"$gt": g.Cfg().MustGet(context.TODO(), "start", 1720540800).Int64()}}).Select(g.Map{"userid": 1, "url": 1}).Iter()
+	var index int64
+	for m := make(map[string]interface{}); it.Next(&m); {
+		index++
+		if index%10e4 == 0 {
+			g.Log().Infof(context.TODO(), "loadWxPc %d", index)
+		}
+		var (
+			userid = MogUserId(gconv.String(m["userid"]))
+			url    = gconv.String(m["url"])
+		)
+		if len(userid) != 24 {
+			continue
+		}
+		if _, ok := allPayMap[userid]; ok {
+			continue
+		}
+		matchArr := rg.FindStringSubmatch(url)
+		if len(matchArr) < 2 {
+			continue
+		}
+		if matchArr[1] == "" {
+			continue
+		}
+		bidId, err := GetDetailId(matchArr[1])
+		if err != nil {
+			g.Log().Debugf(context.TODO(), "解密三级页id失败 %s %s", url, matchArr[1])
+			continue
+		}
+		//判断是否是三级页地址
+		idVisited[bidId] = true
+		fullData.IdTotal[bidId]++
+		if !fullData.VisitedEq[bidId][userid] {
+			if fullData.VisitedEq[bidId] == nil {
+				fullData.VisitedEq[bidId] = map[MogUserId]bool{}
+			}
+			fullData.IdUserOnce[bidId]++
+			fullData.VisitedEq[bidId][userid] = true
+		}
+		m = make(map[string]interface{})
+	}
+}
+
+func loadApp() {
+	sess := common.MG.DB("log").GetMgoConn()
+	defer common.MG.DB("log").DestoryMongoConn(sess)
+	it := sess.DB("qfw").C("jyapp_logs").Find(g.Map{"date": g.Map{"$gt": g.Cfg().MustGet(context.TODO(), "start", 1720540800).Int64()}}).Select(g.Map{"userid": 1, "url": 1}).Iter()
+	var index int64
+	for m := make(map[string]interface{}); it.Next(&m); {
+		index++
+		if index%10e4 == 0 {
+			g.Log().Infof(context.TODO(), "loadApp %d", index)
+		}
+		var (
+			userid = MogUserId(gconv.String(m["userid"]))
+			url    = gconv.String(m["url"])
+		)
+		if len(userid) != 24 {
+			continue
+		}
+		if _, ok := allPayMap[userid]; ok {
+			continue
+		}
+		matchArr := rg.FindStringSubmatch(url)
+		if len(matchArr) < 2 {
+			continue
+		}
+		if matchArr[1] == "" {
+			continue
+		}
+		bidId, err := GetDetailId(matchArr[1])
+		if err != nil {
+			g.Log().Debugf(context.TODO(), "解密三级页id失败 %s %s", url, matchArr[1])
+			continue
+		}
+		//判断是否是三级页地址
+		idVisited[bidId] = true
+		fullData.IdTotal[bidId]++
+		if !fullData.VisitedEq[bidId][userid] {
+			if fullData.VisitedEq[bidId] == nil {
+				fullData.VisitedEq[bidId] = map[MogUserId]bool{}
+			}
+			fullData.IdUserOnce[bidId]++
+			fullData.VisitedEq[bidId][userid] = true
+		}
+		m = make(map[string]interface{})
+	}
+}
+func loadBiddingType() {
+	var (
+		ctx = context.TODO()
+
+		index int64
+		now   = time.Now()
+	)
+	g.Log().Infof(ctx, "开始读取标讯类型 %d", len(idVisited))
+
+	groupArr := make([]primitive.ObjectID, 0, size)
+	for id, _ := range idVisited {
+		if index%1e4 == 0 {
+			g.Log().Infof(ctx, "当前已加载%d个", index)
+		}
+		if len(groupArr) == size {
+			pool <- true
+			go loadBidding(groupArr)
+			groupArr = make([]primitive.ObjectID, 0, size)
+		} else {
+			groupArr = append(groupArr, mongodb.StringTOBsonId(string(id)))
+		}
+		index++
+	}
+	if len(groupArr) > 0 {
+		pool <- true
+		loadBidding(groupArr)
+	} else {
+		time.Sleep(time.Second * 10)
+	}
+	g.Log().Infof(ctx, "开始读取标讯类型结束 耗时%f秒", time.Now().Sub(now).Seconds())
+
+}
+
+func loadBidding(arr []primitive.ObjectID) {
+	defer func() { <-pool }()
+	rData := map[string][]string{}
+	list, _ := common.MG.DB("bidding").Find("bidding", g.Map{"_id": g.Map{"$in": arr}}, nil, `{"toptype":1,"_id":1}`, false, 0, size)
+	if list != nil && len(*list) >= 0 {
+		for _, m := range *list {
+			toptype := gconv.String(m["toptype"])
+			if toptype == "招标" || toptype == "预告" || toptype == "结果" || toptype == "其它" {
+				rData[toptype] = append(rData[toptype], mongodb.BsonIdToSId(m["_id"]))
+			}
+		}
+	}
+	if len(*list) != size {
+		list2, _ := common.MG.DB("bidding").Find("bidding_back", g.Map{"_id": g.Map{"$in": arr}}, nil, `{"toptype":1,"_id":1}`, false, 0, size)
+		if list2 != nil && len(*list2) >= 0 {
+			for _, m := range *list2 {
+				toptype := gconv.String(m["toptype"])
+				if toptype == "招标" || toptype == "预告" || toptype == "结果" || toptype == "其它" {
+					rData[toptype] = append(rData[toptype], mongodb.BsonIdToSId(m["_id"]))
+				}
+			}
+		}
+	}
+	if len(rData) > 0 {
+		lock.Lock()
+		defer lock.Unlock()
+		for toptype, bidArr := range rData {
+			for _, bid := range bidArr {
+				biddingGroup[toptype] = append(biddingGroup[toptype], BidId(bid))
+			}
+		}
+	}
+}
+
+func analysis() {
+	gfile.PutContents("./bidding.json", gconv.String(biddingGroup))
+	gfile.PutContents("./user.json", gconv.String(fullData))
+	for top, ids := range biddingGroup {
+		var bidTotal, bidCqTotal int64
+		u := map[MogUserId]bool{}
+		for _, id := range ids {
+			bidCqTotal += fullData.IdUserOnce[id] //用户访问量
+			bidTotal += fullData.IdTotal[id]      //用户访问量
+			for uId, _ := range fullData.VisitedEq[id] {
+				u[uId] = true
+			}
+		}
+		g.Log().Infof(context.TODO(), "%s 用户浏览量:%d 标讯浏览总量:%d 标讯浏览去重量:%d", top, len(u), bidTotal, bidCqTotal)
+	}
+}
+
+func GetDetailId(path string) (BidId, error) {
+	defer util.Catch()
+	path, _ = url.QueryUnescape(path)
+	arr := encrypt.CommonDecodeArticle("content", path)
+	if len(arr) == 0 {
+		return "", fmt.Errorf("解密失败%s", path)
+	}
+	if arr[0] == "" {
+		return "", fmt.Errorf("解密失败2%s", path)
+	}
+	return BidId(arr[0]), nil
+}
+
+func GetPayUser() map[MogUserId]bool {
+	var (
+		pay = make(map[MogUserId]bool)
+		ctx = context.Background()
+	)
+	res, err := g.DB("subjectdb").Query(ctx, `SELECT id,userid FROM dwd_f_data_equity_info WHERE endtime>?`, time.Now().Format(time.DateTime))
+	if err == nil && !res.IsEmpty() {
+		for _, m := range res.List() {
+			pay[MogUserId(gconv.String(m["userid"]))] = true
+		}
+	}
+	g.Log().Infof(ctx, "加载%d个付费用户", len(pay))
+	return pay
+}