123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351 |
- package main
- import (
- qu "app.yhyue.com/moapp/jybase/common"
- "app.yhyue.com/moapp/jybase/mongodb"
- "context"
- "encoding/json"
- "fmt"
- _ "github.com/gogf/gf/contrib/drivers/clickhouse/v2"
- "github.com/gogf/gf/v2/util/gconv"
- "go.mongodb.org/mongo-driver/bson/primitive"
- "log"
- "strings"
- "time"
- // "github.com/gogf/gf/v2/os/gctx"
- )
- func main() {
- //正式环境
- go InitInsertDataStrategy()
- go Run()
- select {}
- }
- func dwd_f_personnel_behavior(st, et int64) {
- pageCodeMap := LoadPageCode()
- s := time.Unix(st, 0).Format(time.DateTime)
- e := time.Unix(et, 0).Format(time.DateTime)
- modelMap := MatchRegexModule()
- q := fmt.Sprintf(`select jy_trusted_id,user_id,position_id,phone,ip,platform,os,os_version,browser,browser_version,date,action_id,action_type,breaker_id,breaker_name,click_time,order_id,order_time,pay_time,pay_way,price,product,product_name,page_id,page_name,bidding_id,desc,url,source,user_agent,port,refer,search_word,filter,break_data,app_id,app_version,mini_program_code from data_analysis.dwd_f_personnel_behavior
- where click_time >= '%s'
- and click_time < '%s' order by click_time `, s, e)
- log.Println(q)
- ctx := context.Background()
- rows, err := Ch_analysis.Query(ctx, q)
- if err != nil {
- log.Fatal(err)
- }
- defer rows.Close()
- acitonMap := GetActionMap()
- filterCount := 0
- for rows.Next() {
- m := &Personnel_behavior{}
- if err := rows.Scan(m); err != nil {
- if err := rows.Scan(
- &m.Jy_trusted_id,
- &m.User_id,
- &m.Position_id,
- &m.Phone,
- &m.Ip,
- &m.Platform,
- &m.Os,
- &m.Os_version,
- &m.Browser,
- &m.Browser_version,
- &m.Date,
- &m.Action_id,
- &m.Action_type,
- &m.Breaker_id,
- &m.Breaker_name,
- &m.Click_time,
- &m.Order_id,
- &m.Order_time,
- &m.Pay_time,
- &m.Pay_way,
- &m.Price,
- &m.Product,
- &m.Product_name,
- &m.Page_id,
- &m.Page_name,
- &m.Bidding_id,
- &m.Desc,
- &m.Url,
- &m.Source,
- &m.User_agent,
- &m.Port,
- &m.Refer,
- &m.Search_word,
- &m.Filter,
- &m.Break_data,
- &m.App_id,
- &m.App_version,
- &m.Mini_program_code,
- ); err != nil {
- log.Fatal(err)
- }
- //过滤策略 过滤三级页||搜索||
- if (strings.Contains(m.Url, "/content/") ||
- strings.Contains(m.Url, "/jylab/supsearch/index.html") ||
- strings.Contains(m.Url, "%2Fcontent%2F")) && m.Action_id == "c_jy_open_page" ||
- strings.Contains(m.Url, "/qmx/") {
- filterCount++
- continue
- }
- //获取动作id
- action := ""
- if m.Action_id == "c_jy_open_page" {
- action = "访问页面"
- } else if m.Action_id == "c_jyclick" {
- action = "点击页面元素"
- }
- if m.Product_name == "下单" || m.Product_name == "支付" || m.Product_name == "绑定手机号" {
- action = m.Product_name
- }
- if strings.Contains(m.Product_name, "登录") || strings.Contains(m.Breaker_name, "登录") {
- action = "登录"
- }
- if strings.Contains(m.Breaker_name, "注册") {
- action = "注册"
- }
- actionid := acitonMap[action]
- //获取ip
- area, city := GetIpSource(m.Ip)
- urlinfo, _ := modelMap.MatchRegexMap(Unescape(m.Url))
- module := ""
- if urlinfo != nil {
- module = urlinfo.Module
- }
- referinfo, _ := modelMap.MatchRegexMap(Unescape(m.Refer))
- refername := ""
- if referinfo != nil {
- refername = referinfo.Page_name
- }
- //TODO 获取子系统
- subsystem := ""
- //TODO 获取附加词
- fields := ""
- if ordercode := ExtractOrderCode(m.Url); ordercode != "" {
- filterMap := map[string]interface{}{
- "ordercode": ordercode,
- }
- filterStr, _ := json.Marshal(filterMap)
- fields = string(filterStr)
- }
- //Macintosh Windows iPhone Linux
- platform := ""
- if strings.Contains(m.Url, "wx.") {
- platform = "wx"
- } else if strings.Contains(m.Url, "app-") {
- platform = "android"
- } else if strings.Contains(m.Url, "ios-") {
- platform = "ios"
- } else {
- platform = "pc"
- }
- //来源域内域外
- referType := GetReferType(m.Refer)
- //pagecodeid
- pagecodeid := int64(0)
- if m.Breaker_name == "" && m.Action_id == "c_jy_open_page" {
- pagecodeid = getPageCodeId(platform, m.Url, m.Breaker_name, module, m.Page_name, actionid, pageCodeMap, modelMap)
- }
- StrategyInfo.CacheData <- &UserBehaviorLog{
- UserID: m.User_id,
- ActionID: actionid, // 动作ID
- TrustedID: m.Jy_trusted_id, // 浏览器指纹
- IP: m.Ip, // 用户IP地址
- Area: area, // 省份
- City: city, // 城市
- OS: m.Os, // 操作系统
- Browser: m.Browser, // 浏览器类型
- BrowserVersion: m.Browser_version, // 浏览器版本
- Date: m.Date, // 行为发生时间
- Platform: platform, // 平台(如移动端、PC端)
- Subsystem: subsystem, // 子系统
- Module: module, // 模块
- URL: m.Url, // 访问的URL
- URLName: m.Page_name, // URL名称
- URLElement: m.Breaker_id, // 页面元素
- AddField: fields, // 附加数据
- TimeStamp: int32(m.Date.Unix()), // 时间戳
- Refer: m.Refer, // 引用来源
- ReferName: refername, // 引用名称
- ReferType: referType, // 引用类型
- SessionID: "", // 会话ID
- //SessionStartTime: nil, // 会话开始时间 //TODO 会话开始时间x
- Phone: m.Phone, // 手机号码
- PageName: m.Page_name, // 页面名称
- Desc: m.Desc, // 描述
- BreakData: m.Break_data, // 额外数据
- BreakerName: m.Breaker_name, //埋点名称
- DataType: m.Action_id,
- PositionId: gconv.String(m.Position_id),
- PageCodeId: uint64(pagecodeid),
- }
- }
- }
- log.Println("clickhouse end")
- log.Println("~~", filterCount)
- }
- func getPageCodeId(platform, url, breaker_name, module, url_name string, actionid int64, pageCodeMap map[string]int64, moduleMap *CompiledRegexMap) int64 {
- urlinfo, _ := moduleMap.MatchRegexMap(Unescape(url))
- url1 := Unescape(url)
- if urlinfo != nil {
- url1 = urlinfo.Url
- }
- breaker_name = ""
- key := fmt.Sprintf("%s_%s_%s", platform, url1, breaker_name)
- id := int64(0)
- if code := pageCodeMap[fmt.Sprintf("%s_%s_%s", platform, url1, breaker_name)]; code <= 0 {
- id = AddPathCodeId()
- q := `INSERT INTO path_page_code (id,url_name,url_element,action_id,platform,module,url) VALUES (?,?,?,?,?,?,?) `
- Ch.InsertBySql(q, id, url_name, breaker_name, actionid, platform, module, url1)
- pageCodeMap[key] = id
- } else {
- id = code
- }
- return id
- }
- // jy_mongodb_log 剑鱼日志
- func jy_mongodb_log(st, et int64) {
- userPhoneMap := GetPhone()
- modelMap := MatchRegexModule()
- dbname := []string{
- "jy_logs",
- "jyapp_logs",
- "subscribepay_logs",
- "debris_product_logs",
- }
- pageCodeMap := LoadPageCode()
- for _, v := range dbname {
- q := GetQuery(v, st, et)
- sess := Mgo_Log.GetMgoConn()
- defer Mgo_Log.DestoryMongoConn(sess)
- //data := []map[string]interface{}{}
- it := sess.DB("qfw").C(v).Find(q).Sort("-_id").Select(map[string]interface{}{
- "userid": 1,
- "ip": 1,
- "refer": 1,
- "client": 1,
- "url": 1,
- "createtime": 1,
- "date": 1,
- "trustedId": 1,
- "os": 1,
- "positionid": 1,
- "browse": 1,
- }).Iter()
- total := 0
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%50000 == 0 {
- log.Println("cur index ", total)
- }
- userid := qu.ObjToString(tmp["userid"])
- client := qu.ObjToString(tmp["client"])
- href := qu.ObjToString(tmp["url"])
- if !mongodb.IsObjectIdHex(userid) {
- userid = GetUserId(userid)
- }
- trustedId := qu.ObjToString(tmp["trustedId"])
- ip := qu.ObjToString(tmp["ip"])
- area, city := GetIpSource(ip)
- os := qu.ObjToString(tmp["os"])
- browse := qu.ObjToString(tmp["browse"])
- positionid := qu.ObjToString(tmp["positionid"])
- refer := qu.ObjToString(tmp["refer"])
- //TODO
- actionid := int64(0)
- if strings.Contains(href, "/login") {
- actionid = int64(3)
- }
- if strings.Contains(href, "/register") {
- actionid = int64(4)
- }
- i_createtime := qu.Int64All(tmp["date"])
- createtime := time.Unix(i_createtime, 0)
- if v == "subscribepay_logs" {
- ct, _ := tmp["createtime"].(primitive.DateTime)
- createtime = ct.Time()
- i_createtime = createtime.Unix()
- actionid = int64(7)
- if strings.Contains(href, "Pay") {
- actionid = int64(8)
- }
- }
- platform := GetPlatform(v, client, refer)
- subsystem := ""
- hrefinfo, _ := modelMap.MatchRegexMap(Unescape(href))
- module := ""
- if hrefinfo != nil {
- module = hrefinfo.Module
- }
- referinfo, _ := modelMap.MatchRegexMap(Unescape(refer))
- refermodule := ""
- urlname := ""
- urldesc := ""
- if referinfo != nil {
- refermodule = referinfo.Module
- urlname = referinfo.Page_name
- urldesc = referinfo.Desc
- }
- referType := GetReferType(refer)
- fields := ""
- if bid := GetArticleId(href); bid != "" {
- filterMap := map[string]interface{}{
- "bidding": bid,
- }
- filterStr, _ := json.Marshal(filterMap)
- fields = string(filterStr)
- }
- pagecodeid := getPageCodeId(platform, href, "", module, "", actionid, pageCodeMap, modelMap)
- StrategyInfo.CacheData <- &UserBehaviorLog{
- UserID: userid,
- ActionID: actionid, // 动作ID
- TrustedID: trustedId, // 浏览器指纹
- IP: ip, // 用户IP地址
- Area: area, // 省份
- City: city, // 城市
- OS: os, // 操作系统
- Browser: browse, // 浏览器类型
- BrowserVersion: "", // 浏览器版本
- Date: createtime, // 行为发生时间
- Platform: platform, // 平台(如移动端、PC端)
- Subsystem: subsystem, // 子系统
- Module: module, // 模块
- URL: href, // 访问的URL
- URLName: urlname, // URL名称
- URLElement: "", // 页面元素
- AddField: fields, // 附加数据
- TimeStamp: int32(i_createtime), // 时间戳
- Refer: refer, // 引用来源
- ReferName: refermodule, // 引用名称
- ReferType: referType, // 引用类型
- SessionID: "", // 会话ID
- //SessionStartTime: time.Now(), // 会话开始时间 //TODO 会话开始时间
- Phone: userPhoneMap[userid], // 手机号码
- PageName: urlname, // 页面名称
- Desc: urldesc, // 描述
- BreakData: "", // 额外数据
- PageCodeId: uint64(pagecodeid),
- PositionId: positionid,
- }
- tmp = make(map[string]interface{})
- }
- }
- log.Println("mongodb end")
- }
|