main.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. package main
  2. import (
  3. qu "app.yhyue.com/moapp/jybase/common"
  4. "app.yhyue.com/moapp/jybase/mongodb"
  5. "context"
  6. "encoding/json"
  7. "fmt"
  8. _ "github.com/gogf/gf/contrib/drivers/clickhouse/v2"
  9. "github.com/gogf/gf/v2/util/gconv"
  10. "go.mongodb.org/mongo-driver/bson/primitive"
  11. "log"
  12. "strings"
  13. "time"
  14. // "github.com/gogf/gf/v2/os/gctx"
  15. )
  16. func main() {
  17. //正式环境
  18. go InitInsertDataStrategy()
  19. go Run()
  20. select {}
  21. }
  22. func dwd_f_personnel_behavior(st, et int64) {
  23. pageCodeMap := LoadPageCode()
  24. s := time.Unix(st, 0).Format(time.DateTime)
  25. e := time.Unix(et, 0).Format(time.DateTime)
  26. modelMap := MatchRegexModule()
  27. q := fmt.Sprintf(`select jy_trusted_id,user_id,position_id,phone,ip,platform,os,os_version,browser,browser_version,date,action_id,action_type,breaker_id,breaker_name,click_time,order_id,order_time,pay_time,pay_way,price,product,product_name,page_id,page_name,bidding_id,desc,url,source,user_agent,port,refer,search_word,filter,break_data,app_id,app_version,mini_program_code from data_analysis.dwd_f_personnel_behavior
  28. where click_time >= '%s'
  29. and click_time < '%s' order by click_time `, s, e)
  30. log.Println(q)
  31. ctx := context.Background()
  32. rows, err := Ch_analysis.Query(ctx, q)
  33. if err != nil {
  34. log.Fatal(err)
  35. }
  36. defer rows.Close()
  37. acitonMap := GetActionMap()
  38. filterCount := 0
  39. for rows.Next() {
  40. m := &Personnel_behavior{}
  41. if err := rows.Scan(m); err != nil {
  42. if err := rows.Scan(
  43. &m.Jy_trusted_id,
  44. &m.User_id,
  45. &m.Position_id,
  46. &m.Phone,
  47. &m.Ip,
  48. &m.Platform,
  49. &m.Os,
  50. &m.Os_version,
  51. &m.Browser,
  52. &m.Browser_version,
  53. &m.Date,
  54. &m.Action_id,
  55. &m.Action_type,
  56. &m.Breaker_id,
  57. &m.Breaker_name,
  58. &m.Click_time,
  59. &m.Order_id,
  60. &m.Order_time,
  61. &m.Pay_time,
  62. &m.Pay_way,
  63. &m.Price,
  64. &m.Product,
  65. &m.Product_name,
  66. &m.Page_id,
  67. &m.Page_name,
  68. &m.Bidding_id,
  69. &m.Desc,
  70. &m.Url,
  71. &m.Source,
  72. &m.User_agent,
  73. &m.Port,
  74. &m.Refer,
  75. &m.Search_word,
  76. &m.Filter,
  77. &m.Break_data,
  78. &m.App_id,
  79. &m.App_version,
  80. &m.Mini_program_code,
  81. ); err != nil {
  82. log.Fatal(err)
  83. }
  84. //过滤策略 过滤三级页||搜索||
  85. if (strings.Contains(m.Url, "/content/") ||
  86. strings.Contains(m.Url, "/jylab/supsearch/index.html") ||
  87. strings.Contains(m.Url, "%2Fcontent%2F")) && m.Action_id == "c_jy_open_page" ||
  88. strings.Contains(m.Url, "/qmx/") {
  89. filterCount++
  90. continue
  91. }
  92. //获取动作id
  93. action := ""
  94. if m.Action_id == "c_jy_open_page" {
  95. action = "访问页面"
  96. } else if m.Action_id == "c_jyclick" {
  97. action = "点击页面元素"
  98. }
  99. if m.Product_name == "下单" || m.Product_name == "支付" || m.Product_name == "绑定手机号" {
  100. action = m.Product_name
  101. }
  102. if strings.Contains(m.Product_name, "登录") || strings.Contains(m.Breaker_name, "登录") {
  103. action = "登录"
  104. }
  105. if strings.Contains(m.Breaker_name, "注册") {
  106. action = "注册"
  107. }
  108. actionid := acitonMap[action]
  109. //获取ip
  110. area, city := GetIpSource(m.Ip)
  111. urlinfo, _ := modelMap.MatchRegexMap(Unescape(m.Url))
  112. module := ""
  113. if urlinfo != nil {
  114. module = urlinfo.Module
  115. }
  116. referinfo, _ := modelMap.MatchRegexMap(Unescape(m.Refer))
  117. refername := ""
  118. if referinfo != nil {
  119. refername = referinfo.Page_name
  120. }
  121. //TODO 获取子系统
  122. subsystem := ""
  123. //TODO 获取附加词
  124. fields := ""
  125. if ordercode := ExtractOrderCode(m.Url); ordercode != "" {
  126. filterMap := map[string]interface{}{
  127. "ordercode": ordercode,
  128. }
  129. filterStr, _ := json.Marshal(filterMap)
  130. fields = string(filterStr)
  131. }
  132. //Macintosh Windows iPhone Linux
  133. platform := ""
  134. if strings.Contains(m.Url, "wx.") {
  135. platform = "wx"
  136. } else if strings.Contains(m.Url, "app-") {
  137. platform = "android"
  138. } else if strings.Contains(m.Url, "ios-") {
  139. platform = "ios"
  140. } else {
  141. platform = "pc"
  142. }
  143. //来源域内域外
  144. referType := GetReferType(m.Refer)
  145. //pagecodeid
  146. pagecodeid := int64(0)
  147. if m.Breaker_name == "" && m.Action_id == "c_jy_open_page" {
  148. pagecodeid = getPageCodeId(platform, m.Url, m.Breaker_name, module, m.Page_name, actionid, pageCodeMap, modelMap)
  149. }
  150. StrategyInfo.CacheData <- &UserBehaviorLog{
  151. UserID: m.User_id,
  152. ActionID: actionid, // 动作ID
  153. TrustedID: m.Jy_trusted_id, // 浏览器指纹
  154. IP: m.Ip, // 用户IP地址
  155. Area: area, // 省份
  156. City: city, // 城市
  157. OS: m.Os, // 操作系统
  158. Browser: m.Browser, // 浏览器类型
  159. BrowserVersion: m.Browser_version, // 浏览器版本
  160. Date: m.Date, // 行为发生时间
  161. Platform: platform, // 平台(如移动端、PC端)
  162. Subsystem: subsystem, // 子系统
  163. Module: module, // 模块
  164. URL: m.Url, // 访问的URL
  165. URLName: m.Page_name, // URL名称
  166. URLElement: m.Breaker_id, // 页面元素
  167. AddField: fields, // 附加数据
  168. TimeStamp: int32(m.Date.Unix()), // 时间戳
  169. Refer: m.Refer, // 引用来源
  170. ReferName: refername, // 引用名称
  171. ReferType: referType, // 引用类型
  172. SessionID: "", // 会话ID
  173. //SessionStartTime: nil, // 会话开始时间 //TODO 会话开始时间x
  174. Phone: m.Phone, // 手机号码
  175. PageName: m.Page_name, // 页面名称
  176. Desc: m.Desc, // 描述
  177. BreakData: m.Break_data, // 额外数据
  178. BreakerName: m.Breaker_name, //埋点名称
  179. DataType: m.Action_id,
  180. PositionId: gconv.String(m.Position_id),
  181. PageCodeId: uint64(pagecodeid),
  182. }
  183. }
  184. }
  185. log.Println("clickhouse end")
  186. log.Println("~~", filterCount)
  187. }
  188. func getPageCodeId(platform, url, breaker_name, module, url_name string, actionid int64, pageCodeMap map[string]int64, moduleMap *CompiledRegexMap) int64 {
  189. urlinfo, _ := moduleMap.MatchRegexMap(Unescape(url))
  190. url1 := Unescape(url)
  191. if urlinfo != nil {
  192. url1 = urlinfo.Url
  193. }
  194. breaker_name = ""
  195. key := fmt.Sprintf("%s_%s_%s", platform, url1, breaker_name)
  196. id := int64(0)
  197. if code := pageCodeMap[fmt.Sprintf("%s_%s_%s", platform, url1, breaker_name)]; code <= 0 {
  198. id = AddPathCodeId()
  199. q := `INSERT INTO path_page_code (id,url_name,url_element,action_id,platform,module,url) VALUES (?,?,?,?,?,?,?) `
  200. Ch.InsertBySql(q, id, url_name, breaker_name, actionid, platform, module, url1)
  201. pageCodeMap[key] = id
  202. } else {
  203. id = code
  204. }
  205. return id
  206. }
  207. // jy_mongodb_log 剑鱼日志
  208. func jy_mongodb_log(st, et int64) {
  209. userPhoneMap := GetPhone()
  210. modelMap := MatchRegexModule()
  211. dbname := []string{
  212. "jy_logs",
  213. "jyapp_logs",
  214. "subscribepay_logs",
  215. "debris_product_logs",
  216. }
  217. pageCodeMap := LoadPageCode()
  218. for _, v := range dbname {
  219. q := GetQuery(v, st, et)
  220. sess := Mgo_Log.GetMgoConn()
  221. defer Mgo_Log.DestoryMongoConn(sess)
  222. //data := []map[string]interface{}{}
  223. it := sess.DB("qfw").C(v).Find(q).Sort("-_id").Select(map[string]interface{}{
  224. "userid": 1,
  225. "ip": 1,
  226. "refer": 1,
  227. "client": 1,
  228. "url": 1,
  229. "createtime": 1,
  230. "date": 1,
  231. "trustedId": 1,
  232. "os": 1,
  233. "positionid": 1,
  234. "browse": 1,
  235. }).Iter()
  236. total := 0
  237. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  238. if total%50000 == 0 {
  239. log.Println("cur index ", total)
  240. }
  241. userid := qu.ObjToString(tmp["userid"])
  242. client := qu.ObjToString(tmp["client"])
  243. href := qu.ObjToString(tmp["url"])
  244. if !mongodb.IsObjectIdHex(userid) {
  245. userid = GetUserId(userid)
  246. }
  247. trustedId := qu.ObjToString(tmp["trustedId"])
  248. ip := qu.ObjToString(tmp["ip"])
  249. area, city := GetIpSource(ip)
  250. os := qu.ObjToString(tmp["os"])
  251. browse := qu.ObjToString(tmp["browse"])
  252. positionid := qu.ObjToString(tmp["positionid"])
  253. refer := qu.ObjToString(tmp["refer"])
  254. //TODO
  255. actionid := int64(0)
  256. if strings.Contains(href, "/login") {
  257. actionid = int64(3)
  258. }
  259. if strings.Contains(href, "/register") {
  260. actionid = int64(4)
  261. }
  262. i_createtime := qu.Int64All(tmp["date"])
  263. createtime := time.Unix(i_createtime, 0)
  264. if v == "subscribepay_logs" {
  265. ct, _ := tmp["createtime"].(primitive.DateTime)
  266. createtime = ct.Time()
  267. i_createtime = createtime.Unix()
  268. actionid = int64(7)
  269. if strings.Contains(href, "Pay") {
  270. actionid = int64(8)
  271. }
  272. }
  273. platform := GetPlatform(v, client, refer)
  274. subsystem := ""
  275. hrefinfo, _ := modelMap.MatchRegexMap(Unescape(href))
  276. module := ""
  277. if hrefinfo != nil {
  278. module = hrefinfo.Module
  279. }
  280. referinfo, _ := modelMap.MatchRegexMap(Unescape(refer))
  281. refermodule := ""
  282. urlname := ""
  283. urldesc := ""
  284. if referinfo != nil {
  285. refermodule = referinfo.Module
  286. urlname = referinfo.Page_name
  287. urldesc = referinfo.Desc
  288. }
  289. referType := GetReferType(refer)
  290. fields := ""
  291. if bid := GetArticleId(href); bid != "" {
  292. filterMap := map[string]interface{}{
  293. "bidding": bid,
  294. }
  295. filterStr, _ := json.Marshal(filterMap)
  296. fields = string(filterStr)
  297. }
  298. pagecodeid := getPageCodeId(platform, href, "", module, "", actionid, pageCodeMap, modelMap)
  299. StrategyInfo.CacheData <- &UserBehaviorLog{
  300. UserID: userid,
  301. ActionID: actionid, // 动作ID
  302. TrustedID: trustedId, // 浏览器指纹
  303. IP: ip, // 用户IP地址
  304. Area: area, // 省份
  305. City: city, // 城市
  306. OS: os, // 操作系统
  307. Browser: browse, // 浏览器类型
  308. BrowserVersion: "", // 浏览器版本
  309. Date: createtime, // 行为发生时间
  310. Platform: platform, // 平台(如移动端、PC端)
  311. Subsystem: subsystem, // 子系统
  312. Module: module, // 模块
  313. URL: href, // 访问的URL
  314. URLName: urlname, // URL名称
  315. URLElement: "", // 页面元素
  316. AddField: fields, // 附加数据
  317. TimeStamp: int32(i_createtime), // 时间戳
  318. Refer: refer, // 引用来源
  319. ReferName: refermodule, // 引用名称
  320. ReferType: referType, // 引用类型
  321. SessionID: "", // 会话ID
  322. //SessionStartTime: time.Now(), // 会话开始时间 //TODO 会话开始时间
  323. Phone: userPhoneMap[userid], // 手机号码
  324. PageName: urlname, // 页面名称
  325. Desc: urldesc, // 描述
  326. BreakData: "", // 额外数据
  327. PageCodeId: uint64(pagecodeid),
  328. PositionId: positionid,
  329. }
  330. tmp = make(map[string]interface{})
  331. }
  332. }
  333. log.Println("mongodb end")
  334. }