dwd_f_userbase_search_info.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. package entity
  2. import (
  3. "log"
  4. "strconv"
  5. "strings"
  6. "sync"
  7. "telemarketingEtl/config"
  8. "telemarketingEtl/util"
  9. "time"
  10. "app.yhyue.com/moapp/jybase/common"
  11. "app.yhyue.com/moapp/jybase/date"
  12. "app.yhyue.com/moapp/jybase/mongodb"
  13. "github.com/gogf/gf/v2/frame/g"
  14. "github.com/gogf/gf/v2/util/gconv"
  15. )
  16. var (
  17. sPool chan bool
  18. sWait = &sync.WaitGroup{}
  19. )
  20. func init() {
  21. poolSize := g.Cfg().MustGet(ctx, "poolSize").Int()
  22. sPool = make(chan bool, poolSize)
  23. }
  24. // 加载地区代码表
  25. func AreaCode() map[string]string {
  26. areaCodeMap := map[string]string{}
  27. data := config.JianyuSubjectdb.SelectBySql(`SELECT code,name FROM d_area_code WHERE level =1`)
  28. if data != nil && len(*data) > 0 {
  29. for _, v := range *data {
  30. areaCodeMap[gconv.String(v["name"])] = gconv.String(v["code"])
  31. }
  32. }
  33. return areaCodeMap
  34. }
  35. // 加载信息类型代码表
  36. func SubTypeCode() map[string]string {
  37. subTypeCodeMap := map[string]string{}
  38. data := config.JianyuSubjectdb.SelectBySql(`SELECT code,pcode,level,name FROM d_topsubtype_code`)
  39. if data != nil && len(*data) > 0 {
  40. for _, v := range *data {
  41. name := gconv.String(v["name"])
  42. level := gconv.Int(v["level"])
  43. code := gconv.String(v["code"])
  44. //拟建重名过滤
  45. if name == "拟建" && level == 2 {
  46. continue
  47. }
  48. //统一名称
  49. if level == 1 {
  50. if name == "预告" {
  51. name = "招标预告"
  52. } else if name == "招标" {
  53. name = "招标公告"
  54. } else if name == "结果" {
  55. name = "招标结果"
  56. } else if name == "其它" {
  57. name = "招标信用信息"
  58. }
  59. }
  60. subTypeCodeMap[name] = code
  61. }
  62. }
  63. return subTypeCodeMap
  64. }
  65. // 加载采购单位代码表
  66. func BuyerClassCode() map[string]string {
  67. buyerClassCodeMap := map[string]string{}
  68. data := config.JianyuSubjectdb.SelectBySql(`SELECT code,name FROM d_buyerclass_code`)
  69. if data != nil && len(*data) > 0 {
  70. for _, v := range *data {
  71. buyerClassCodeMap[gconv.String(v["name"])] = gconv.String(v["code"])
  72. }
  73. }
  74. return buyerClassCodeMap
  75. }
  76. // 历史数据刷库
  77. func SearchInfoAdd(start, end int64) {
  78. s_id := util.GetObjectId(start)
  79. e_id := util.GetObjectId(end)
  80. index := g.Cfg().MustGet(ctx, "index").Int()
  81. query := map[string]interface{}{
  82. "_id": map[string]interface{}{
  83. "$gte": s_id,
  84. "$lt": e_id,
  85. },
  86. }
  87. //加载地区代码表
  88. areaCodeMap := AreaCode()
  89. //加载信息类型代码表
  90. subTypeCodeMap := SubTypeCode()
  91. //加载采购单位类型代码表
  92. buyerClassCodeMap := BuyerClassCode()
  93. //
  94. session := config.MgoLog.GetMgoConn()
  95. iter := session.DB("qfw").C("jy_search_log").Find(query).Iter()
  96. count := 0
  97. values := []interface{}{}
  98. fieids := []string{"userid", "search_word", "exclude_word", "search_area", "search_model", "matchtype", "search_industry", "max_price", "min_price", "search_publishtime_start", "search_publishtime_end", "search_type", "filetext", "search_buyerclass", "platform", "search_time"}
  99. for thisData := map[string]interface{}{}; iter.Next(&thisData); {
  100. func(thisData map[string]interface{}) {
  101. userid := gconv.String(thisData["s_userid"])
  102. if userid == "" {
  103. return
  104. }
  105. if !mongodb.IsObjectIdHex(userid) {
  106. userid, _ = GetUserIdByPositionId(userid)
  107. }
  108. if userid == "" {
  109. return
  110. }
  111. //搜索词
  112. search_word := gconv.String(thisData["search_word"])
  113. //排除词
  114. exclude_word := gconv.String(thisData["exclusionWords"])
  115. //地区
  116. search_area := gconv.String(thisData["search_area"])
  117. area := ""
  118. if search_area != "" {
  119. arr := strings.Split(search_area, ",")
  120. for k, v := range arr {
  121. area += areaCodeMap[v]
  122. if k != len(arr)-1 {
  123. area += ","
  124. }
  125. }
  126. }
  127. searchMode := gconv.String(thisData["searchMode"]) //搜索日志库里存的 搜索模式:0:精准搜索;1:模糊搜索
  128. search_mode := 2
  129. if searchMode == "精准搜索" {
  130. search_mode = 1 //tidb的搜索信息表存 1-精准查询,2-模糊查询
  131. }
  132. //行业
  133. search_industry := gconv.String(thisData["search_industry"])
  134. //价格
  135. max_price := int64(0)
  136. min_price := int64(0)
  137. searchPriceStr, ok := thisData["search_price"].(string)
  138. if !ok {
  139. min_price, max_price = SearchPriceOld(gconv.Strings(thisData["search_price"]))
  140. } else {
  141. min_price, max_price = SearchPriceNew(searchPriceStr)
  142. }
  143. //发布时间开始时间
  144. search_publishtime_start := ""
  145. //发布时间结束时间
  146. search_publishtime_end := ""
  147. search_publishtime := gconv.String(thisData["search_publishtime"])
  148. createtime := gconv.Int64(thisData["createtime"])
  149. search_time := time.Unix(createtime, 0).Format(date.Date_Full_Layout)
  150. search_publishtime_start, search_publishtime_end = PublishTime(createtime, search_publishtime)
  151. //信息类型
  152. search_type := gconv.String(thisData["search_subType"])
  153. subType := ""
  154. if search_type == "" {
  155. arr := strings.Split(search_type, ",")
  156. for k, v := range arr {
  157. subType += subTypeCodeMap[v]
  158. if k != len(arr)-1 {
  159. subType += ","
  160. }
  161. }
  162. }
  163. //搜索范围 todo 是否附件 标题全文 都得看他 //1-标题,2-全文
  164. /*
  165. 免费 标题(title) 正文(content) 老用户【中标企业(winner)】
  166. 付费用户 全部(all)、标题(title) 正文(content) 会员: 采购单位(buyer) 中标企业(winner) 招标代理机构(agency) 附件(file)
  167. 项目名称projectname和标的物purchasing(ppa)
  168. */
  169. search_selectType := gconv.String(thisData["search_selectType"])
  170. //有无附件
  171. filetext := 0
  172. matchtype := 1 //1-标题,2-全文,3-全部
  173. if strings.Contains(search_selectType, "filetext") {
  174. filetext = 1 //附件
  175. }
  176. //全文
  177. if !strings.Contains(search_selectType, "title") && (strings.Contains(search_selectType, "content") || strings.Contains(search_selectType, "detail")) {
  178. matchtype = 2
  179. }
  180. //标题+全文
  181. if strings.Contains(search_selectType, "title") && (strings.Contains(search_selectType, "content") || strings.Contains(search_selectType, "detail")) {
  182. matchtype = 3
  183. }
  184. //采购单位类型
  185. search_buyerclass := gconv.String(thisData["search_buyerClass"])
  186. buyerclass := ""
  187. if search_buyerclass == "" {
  188. arr := strings.Split(search_buyerclass, ",")
  189. for k, v := range arr {
  190. buyerclass += buyerClassCodeMap[v]
  191. if k != len(arr)-1 {
  192. buyerclass += ","
  193. }
  194. }
  195. }
  196. //平台
  197. platform := gconv.String(thisData["platform"])
  198. values = append(values, userid, search_word, exclude_word, area, search_mode, matchtype, search_industry, max_price, min_price, common.If(search_publishtime_start == "", nil, search_publishtime_start), common.If(search_publishtime_end == "", nil, search_publishtime_end), subType, filetext, buyerclass, platform, search_time)
  199. //存库
  200. // sql := `INSERT INTO dwd_f_userbase_search_info
  201. // (userid,search_word, exclude_word, search_area,search_model,matchtype,
  202. // search_industry,max_price, min_price,
  203. // search_publishtime_start,search_publishtime_end,search_type,
  204. // filetext,search_buyerclass, platform,search_time)
  205. // VALUES
  206. // (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)`
  207. // id := config.JianyuSubjectdb.InsertBySql(sql, userid, search_word, exclude_word, area, search_mode, matchtype,
  208. // search_industry, max_price, min_price, search_publishtime_start, search_publishtime_end, subType, filetext, buyerclass, platform, search_time)
  209. // if id <= 0 {
  210. // log.Println("插入失败:", userid, search_word, exclude_word, area, search_mode, matchtype,
  211. // search_industry, max_price, min_price, search_publishtime_start, search_publishtime_end, subType, filetext, buyerclass, platform, search_time)
  212. // }
  213. if count%index == 0 {
  214. if len(values) > 0 {
  215. config.JianyuSubjectdb.InsertBatch("dwd_f_userbase_search_info", fieids, values)
  216. values = []interface{}{}
  217. }
  218. }
  219. }(thisData)
  220. count++
  221. if count%5000 == 0 {
  222. log.Printf("已完成%d条数据\n", count)
  223. }
  224. thisData = map[string]interface{}{}
  225. }
  226. if len(values) > 0 {
  227. config.JianyuSubjectdb.InsertBatch("dwd_f_userbase_search_info", fieids, values)
  228. values = []interface{}{}
  229. }
  230. log.Println("end")
  231. }
  232. // 价格区间计算处理 新
  233. func SearchPriceNew(search_price string) (min, max int64) {
  234. if search_price == "" || search_price == "-" {
  235. return
  236. }
  237. result := strings.Split(search_price, "-")
  238. if len(result) > 1 {
  239. return gconv.Int64(result[0]), gconv.Int64(result[1])
  240. }
  241. return
  242. }
  243. // 价格区间计算处理 老
  244. func SearchPriceOld(search_price []string) (min, max int64) {
  245. if len(search_price) == 0 {
  246. return
  247. }
  248. result := gconv.Strings(search_price)
  249. if len(result) > 1 {
  250. return gconv.Int64(result[0]), gconv.Int64(result[1])
  251. }
  252. return
  253. }
  254. // 发布时间
  255. // 根据入库时间反推 开始结束时间
  256. func PublishTime(createtime int64, publishTime string) (startTime, endTime string) {
  257. if publishTime == "" {
  258. return "", ""
  259. }
  260. now := time.Unix(createtime, 0)
  261. endTime = time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.Local).Format(date.Date_Full_Layout)
  262. if publishTime == "lately-7" { //最近7天
  263. startTime = time.Date(now.Year(), now.Month(), now.Day()-7, 0, 0, 0, 0, time.Local).Format(date.Date_Full_Layout)
  264. } else if publishTime == "lately-30" { //最近30天
  265. startTime = time.Date(now.Year(), now.Month(), now.Day()-30, 0, 0, 0, 0, time.Local).Format(date.Date_Full_Layout)
  266. } else if publishTime == "thisyear" { //最近一年
  267. startTime = time.Date(now.Year()-1, now.Month(), now.Day(), now.Hour(), now.Minute(), now.Second(), 0, time.Local).Format(date.Date_Full_Layout)
  268. } else if publishTime == "threeyear" { //最近三年
  269. startTime = time.Date(now.Year()-3, now.Month(), now.Day(), now.Hour(), now.Minute(), now.Second(), 0, time.Local).Format(date.Date_Full_Layout)
  270. } else if publishTime == "fiveyear" { //最近五年
  271. startTime = time.Date(now.Year()-5, now.Month(), now.Day(), now.Hour(), now.Minute(), now.Second(), 0, time.Local).Format(date.Date_Full_Layout)
  272. } else if len(strings.Split(publishTime, "_")) > 1 {
  273. startTimeStr := strings.Split(publishTime, "_")[0]
  274. endTimeStr := strings.Split(publishTime, "_")[1]
  275. if endTimeStr != "" {
  276. et, _ := strconv.ParseInt(endTimeStr, 0, 64)
  277. etTime := time.Unix(et, 0)
  278. endTime = time.Date(etTime.Year(), etTime.Month(), etTime.Day()+1, 0, 0, 0, 0, time.Local).Format(date.Date_Full_Layout)
  279. }
  280. if startTimeStr != "" {
  281. st, _ := strconv.ParseInt(startTimeStr, 0, 64)
  282. startTime = time.Unix(st, 0).Format(date.Date_Full_Layout)
  283. }
  284. }
  285. return
  286. }