123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302 |
- package entity
- import (
- "log"
- "strconv"
- "strings"
- "sync"
- "telemarketingEtl/config"
- "telemarketingEtl/util"
- "time"
- "app.yhyue.com/moapp/jybase/common"
- "app.yhyue.com/moapp/jybase/date"
- "app.yhyue.com/moapp/jybase/mongodb"
- "github.com/gogf/gf/v2/frame/g"
- "github.com/gogf/gf/v2/util/gconv"
- )
- var (
- sPool chan bool
- sWait = &sync.WaitGroup{}
- )
- func init() {
- poolSize := g.Cfg().MustGet(ctx, "poolSize").Int()
- sPool = make(chan bool, poolSize)
- }
- // 加载地区代码表
- func AreaCode() map[string]string {
- areaCodeMap := map[string]string{}
- data := config.JianyuSubjectdb.SelectBySql(`SELECT code,name FROM d_area_code WHERE level =1`)
- if data != nil && len(*data) > 0 {
- for _, v := range *data {
- areaCodeMap[gconv.String(v["name"])] = gconv.String(v["code"])
- }
- }
- return areaCodeMap
- }
- // 加载信息类型代码表
- func SubTypeCode() map[string]string {
- subTypeCodeMap := map[string]string{}
- data := config.JianyuSubjectdb.SelectBySql(`SELECT code,pcode,level,name FROM d_topsubtype_code`)
- if data != nil && len(*data) > 0 {
- for _, v := range *data {
- name := gconv.String(v["name"])
- level := gconv.Int(v["level"])
- code := gconv.String(v["code"])
- //拟建重名过滤
- if name == "拟建" && level == 2 {
- continue
- }
- //统一名称
- if level == 1 {
- if name == "预告" {
- name = "招标预告"
- } else if name == "招标" {
- name = "招标公告"
- } else if name == "结果" {
- name = "招标结果"
- } else if name == "其它" {
- name = "招标信用信息"
- }
- }
- subTypeCodeMap[name] = code
- }
- }
- return subTypeCodeMap
- }
- // 加载采购单位代码表
- func BuyerClassCode() map[string]string {
- buyerClassCodeMap := map[string]string{}
- data := config.JianyuSubjectdb.SelectBySql(`SELECT code,name FROM d_buyerclass_code`)
- if data != nil && len(*data) > 0 {
- for _, v := range *data {
- buyerClassCodeMap[gconv.String(v["name"])] = gconv.String(v["code"])
- }
- }
- return buyerClassCodeMap
- }
- // 历史数据刷库
- func SearchInfoAdd(start, end int64) {
- s_id := util.GetObjectId(start)
- e_id := util.GetObjectId(end)
- index := g.Cfg().MustGet(ctx, "index").Int()
- query := map[string]interface{}{
- "_id": map[string]interface{}{
- "$gte": s_id,
- "$lt": e_id,
- },
- }
- //加载地区代码表
- areaCodeMap := AreaCode()
- //加载信息类型代码表
- subTypeCodeMap := SubTypeCode()
- //加载采购单位类型代码表
- buyerClassCodeMap := BuyerClassCode()
- //
- session := config.MgoLog.GetMgoConn()
- iter := session.DB("qfw").C("jy_search_log").Find(query).Iter()
- count := 0
- values := []interface{}{}
- fieids := []string{"userid", "search_word", "exclude_word", "search_area", "search_model", "matchtype", "search_industry", "max_price", "min_price", "search_publishtime_start", "search_publishtime_end", "search_type", "filetext", "search_buyerclass", "platform", "search_time"}
- for thisData := map[string]interface{}{}; iter.Next(&thisData); {
- func(thisData map[string]interface{}) {
- userid := gconv.String(thisData["s_userid"])
- if userid == "" {
- return
- }
- if !mongodb.IsObjectIdHex(userid) {
- userid, _ = GetUserIdByPositionId(userid)
- }
- if userid == "" {
- return
- }
- //搜索词
- search_word := gconv.String(thisData["search_word"])
- //排除词
- exclude_word := gconv.String(thisData["exclusionWords"])
- //地区
- search_area := gconv.String(thisData["search_area"])
- area := ""
- if search_area != "" {
- arr := strings.Split(search_area, ",")
- for k, v := range arr {
- area += areaCodeMap[v]
- if k != len(arr)-1 {
- area += ","
- }
- }
- }
- searchMode := gconv.String(thisData["searchMode"]) //搜索日志库里存的 搜索模式:0:精准搜索;1:模糊搜索
- search_mode := 2
- if searchMode == "精准搜索" {
- search_mode = 1 //tidb的搜索信息表存 1-精准查询,2-模糊查询
- }
- //行业
- search_industry := gconv.String(thisData["search_industry"])
- //价格
- max_price := int64(0)
- min_price := int64(0)
- searchPriceStr, ok := thisData["search_price"].(string)
- if !ok {
- min_price, max_price = SearchPriceOld(gconv.Strings(thisData["search_price"]))
- } else {
- min_price, max_price = SearchPriceNew(searchPriceStr)
- }
- //发布时间开始时间
- search_publishtime_start := ""
- //发布时间结束时间
- search_publishtime_end := ""
- search_publishtime := gconv.String(thisData["search_publishtime"])
- createtime := gconv.Int64(thisData["createtime"])
- search_time := time.Unix(createtime, 0).Format(date.Date_Full_Layout)
- search_publishtime_start, search_publishtime_end = PublishTime(createtime, search_publishtime)
- //信息类型
- search_type := gconv.String(thisData["search_subType"])
- subType := ""
- if search_type == "" {
- arr := strings.Split(search_type, ",")
- for k, v := range arr {
- subType += subTypeCodeMap[v]
- if k != len(arr)-1 {
- subType += ","
- }
- }
- }
- //搜索范围 todo 是否附件 标题全文 都得看他 //1-标题,2-全文
- /*
- 免费 标题(title) 正文(content) 老用户【中标企业(winner)】
- 付费用户 全部(all)、标题(title) 正文(content) 会员: 采购单位(buyer) 中标企业(winner) 招标代理机构(agency) 附件(file)
- 项目名称projectname和标的物purchasing(ppa)
- */
- search_selectType := gconv.String(thisData["search_selectType"])
- //有无附件
- filetext := 0
- matchtype := 1 //1-标题,2-全文,3-全部
- if strings.Contains(search_selectType, "filetext") {
- filetext = 1 //附件
- }
- //全文
- if !strings.Contains(search_selectType, "title") && (strings.Contains(search_selectType, "content") || strings.Contains(search_selectType, "detail")) {
- matchtype = 2
- }
- //标题+全文
- if strings.Contains(search_selectType, "title") && (strings.Contains(search_selectType, "content") || strings.Contains(search_selectType, "detail")) {
- matchtype = 3
- }
- //采购单位类型
- search_buyerclass := gconv.String(thisData["search_buyerClass"])
- buyerclass := ""
- if search_buyerclass == "" {
- arr := strings.Split(search_buyerclass, ",")
- for k, v := range arr {
- buyerclass += buyerClassCodeMap[v]
- if k != len(arr)-1 {
- buyerclass += ","
- }
- }
- }
- //平台
- platform := gconv.String(thisData["platform"])
- values = append(values, userid, search_word, exclude_word, area, search_mode, matchtype, search_industry, max_price, min_price, common.If(search_publishtime_start == "", nil, search_publishtime_start), common.If(search_publishtime_end == "", nil, search_publishtime_end), subType, filetext, buyerclass, platform, search_time)
- //存库
- // sql := `INSERT INTO dwd_f_userbase_search_info
- // (userid,search_word, exclude_word, search_area,search_model,matchtype,
- // search_industry,max_price, min_price,
- // search_publishtime_start,search_publishtime_end,search_type,
- // filetext,search_buyerclass, platform,search_time)
- // VALUES
- // (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)`
- // id := config.JianyuSubjectdb.InsertBySql(sql, userid, search_word, exclude_word, area, search_mode, matchtype,
- // search_industry, max_price, min_price, search_publishtime_start, search_publishtime_end, subType, filetext, buyerclass, platform, search_time)
- // if id <= 0 {
- // log.Println("插入失败:", userid, search_word, exclude_word, area, search_mode, matchtype,
- // search_industry, max_price, min_price, search_publishtime_start, search_publishtime_end, subType, filetext, buyerclass, platform, search_time)
- // }
- if count%index == 0 {
- if len(values) > 0 {
- config.JianyuSubjectdb.InsertBatch("dwd_f_userbase_search_info", fieids, values)
- values = []interface{}{}
- }
- }
- }(thisData)
- count++
- if count%5000 == 0 {
- log.Printf("已完成%d条数据\n", count)
- }
- thisData = map[string]interface{}{}
- }
- if len(values) > 0 {
- config.JianyuSubjectdb.InsertBatch("dwd_f_userbase_search_info", fieids, values)
- values = []interface{}{}
- }
- log.Println("end")
- }
- // 价格区间计算处理 新
- func SearchPriceNew(search_price string) (min, max int64) {
- if search_price == "" || search_price == "-" {
- return
- }
- result := strings.Split(search_price, "-")
- if len(result) > 1 {
- return gconv.Int64(result[0]), gconv.Int64(result[1])
- }
- return
- }
- // 价格区间计算处理 老
- func SearchPriceOld(search_price []string) (min, max int64) {
- if len(search_price) == 0 {
- return
- }
- result := gconv.Strings(search_price)
- if len(result) > 1 {
- return gconv.Int64(result[0]), gconv.Int64(result[1])
- }
- return
- }
- // 发布时间
- // 根据入库时间反推 开始结束时间
- func PublishTime(createtime int64, publishTime string) (startTime, endTime string) {
- if publishTime == "" {
- return "", ""
- }
- now := time.Unix(createtime, 0)
- endTime = time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.Local).Format(date.Date_Full_Layout)
- if publishTime == "lately-7" { //最近7天
- startTime = time.Date(now.Year(), now.Month(), now.Day()-7, 0, 0, 0, 0, time.Local).Format(date.Date_Full_Layout)
- } else if publishTime == "lately-30" { //最近30天
- startTime = time.Date(now.Year(), now.Month(), now.Day()-30, 0, 0, 0, 0, time.Local).Format(date.Date_Full_Layout)
- } else if publishTime == "thisyear" { //最近一年
- startTime = time.Date(now.Year()-1, now.Month(), now.Day(), now.Hour(), now.Minute(), now.Second(), 0, time.Local).Format(date.Date_Full_Layout)
- } else if publishTime == "threeyear" { //最近三年
- startTime = time.Date(now.Year()-3, now.Month(), now.Day(), now.Hour(), now.Minute(), now.Second(), 0, time.Local).Format(date.Date_Full_Layout)
- } else if publishTime == "fiveyear" { //最近五年
- startTime = time.Date(now.Year()-5, now.Month(), now.Day(), now.Hour(), now.Minute(), now.Second(), 0, time.Local).Format(date.Date_Full_Layout)
- } else if len(strings.Split(publishTime, "_")) > 1 {
- startTimeStr := strings.Split(publishTime, "_")[0]
- endTimeStr := strings.Split(publishTime, "_")[1]
- if endTimeStr != "" {
- et, _ := strconv.ParseInt(endTimeStr, 0, 64)
- etTime := time.Unix(et, 0)
- endTime = time.Date(etTime.Year(), etTime.Month(), etTime.Day()+1, 0, 0, 0, 0, time.Local).Format(date.Date_Full_Layout)
- }
- if startTimeStr != "" {
- st, _ := strconv.ParseInt(startTimeStr, 0, 64)
- startTime = time.Unix(st, 0).Format(date.Date_Full_Layout)
- }
- }
- return
- }
|