package main import ( "encoding/json" "go.uber.org/zap" util "jygit.jydev.jianyu360.cn/data_processing/common_utils" "jygit.jydev.jianyu360.cn/data_processing/common_utils/log" "jygit.jydev.jianyu360.cn/data_processing/common_utils/redis" "strings" "sync" "time" ) // 初始加载数据,默认加载最近6个月的数据 func (p *ProjectTask) loadData(starttime int64) { log.Info("load project start..", zap.Int64("starttime", starttime)) p.findLock.Lock() defer p.findLock.Unlock() sess := MgoP.GetMgoConn() defer MgoP.DestoryMongoConn(sess) loadOver := make(chan bool) q := map[string]interface{}{ "lasttime": map[string]interface{}{"$gte": starttime}, } field := map[string]interface{}{"list": 0} it := sess.DB(MgoP.DbName).C(p.coll).Find(&q).Select(field).Iter() n, count := 0, 0 pool := make(chan *ProjectCache, 1000) go func() { for { select { case tmp := <-pool: n++ if n%10000 == 0 { util.Debug("current", n, "mapPn", len(p.mapPn), "mapPc", len(p.mapPc), "mapPb", len(p.mapPb), "mapHref", len(p.mapHref)) //, tmp.ProjectName, tmp.MPN, tmp.ProjectCode, tmp.MPC, tmp.Buyer) } if tmp != nil { id := tmp.Id.Hex() for _, v := range append([]string{tmp.ProjectName}, tmp.MPN...) { if v != "" { //v = pcReplace.ReplaceAllString(v, "") if v != "" { k := p.mapPn[v] if k == nil { k = &Key{Arr: []string{id}} p.mapPn[v] = k } else { k.Arr = append(k.Arr, id) } } } } for _, v := range append([]string{tmp.ProjectCode}, tmp.MPC...) { if v != "" { //v = pcReplace.ReplaceAllString(v, "") if v != "" { k := p.mapPc[v] if k == nil { k = &Key{Arr: []string{id}} p.mapPc[v] = k } else { k.Arr = append(k.Arr, id) } } } } if tmp.Buyer != "" && len([]rune(tmp.Buyer)) > 2 { k := p.mapPb[tmp.Buyer] if k == nil { k = &Key{Arr: []string{id}} p.mapPb[tmp.Buyer] = k } else { k.Arr = append(k.Arr, id) } } p.AllIdsMapLock.Lock() p.AllIdsMap[id] = &ID{Id: id, P: tmp} p.AllIdsMapLock.Unlock() } case <-loadOver: return } } }() ch := make(chan bool, 3) wg := &sync.WaitGroup{} for tmp := make(map[string]interface{}); it.Next(tmp); count++ { //if count%20000 == 0 { // log.Println(fmt.Sprintf("iter --- %d", count)) //} ch <- true wg.Add(1) go func(tmp map[string]interface{}) { defer func() { <-ch wg.Done() }() bys, _ := json.Marshal(tmp) var pc *ProjectCache _ = json.Unmarshal(bys, &pc) saveFiled(p, tmp, pc) redis.PutCKV("project", pc.Id.Hex(), tmp) pool <- pc }(tmp) tmp = make(map[string]interface{}) } wg.Wait() time.Sleep(2 * time.Second) loadOver <- true log.Info("load project over..", zap.Int("n", n)) } func (p *ProjectTask) loadSite() { log.Info("load site start..") p.findLock.Lock() defer p.findLock.Unlock() p.mapSiteLock.Lock() defer p.mapSiteLock.Unlock() sess := MgoS.GetMgoConn() defer MgoS.DestoryMongoConn(sess) q := map[string]interface{}{} it := sess.DB(MgoS.DbName).C(SiteColl).Find(&q).Iter() n := 0 pool := make(chan *Site, 100) over := make(chan bool) go func() { for { select { case tmp := <-pool: n++ //站点有效标记state if tmp != nil && tmp.Status == 5 { p.mapSite[tmp.Site] = tmp } case <-over: return } } }() for { result := make(map[string]interface{}) if it.Next(&result) { go func(res map[string]interface{}) { bys, _ := json.Marshal(result) var tmp *Site _ = json.Unmarshal(bys, &tmp) pool <- tmp }(result) } else { break } } time.Sleep(2 * time.Second) over <- true log.Info("load site over..", zap.Int("n", n)) } func saveFiled(p *ProjectTask, res map[string]interface{}, tmp *ProjectCache) { proHref := util.ObjToString(res["projecthref"]) if proHref != "" { p.mapHrefLock.Lock() p.mapHref[proHref] = tmp.Id.Hex() p.mapHrefLock.Unlock() } if res["entidlist"] != nil { elist := util.ObjArrToStringArr(res["entidlist"].([]interface{})) wlist := strings.Split(util.ObjToString(res["s_winner"]), ",") buyer := util.ObjToString(res["buyer"]) if len(elist) == len(wlist) && buyer != "" { for i, eid := range elist { if eid != "-" { text := buyer + "," + wlist[i] ex, _ := redis.Exists(RedisCode, text) if !ex { redis.PutCKV(RedisCode, text, tmp.Id.Hex()) } } } } } //tmpMap := make(map[string]InfoField) //infoMap := res["infofield"].(map[string]interface{}) //for _, v := range infoMap { // var field InfoField // b, _ := json.Marshal(v) // _ = json.Unmarshal(b, &field) // tmpMap[tmp.Id.Hex()] = field //} //tmp.InfoFiled = tmpMap } // 加载spidercode数据,isflow字段 func (p *ProjectTask) loadSpiderCode() { log.Info("load spider code start..") p.findLock.Lock() defer p.findLock.Unlock() p.mapSpiderLock.Lock() defer p.mapSpiderLock.Unlock() sess := MgoS.GetMgoConn() defer MgoS.DestoryMongoConn(sess) q := map[string]interface{}{} field := map[string]interface{}{"code": 1, "isflow": 1} it := sess.DB(MgoS.DbName).C("luaconfig").Find(&q).Select(field).Iter() n := 0 pool := make(chan map[string]interface{}, 100) over := make(chan bool) go func() { for { select { case tmp := <-pool: n++ code := util.ObjToString(tmp["code"]) p.mapSpider[code] = util.IntAll(tmp["isflow"]) case <-over: return } } }() for { result := make(map[string]interface{}) if it.Next(&result) { go func(res map[string]interface{}) { pool <- result }(result) } else { break } } time.Sleep(2 * time.Second) over <- true log.Info("load spider over..", zap.Int("n", n)) }