123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- package spider
- import (
- "github.com/donnie4w/go-logger/logger"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- qu "qfw/util"
- "sync"
- "time"
- )
- // var SpaceReg = regexp.MustCompile("[\\s\u3000\u2003\u00a0]+")
- // var HtmlReg = regexp.MustCompile("<[^>]*?>")
- // var HanReg = regexp.MustCompile("[\u4e00-\u9fa5]+")
- var ErrFid = "a6879f0a8570256aa21fb978e6dabb50429a30dfacff697cf0b898abbc5c262e" //限制访问的附件
- // 初始化延迟采集站点集合
- func InitOther() {
- defer qu.Catch()
- DelaySiteMap = map[string]*DelaySite{}
- DelaySiteLock = &sync.Mutex{}
- go func() {
- for {
- list, _ := MgoS.Find("spider_compete", nil, nil, nil, false, -1, -1)
- for _, l := range *list {
- site := qu.ObjToString(l["site"])
- delayTime := qu.IntAll(l["delaytime"])
- compete, _ := l["compete"].(bool)
- DelaySiteLock.Lock()
- DelaySiteMap[site] = &DelaySite{
- DelayTime: delayTime,
- Compete: compete,
- }
- DelaySiteLock.Unlock()
- }
- logger.Info("重载延时采集配置...")
- time.Sleep(12 * time.Hour)
- }
- }()
- }
- // 获取第day天凌晨的时间戳
- func GetTime(day int) int64 {
- nowTime := time.Now().AddDate(0, 0, day)
- timeStr := util.FormatDate(&nowTime, util.Date_Short_Layout)
- t, _ := time.ParseInLocation(util.Date_Short_Layout, timeStr, time.Local)
- return t.Unix()
- }
- // 获取第day天凌晨的时间字符串
- func GetStrTime(day int) string {
- nowTime := time.Now().AddDate(0, 0, day)
- timeStr := util.FormatDate(&nowTime, util.Date_Short_Layout)
- return timeStr
- }
|