util.go 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. package spider
  2. import (
  3. "github.com/donnie4w/go-logger/logger"
  4. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  5. qu "qfw/util"
  6. "sync"
  7. "time"
  8. )
  9. // var SpaceReg = regexp.MustCompile("[\\s\u3000\u2003\u00a0]+")
  10. // var HtmlReg = regexp.MustCompile("<[^>]*?>")
  11. // var HanReg = regexp.MustCompile("[\u4e00-\u9fa5]+")
  12. var ErrFid = "a6879f0a8570256aa21fb978e6dabb50429a30dfacff697cf0b898abbc5c262e" //限制访问的附件
  13. // 初始化延迟采集站点集合
  14. func InitOther() {
  15. defer qu.Catch()
  16. DelaySiteMap = map[string]*DelaySite{}
  17. DelaySiteLock = &sync.Mutex{}
  18. go func() {
  19. for {
  20. list, _ := MgoS.Find("spider_compete", nil, nil, nil, false, -1, -1)
  21. for _, l := range *list {
  22. site := qu.ObjToString(l["site"])
  23. delayTime := qu.IntAll(l["delaytime"])
  24. compete, _ := l["compete"].(bool)
  25. DelaySiteLock.Lock()
  26. DelaySiteMap[site] = &DelaySite{
  27. DelayTime: delayTime,
  28. Compete: compete,
  29. }
  30. DelaySiteLock.Unlock()
  31. }
  32. logger.Info("重载延时采集配置...")
  33. time.Sleep(12 * time.Hour)
  34. }
  35. }()
  36. }
  37. // 获取第day天凌晨的时间戳
  38. func GetTime(day int) int64 {
  39. nowTime := time.Now().AddDate(0, 0, day)
  40. timeStr := util.FormatDate(&nowTime, util.Date_Short_Layout)
  41. t, _ := time.ParseInLocation(util.Date_Short_Layout, timeStr, time.Local)
  42. return t.Unix()
  43. }
  44. // 获取第day天凌晨的时间字符串
  45. func GetStrTime(day int) string {
  46. nowTime := time.Now().AddDate(0, 0, day)
  47. timeStr := util.FormatDate(&nowTime, util.Date_Short_Layout)
  48. return timeStr
  49. }