main.go 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. package main
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "github.com/donnie4w/go-logger/logger"
  6. "io/ioutil"
  7. "os"
  8. "regexp"
  9. util "spider_chromedp/chromedp/mfw"
  10. "time"
  11. )
  12. var (
  13. timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
  14. client *util.Client
  15. Channel chan bool
  16. )
  17. func InitChromedpConfig() {
  18. //初始浏览器实例
  19. BrowserGroup = make(chan *Browser, BrowserSize)
  20. Channel = make(chan bool, BrowserSize)
  21. }
  22. func main() {
  23. //日志相关
  24. logger.SetConsole(false)
  25. logger.SetLevel(logger.LEVEL_DEBUG)
  26. logger.SetRollingDaily("./logs", "chrome.log")
  27. go clearLogs()
  28. //
  29. InitChromedpConfig() //初始化参数
  30. //InitRedisClient(ChromedpConfig["redisaddr"].(string)) //初始化Redis
  31. InitBrowserGroup() //初始化浏览器实例组
  32. InitServer() //初始化消息服务
  33. go sendIdleMsg()
  34. ch := make(chan bool)
  35. <-ch
  36. }
  37. func sendIdleMsg() {
  38. for {
  39. time.Sleep(5 * time.Second)
  40. if chlen := len(Channel); chlen < BrowserSize {
  41. myid := client.GetMyclient()
  42. client.WriteObj(myid, "", util.SENDTO_TYPE_IDLE_SERVER, -1, map[string]interface{}{myid: BrowserSize - chlen})
  43. }
  44. }
  45. }
  46. func clearLogs() {
  47. fmt.Println("=======clearLogs========")
  48. timeInt := time.Now().AddDate(0, 0, -30).Unix()
  49. dirs, err := ioutil.ReadDir("./logs")
  50. if err == nil {
  51. for _, f := range dirs {
  52. fname := f.Name()
  53. logTimeStr := timeReg.FindString(fname)
  54. if logTimeStr == "" {
  55. continue
  56. }
  57. logTimeInt, _ := time.ParseInLocation("2006-01-02", logTimeStr, time.Local)
  58. if logTimeInt.Unix() < timeInt {
  59. os.Remove("./logs/" + fname)
  60. }
  61. }
  62. }
  63. time.AfterFunc(24*time.Hour, clearLogs)
  64. }
  65. func InitServer() {
  66. cf := &util.ClientConfig{
  67. ClientName: ServerName,
  68. EventHandler: processevent,
  69. MsgServerAddr: ServerAddr,
  70. CanHandleEvents: []int{util.SERVICE_DOWNLOAD},
  71. //OnRequestConnect: func() {},
  72. OnConnectSuccess: func() {
  73. fmt.Println("join...")
  74. },
  75. ReadBufferSize: 200,
  76. WriteBufferSize: 200,
  77. }
  78. client, _ = util.NewClient(cf)
  79. }
  80. func processevent(p *util.Packet) {
  81. defer Catch()
  82. Channel <- true
  83. event := int(p.Event)
  84. switch event {
  85. case util.SERVICE_DOWNLOAD: //监听下载服务
  86. task := &ChromedpTask{}
  87. json.Unmarshal(p.GetBusinessData(), &task)
  88. var ret []string
  89. fmt.Println(*task)
  90. if task.TimeOut <= 0 {
  91. task.TimeOut = ChromeTaskTimeOut
  92. }
  93. if task.Flow {
  94. ret = DownloadHtmlByChromedpForFlow(task) //chromedp下载页面(列表页、详情页顺序采集)
  95. } else {
  96. ret = DownloadHtmlByChromedp(task) //chromedp下载页面(列表页、详情页分开采集)
  97. }
  98. //if len(ret) == 0 { //给默认值
  99. // ret = []string{""}
  100. //}
  101. client.WriteObj(p.From, p.Msgid, util.EVENT_RECIVE_CALLBACK, util.SENDTO_TYPE_P2P, ret)
  102. //写入,返回
  103. <-Channel
  104. default:
  105. <-Channel
  106. }
  107. }