123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- package main
- import (
- "encoding/json"
- "fmt"
- "github.com/donnie4w/go-logger/logger"
- "io/ioutil"
- "os"
- "regexp"
- util "spider_chromedp/chromedp/mfw"
- "time"
- )
- var (
- timeReg = regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2}")
- client *util.Client
- Channel chan bool
- )
- func InitChromedpConfig() {
- //初始浏览器实例
- BrowserGroup = make(chan *Browser, BrowserSize)
- Channel = make(chan bool, BrowserSize)
- }
- func main() {
- //日志相关
- logger.SetConsole(false)
- logger.SetLevel(logger.LEVEL_DEBUG)
- logger.SetRollingDaily("./logs", "chrome.log")
- go clearLogs()
- //
- InitChromedpConfig() //初始化参数
- //InitRedisClient(ChromedpConfig["redisaddr"].(string)) //初始化Redis
- InitBrowserGroup() //初始化浏览器实例组
- InitServer() //初始化消息服务
- go sendIdleMsg()
- ch := make(chan bool)
- <-ch
- }
- func sendIdleMsg() {
- for {
- time.Sleep(5 * time.Second)
- if chlen := len(Channel); chlen < BrowserSize {
- myid := client.GetMyclient()
- client.WriteObj(myid, "", util.SENDTO_TYPE_IDLE_SERVER, -1, map[string]interface{}{myid: BrowserSize - chlen})
- }
- }
- }
- func clearLogs() {
- fmt.Println("=======clearLogs========")
- timeInt := time.Now().AddDate(0, 0, -30).Unix()
- dirs, err := ioutil.ReadDir("./logs")
- if err == nil {
- for _, f := range dirs {
- fname := f.Name()
- logTimeStr := timeReg.FindString(fname)
- if logTimeStr == "" {
- continue
- }
- logTimeInt, _ := time.ParseInLocation("2006-01-02", logTimeStr, time.Local)
- if logTimeInt.Unix() < timeInt {
- os.Remove("./logs/" + fname)
- }
- }
- }
- time.AfterFunc(24*time.Hour, clearLogs)
- }
- func InitServer() {
- cf := &util.ClientConfig{
- ClientName: ServerName,
- EventHandler: processevent,
- MsgServerAddr: ServerAddr,
- CanHandleEvents: []int{util.SERVICE_DOWNLOAD},
- //OnRequestConnect: func() {},
- OnConnectSuccess: func() {
- fmt.Println("join...")
- },
- ReadBufferSize: 200,
- WriteBufferSize: 200,
- }
- client, _ = util.NewClient(cf)
- }
- func processevent(p *util.Packet) {
- defer Catch()
- Channel <- true
- event := int(p.Event)
- switch event {
- case util.SERVICE_DOWNLOAD: //监听下载服务
- task := &ChromedpTask{}
- json.Unmarshal(p.GetBusinessData(), &task)
- var ret []string
- fmt.Println(*task)
- if task.TimeOut <= 0 {
- task.TimeOut = ChromeTaskTimeOut
- }
- if task.Flow {
- ret = DownloadHtmlByChromedpForFlow(task) //chromedp下载页面(列表页、详情页顺序采集)
- } else {
- ret = DownloadHtmlByChromedp(task) //chromedp下载页面(列表页、详情页分开采集)
- }
- //if len(ret) == 0 { //给默认值
- // ret = []string{""}
- //}
- client.WriteObj(p.From, p.Msgid, util.EVENT_RECIVE_CALLBACK, util.SENDTO_TYPE_P2P, ret)
- //写入,返回
- <-Channel
- default:
- <-Channel
- }
- }
|