|
@@ -1,573 +0,0 @@
|
|
-package main
|
|
|
|
-
|
|
|
|
-import (
|
|
|
|
- "bufio"
|
|
|
|
- "fmt"
|
|
|
|
- "io/ioutil"
|
|
|
|
- "log"
|
|
|
|
- "net"
|
|
|
|
- "net/http"
|
|
|
|
- "qfw/util"
|
|
|
|
- "qfw/util/mail"
|
|
|
|
- "qfw/util/mongodb"
|
|
|
|
- "sort"
|
|
|
|
- "strings"
|
|
|
|
- "sync"
|
|
|
|
- "time"
|
|
|
|
-
|
|
|
|
- "github.com/influxdata/influxdb-client"
|
|
|
|
-)
|
|
|
|
-
|
|
|
|
-var (
|
|
|
|
- config map[string]interface{} //全局配置文件
|
|
|
|
- mgo mongodb.MongodbSim //mongodb连接
|
|
|
|
- Spiders []*spiderobj //每次所有加载爬虫
|
|
|
|
- Requestthread chan bool //请求线程数
|
|
|
|
- Brequestbody int //是否解析请求正文
|
|
|
|
- wg = &sync.WaitGroup{}
|
|
|
|
- msave = []map[string]interface{}{}
|
|
|
|
- lock *sync.Mutex = new(sync.Mutex)
|
|
|
|
- checkcoll string
|
|
|
|
- totalcoll string
|
|
|
|
- Client *http.Client
|
|
|
|
- Retry = 2 //请求重试次数
|
|
|
|
- reqduration = 2 //请求延时
|
|
|
|
- taskduration = 20 //每次检测间隔,分钟
|
|
|
|
- dayreport = -1 //每天整点报告前一天的结果 -1不报告
|
|
|
|
- alarmmode = 0 //报警模式,0 每次出错每次报警 1天一个爬虫只报一次 2 连续出错只报一次,间隔出错报多次
|
|
|
|
- MAP_STATUS map[int][]*spiderobj //返回代码-爬虫
|
|
|
|
- MAP_ALARM = map[string]bool{} //报警MAP
|
|
|
|
- stopspidercount int
|
|
|
|
- excludecode = map[int]bool{}
|
|
|
|
- alarmtime = []int{}
|
|
|
|
- Mail *mail.MailAuth
|
|
|
|
- useremail = []string{}
|
|
|
|
- MAP_site_all map[string]int
|
|
|
|
- MAP_site_run map[string]int
|
|
|
|
- MAP_site_stop map[string]int
|
|
|
|
- MAP_site_error map[string][]*spiderobj
|
|
|
|
-)
|
|
|
|
-
|
|
|
|
-type spiderobj struct {
|
|
|
|
- Id string //爬虫id
|
|
|
|
- Site string //站点
|
|
|
|
- Channel string //爬虫栏目
|
|
|
|
- Code string //爬虫代码
|
|
|
|
- ListUrl string //列表url
|
|
|
|
- Cuser string
|
|
|
|
- Muser string //修改人
|
|
|
|
- Mtime int64 //修改时间
|
|
|
|
- Status int //爬虫状态
|
|
|
|
- I_old int //是否是老爬虫{luacontent:{$exists:1}}
|
|
|
|
- ResponseCode int //响应码
|
|
|
|
- ResponseStr string //响应码串
|
|
|
|
- ResponseBody string //响应内容
|
|
|
|
- Requesttime int64 //请求时间
|
|
|
|
- I_err int //1,有错
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func init() {
|
|
|
|
- //加载配置
|
|
|
|
- util.ReadConfig(&config)
|
|
|
|
- //初始化
|
|
|
|
- InitInfluxdb(fmt.Sprint(config["influxdb"]))
|
|
|
|
- mgo = mongodb.MongodbSim{
|
|
|
|
- MongodbAddr: config["mongodbServers"].(string),
|
|
|
|
- Size: util.IntAll(config["mongodbPoolSize"]),
|
|
|
|
- DbName: config["mongodbName"].(string),
|
|
|
|
- }
|
|
|
|
- mgo.InitPool()
|
|
|
|
- Requestthread = make(chan bool, util.IntAllDef(config["requestthread"], 20))
|
|
|
|
- Brequestbody = util.IntAll(config["brequestbody"])
|
|
|
|
- checkcoll = util.ObjToString(config["checkcoll"])
|
|
|
|
- totalcoll = util.ObjToString(config["totalcoll"])
|
|
|
|
- reqduration = util.IntAll(config["reqduration"])
|
|
|
|
- Retry = util.IntAll(config["requestretry"])
|
|
|
|
- taskduration = util.IntAll(config["taskduration"])
|
|
|
|
- dayreport = util.IntAll(config["dayreport"])
|
|
|
|
- alarmmode = util.IntAll(config["alarmmode"])
|
|
|
|
- excludecodearr := config["excludecode"].([]interface{})
|
|
|
|
- for _, cv := range excludecodearr {
|
|
|
|
- excludecode[util.IntAll(cv)] = true
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- useremailarr := config["useremail"].([]interface{})
|
|
|
|
- for _, cv := range useremailarr {
|
|
|
|
- useremail = append(useremail, cv.(string))
|
|
|
|
- }
|
|
|
|
- for _, cv := range config["alarmtime"].([]interface{}) {
|
|
|
|
- alarmtime = append(alarmtime, util.IntAll(cv))
|
|
|
|
- }
|
|
|
|
- Client = &http.Client{Transport: &http.Transport{
|
|
|
|
- Dial: func(netw, addr string) (net.Conn, error) {
|
|
|
|
- deadline := time.Now().Add(time.Duration(reqduration) * time.Second)
|
|
|
|
- c, err := net.DialTimeout(netw, addr, time.Duration(reqduration*2)*time.Second)
|
|
|
|
- if err != nil {
|
|
|
|
- return nil, err
|
|
|
|
- }
|
|
|
|
- tcp_conn := c.(*net.TCPConn)
|
|
|
|
- tcp_conn.SetKeepAlive(false)
|
|
|
|
- tcp_conn.SetDeadline(deadline)
|
|
|
|
- return tcp_conn, nil
|
|
|
|
- },
|
|
|
|
- DisableKeepAlives: true,
|
|
|
|
- }}
|
|
|
|
- M := config["smtp"].(map[string]interface{})
|
|
|
|
- Mail = &mail.MailAuth{M["addr"].(string),
|
|
|
|
- util.IntAll(M["port"]),
|
|
|
|
- util.ObjToString(M["user"]),
|
|
|
|
- util.ObjToString(M["pwd"]),
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- log.Println("dayreport:", dayreport, config)
|
|
|
|
- if dayreport > 0 {
|
|
|
|
- //启动线程,报告每天监测情况
|
|
|
|
- go func() {
|
|
|
|
- for {
|
|
|
|
- tn := time.Now()
|
|
|
|
- nowh := tn.Hour()
|
|
|
|
- var tnext time.Time
|
|
|
|
- if nowh >= dayreport {
|
|
|
|
- tnext = time.Date(tn.Year(), tn.Month(), tn.Day()+1, dayreport, 0, 0, 0, tn.Location())
|
|
|
|
- } else {
|
|
|
|
- tnext = time.Date(tn.Year(), tn.Month(), tn.Day(), dayreport, 0, 0, 0, tn.Location())
|
|
|
|
- }
|
|
|
|
- t := time.NewTimer(tnext.Sub(tn))
|
|
|
|
- select {
|
|
|
|
- case <-t.C:
|
|
|
|
- log.Println("定时报告任务")
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }()
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func main() {
|
|
|
|
- go checktask()
|
|
|
|
- go ClearMap()
|
|
|
|
- time.Sleep(999999 * time.Hour)
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-//检测任务
|
|
|
|
-func checktask() {
|
|
|
|
- func() {
|
|
|
|
- defer util.Catch()
|
|
|
|
- MAP_site_all = map[string]int{}
|
|
|
|
- MAP_site_run = map[string]int{}
|
|
|
|
- MAP_site_stop = map[string]int{}
|
|
|
|
- MAP_site_error = map[string][]*spiderobj{}
|
|
|
|
- //加载所有爬虫代码,站点名称、代码、列表url、状态、作者、修改时间
|
|
|
|
- res, b := mgo.Find("luaconfig", nil, nil, `{"param_common":1,"modifytime":1,"createuser":1,"modifyuser":1,"code":1,"iupload":1,"luacontent":1}`, false, -1, -1)
|
|
|
|
- Spiders = []*spiderobj{}
|
|
|
|
- stopspidercount = 0
|
|
|
|
- if b && res != nil && (*res) != nil && len(*res) > 0 {
|
|
|
|
- for _, spider := range *res {
|
|
|
|
- defer util.Catch()
|
|
|
|
- sp := &spiderobj{}
|
|
|
|
- sp.Status = util.IntAll(spider["iupload"])
|
|
|
|
- sp.Cuser = util.ObjToString(spider["createuser"])
|
|
|
|
- if spider["param_common"] != nil {
|
|
|
|
- pc := spider["param_common"].([]interface{})
|
|
|
|
- if len(pc) > 1 && len(pc) < 10 {
|
|
|
|
- sp.Site = util.ObjToString(pc[1])
|
|
|
|
- if len(pc) > 2 {
|
|
|
|
- sp.Channel = util.ObjToString(pc[2])
|
|
|
|
- }
|
|
|
|
- } else if len(pc) > 12 {
|
|
|
|
- sp.ListUrl = util.ObjToString(pc[11])
|
|
|
|
- sp.Channel = util.ObjToString(pc[2])
|
|
|
|
- sp.Site = util.ObjToString(pc[1])
|
|
|
|
- } else {
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if sp.Status == 1 {
|
|
|
|
- sp.Id = util.BsonIdToSId(spider["_id"])
|
|
|
|
- if spider["luacontent"] != nil {
|
|
|
|
- sp.I_old = 1
|
|
|
|
- //从脚本中取
|
|
|
|
- con := spider["luacontent"].(string)
|
|
|
|
- sr := strings.NewReader(con)
|
|
|
|
- br := bufio.NewReader(sr)
|
|
|
|
- n := 0
|
|
|
|
- siteUrl := ""
|
|
|
|
- for n < 150 {
|
|
|
|
- n++
|
|
|
|
- str, e := br.ReadString('\n')
|
|
|
|
- if e == nil {
|
|
|
|
- if strings.HasPrefix(str, "local siteUrl") {
|
|
|
|
- siteUrl = str[strings.Index(str, `"`)+1 : strings.LastIndex(str, `"`)]
|
|
|
|
- } else if strings.HasPrefix(str, "spiderTargetChannelUrl") {
|
|
|
|
- if strings.Index(str, "siteUrl") > 0 {
|
|
|
|
- sp.ListUrl = siteUrl
|
|
|
|
- } else {
|
|
|
|
- s1, s2 := strings.Index(str, `"`), strings.LastIndex(str, `"`)
|
|
|
|
- sp.ListUrl = str[s1+1 : s2]
|
|
|
|
- }
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- } else if e != nil {
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- sp.Mtime = util.Int64All(spider["modifytime"])
|
|
|
|
- sp.Muser = util.ObjToString(spider["modifyuser"])
|
|
|
|
- sp.Code = util.ObjToString(spider["code"])
|
|
|
|
- if sp.ListUrl != "" {
|
|
|
|
- if !strings.HasPrefix(sp.ListUrl, "http") {
|
|
|
|
- sp.ListUrl = "http://" + sp.ListUrl
|
|
|
|
- }
|
|
|
|
- Spiders = append(Spiders, sp)
|
|
|
|
- }
|
|
|
|
- MAP_site_run[sp.Site]++
|
|
|
|
- MAP_site_all[sp.Site]++
|
|
|
|
- } else {
|
|
|
|
- stopspidercount++
|
|
|
|
- MAP_site_stop[sp.Site]++
|
|
|
|
- MAP_site_all[sp.Site]++
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- log.Println("load url size:", len(Spiders), "stopped spider count:", stopspidercount)
|
|
|
|
- tn := time.Now()
|
|
|
|
- now := tn.Unix()
|
|
|
|
- year := tn.Year()
|
|
|
|
- mon := tn.Month()
|
|
|
|
- day := tn.Day()
|
|
|
|
- hour := tn.Hour()
|
|
|
|
- minute := tn.Minute()
|
|
|
|
- reqn := 0
|
|
|
|
- MAP_STATUS = map[int][]*spiderobj{}
|
|
|
|
- //根据站点打乱爬虫顺序
|
|
|
|
- NewSP := make(map[string]chan *spiderobj)
|
|
|
|
- for _, sp1 := range Spiders {
|
|
|
|
- chansp := NewSP[sp1.Site]
|
|
|
|
- if chansp == nil {
|
|
|
|
- chansp = make(chan *spiderobj, MAP_site_run[sp1.Site])
|
|
|
|
- }
|
|
|
|
- chansp <- sp1
|
|
|
|
- NewSP[sp1.Site] = chansp
|
|
|
|
- }
|
|
|
|
- Newspiders := []*spiderobj{}
|
|
|
|
- for {
|
|
|
|
- if len(NewSP) == 0 {
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- for site, chansp := range NewSP {
|
|
|
|
- sp := <-chansp
|
|
|
|
- Newspiders = append(Newspiders, sp)
|
|
|
|
- MAP_site_run[site]--
|
|
|
|
- if MAP_site_run[site] == 0 {
|
|
|
|
- delete(NewSP, site)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- for _, sp1 := range Newspiders {
|
|
|
|
- Requestthread <- true
|
|
|
|
- wg.Add(1)
|
|
|
|
- go func(sp *spiderobj) {
|
|
|
|
- defer func() {
|
|
|
|
- <-Requestthread
|
|
|
|
- wg.Done()
|
|
|
|
- }()
|
|
|
|
- res, err := Client.Get(sp.ListUrl)
|
|
|
|
- for i := 0; i < Retry; i++ {
|
|
|
|
- if err != nil {
|
|
|
|
- res, err = Client.Get(sp.ListUrl)
|
|
|
|
- time.Sleep(time.Duration(reqduration/5) * time.Second)
|
|
|
|
- }
|
|
|
|
- if err == nil {
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- restr := ""
|
|
|
|
- if err != nil {
|
|
|
|
- sp.I_err = 1
|
|
|
|
- restr = err.Error()
|
|
|
|
- if res != nil {
|
|
|
|
- if res.Body != nil {
|
|
|
|
- defer res.Body.Close()
|
|
|
|
- }
|
|
|
|
- sp.ResponseCode = res.StatusCode
|
|
|
|
- sp.ResponseStr = res.Status
|
|
|
|
- } else {
|
|
|
|
- sp.ResponseCode = 600
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- defer res.Body.Close()
|
|
|
|
- sp.ResponseCode = res.StatusCode
|
|
|
|
- sp.ResponseStr = res.Status
|
|
|
|
- if Brequestbody == 1 {
|
|
|
|
- bs, e := ioutil.ReadAll(res.Body)
|
|
|
|
- if e == nil {
|
|
|
|
- restr = string(bs)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- sp.Requesttime = time.Now().Unix()
|
|
|
|
- sp.ResponseBody = restr
|
|
|
|
- m := map[string]interface{}{
|
|
|
|
- "s_spiderid": sp.Id,
|
|
|
|
- "l_time": now,
|
|
|
|
- "l_modifytime": sp.Mtime,
|
|
|
|
- "s_modifyuser": sp.Muser,
|
|
|
|
- "s_listurl": sp.ListUrl,
|
|
|
|
- "s_site": sp.Site,
|
|
|
|
- "s_channel": sp.Channel,
|
|
|
|
- "i_res_code": sp.ResponseCode,
|
|
|
|
- "s_res_codestr": sp.ResponseStr,
|
|
|
|
- "s_res_body": sp.ResponseBody,
|
|
|
|
- "s_code": sp.Code,
|
|
|
|
- "l_requesttime": sp.Requesttime,
|
|
|
|
- "i_oldspider": sp.I_old,
|
|
|
|
- "i_err": sp.I_err,
|
|
|
|
- "year": year,
|
|
|
|
- "month": mon,
|
|
|
|
- "day": day,
|
|
|
|
- "hour": hour,
|
|
|
|
- "minute": minute,
|
|
|
|
- }
|
|
|
|
- lock.Lock()
|
|
|
|
- ss := MAP_STATUS[sp.ResponseCode]
|
|
|
|
- if ss == nil {
|
|
|
|
- ss = []*spiderobj{}
|
|
|
|
- }
|
|
|
|
- ss = append(ss, sp)
|
|
|
|
- MAP_STATUS[sp.ResponseCode] = ss
|
|
|
|
- msave = append(msave, m)
|
|
|
|
- if len(msave) >= 100 {
|
|
|
|
- reqn += len(msave)
|
|
|
|
- //go mgo.SaveBulk(checkcoll, msave...)
|
|
|
|
- msave = []map[string]interface{}{}
|
|
|
|
- log.Println("save...", reqn)
|
|
|
|
- }
|
|
|
|
- if sp.ResponseCode != 200 {
|
|
|
|
- if sp.Channel == "" {
|
|
|
|
- sp.Channel = sp.Site
|
|
|
|
- }
|
|
|
|
- InsertInto(
|
|
|
|
- "jy_logs",
|
|
|
|
- "sp_healthcheck",
|
|
|
|
- []influxdb.Tag{
|
|
|
|
- {Key: "s_site", Value: sp.Site},
|
|
|
|
- {Key: "s_errcode", Value: "s_" + fmt.Sprint(sp.ResponseCode)},
|
|
|
|
- {Key: "s_code", Value: sp.Code},
|
|
|
|
- },
|
|
|
|
- map[string]interface{}{
|
|
|
|
- "channel": "<a href='" + sp.ListUrl + "' target='_blank'>" + sp.Channel + "</a>",
|
|
|
|
- "code": sp.Code,
|
|
|
|
- "code_del": "<a href='http://test.qmx.top:6123/delete/bycode?code=" + sp.Code + "' target='_blank'>删除</a>",
|
|
|
|
- "cuser": sp.Cuser,
|
|
|
|
- "muser": sp.Muser,
|
|
|
|
- "repcode": sp.ResponseCode,
|
|
|
|
- "reqtime": sp.Requesttime,
|
|
|
|
- "i_old": sp.I_old,
|
|
|
|
- },
|
|
|
|
- time.Now(),
|
|
|
|
- "7d",
|
|
|
|
- )
|
|
|
|
- }
|
|
|
|
- lock.Unlock()
|
|
|
|
- }(sp1)
|
|
|
|
- time.Sleep(150 * time.Millisecond)
|
|
|
|
- }
|
|
|
|
- wg.Wait()
|
|
|
|
- lock.Lock()
|
|
|
|
- if len(msave) > 0 {
|
|
|
|
- reqn += len(msave)
|
|
|
|
- //go mgo.SaveBulk(checkcoll, msave...)
|
|
|
|
- msave = []map[string]interface{}{}
|
|
|
|
- log.Println("save...", reqn)
|
|
|
|
- }
|
|
|
|
- lock.Unlock()
|
|
|
|
- log.Println("request over...")
|
|
|
|
- //报警
|
|
|
|
- alarmtask()
|
|
|
|
- }()
|
|
|
|
- time.AfterFunc(time.Duration(taskduration)*time.Minute, checktask)
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-var (
|
|
|
|
- html = `<style>td{border-width: 1px;padding: 1px;border-style: solid;border-color: #666666;background-color: #ffffff;} table{margin:5px;border-collapse: collapse;border-width: 1px;border-color: #666666;} div{font-size:16px;line-height:1.5em} .sp{font-size:14px}</style>` +
|
|
|
|
- `<div class="row"><table><tr><td>爬虫总量:%d</td><td>运行爬虫量:%d</td><td>暂停爬虫量:%d</td><td>运行爬虫出错量:%d</td></tr><tr><td>站点总量:%d</td><td>运行站点量:%d</td><td>暂停站点量:%d</td><td>运行站点出错量:%d</td></tr></table></div>` +
|
|
|
|
- `<div class="row">%s</div>`
|
|
|
|
- as = `<div class='mode'>报警模式:%s</div>`
|
|
|
|
- //errsite = `<div class='cls'>出错站点明细:<table><tr style='font-weight:bold'><td style='width:230px'>站点</td><td style='width:480px'>出错爬虫</td></tr>%s</table></div>`
|
|
|
|
- //errsitetr = `<tr><td>%s</td><td>%s</td></tr>`
|
|
|
|
- cs = `<div class='cls'><div style='font-weight:bold;margin:5px;color:red'>出错状态码:%d | 爬虫出错总量:%d | 站点出错总量:%d</div><table><tr style='font-weight:bold'><td style='width:230px'>站点</td><td style='width:195px'>代码</td><td style='width:65px'>维护人</td><td style='width:180px'>栏目</td><td style='width:380px'>URL</td></tr>%s</table></div>`
|
|
|
|
- ss = `<tr>%s<td>%s</td><td>%s</td><td>%s</td><td>`
|
|
|
|
-)
|
|
|
|
-
|
|
|
|
-var alarmLock = sync.Mutex{}
|
|
|
|
-
|
|
|
|
-//报警任务
|
|
|
|
-func alarmtask() {
|
|
|
|
- //看时间点
|
|
|
|
- alarmLock.Lock()
|
|
|
|
- defer alarmLock.Unlock()
|
|
|
|
- statuscode := []int{}
|
|
|
|
- for c, _ := range MAP_STATUS {
|
|
|
|
- statuscode = append(statuscode, c)
|
|
|
|
- }
|
|
|
|
- sort.Ints(statuscode)
|
|
|
|
- tn := time.Now().Hour()
|
|
|
|
- if tn >= alarmtime[0] && tn <= alarmtime[1] {
|
|
|
|
- if len(MAP_STATUS[200]) == len(Spiders) {
|
|
|
|
- //本轮次没有出错脚本
|
|
|
|
- } else {
|
|
|
|
- as1 := ""
|
|
|
|
- allcs := ""
|
|
|
|
- cs1 := ""
|
|
|
|
- ss1 := ""
|
|
|
|
- errcount := 0
|
|
|
|
- switch alarmmode {
|
|
|
|
- case 0: //每次报
|
|
|
|
- as1 = fmt.Sprintf(as, "每次出错报警")
|
|
|
|
- for _, kc := range statuscode {
|
|
|
|
- kv := MAP_STATUS[kc]
|
|
|
|
- if !excludecode[kc] {
|
|
|
|
- errcount += len(kv)
|
|
|
|
- msite := map[string]bool{}
|
|
|
|
- minMap := map[string][]*spiderobj{}
|
|
|
|
- for _, sp := range kv {
|
|
|
|
- sp1 := minMap[sp.Site]
|
|
|
|
- if sp1 == nil {
|
|
|
|
- sp1 = []*spiderobj{}
|
|
|
|
- }
|
|
|
|
- sp1 = append(sp1, sp)
|
|
|
|
- minMap[sp.Site] = sp1
|
|
|
|
- }
|
|
|
|
- for _, sp1 := range minMap {
|
|
|
|
- for n, sp := range sp1 {
|
|
|
|
- MAP_ALARM[sp.Code] = true
|
|
|
|
- if n == 0 {
|
|
|
|
- ss1 += fmt.Sprintf(ss, fmt.Sprintf("<td rowspan='%d'>%s</td>", len(sp1), sp.Site), sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
|
|
|
|
- } else {
|
|
|
|
- ss1 += fmt.Sprintf(ss, "", sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
|
|
|
|
- }
|
|
|
|
- asp := MAP_site_error[sp.Site]
|
|
|
|
- if asp == nil {
|
|
|
|
- asp = []*spiderobj{}
|
|
|
|
- }
|
|
|
|
- asp = append(asp, sp)
|
|
|
|
- MAP_site_error[sp.Site] = asp
|
|
|
|
- msite[sp.Site] = true
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- cs1 = fmt.Sprintf(cs, kc, len(kv), len(msite), "%s")
|
|
|
|
- allcs += fmt.Sprintf(cs1, ss1)
|
|
|
|
- ss1 = ""
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- case 1: //一天只报一次
|
|
|
|
- as1 = fmt.Sprintf(as, "同一爬虫出错每天只报警一次")
|
|
|
|
- for _, kc := range statuscode {
|
|
|
|
- kv := MAP_STATUS[kc]
|
|
|
|
- if !excludecode[kc] {
|
|
|
|
- msite := map[string]bool{}
|
|
|
|
- minMap := map[string][]*spiderobj{}
|
|
|
|
- for _, sp := range kv {
|
|
|
|
- if !MAP_ALARM[sp.Code] {
|
|
|
|
- errcount++
|
|
|
|
- sp1 := minMap[sp.Site]
|
|
|
|
- if sp1 == nil {
|
|
|
|
- sp1 = []*spiderobj{}
|
|
|
|
- }
|
|
|
|
- sp1 = append(sp1, sp)
|
|
|
|
- minMap[sp.Site] = sp1
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- for _, sp1 := range minMap {
|
|
|
|
- for n, sp := range sp1 {
|
|
|
|
- MAP_ALARM[sp.Code] = true
|
|
|
|
- if n == 0 {
|
|
|
|
- ss1 += fmt.Sprintf(ss, fmt.Sprintf("<td rowspan='%d'>%s</td>", len(sp1), sp.Site), sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
|
|
|
|
- } else {
|
|
|
|
- ss1 += fmt.Sprintf(ss, "", sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
|
|
|
|
- }
|
|
|
|
- asp := MAP_site_error[sp.Site]
|
|
|
|
- if asp == nil {
|
|
|
|
- asp = []*spiderobj{}
|
|
|
|
- }
|
|
|
|
- asp = append(asp, sp)
|
|
|
|
- MAP_site_error[sp.Site] = asp
|
|
|
|
- msite[sp.Site] = true
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- cs1 = fmt.Sprintf(cs, kc, len(kv), len(msite), "%s")
|
|
|
|
- allcs += fmt.Sprintf(cs1, ss1)
|
|
|
|
- ss1 = ""
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- case 2: //连续只报一次,间隔报多次
|
|
|
|
- as1 = fmt.Sprintf(as, "同一爬虫连续出错只报一次")
|
|
|
|
- for _, kc := range statuscode {
|
|
|
|
- kv := MAP_STATUS[kc]
|
|
|
|
- if !excludecode[kc] {
|
|
|
|
- msite := map[string]bool{}
|
|
|
|
- minMap := map[string][]*spiderobj{}
|
|
|
|
- for _, sp := range kv {
|
|
|
|
- if !MAP_ALARM[sp.Code] {
|
|
|
|
- errcount++
|
|
|
|
- MAP_ALARM[sp.Code] = true
|
|
|
|
- sp1 := minMap[sp.Site]
|
|
|
|
- if sp1 == nil {
|
|
|
|
- sp1 = []*spiderobj{}
|
|
|
|
- }
|
|
|
|
- sp1 = append(sp1, sp)
|
|
|
|
- minMap[sp.Site] = sp1
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- for _, sp1 := range minMap {
|
|
|
|
- for n, sp := range sp1 {
|
|
|
|
- MAP_ALARM[sp.Code] = true
|
|
|
|
- if n == 0 {
|
|
|
|
- ss1 += fmt.Sprintf(ss, fmt.Sprintf("<td rowspan='%d'>%s</td>", len(sp1), sp.Site), sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
|
|
|
|
- } else {
|
|
|
|
- ss1 += fmt.Sprintf(ss, "", sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
|
|
|
|
- }
|
|
|
|
- asp := MAP_site_error[sp.Site]
|
|
|
|
- if asp == nil {
|
|
|
|
- asp = []*spiderobj{}
|
|
|
|
- }
|
|
|
|
- asp = append(asp, sp)
|
|
|
|
- MAP_site_error[sp.Site] = asp
|
|
|
|
- msite[sp.Site] = true
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- cs1 = fmt.Sprintf(cs, kc, len(kv), len(msite), "%s")
|
|
|
|
- allcs += fmt.Sprintf(cs1, ss1)
|
|
|
|
- ss1 = ""
|
|
|
|
- } else {
|
|
|
|
- for _, sp := range kv {
|
|
|
|
- if MAP_ALARM[sp.Code] {
|
|
|
|
- delete(MAP_ALARM, sp.Code)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if allcs != "" {
|
|
|
|
- // errstr := ""
|
|
|
|
- // for k1, v1 := range MAP_site_error {
|
|
|
|
- // strsp1 := ""
|
|
|
|
- // for _, sp1 := range v1 {
|
|
|
|
- // strsp1 += sp1.Code + ":" + sp1.ListUrl + "<br/>"
|
|
|
|
- // }
|
|
|
|
- // errstr += fmt.Sprintf(errsitetr, k1, strsp1)
|
|
|
|
- // }
|
|
|
|
- str := fmt.Sprintf(html, len(Spiders)+stopspidercount, len(Spiders), stopspidercount, errcount, len(MAP_site_all), len(MAP_site_run), len(MAP_site_stop), len(MAP_site_error), as1+allcs)
|
|
|
|
- for _, email := range useremail {
|
|
|
|
- SendEmail(email, "<div>"+str+"</div>", fmt.Sprintf("爬虫报警-%s", time.Now().Format(util.Date_Full_Layout)))
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-//发送邮件
|
|
|
|
-func SendEmail(email, str, title string) {
|
|
|
|
- mail.SendMail(Mail, &mail.Message{title, "爬虫站点检测系统", []string{email}, str})
|
|
|
|
-}
|
|
|