maxiaoshan 4 년 전
부모
커밋
aea3bb2774
4개의 변경된 파일979개의 추가작업 그리고 596개의 파일을 삭제
  1. 14 9
      src/spidersitecheck/config.json
  2. 558 558
      src/spidersitecheck/main.go
  3. 378 0
      src/spidersitecheck/main_new.go
  4. 29 29
      src/spidersitecheck/task.go

+ 14 - 9
src/spidersitecheck/config.json

@@ -1,7 +1,7 @@
 {
-    "mongodbServers": "192.168.3.207:27080",
+    "mongodbServers": "192.168.3.207:27092",
     "mongodbPoolSize": "5",
-    "mongodbName": "spider",
+    "mongodbName": "editor",
     "influxdb": "https://jianyu:Topnet@20150501@baobiao.jianyu360.cn",
     "checkcoll": "spidersitecheck",
     "totalcoll": "spidersitetotal",
@@ -10,25 +10,30 @@
         23
     ],
     "useremail": [
-        "renzheng@topnet.net.cn"
+        "maxiaoshan@topnet.net.cn"
     ],
     "smtp": {
         "addr": "smtp.exmail.qq.com",
         "port": 465,
-        "user": "qyfw@topnet.net.cn",
-        "pwd": "QYfw@123456",
+        "user": "public03@topnet.net.cn",
+        "pwd": "ue9Rg9Sf4CVtdm5a",
         "fromuser": "爬虫监控",
         "title": "爬虫监控预警"
     },
-    "taskduration": 40,
+    "taskduration": 1440,
     "maxalarmcount": 1,
-    "requestthread": 65,
+    "requestthread": 60,
     "brequestbody": 0,
     "requestretry": 2,
-    "reqduration": 16,
+    "reqduration": 1,
     "dayreport": 8,
-    "alarmmode": 1,
+    "alarmmode": 0,
     "excludecode": [
         200
+    ],
+    "requestime":[
+    	"8",
+		"12",
+		"17"
     ]
 }

+ 558 - 558
src/spidersitecheck/main.go

@@ -1,573 +1,573 @@
 package main
 
-import (
-	"bufio"
-	"fmt"
-	"io/ioutil"
-	"log"
-	"net"
-	"net/http"
-	"qfw/util"
-	"qfw/util/mail"
-	"qfw/util/mongodb"
-	"sort"
-	"strings"
-	"sync"
-	"time"
+// import (
+// 	"bufio"
+// 	"fmt"
+// 	"io/ioutil"
+// 	"log"
+// 	"net"
+// 	"net/http"
+// 	"qfw/util"
+// 	"qfw/util/mail"
+// 	"qfw/util/mongodb"
+// 	"sort"
+// 	"strings"
+// 	"sync"
+// 	"time"
 
-	"github.com/influxdata/influxdb-client"
-)
+// 	"github.com/influxdata/influxdb-client"
+// )
 
-var (
-	config          map[string]interface{} //全局配置文件
-	mgo             mongodb.MongodbSim     //mongodb连接
-	Spiders         []*spiderobj           //每次所有加载爬虫
-	Requestthread   chan bool              //请求线程数
-	Brequestbody    int                    //是否解析请求正文
-	wg                                     = &sync.WaitGroup{}
-	msave                                  = []map[string]interface{}{}
-	lock            *sync.Mutex            = new(sync.Mutex)
-	checkcoll       string
-	totalcoll       string
-	Client          *http.Client
-	Retry           = 2                  //请求重试次数
-	reqduration     = 2                  //请求延时
-	taskduration    = 20                 //每次检测间隔,分钟
-	dayreport       = -1                 //每天整点报告前一天的结果 -1不报告
-	alarmmode       = 0                  //报警模式,0 每次出错每次报警 1天一个爬虫只报一次 2 连续出错只报一次,间隔出错报多次
-	MAP_STATUS      map[int][]*spiderobj //返回代码-爬虫
-	MAP_ALARM       = map[string]bool{}  //报警MAP
-	stopspidercount int
-	excludecode     = map[int]bool{}
-	alarmtime       = []int{}
-	Mail            *mail.MailAuth
-	useremail       = []string{}
-	MAP_site_all    map[string]int
-	MAP_site_run    map[string]int
-	MAP_site_stop   map[string]int
-	MAP_site_error  map[string][]*spiderobj
-)
+// var (
+// 	config          map[string]interface{} //全局配置文件
+// 	mgo             mongodb.MongodbSim     //mongodb连接
+// 	Spiders         []*spiderobj           //每次所有加载爬虫
+// 	Requestthread   chan bool              //请求线程数
+// 	Brequestbody    int                    //是否解析请求正文
+// 	wg                                     = &sync.WaitGroup{}
+// 	msave                                  = []map[string]interface{}{}
+// 	lock            *sync.Mutex            = new(sync.Mutex)
+// 	checkcoll       string
+// 	totalcoll       string
+// 	Client          *http.Client
+// 	Retry           = 2                  //请求重试次数
+// 	reqduration     = 2                  //请求延时
+// 	taskduration    = 20                 //每次检测间隔,分钟
+// 	dayreport       = -1                 //每天整点报告前一天的结果 -1不报告
+// 	alarmmode       = 0                  //报警模式,0 每次出错每次报警 1天一个爬虫只报一次 2 连续出错只报一次,间隔出错报多次
+// 	MAP_STATUS      map[int][]*spiderobj //返回代码-爬虫
+// 	MAP_ALARM       = map[string]bool{}  //报警MAP
+// 	stopspidercount int
+// 	excludecode     = map[int]bool{}
+// 	alarmtime       = []int{}
+// 	Mail            *mail.MailAuth
+// 	useremail       = []string{}
+// 	MAP_site_all    map[string]int
+// 	MAP_site_run    map[string]int
+// 	MAP_site_stop   map[string]int
+// 	MAP_site_error  map[string][]*spiderobj
+// )
 
-type spiderobj struct {
-	Id           string //爬虫id
-	Site         string //站点
-	Channel      string //爬虫栏目
-	Code         string //爬虫代码
-	ListUrl      string //列表url
-	Cuser        string
-	Muser        string //修改人
-	Mtime        int64  //修改时间
-	Status       int    //爬虫状态
-	I_old        int    //是否是老爬虫{luacontent:{$exists:1}}
-	ResponseCode int    //响应码
-	ResponseStr  string //响应码串
-	ResponseBody string //响应内容
-	Requesttime  int64  //请求时间
-	I_err        int    //1,有错
-}
+// type spiderobj struct {
+// 	Id           string //爬虫id
+// 	Site         string //站点
+// 	Channel      string //爬虫栏目
+// 	Code         string //爬虫代码
+// 	ListUrl      string //列表url
+// 	Cuser        string
+// 	Muser        string //修改人
+// 	Mtime        int64  //修改时间
+// 	Status       int    //爬虫状态
+// 	I_old        int    //是否是老爬虫{luacontent:{$exists:1}}
+// 	ResponseCode int    //响应码
+// 	ResponseStr  string //响应码串
+// 	ResponseBody string //响应内容
+// 	Requesttime  int64  //请求时间
+// 	I_err        int    //1,有错
+// }
 
-func init() {
-	//加载配置
-	util.ReadConfig(&config)
-	//初始化
-	InitInfluxdb(fmt.Sprint(config["influxdb"]))
-	mgo = mongodb.MongodbSim{
-		MongodbAddr: config["mongodbServers"].(string),
-		Size:        util.IntAll(config["mongodbPoolSize"]),
-		DbName:      config["mongodbName"].(string),
-	}
-	mgo.InitPool()
-	Requestthread = make(chan bool, util.IntAllDef(config["requestthread"], 20))
-	Brequestbody = util.IntAll(config["brequestbody"])
-	checkcoll = util.ObjToString(config["checkcoll"])
-	totalcoll = util.ObjToString(config["totalcoll"])
-	reqduration = util.IntAll(config["reqduration"])
-	Retry = util.IntAll(config["requestretry"])
-	taskduration = util.IntAll(config["taskduration"])
-	dayreport = util.IntAll(config["dayreport"])
-	alarmmode = util.IntAll(config["alarmmode"])
-	excludecodearr := config["excludecode"].([]interface{})
-	for _, cv := range excludecodearr {
-		excludecode[util.IntAll(cv)] = true
-	}
+// func init() {
+// 	//加载配置
+// 	util.ReadConfig(&config)
+// 	//初始化
+// 	InitInfluxdb(fmt.Sprint(config["influxdb"]))
+// 	mgo = mongodb.MongodbSim{
+// 		MongodbAddr: config["mongodbServers"].(string),
+// 		Size:        util.IntAll(config["mongodbPoolSize"]),
+// 		DbName:      config["mongodbName"].(string),
+// 	}
+// 	mgo.InitPool()
+// 	Requestthread = make(chan bool, util.IntAllDef(config["requestthread"], 20))
+// 	Brequestbody = util.IntAll(config["brequestbody"])
+// 	checkcoll = util.ObjToString(config["checkcoll"])
+// 	totalcoll = util.ObjToString(config["totalcoll"])
+// 	reqduration = util.IntAll(config["reqduration"])
+// 	Retry = util.IntAll(config["requestretry"])
+// 	taskduration = util.IntAll(config["taskduration"])
+// 	dayreport = util.IntAll(config["dayreport"])
+// 	alarmmode = util.IntAll(config["alarmmode"])
+// 	excludecodearr := config["excludecode"].([]interface{})
+// 	for _, cv := range excludecodearr {
+// 		excludecode[util.IntAll(cv)] = true
+// 	}
 
-	useremailarr := config["useremail"].([]interface{})
-	for _, cv := range useremailarr {
-		useremail = append(useremail, cv.(string))
-	}
-	for _, cv := range config["alarmtime"].([]interface{}) {
-		alarmtime = append(alarmtime, util.IntAll(cv))
-	}
-	Client = &http.Client{Transport: &http.Transport{
-		Dial: func(netw, addr string) (net.Conn, error) {
-			deadline := time.Now().Add(time.Duration(reqduration) * time.Second)
-			c, err := net.DialTimeout(netw, addr, time.Duration(reqduration*2)*time.Second)
-			if err != nil {
-				return nil, err
-			}
-			tcp_conn := c.(*net.TCPConn)
-			tcp_conn.SetKeepAlive(false)
-			tcp_conn.SetDeadline(deadline)
-			return tcp_conn, nil
-		},
-		DisableKeepAlives: true,
-	}}
-	M := config["smtp"].(map[string]interface{})
-	Mail = &mail.MailAuth{M["addr"].(string),
-		util.IntAll(M["port"]),
-		util.ObjToString(M["user"]),
-		util.ObjToString(M["pwd"]),
-	}
+// 	useremailarr := config["useremail"].([]interface{})
+// 	for _, cv := range useremailarr {
+// 		useremail = append(useremail, cv.(string))
+// 	}
+// 	for _, cv := range config["alarmtime"].([]interface{}) {
+// 		alarmtime = append(alarmtime, util.IntAll(cv))
+// 	}
+// 	Client = &http.Client{Transport: &http.Transport{
+// 		Dial: func(netw, addr string) (net.Conn, error) {
+// 			deadline := time.Now().Add(time.Duration(reqduration) * time.Second)
+// 			c, err := net.DialTimeout(netw, addr, time.Duration(reqduration*2)*time.Second)
+// 			if err != nil {
+// 				return nil, err
+// 			}
+// 			tcp_conn := c.(*net.TCPConn)
+// 			tcp_conn.SetKeepAlive(false)
+// 			tcp_conn.SetDeadline(deadline)
+// 			return tcp_conn, nil
+// 		},
+// 		DisableKeepAlives: true,
+// 	}}
+// 	M := config["smtp"].(map[string]interface{})
+// 	Mail = &mail.MailAuth{M["addr"].(string),
+// 		util.IntAll(M["port"]),
+// 		util.ObjToString(M["user"]),
+// 		util.ObjToString(M["pwd"]),
+// 	}
 
-	log.Println("dayreport:", dayreport, config)
-	if dayreport > 0 {
-		//启动线程,报告每天监测情况
-		go func() {
-			for {
-				tn := time.Now()
-				nowh := tn.Hour()
-				var tnext time.Time
-				if nowh >= dayreport {
-					tnext = time.Date(tn.Year(), tn.Month(), tn.Day()+1, dayreport, 0, 0, 0, tn.Location())
-				} else {
-					tnext = time.Date(tn.Year(), tn.Month(), tn.Day(), dayreport, 0, 0, 0, tn.Location())
-				}
-				t := time.NewTimer(tnext.Sub(tn))
-				select {
-				case <-t.C:
-					log.Println("定时报告任务")
-				}
-			}
-		}()
-	}
-}
+// 	log.Println("dayreport:", dayreport, config)
+// 	if dayreport > 0 {
+// 		//启动线程,报告每天监测情况
+// 		go func() {
+// 			for {
+// 				tn := time.Now()
+// 				nowh := tn.Hour()
+// 				var tnext time.Time
+// 				if nowh >= dayreport {
+// 					tnext = time.Date(tn.Year(), tn.Month(), tn.Day()+1, dayreport, 0, 0, 0, tn.Location())
+// 				} else {
+// 					tnext = time.Date(tn.Year(), tn.Month(), tn.Day(), dayreport, 0, 0, 0, tn.Location())
+// 				}
+// 				t := time.NewTimer(tnext.Sub(tn))
+// 				select {
+// 				case <-t.C:
+// 					log.Println("定时报告任务")
+// 				}
+// 			}
+// 		}()
+// 	}
+// }
 
-func main() {
-	go checktask()
-	go ClearMap()
-	time.Sleep(999999 * time.Hour)
-}
+// func main() {
+// 	go checktask()
+// 	go ClearMap()
+// 	time.Sleep(999999 * time.Hour)
+// }
 
-//检测任务
-func checktask() {
-	func() {
-		defer util.Catch()
-		MAP_site_all = map[string]int{}
-		MAP_site_run = map[string]int{}
-		MAP_site_stop = map[string]int{}
-		MAP_site_error = map[string][]*spiderobj{}
-		//加载所有爬虫代码,站点名称、代码、列表url、状态、作者、修改时间
-		res, b := mgo.Find("luaconfig", nil, nil, `{"param_common":1,"modifytime":1,"createuser":1,"modifyuser":1,"code":1,"iupload":1,"luacontent":1}`, false, -1, -1)
-		Spiders = []*spiderobj{}
-		stopspidercount = 0
-		if b && res != nil && (*res) != nil && len(*res) > 0 {
-			for _, spider := range *res {
-				defer util.Catch()
-				sp := &spiderobj{}
-				sp.Status = util.IntAll(spider["iupload"])
-				sp.Cuser = util.ObjToString(spider["createuser"])
-				if spider["param_common"] != nil {
-					pc := spider["param_common"].([]interface{})
-					if len(pc) > 1 && len(pc) < 10 {
-						sp.Site = util.ObjToString(pc[1])
-						if len(pc) > 2 {
-							sp.Channel = util.ObjToString(pc[2])
-						}
-					} else if len(pc) > 12 {
-						sp.ListUrl = util.ObjToString(pc[11])
-						sp.Channel = util.ObjToString(pc[2])
-						sp.Site = util.ObjToString(pc[1])
-					} else {
-						continue
-					}
-				}
-				if sp.Status == 1 {
-					sp.Id = util.BsonIdToSId(spider["_id"])
-					if spider["luacontent"] != nil {
-						sp.I_old = 1
-						//从脚本中取
-						con := spider["luacontent"].(string)
-						sr := strings.NewReader(con)
-						br := bufio.NewReader(sr)
-						n := 0
-						siteUrl := ""
-						for n < 150 {
-							n++
-							str, e := br.ReadString('\n')
-							if e == nil {
-								if strings.HasPrefix(str, "local siteUrl") {
-									siteUrl = str[strings.Index(str, `"`)+1 : strings.LastIndex(str, `"`)]
-								} else if strings.HasPrefix(str, "spiderTargetChannelUrl") {
-									if strings.Index(str, "siteUrl") > 0 {
-										sp.ListUrl = siteUrl
-									} else {
-										s1, s2 := strings.Index(str, `"`), strings.LastIndex(str, `"`)
-										sp.ListUrl = str[s1+1 : s2]
-									}
-									break
-								}
-							} else if e != nil {
-								break
-							}
-						}
-					}
-					sp.Mtime = util.Int64All(spider["modifytime"])
-					sp.Muser = util.ObjToString(spider["modifyuser"])
-					sp.Code = util.ObjToString(spider["code"])
-					if sp.ListUrl != "" {
-						if !strings.HasPrefix(sp.ListUrl, "http") {
-							sp.ListUrl = "http://" + sp.ListUrl
-						}
-						Spiders = append(Spiders, sp)
-					}
-					MAP_site_run[sp.Site]++
-					MAP_site_all[sp.Site]++
-				} else {
-					stopspidercount++
-					MAP_site_stop[sp.Site]++
-					MAP_site_all[sp.Site]++
-				}
-			}
-		}
-		log.Println("load url size:", len(Spiders), "stopped spider count:", stopspidercount)
-		tn := time.Now()
-		now := tn.Unix()
-		year := tn.Year()
-		mon := tn.Month()
-		day := tn.Day()
-		hour := tn.Hour()
-		minute := tn.Minute()
-		reqn := 0
-		MAP_STATUS = map[int][]*spiderobj{}
-		//根据站点打乱爬虫顺序
-		NewSP := make(map[string]chan *spiderobj)
-		for _, sp1 := range Spiders {
-			chansp := NewSP[sp1.Site]
-			if chansp == nil {
-				chansp = make(chan *spiderobj, MAP_site_run[sp1.Site])
-			}
-			chansp <- sp1
-			NewSP[sp1.Site] = chansp
-		}
-		Newspiders := []*spiderobj{}
-		for {
-			if len(NewSP) == 0 {
-				break
-			}
-			for site, chansp := range NewSP {
-				sp := <-chansp
-				Newspiders = append(Newspiders, sp)
-				MAP_site_run[site]--
-				if MAP_site_run[site] == 0 {
-					delete(NewSP, site)
-				}
-			}
-		}
-		for _, sp1 := range Newspiders {
-			Requestthread <- true
-			wg.Add(1)
-			go func(sp *spiderobj) {
-				defer func() {
-					<-Requestthread
-					wg.Done()
-				}()
-				res, err := Client.Get(sp.ListUrl)
-				for i := 0; i < Retry; i++ {
-					if err != nil {
-						res, err = Client.Get(sp.ListUrl)
-						time.Sleep(time.Duration(reqduration/5) * time.Second)
-					}
-					if err == nil {
-						break
-					}
-				}
-				restr := ""
-				if err != nil {
-					sp.I_err = 1
-					restr = err.Error()
-					if res != nil {
-						if res.Body != nil {
-							defer res.Body.Close()
-						}
-						sp.ResponseCode = res.StatusCode
-						sp.ResponseStr = res.Status
-					} else {
-						sp.ResponseCode = 600
-					}
-				} else {
-					defer res.Body.Close()
-					sp.ResponseCode = res.StatusCode
-					sp.ResponseStr = res.Status
-					if Brequestbody == 1 {
-						bs, e := ioutil.ReadAll(res.Body)
-						if e == nil {
-							restr = string(bs)
-						}
-					}
-				}
-				sp.Requesttime = time.Now().Unix()
-				sp.ResponseBody = restr
-				m := map[string]interface{}{
-					"s_spiderid":    sp.Id,
-					"l_time":        now,
-					"l_modifytime":  sp.Mtime,
-					"s_modifyuser":  sp.Muser,
-					"s_listurl":     sp.ListUrl,
-					"s_site":        sp.Site,
-					"s_channel":     sp.Channel,
-					"i_res_code":    sp.ResponseCode,
-					"s_res_codestr": sp.ResponseStr,
-					"s_res_body":    sp.ResponseBody,
-					"s_code":        sp.Code,
-					"l_requesttime": sp.Requesttime,
-					"i_oldspider":   sp.I_old,
-					"i_err":         sp.I_err,
-					"year":          year,
-					"month":         mon,
-					"day":           day,
-					"hour":          hour,
-					"minute":        minute,
-				}
-				lock.Lock()
-				ss := MAP_STATUS[sp.ResponseCode]
-				if ss == nil {
-					ss = []*spiderobj{}
-				}
-				ss = append(ss, sp)
-				MAP_STATUS[sp.ResponseCode] = ss
-				msave = append(msave, m)
-				if len(msave) >= 100 {
-					reqn += len(msave)
-					//go mgo.SaveBulk(checkcoll, msave...)
-					msave = []map[string]interface{}{}
-					log.Println("save...", reqn)
-				}
-				if sp.ResponseCode != 200 {
-					if sp.Channel == "" {
-						sp.Channel = sp.Site
-					}
-					InsertInto(
-						"jy_logs",
-						"sp_healthcheck",
-						[]influxdb.Tag{
-							{Key: "s_site", Value: sp.Site},
-							{Key: "s_errcode", Value: "s_" + fmt.Sprint(sp.ResponseCode)},
-							{Key: "s_code", Value: sp.Code},
-						},
-						map[string]interface{}{
-							"channel":  "<a href='" + sp.ListUrl + "' target='_blank'>" + sp.Channel + "</a>",
-							"code":     sp.Code,
-							"code_del": "<a href='http://test.qmx.top:6123/delete/bycode?code=" + sp.Code + "' target='_blank'>删除</a>",
-							"cuser":    sp.Cuser,
-							"muser":    sp.Muser,
-							"repcode":  sp.ResponseCode,
-							"reqtime":  sp.Requesttime,
-							"i_old":    sp.I_old,
-						},
-						time.Now(),
-						"7d",
-					)
-				}
-				lock.Unlock()
-			}(sp1)
-			time.Sleep(150 * time.Millisecond)
-		}
-		wg.Wait()
-		lock.Lock()
-		if len(msave) > 0 {
-			reqn += len(msave)
-			//go mgo.SaveBulk(checkcoll, msave...)
-			msave = []map[string]interface{}{}
-			log.Println("save...", reqn)
-		}
-		lock.Unlock()
-		log.Println("request over...")
-		//报警
-		alarmtask()
-	}()
-	time.AfterFunc(time.Duration(taskduration)*time.Minute, checktask)
-}
+// //检测任务
+// func checktask() {
+// 	func() {
+// 		defer util.Catch()
+// 		MAP_site_all = map[string]int{}
+// 		MAP_site_run = map[string]int{}
+// 		MAP_site_stop = map[string]int{}
+// 		MAP_site_error = map[string][]*spiderobj{}
+// 		//加载所有爬虫代码,站点名称、代码、列表url、状态、作者、修改时间
+// 		res, b := mgo.Find("luaconfig", nil, nil, `{"param_common":1,"modifytime":1,"createuser":1,"modifyuser":1,"code":1,"iupload":1,"luacontent":1}`, false, -1, -1)
+// 		Spiders = []*spiderobj{}
+// 		stopspidercount = 0
+// 		if b && res != nil && (*res) != nil && len(*res) > 0 {
+// 			for _, spider := range *res {
+// 				defer util.Catch()
+// 				sp := &spiderobj{}
+// 				sp.Status = util.IntAll(spider["iupload"])
+// 				sp.Cuser = util.ObjToString(spider["createuser"])
+// 				if spider["param_common"] != nil {
+// 					pc := spider["param_common"].([]interface{})
+// 					if len(pc) > 1 && len(pc) < 10 {
+// 						sp.Site = util.ObjToString(pc[1])
+// 						if len(pc) > 2 {
+// 							sp.Channel = util.ObjToString(pc[2])
+// 						}
+// 					} else if len(pc) > 12 {
+// 						sp.ListUrl = util.ObjToString(pc[11])
+// 						sp.Channel = util.ObjToString(pc[2])
+// 						sp.Site = util.ObjToString(pc[1])
+// 					} else {
+// 						continue
+// 					}
+// 				}
+// 				if sp.Status == 1 {
+// 					sp.Id = util.BsonIdToSId(spider["_id"])
+// 					if spider["luacontent"] != nil {
+// 						sp.I_old = 1
+// 						//从脚本中取
+// 						con := spider["luacontent"].(string)
+// 						sr := strings.NewReader(con)
+// 						br := bufio.NewReader(sr)
+// 						n := 0
+// 						siteUrl := ""
+// 						for n < 150 {
+// 							n++
+// 							str, e := br.ReadString('\n')
+// 							if e == nil {
+// 								if strings.HasPrefix(str, "local siteUrl") {
+// 									siteUrl = str[strings.Index(str, `"`)+1 : strings.LastIndex(str, `"`)]
+// 								} else if strings.HasPrefix(str, "spiderTargetChannelUrl") {
+// 									if strings.Index(str, "siteUrl") > 0 {
+// 										sp.ListUrl = siteUrl
+// 									} else {
+// 										s1, s2 := strings.Index(str, `"`), strings.LastIndex(str, `"`)
+// 										sp.ListUrl = str[s1+1 : s2]
+// 									}
+// 									break
+// 								}
+// 							} else if e != nil {
+// 								break
+// 							}
+// 						}
+// 					}
+// 					sp.Mtime = util.Int64All(spider["modifytime"])
+// 					sp.Muser = util.ObjToString(spider["modifyuser"])
+// 					sp.Code = util.ObjToString(spider["code"])
+// 					if sp.ListUrl != "" {
+// 						if !strings.HasPrefix(sp.ListUrl, "http") {
+// 							sp.ListUrl = "http://" + sp.ListUrl
+// 						}
+// 						Spiders = append(Spiders, sp)
+// 					}
+// 					MAP_site_run[sp.Site]++
+// 					MAP_site_all[sp.Site]++
+// 				} else {
+// 					stopspidercount++
+// 					MAP_site_stop[sp.Site]++
+// 					MAP_site_all[sp.Site]++
+// 				}
+// 			}
+// 		}
+// 		log.Println("load url size:", len(Spiders), "stopped spider count:", stopspidercount)
+// 		tn := time.Now()
+// 		now := tn.Unix()
+// 		year := tn.Year()
+// 		mon := tn.Month()
+// 		day := tn.Day()
+// 		hour := tn.Hour()
+// 		minute := tn.Minute()
+// 		reqn := 0
+// 		MAP_STATUS = map[int][]*spiderobj{}
+// 		//根据站点打乱爬虫顺序
+// 		NewSP := make(map[string]chan *spiderobj)
+// 		for _, sp1 := range Spiders {
+// 			chansp := NewSP[sp1.Site]
+// 			if chansp == nil {
+// 				chansp = make(chan *spiderobj, MAP_site_run[sp1.Site])
+// 			}
+// 			chansp <- sp1
+// 			NewSP[sp1.Site] = chansp
+// 		}
+// 		Newspiders := []*spiderobj{}
+// 		for {
+// 			if len(NewSP) == 0 {
+// 				break
+// 			}
+// 			for site, chansp := range NewSP {
+// 				sp := <-chansp
+// 				Newspiders = append(Newspiders, sp)
+// 				MAP_site_run[site]--
+// 				if MAP_site_run[site] == 0 {
+// 					delete(NewSP, site)
+// 				}
+// 			}
+// 		}
+// 		for _, sp1 := range Newspiders {
+// 			Requestthread <- true
+// 			wg.Add(1)
+// 			go func(sp *spiderobj) {
+// 				defer func() {
+// 					<-Requestthread
+// 					wg.Done()
+// 				}()
+// 				res, err := Client.Get(sp.ListUrl)
+// 				for i := 0; i < Retry; i++ {
+// 					if err != nil {
+// 						res, err = Client.Get(sp.ListUrl)
+// 						time.Sleep(time.Duration(reqduration/5) * time.Second)
+// 					}
+// 					if err == nil {
+// 						break
+// 					}
+// 				}
+// 				restr := ""
+// 				if err != nil {
+// 					sp.I_err = 1
+// 					restr = err.Error()
+// 					if res != nil {
+// 						if res.Body != nil {
+// 							defer res.Body.Close()
+// 						}
+// 						sp.ResponseCode = res.StatusCode
+// 						sp.ResponseStr = res.Status
+// 					} else {
+// 						sp.ResponseCode = 600
+// 					}
+// 				} else {
+// 					defer res.Body.Close()
+// 					sp.ResponseCode = res.StatusCode
+// 					sp.ResponseStr = res.Status
+// 					if Brequestbody == 1 {
+// 						bs, e := ioutil.ReadAll(res.Body)
+// 						if e == nil {
+// 							restr = string(bs)
+// 						}
+// 					}
+// 				}
+// 				sp.Requesttime = time.Now().Unix()
+// 				sp.ResponseBody = restr
+// 				m := map[string]interface{}{
+// 					"s_spiderid":    sp.Id,
+// 					"l_time":        now,
+// 					"l_modifytime":  sp.Mtime,
+// 					"s_modifyuser":  sp.Muser,
+// 					"s_listurl":     sp.ListUrl,
+// 					"s_site":        sp.Site,
+// 					"s_channel":     sp.Channel,
+// 					"i_res_code":    sp.ResponseCode,
+// 					"s_res_codestr": sp.ResponseStr,
+// 					"s_res_body":    sp.ResponseBody,
+// 					"s_code":        sp.Code,
+// 					"l_requesttime": sp.Requesttime,
+// 					"i_oldspider":   sp.I_old,
+// 					"i_err":         sp.I_err,
+// 					"year":          year,
+// 					"month":         mon,
+// 					"day":           day,
+// 					"hour":          hour,
+// 					"minute":        minute,
+// 				}
+// 				lock.Lock()
+// 				ss := MAP_STATUS[sp.ResponseCode]
+// 				if ss == nil {
+// 					ss = []*spiderobj{}
+// 				}
+// 				ss = append(ss, sp)
+// 				MAP_STATUS[sp.ResponseCode] = ss
+// 				msave = append(msave, m)
+// 				if len(msave) >= 100 {
+// 					reqn += len(msave)
+// 					//go mgo.SaveBulk(checkcoll, msave...)
+// 					msave = []map[string]interface{}{}
+// 					log.Println("save...", reqn)
+// 				}
+// 				if sp.ResponseCode != 200 {
+// 					if sp.Channel == "" {
+// 						sp.Channel = sp.Site
+// 					}
+// 					InsertInto(
+// 						"jy_logs",
+// 						"sp_healthcheck",
+// 						[]influxdb.Tag{
+// 							{Key: "s_site", Value: sp.Site},
+// 							{Key: "s_errcode", Value: "s_" + fmt.Sprint(sp.ResponseCode)},
+// 							{Key: "s_code", Value: sp.Code},
+// 						},
+// 						map[string]interface{}{
+// 							"channel":  "<a href='" + sp.ListUrl + "' target='_blank'>" + sp.Channel + "</a>",
+// 							"code":     sp.Code,
+// 							"code_del": "<a href='http://test.qmx.top:6123/delete/bycode?code=" + sp.Code + "' target='_blank'>删除</a>",
+// 							"cuser":    sp.Cuser,
+// 							"muser":    sp.Muser,
+// 							"repcode":  sp.ResponseCode,
+// 							"reqtime":  sp.Requesttime,
+// 							"i_old":    sp.I_old,
+// 						},
+// 						time.Now(),
+// 						"7d",
+// 					)
+// 				}
+// 				lock.Unlock()
+// 			}(sp1)
+// 			time.Sleep(150 * time.Millisecond)
+// 		}
+// 		wg.Wait()
+// 		lock.Lock()
+// 		if len(msave) > 0 {
+// 			reqn += len(msave)
+// 			//go mgo.SaveBulk(checkcoll, msave...)
+// 			msave = []map[string]interface{}{}
+// 			log.Println("save...", reqn)
+// 		}
+// 		lock.Unlock()
+// 		log.Println("request over...")
+// 		//报警
+// 		alarmtask()
+// 	}()
+// 	time.AfterFunc(time.Duration(taskduration)*time.Minute, checktask)
+// }
 
-var (
-	html = `<style>td{border-width: 1px;padding: 1px;border-style: solid;border-color: #666666;background-color: #ffffff;} table{margin:5px;border-collapse: collapse;border-width: 1px;border-color: #666666;} div{font-size:16px;line-height:1.5em} .sp{font-size:14px}</style>` +
-		`<div class="row"><table><tr><td>爬虫总量:%d</td><td>运行爬虫量:%d</td><td>暂停爬虫量:%d</td><td>运行爬虫出错量:%d</td></tr><tr><td>站点总量:%d</td><td>运行站点量:%d</td><td>暂停站点量:%d</td><td>运行站点出错量:%d</td></tr></table></div>` +
-		`<div class="row">%s</div>`
-	as = `<div class='mode'>报警模式:%s</div>`
-	//errsite   = `<div class='cls'>出错站点明细:<table><tr style='font-weight:bold'><td style='width:230px'>站点</td><td style='width:480px'>出错爬虫</td></tr>%s</table></div>`
-	//errsitetr = `<tr><td>%s</td><td>%s</td></tr>`
-	cs = `<div class='cls'><div style='font-weight:bold;margin:5px;color:red'>出错状态码:%d | 爬虫出错总量:%d | 站点出错总量:%d</div><table><tr style='font-weight:bold'><td style='width:230px'>站点</td><td style='width:195px'>代码</td><td style='width:65px'>维护人</td><td style='width:180px'>栏目</td><td style='width:380px'>URL</td></tr>%s</table></div>`
-	ss = `<tr>%s<td>%s</td><td>%s</td><td>%s</td><td>`
-)
+// var (
+// 	html = `<style>td{border-width: 1px;padding: 1px;border-style: solid;border-color: #666666;background-color: #ffffff;} table{margin:5px;border-collapse: collapse;border-width: 1px;border-color: #666666;} div{font-size:16px;line-height:1.5em} .sp{font-size:14px}</style>` +
+// 		`<div class="row"><table><tr><td>爬虫总量:%d</td><td>运行爬虫量:%d</td><td>暂停爬虫量:%d</td><td>运行爬虫出错量:%d</td></tr><tr><td>站点总量:%d</td><td>运行站点量:%d</td><td>暂停站点量:%d</td><td>运行站点出错量:%d</td></tr></table></div>` +
+// 		`<div class="row">%s</div>`
+// 	as = `<div class='mode'>报警模式:%s</div>`
+// 	//errsite   = `<div class='cls'>出错站点明细:<table><tr style='font-weight:bold'><td style='width:230px'>站点</td><td style='width:480px'>出错爬虫</td></tr>%s</table></div>`
+// 	//errsitetr = `<tr><td>%s</td><td>%s</td></tr>`
+// 	cs = `<div class='cls'><div style='font-weight:bold;margin:5px;color:red'>出错状态码:%d | 爬虫出错总量:%d | 站点出错总量:%d</div><table><tr style='font-weight:bold'><td style='width:230px'>站点</td><td style='width:195px'>代码</td><td style='width:65px'>维护人</td><td style='width:180px'>栏目</td><td style='width:380px'>URL</td></tr>%s</table></div>`
+// 	ss = `<tr>%s<td>%s</td><td>%s</td><td>%s</td><td>`
+// )
 
-var alarmLock = sync.Mutex{}
+// var alarmLock = sync.Mutex{}
 
-//报警任务
-func alarmtask() {
-	//看时间点
-	alarmLock.Lock()
-	defer alarmLock.Unlock()
-	statuscode := []int{}
-	for c, _ := range MAP_STATUS {
-		statuscode = append(statuscode, c)
-	}
-	sort.Ints(statuscode)
-	tn := time.Now().Hour()
-	if tn >= alarmtime[0] && tn <= alarmtime[1] {
-		if len(MAP_STATUS[200]) == len(Spiders) {
-			//本轮次没有出错脚本
-		} else {
-			as1 := ""
-			allcs := ""
-			cs1 := ""
-			ss1 := ""
-			errcount := 0
-			switch alarmmode {
-			case 0: //每次报
-				as1 = fmt.Sprintf(as, "每次出错报警")
-				for _, kc := range statuscode {
-					kv := MAP_STATUS[kc]
-					if !excludecode[kc] {
-						errcount += len(kv)
-						msite := map[string]bool{}
-						minMap := map[string][]*spiderobj{}
-						for _, sp := range kv {
-							sp1 := minMap[sp.Site]
-							if sp1 == nil {
-								sp1 = []*spiderobj{}
-							}
-							sp1 = append(sp1, sp)
-							minMap[sp.Site] = sp1
-						}
-						for _, sp1 := range minMap {
-							for n, sp := range sp1 {
-								MAP_ALARM[sp.Code] = true
-								if n == 0 {
-									ss1 += fmt.Sprintf(ss, fmt.Sprintf("<td rowspan='%d'>%s</td>", len(sp1), sp.Site), sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
-								} else {
-									ss1 += fmt.Sprintf(ss, "", sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
-								}
-								asp := MAP_site_error[sp.Site]
-								if asp == nil {
-									asp = []*spiderobj{}
-								}
-								asp = append(asp, sp)
-								MAP_site_error[sp.Site] = asp
-								msite[sp.Site] = true
-							}
-						}
-						cs1 = fmt.Sprintf(cs, kc, len(kv), len(msite), "%s")
-						allcs += fmt.Sprintf(cs1, ss1)
-						ss1 = ""
-					}
-				}
-			case 1: //一天只报一次
-				as1 = fmt.Sprintf(as, "同一爬虫出错每天只报警一次")
-				for _, kc := range statuscode {
-					kv := MAP_STATUS[kc]
-					if !excludecode[kc] {
-						msite := map[string]bool{}
-						minMap := map[string][]*spiderobj{}
-						for _, sp := range kv {
-							if !MAP_ALARM[sp.Code] {
-								errcount++
-								sp1 := minMap[sp.Site]
-								if sp1 == nil {
-									sp1 = []*spiderobj{}
-								}
-								sp1 = append(sp1, sp)
-								minMap[sp.Site] = sp1
-							}
-						}
-						for _, sp1 := range minMap {
-							for n, sp := range sp1 {
-								MAP_ALARM[sp.Code] = true
-								if n == 0 {
-									ss1 += fmt.Sprintf(ss, fmt.Sprintf("<td rowspan='%d'>%s</td>", len(sp1), sp.Site), sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
-								} else {
-									ss1 += fmt.Sprintf(ss, "", sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
-								}
-								asp := MAP_site_error[sp.Site]
-								if asp == nil {
-									asp = []*spiderobj{}
-								}
-								asp = append(asp, sp)
-								MAP_site_error[sp.Site] = asp
-								msite[sp.Site] = true
-							}
-						}
-						cs1 = fmt.Sprintf(cs, kc, len(kv), len(msite), "%s")
-						allcs += fmt.Sprintf(cs1, ss1)
-						ss1 = ""
-					}
-				}
-			case 2: //连续只报一次,间隔报多次
-				as1 = fmt.Sprintf(as, "同一爬虫连续出错只报一次")
-				for _, kc := range statuscode {
-					kv := MAP_STATUS[kc]
-					if !excludecode[kc] {
-						msite := map[string]bool{}
-						minMap := map[string][]*spiderobj{}
-						for _, sp := range kv {
-							if !MAP_ALARM[sp.Code] {
-								errcount++
-								MAP_ALARM[sp.Code] = true
-								sp1 := minMap[sp.Site]
-								if sp1 == nil {
-									sp1 = []*spiderobj{}
-								}
-								sp1 = append(sp1, sp)
-								minMap[sp.Site] = sp1
-							}
-						}
-						for _, sp1 := range minMap {
-							for n, sp := range sp1 {
-								MAP_ALARM[sp.Code] = true
-								if n == 0 {
-									ss1 += fmt.Sprintf(ss, fmt.Sprintf("<td rowspan='%d'>%s</td>", len(sp1), sp.Site), sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
-								} else {
-									ss1 += fmt.Sprintf(ss, "", sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
-								}
-								asp := MAP_site_error[sp.Site]
-								if asp == nil {
-									asp = []*spiderobj{}
-								}
-								asp = append(asp, sp)
-								MAP_site_error[sp.Site] = asp
-								msite[sp.Site] = true
-							}
-						}
-						cs1 = fmt.Sprintf(cs, kc, len(kv), len(msite), "%s")
-						allcs += fmt.Sprintf(cs1, ss1)
-						ss1 = ""
-					} else {
-						for _, sp := range kv {
-							if MAP_ALARM[sp.Code] {
-								delete(MAP_ALARM, sp.Code)
-							}
-						}
-					}
-				}
-			}
+// //报警任务
+// func alarmtask() {
+// 	//看时间点
+// 	alarmLock.Lock()
+// 	defer alarmLock.Unlock()
+// 	statuscode := []int{}
+// 	for c, _ := range MAP_STATUS {
+// 		statuscode = append(statuscode, c)
+// 	}
+// 	sort.Ints(statuscode)
+// 	tn := time.Now().Hour()
+// 	if tn >= alarmtime[0] && tn <= alarmtime[1] {
+// 		if len(MAP_STATUS[200]) == len(Spiders) {
+// 			//本轮次没有出错脚本
+// 		} else {
+// 			as1 := ""
+// 			allcs := ""
+// 			cs1 := ""
+// 			ss1 := ""
+// 			errcount := 0
+// 			switch alarmmode {
+// 			case 0: //每次报
+// 				as1 = fmt.Sprintf(as, "每次出错报警")
+// 				for _, kc := range statuscode {
+// 					kv := MAP_STATUS[kc]
+// 					if !excludecode[kc] {
+// 						errcount += len(kv)
+// 						msite := map[string]bool{}
+// 						minMap := map[string][]*spiderobj{}
+// 						for _, sp := range kv {
+// 							sp1 := minMap[sp.Site]
+// 							if sp1 == nil {
+// 								sp1 = []*spiderobj{}
+// 							}
+// 							sp1 = append(sp1, sp)
+// 							minMap[sp.Site] = sp1
+// 						}
+// 						for _, sp1 := range minMap {
+// 							for n, sp := range sp1 {
+// 								MAP_ALARM[sp.Code] = true
+// 								if n == 0 {
+// 									ss1 += fmt.Sprintf(ss, fmt.Sprintf("<td rowspan='%d'>%s</td>", len(sp1), sp.Site), sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
+// 								} else {
+// 									ss1 += fmt.Sprintf(ss, "", sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
+// 								}
+// 								asp := MAP_site_error[sp.Site]
+// 								if asp == nil {
+// 									asp = []*spiderobj{}
+// 								}
+// 								asp = append(asp, sp)
+// 								MAP_site_error[sp.Site] = asp
+// 								msite[sp.Site] = true
+// 							}
+// 						}
+// 						cs1 = fmt.Sprintf(cs, kc, len(kv), len(msite), "%s")
+// 						allcs += fmt.Sprintf(cs1, ss1)
+// 						ss1 = ""
+// 					}
+// 				}
+// 			case 1: //一天只报一次
+// 				as1 = fmt.Sprintf(as, "同一爬虫出错每天只报警一次")
+// 				for _, kc := range statuscode {
+// 					kv := MAP_STATUS[kc]
+// 					if !excludecode[kc] {
+// 						msite := map[string]bool{}
+// 						minMap := map[string][]*spiderobj{}
+// 						for _, sp := range kv {
+// 							if !MAP_ALARM[sp.Code] {
+// 								errcount++
+// 								sp1 := minMap[sp.Site]
+// 								if sp1 == nil {
+// 									sp1 = []*spiderobj{}
+// 								}
+// 								sp1 = append(sp1, sp)
+// 								minMap[sp.Site] = sp1
+// 							}
+// 						}
+// 						for _, sp1 := range minMap {
+// 							for n, sp := range sp1 {
+// 								MAP_ALARM[sp.Code] = true
+// 								if n == 0 {
+// 									ss1 += fmt.Sprintf(ss, fmt.Sprintf("<td rowspan='%d'>%s</td>", len(sp1), sp.Site), sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
+// 								} else {
+// 									ss1 += fmt.Sprintf(ss, "", sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
+// 								}
+// 								asp := MAP_site_error[sp.Site]
+// 								if asp == nil {
+// 									asp = []*spiderobj{}
+// 								}
+// 								asp = append(asp, sp)
+// 								MAP_site_error[sp.Site] = asp
+// 								msite[sp.Site] = true
+// 							}
+// 						}
+// 						cs1 = fmt.Sprintf(cs, kc, len(kv), len(msite), "%s")
+// 						allcs += fmt.Sprintf(cs1, ss1)
+// 						ss1 = ""
+// 					}
+// 				}
+// 			case 2: //连续只报一次,间隔报多次
+// 				as1 = fmt.Sprintf(as, "同一爬虫连续出错只报一次")
+// 				for _, kc := range statuscode {
+// 					kv := MAP_STATUS[kc]
+// 					if !excludecode[kc] {
+// 						msite := map[string]bool{}
+// 						minMap := map[string][]*spiderobj{}
+// 						for _, sp := range kv {
+// 							if !MAP_ALARM[sp.Code] {
+// 								errcount++
+// 								MAP_ALARM[sp.Code] = true
+// 								sp1 := minMap[sp.Site]
+// 								if sp1 == nil {
+// 									sp1 = []*spiderobj{}
+// 								}
+// 								sp1 = append(sp1, sp)
+// 								minMap[sp.Site] = sp1
+// 							}
+// 						}
+// 						for _, sp1 := range minMap {
+// 							for n, sp := range sp1 {
+// 								MAP_ALARM[sp.Code] = true
+// 								if n == 0 {
+// 									ss1 += fmt.Sprintf(ss, fmt.Sprintf("<td rowspan='%d'>%s</td>", len(sp1), sp.Site), sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
+// 								} else {
+// 									ss1 += fmt.Sprintf(ss, "", sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
+// 								}
+// 								asp := MAP_site_error[sp.Site]
+// 								if asp == nil {
+// 									asp = []*spiderobj{}
+// 								}
+// 								asp = append(asp, sp)
+// 								MAP_site_error[sp.Site] = asp
+// 								msite[sp.Site] = true
+// 							}
+// 						}
+// 						cs1 = fmt.Sprintf(cs, kc, len(kv), len(msite), "%s")
+// 						allcs += fmt.Sprintf(cs1, ss1)
+// 						ss1 = ""
+// 					} else {
+// 						for _, sp := range kv {
+// 							if MAP_ALARM[sp.Code] {
+// 								delete(MAP_ALARM, sp.Code)
+// 							}
+// 						}
+// 					}
+// 				}
+// 			}
 
-			if allcs != "" {
-				//				errstr := ""
-				//				for k1, v1 := range MAP_site_error {
-				//					strsp1 := ""
-				//					for _, sp1 := range v1 {
-				//						strsp1 += sp1.Code + ":" + sp1.ListUrl + "<br/>"
-				//					}
-				//					errstr += fmt.Sprintf(errsitetr, k1, strsp1)
-				//				}
-				str := fmt.Sprintf(html, len(Spiders)+stopspidercount, len(Spiders), stopspidercount, errcount, len(MAP_site_all), len(MAP_site_run), len(MAP_site_stop), len(MAP_site_error), as1+allcs)
-				for _, email := range useremail {
-					SendEmail(email, "<div>"+str+"</div>", fmt.Sprintf("爬虫报警-%s", time.Now().Format(util.Date_Full_Layout)))
-				}
-			}
-		}
-	}
-}
+// 			if allcs != "" {
+// 				//				errstr := ""
+// 				//				for k1, v1 := range MAP_site_error {
+// 				//					strsp1 := ""
+// 				//					for _, sp1 := range v1 {
+// 				//						strsp1 += sp1.Code + ":" + sp1.ListUrl + "<br/>"
+// 				//					}
+// 				//					errstr += fmt.Sprintf(errsitetr, k1, strsp1)
+// 				//				}
+// 				str := fmt.Sprintf(html, len(Spiders)+stopspidercount, len(Spiders), stopspidercount, errcount, len(MAP_site_all), len(MAP_site_run), len(MAP_site_stop), len(MAP_site_error), as1+allcs)
+// 				for _, email := range useremail {
+// 					SendEmail(email, "<div>"+str+"</div>", fmt.Sprintf("爬虫报警-%s", time.Now().Format(util.Date_Full_Layout)))
+// 				}
+// 			}
+// 		}
+// 	}
+// }
 
-//发送邮件
-func SendEmail(email, str, title string) {
-	mail.SendMail(Mail, &mail.Message{title, "爬虫站点检测系统", []string{email}, str})
-}
+// //发送邮件
+// func SendEmail(email, str, title string) {
+// 	mail.SendMail(Mail, &mail.Message{title, "爬虫站点检测系统", []string{email}, str})
+// }

+ 378 - 0
src/spidersitecheck/main_new.go

@@ -0,0 +1,378 @@
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"mongodb"
+	"net"
+	"net/http"
+	"net/url"
+	qu "qfw/util"
+	"qfw/util/mail"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/cron"
+	//"github.com/influxdata/influxdb-client"
+)
+
+const ProxyIp = "http://pzy.spdata.jianyu360.com/getproxy"
+
+var (
+	config          map[string]interface{} //全局配置文件
+	mgo             mongodb.MongodbSim     //mongodb连接
+	Spiders         []*spiderobj           //每次所有加载爬虫
+	Requestthread   chan bool              //请求线程数
+	Brequestbody    int                    //是否解析请求正文
+	wg                                     = &sync.WaitGroup{}
+	msave                                  = []map[string]interface{}{}
+	lock            *sync.Mutex            = new(sync.Mutex)
+	checkcoll       string
+	totalcoll       string
+	Client          *http.Client
+	Retry           = 2                  //请求重试次数
+	reqduration     = 2                  //请求延时
+	taskduration    = 20                 //每次检测间隔,分钟
+	dayreport       = -1                 //每天整点报告前一天的结果 -1不报告
+	alarmmode       = 0                  //报警模式,0 每次出错每次报警 1天一个爬虫只报一次 2 连续出错只报一次,间隔出错报多次
+	MAP_STATUS      map[int][]*spiderobj //返回代码-爬虫
+	MAP_ALARM       = map[string]bool{}  //报警MAP
+	stopspidercount int
+	excludecode     = map[int]bool{}
+	alarmtime       = []int{}
+	Mail            *mail.MailAuth
+	useremail       = []string{}
+	MAP_site_all    map[string]int
+	MAP_site_run    map[string]int
+	MAP_site_stop   map[string]int
+	MAP_site_error  map[string][]*spiderobj
+	requestime      []string //每天请求的时间
+	AllChannelOk    bool     //所有站点是否都请求正常
+	TotalLua        int      //爬虫总量
+	TotalRunLua     int      //运行爬虫量
+)
+
+type spiderobj struct {
+	Id           string //爬虫id
+	Site         string //站点
+	Channel      string //爬虫栏目
+	Code         string //爬虫代码
+	ListUrl      string //列表url
+	Cuser        string
+	Muser        string //修改人
+	Mtime        int64  //修改时间
+	Status       int    //爬虫状态
+	I_old        int    //是否是老爬虫{luacontent:{$exists:1}}
+	ResponseCode int    //响应码
+	ResponseStr  string //响应码串
+	ResponseBody string //响应内容
+	Requesttime  int64  //请求时间
+	I_err        int    //1,有错
+}
+
+func init() {
+	//加载配置
+	qu.ReadConfig(&config)
+	//influxdb
+	InitInfluxdb(fmt.Sprint(config["influxdb"]))
+	//mgo
+	mgo = mongodb.MongodbSim{
+		MongodbAddr: config["mongodbServers"].(string),
+		Size:        qu.IntAll(config["mongodbPoolSize"]),
+		DbName:      config["mongodbName"].(string),
+	}
+	mgo.InitPool()
+	//其他参数
+	for _, rt := range config["requestime"].([]interface{}) {
+		requestime = append(requestime, qu.ObjToString(rt))
+	}
+	useremailarr := config["useremail"].([]interface{})
+	for _, cv := range useremailarr {
+		useremail = append(useremail, cv.(string))
+	}
+
+	Requestthread = make(chan bool, qu.IntAllDef(config["requestthread"], 20)) //60
+	Brequestbody = qu.IntAll(config["brequestbody"])                           //0
+	reqduration = qu.IntAll(config["reqduration"])                             //18
+	Retry = qu.IntAll(config["requestretry"])                                  //2
+	dayreport = qu.IntAll(config["dayreport"])                                 //7
+	excludecodearr := config["excludecode"].([]interface{})                    //200
+	for _, cv := range excludecodearr {
+		excludecode[qu.IntAll(cv)] = true
+	}
+	//mail
+	M := config["smtp"].(map[string]interface{})
+	Mail = &mail.MailAuth{
+		M["addr"].(string),
+		qu.IntAll(M["port"]),
+		qu.ObjToString(M["user"]),
+		qu.ObjToString(M["pwd"]),
+	}
+	//初始化map
+	MAP_site_all = map[string]int{}
+	MAP_site_run = map[string]int{}
+	MAP_site_stop = map[string]int{}
+	MAP_site_error = map[string][]*spiderobj{}
+	MAP_STATUS = map[int][]*spiderobj{}
+
+}
+
+var (
+	html = `<style>td{border-width: 1px;padding: 1px;border-style: solid;border-color: #666666;background-color: #ffffff;} table{margin:5px;border-collapse: collapse;border-width: 1px;border-color: #666666;} div{font-size:16px;line-height:1.5em} .sp{font-size:14px}</style>` +
+		`<div class="row"><table><tr><td>爬虫总量:%d</td><td>运行爬虫量:%d</td><td>暂停爬虫量:%d</td><td>运行爬虫出错量:%d</td></tr><tr><td>站点总量:%d</td><td>运行站点量:%d</td><td>暂停站点量:%d</td><td>运行站点出错量:%d</td></tr></table></div>` +
+		`<div class="row">%s</div>`
+	as = `<div class='mode'>报警模式:%s</div>`
+	//errsite   = `<div class='cls'>出错站点明细:<table><tr style='font-weight:bold'><td style='width:230px'>站点</td><td style='width:480px'>出错爬虫</td></tr>%s</table></div>`
+	//errsitetr = `<tr><td>%s</td><td>%s</td></tr>`
+	cs = `<div class='cls'><div style='font-weight:bold;margin:5px;color:red'>出错状态码:%d | 爬虫出错总量:%d | 站点出错总量:%d</div><table><tr style='font-weight:bold'><td style='width:230px'>站点</td><td style='width:195px'>代码</td><td style='width:65px'>维护人</td><td style='width:180px'>栏目</td><td style='width:380px'>URL</td></tr>%s</table></div>`
+	ss = `<tr>%s<td>%s</td><td>%s</td><td>%s</td><td>`
+)
+
+func main() {
+	TimeTask()
+	ch := make(chan bool, 1)
+	<-ch
+}
+
+func TimeTask() {
+	c := cron.New()
+	c.AddFunc("0 0 "+fmt.Sprint(dayreport)+" * * ?", func() { alarmtask() }) //7点定时发送邮件报警
+	for j, rt := range requestime {
+		go func(rt string, j int) {
+			c.AddFunc("0 0 "+rt+" * * ?", func() { StartTask() })
+		}(rt, j)
+	}
+	c.Start()
+}
+
+func StartTask() {
+	defer qu.Catch()
+	qu.Debug("MAP_STATUS:", len(MAP_STATUS))
+	Spiders = []*spiderobj{} //清空上一轮Spiders的值
+	if AllChannelOk {        //所有channel正常,不再请求。发送邮件后初始化为false
+		return
+	}
+	if len(MAP_STATUS) > 0 { //前一次请求失败的url再次请求
+		for _, sps := range MAP_STATUS {
+			Spiders = append(Spiders, sps...)
+		}
+	}
+	qu.Debug("Spiders:", len(Spiders))
+	if len(Spiders) == 0 { //每天第一次发请求
+		//加载(待完成、待审核、已通过、未通过、已上架、已下架)爬虫代码,站点名称、代码、列表url、状态、作者、修改时间
+		query := map[string]interface{}{
+			"state": 5,
+			// "state": map[string]interface{}{
+			// 	"$in": []int{0, 1, 2, 3, 5, 6},
+			// },
+		}
+		res, b := mgo.Find("luaconfig", query, nil, `{"param_common":1,"modifytime":1,"createuser":1,"modifyuser":1,"code":1,"iupload":1,"luacontent":1}`, false, -1, -1)
+		TotalLua = len(*res)
+		qu.Debug("共查询已上架爬虫:", len(*res))
+		stopspidercount = 0
+		if b && res != nil && (*res) != nil && len(*res) > 0 {
+			for _, spider := range *res {
+				sp := &spiderobj{}
+				sp.Cuser = qu.ObjToString(spider["createuser"])
+				sp.Id = mongodb.BsonIdToSId(spider["_id"])
+				if spider["param_common"] != nil {
+					pc := spider["param_common"].([]interface{})
+					if len(pc) > 1 && len(pc) < 10 {
+						sp.Site = qu.ObjToString(pc[1])
+						if len(pc) > 2 {
+							sp.Channel = qu.ObjToString(pc[2])
+						}
+					} else if len(pc) > 12 {
+						sp.ListUrl = qu.ObjToString(pc[11])
+						sp.Channel = qu.ObjToString(pc[2])
+						sp.Site = qu.ObjToString(pc[1])
+					} else {
+						stopspidercount++
+						MAP_site_stop[sp.Site]++
+						MAP_site_all[sp.Site]++
+						continue
+					}
+				}
+				sp.Mtime = qu.Int64All(spider["modifytime"])
+				sp.Muser = qu.ObjToString(spider["modifyuser"])
+				sp.Code = qu.ObjToString(spider["code"])
+				if sp.ListUrl != "" {
+					if !strings.HasPrefix(sp.ListUrl, "http") {
+						sp.ListUrl = "http://" + sp.ListUrl
+					}
+					Spiders = append(Spiders, sp)
+				}
+				MAP_site_run[sp.Site]++
+				MAP_site_all[sp.Site]++
+			}
+		}
+		qu.Debug("load url size:", len(Spiders), "stopped spider count:", stopspidercount, "site all:", len(MAP_site_all), "site run:", len(MAP_site_run))
+	}
+	MAP_STATUS = map[int][]*spiderobj{}
+	TotalRunLua = len(Spiders)
+	for _, sp1 := range Spiders {
+		Requestthread <- true
+		wg.Add(1)
+		go func(sp *spiderobj) {
+			defer func() {
+				<-Requestthread
+				wg.Done()
+			}()
+			var res *http.Response
+			var err error
+			for i := 0; i < Retry; i++ {
+				client := InitClient()
+				res, err = client.Get(sp.ListUrl)
+				if err == nil { //请求成功
+					break
+				}
+				if err != nil && i != Retry-1 { //请求失败
+					time.Sleep(time.Duration(reqduration) * time.Second)
+					//res, err = Client.Get(sp.ListUrl)
+				}
+			}
+			restr := ""
+			if err != nil {
+				sp.I_err = 1
+				restr = err.Error()
+				if res != nil {
+					if res.Body != nil {
+						defer res.Body.Close()
+					}
+					sp.ResponseCode = res.StatusCode
+					sp.ResponseStr = res.Status
+				} else {
+					sp.ResponseCode = 600
+				}
+			} else {
+				defer res.Body.Close()
+				sp.ResponseCode = res.StatusCode
+				sp.ResponseStr = res.Status
+				if Brequestbody == 1 {
+					bs, e := ioutil.ReadAll(res.Body)
+					if e == nil {
+						restr = string(bs)
+					}
+				}
+			}
+			qu.Debug(sp.Site, " ", sp.Channel, " ", sp.Code, " ", sp.ResponseCode)
+			sp.Requesttime = time.Now().Unix()
+			sp.ResponseBody = restr
+			lock.Lock()
+			ss := MAP_STATUS[sp.ResponseCode]
+			if ss == nil {
+				ss = []*spiderobj{}
+			}
+			ss = append(ss, sp)
+			MAP_STATUS[sp.ResponseCode] = ss
+			lock.Unlock()
+		}(sp1)
+		time.Sleep(150 * time.Millisecond)
+	}
+	wg.Wait()
+	qu.Debug("request over...", len(MAP_STATUS))
+	for code, _ := range excludecode { //将MAP_STATUS中栏目请求正常的清除
+		delete(MAP_STATUS, code)
+	}
+	if len(MAP_STATUS) == 0 { //所有请求正常
+		AllChannelOk = true
+	}
+	qu.Debug("delete MAP_STATUS over...", len(MAP_STATUS))
+}
+
+func alarmtask() {
+	defer qu.Catch()
+	statuscode := []int{}
+	errcount := 0
+	as1 := ""
+	allcs := ""
+	cs1 := ""
+	ss1 := ""
+	for c, _ := range MAP_STATUS {
+		statuscode = append(statuscode, c)
+	}
+	sort.Ints(statuscode)
+	for _, kc := range statuscode {
+		kv := MAP_STATUS[kc]
+		errcount += len(kv)
+		msite := map[string]bool{}
+		minMap := map[string][]*spiderobj{} //记录某个站点的异常总量
+		for _, sp := range kv {
+			sp1 := minMap[sp.Site]
+			if sp1 == nil {
+				sp1 = []*spiderobj{}
+			}
+			sp1 = append(sp1, sp)
+			minMap[sp.Site] = sp1
+		}
+		for _, sp1 := range minMap {
+			for n, sp := range sp1 {
+				if n == 0 {
+					ss1 += fmt.Sprintf(ss, fmt.Sprintf("<td rowspan='%d'>%s</td>", len(sp1), sp.Site), sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
+				} else {
+					ss1 += fmt.Sprintf(ss, "", sp.Code, sp.Cuser, sp.Channel) + sp.ListUrl + "</td></tr>"
+				}
+				asp := MAP_site_error[sp.Site]
+				if asp == nil {
+					asp = []*spiderobj{}
+				}
+				asp = append(asp, sp)
+				MAP_site_error[sp.Site] = asp
+				msite[sp.Site] = true
+			}
+		}
+		cs1 = fmt.Sprintf(cs, kc, len(kv), len(msite), "%s")
+		allcs += fmt.Sprintf(cs1, ss1)
+		ss1 = ""
+	}
+	if allcs != "" {
+		str := fmt.Sprintf(html, TotalLua, TotalRunLua, stopspidercount, errcount, len(MAP_site_all), len(MAP_site_run), len(MAP_site_stop), len(MAP_site_error), as1+allcs)
+		for _, email := range useremail {
+			SendEmail(email, "<div>"+str+"</div>", fmt.Sprintf("爬虫报警-%s", time.Now().Format(qu.Date_Full_Layout)))
+		}
+	}
+	//初始化
+	MAP_STATUS = map[int][]*spiderobj{}
+	AllChannelOk = false
+	qu.Debug("send mail over...")
+}
+
+//初始化client
+func InitClient() (client *http.Client) {
+	defer qu.Catch()
+	for {
+		res, _ := http.Get(ProxyIp)
+		bs, _ := ioutil.ReadAll(res.Body)
+		res.Body.Close()
+		if string(bs) == "" {
+			continue
+		}
+		//qu.Debug("ip:", string(bs))
+		proxyUrl, _ := url.Parse(string(bs))
+		client = &http.Client{Transport: &http.Transport{
+			Proxy: http.ProxyURL(proxyUrl),
+			Dial: func(netw, addr string) (net.Conn, error) {
+				deadline := time.Now().Add(time.Duration(reqduration) * time.Second)
+				c, err := net.DialTimeout(netw, addr, time.Duration(reqduration*2)*time.Second)
+				if err != nil {
+					return nil, err
+				}
+				tcp_conn := c.(*net.TCPConn)
+				tcp_conn.SetKeepAlive(false)
+				tcp_conn.SetDeadline(deadline)
+				return tcp_conn, nil
+			},
+			DisableKeepAlives: true,
+		}}
+		break
+	}
+	return
+}
+
+//发送邮件
+func SendEmail(email, str, title string) {
+	qu.Debug(str)
+	mail.SendMail(Mail, &mail.Message{title, "爬虫站点检测系统", []string{email}, str})
+}

+ 29 - 29
src/spidersitecheck/task.go

@@ -1,34 +1,34 @@
 package main
 
-import (
-	"time"
-)
+// import (
+// 	"time"
+// )
 
-func ClearMap() {
-	now := time.Now()
-	h, day := now.Hour(), now.Day()
-	if h >= 0 {
-		day++
-	}
-	newDate := time.Date(now.Year(), now.Month(), day, 0, 0, 10, 0, time.Local)
-	des := newDate.Unix() - now.Unix()
-	if des > 0 {
-		time.AfterFunc(time.Duration(des)*time.Second, func() {
-			go cm()
-			ticker := time.NewTicker(time.Hour * 24)
-			go func() {
-				for _ = range ticker.C {
-					cm()
-				}
-			}()
-		})
-	}
-}
+// func ClearMap() {
+// 	now := time.Now()
+// 	h, day := now.Hour(), now.Day()
+// 	if h >= 0 {
+// 		day++
+// 	}
+// 	newDate := time.Date(now.Year(), now.Month(), day, 0, 0, 10, 0, time.Local)
+// 	des := newDate.Unix() - now.Unix()
+// 	if des > 0 {
+// 		time.AfterFunc(time.Duration(des)*time.Second, func() {
+// 			go cm()
+// 			ticker := time.NewTicker(time.Hour * 24)
+// 			go func() {
+// 				for _ = range ticker.C {
+// 					cm()
+// 				}
+// 			}()
+// 		})
+// 	}
+// }
 
-func cm() {
-	alarmLock.Lock()
-	defer alarmLock.Unlock()
-	//清除alarm
-	MAP_ALARM = map[string]bool{}
+// func cm() {
+// 	alarmLock.Lock()
+// 	defer alarmLock.Unlock()
+// 	//清除alarm
+// 	MAP_ALARM = map[string]bool{}
 
-}
+// }