|
@@ -34,6 +34,8 @@ type Heart struct {
|
|
|
DetailExecuteHeart int64 //三级页采集到数据心跳
|
|
|
ListHeart int64 //爬虫列表页执行心跳
|
|
|
ModifyUser string //爬虫维护人
|
|
|
+ Site string //站点
|
|
|
+ Channel string //栏目
|
|
|
}
|
|
|
|
|
|
//爬虫()
|
|
@@ -41,6 +43,7 @@ type Spider struct {
|
|
|
Script
|
|
|
Code string //代码
|
|
|
Name string //名称
|
|
|
+ Channel string //站点
|
|
|
DownDetail bool //是否下载详细页
|
|
|
Stop bool //停止标志
|
|
|
Pass bool //暂停标志
|
|
@@ -81,7 +84,7 @@ var TimeChan = make(chan bool, 1)
|
|
|
var Reg = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+(/?)`)
|
|
|
|
|
|
//心跳
|
|
|
-func UpdateHeart(code, user, t string) {
|
|
|
+func UpdateHeart(site, channel, code, user, t string) {
|
|
|
if htmp, ok := SpiderHeart.Load(code); ok {
|
|
|
if heart, ok := htmp.(*Heart); ok {
|
|
|
if t == "list" {
|
|
@@ -93,22 +96,19 @@ func UpdateHeart(code, user, t string) {
|
|
|
}
|
|
|
}
|
|
|
} else {
|
|
|
+ heart := &Heart{
|
|
|
+ ModifyUser: user,
|
|
|
+ Site: site,
|
|
|
+ Channel: channel,
|
|
|
+ }
|
|
|
if t == "list" {
|
|
|
- SpiderHeart.Store(code, &Heart{
|
|
|
- ListHeart: time.Now().Unix(),
|
|
|
- ModifyUser: user,
|
|
|
- })
|
|
|
+ heart.ListHeart = time.Now().Unix()
|
|
|
} else if t == "detail" {
|
|
|
- SpiderHeart.Store(code, &Heart{
|
|
|
- DetailHeart: time.Now().Unix(),
|
|
|
- ModifyUser: user,
|
|
|
- })
|
|
|
+ heart.DetailHeart = time.Now().Unix()
|
|
|
} else if t == "detailexcute" {
|
|
|
- SpiderHeart.Store(code, &Heart{
|
|
|
- DetailExecuteHeart: time.Now().Unix(),
|
|
|
- ModifyUser: user,
|
|
|
- })
|
|
|
+ heart.DetailExecuteHeart = time.Now().Unix()
|
|
|
}
|
|
|
+ SpiderHeart.Store(code, heart)
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -158,8 +158,8 @@ func (s *Spider) ExecJob(reload bool) {
|
|
|
if err != nil {
|
|
|
logger.Error(s.Code, err)
|
|
|
}
|
|
|
- UpdateHeart(s.Code, s.MUserName, "list") //记录所有节点列表页心跳
|
|
|
- err = s.DownListPageItem() //下载列表
|
|
|
+ UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "list") //记录所有节点列表页心跳
|
|
|
+ err = s.DownListPageItem() //下载列表
|
|
|
if err != nil {
|
|
|
logger.Error(s.Code, err)
|
|
|
}
|
|
@@ -364,6 +364,8 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
|
|
|
nowTime := time.Now()
|
|
|
sDate := qu.FormatDate(&nowTime, qu.Date_Short_Layout)
|
|
|
set := map[string]interface{}{
|
|
|
+ "site": s.Name,
|
|
|
+ "channel": s.Channel,
|
|
|
"spidercode": s.Code,
|
|
|
"updatetime": nowTime.Unix(),
|
|
|
"event": util.Config.Uploadevent,
|
|
@@ -507,7 +509,7 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
|
|
|
SaveHighListPageData(paramdata, href, num)
|
|
|
return
|
|
|
} else {
|
|
|
- UpdateHeart(s.Code, s.MUserName, "detail") //记录modal=0老模式采集三级页心跳
|
|
|
+ UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=0老模式采集三级页心跳
|
|
|
isExist, _ := util.ExistRedis("title_repeat_judgement", 0, "url_repeat_"+href)
|
|
|
if isExist { //更新redis生命周期
|
|
|
util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*30)
|
|
@@ -542,7 +544,7 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
|
|
|
if t1 > time.Now().Unix() { //防止发布时间超前
|
|
|
data["publishtime"] = time.Now().Unix()
|
|
|
}
|
|
|
- UpdateHeart(s.Code, s.MUserName, "detailexcute") //记录modal=0老模式采集到数据心跳
|
|
|
+ UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute") //记录modal=0老模式采集到数据心跳
|
|
|
delete(data, "exit")
|
|
|
delete(data, "checkpublishtime")
|
|
|
data["comeintime"] = time.Now().Unix()
|
|
@@ -697,7 +699,7 @@ func (s *Spider) DownloadHighDetail() {
|
|
|
"comeintime": 0,
|
|
|
"event": 0,
|
|
|
}
|
|
|
- UpdateHeart(s.Code, s.MUserName, "detail") //记录modal=1采集三级页心跳
|
|
|
+ UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=1采集三级页心跳
|
|
|
list, _ := Mgo.Find("spider_highlistdata", q, o, f, false, 0, 100)
|
|
|
if list != nil && len(*list) > 0 {
|
|
|
for _, tmp := range *list {
|
|
@@ -725,7 +727,7 @@ func (s *Spider) DownloadHighDetail() {
|
|
|
}
|
|
|
//下载、解析、入库
|
|
|
data, err = s.DownloadDetailPage(tmp, data)
|
|
|
- UpdateHeart(s.Code, s.MUserName, "detailexcute") //记录modal=1下载数据心跳
|
|
|
+ UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute") //记录modal=1下载数据心跳
|
|
|
if err != nil || data == nil {
|
|
|
success = false
|
|
|
times++
|
|
@@ -819,7 +821,7 @@ func (s *Spider) DownloadListDetail() {
|
|
|
"comeintime": 0,
|
|
|
"event": 0,
|
|
|
}
|
|
|
- UpdateHeart(s.Code, s.MUserName, "detail") //记录modal=1采集三级页心跳
|
|
|
+ UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detail") //记录modal=1采集三级页心跳
|
|
|
list, _ := Mgo.Find("spider_highlistdata", q, o, f, false, 0, 100)
|
|
|
if list != nil && len(*list) > 0 {
|
|
|
for _, tmp := range *list {
|
|
@@ -847,7 +849,7 @@ func (s *Spider) DownloadListDetail() {
|
|
|
}
|
|
|
//下载、解析、入库
|
|
|
data, err = s.DownloadDetailPage(tmp, data)
|
|
|
- UpdateHeart(s.Code, s.MUserName, "detailexcute") //记录modal=1下载数据心跳
|
|
|
+ UpdateHeart(s.Name, s.Channel, s.Code, s.MUserName, "detailexcute") //记录modal=1下载数据心跳
|
|
|
if err != nil || data == nil {
|
|
|
success = false
|
|
|
times++
|