Explorar o código

es调整;数据存储调整

maxiaoshan %!s(int64=2) %!d(string=hai) anos
pai
achega
f6e3e6039e
Modificáronse 6 ficheiros con 275 adicións e 229 borrados
  1. 3 1
      src/main.go
  2. 7 8
      src/spider/handler.go
  3. 5 4
      src/spider/msgservice.go
  4. 1 1
      src/spider/script.go
  5. 68 55
      src/spider/spider.go
  6. 191 160
      src/spider/store.go

+ 3 - 1
src/main.go

@@ -11,7 +11,7 @@ import (
 
 	mgo "mongodb"
 	qu "qfw/util"
-	es "qfw/util/elastic"
+	es "qfw/util/elastic.v7"
 	"regexp"
 	"runtime"
 	. "spiderutil"
@@ -133,6 +133,8 @@ func main() {
 	//7000历史节点下载详情页
 	go spider.HistoryEventDownloadDetail()
 
+	//批量保存data_bak
+	//go spider.SaveDataBak()
 	//批量保存错误数据
 	//go spider.UpdateErrDataMgo()
 	//爬虫信息提交编辑器

+ 7 - 8
src/spider/handler.go

@@ -7,7 +7,6 @@ import (
 	"fmt"
 	"github.com/cjoudrey/gluahttp"
 	lujson "github.com/yuin/gopher-json"
-	mu "mfw/util"
 	"net/http"
 	"net/url"
 	"os"
@@ -873,7 +872,7 @@ func ReloadSpiderFile() {
 
 //生成爬虫
 func CreateSpider(code, luafile string, newstate, thread bool) (*Spider, string) {
-	defer mu.Catch()
+	defer qu.Catch()
 	spider := &Spider{}
 	err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, newstate, thread)
 	if err != "" {
@@ -979,7 +978,7 @@ func UpdateSpider(spider *Spider, code, script string) {
 
 //排队模式生成爬虫
 func NewSpider_New(code, luafile string, newstate bool) (*Spider, string) {
-	defer mu.Catch()
+	defer qu.Catch()
 	spider := &Spider{}
 	err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, newstate, false)
 	if err != "" {
@@ -1042,7 +1041,7 @@ func NewSpider_New(code, luafile string, newstate bool) (*Spider, string) {
 
 //高性能模式生成爬虫
 func NewSpider(code, luafile string) (*Spider, string) {
-	defer mu.Catch()
+	defer qu.Catch()
 	spider := &Spider{}
 	err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, false)
 	if err != "" {
@@ -1106,7 +1105,7 @@ func NewSpider(code, luafile string) (*Spider, string) {
 
 //多线程生成爬虫
 func NewSpiderForThread(code, luafile string) (*Spider, string) {
-	defer mu.Catch()
+	defer qu.Catch()
 	spider := &Spider{}
 	err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, true)
 	if err != "" {
@@ -1197,7 +1196,7 @@ func SaveDownCount(code string, addtotal bool, todayDowncount, todayRequestNum,
 
 //获取下载的上下限(没用)
 func GetLimitDownload(code string) (uplimit, lowlimit int) {
-	defer mu.Catch()
+	defer qu.Catch()
 	ret, _ := MgoS.FindOne("spider_ldtime", map[string]interface{}{"code": code})
 	if ret != nil && len(*ret) > 0 {
 		uplimit = qu.IntAll((*ret)["uplimit"])
@@ -1210,7 +1209,7 @@ func GetLimitDownload(code string) (uplimit, lowlimit int) {
 
 //拼装脚本
 func GetScriptByTmp(luaconfig map[string]interface{}) string {
-	defer mu.Catch()
+	defer qu.Catch()
 	script := ""
 	if luaconfig["listcheck"] == nil {
 		luaconfig["listcheck"] = ""
@@ -1332,7 +1331,7 @@ func GetTmpModel(param map[string][]interface{}) (script string, err interface{}
 
 //补充模型
 func ReplaceModel(script string, comm []interface{}, model map[string]interface{}) string {
-	defer mu.Catch()
+	defer qu.Catch()
 
 	//补充通用信息
 	commstr := `item["spidercode"]="` + comm[0].(string) + `";`

+ 5 - 4
src/spider/msgservice.go

@@ -58,7 +58,7 @@ func InitMsgClientFile(serveraddr, name string) {
 
 //
 func processevent(p *mu.Packet) {
-	defer mu.Catch()
+	defer qu.Catch()
 	var data []byte
 	switch p.Event {
 	case mu.SERVICE_DOWNLOAD_APPEND_NODE:
@@ -96,7 +96,7 @@ func processevent(p *mu.Packet) {
 
 //
 func processeventFile(p *mu.Packet) {
-	defer mu.Catch()
+	defer qu.Catch()
 	var data []byte
 	switch p.Event {
 	case mu.SERVICE_DOWNLOAD_APPEND_NODE:
@@ -204,7 +204,7 @@ func SendMsgService(event int, data []map[string]interface{}) {
 
 //调用消息批量保存
 func SaveObjBlak(event int, checkAtrr string, c string, data []map[string]interface{}) {
-	defer mu.Catch()
+	defer qu.Catch()
 	tmp, _ := json.Marshal([]interface{}{checkAtrr, data})
 	switch event {
 	case mu.SERVICE_YCML_SAVE: //异常名录
@@ -257,7 +257,7 @@ func SaveObj(event int, checkAtrr string, data map[string]interface{}, saveredis
 		//log.Println(event, checkAtrr, data["href"], data["title"], len(bs))
 		return
 	}
-	defer mu.Catch()
+	defer qu.Catch()
 	tmp, _ := json.Marshal([]interface{}{checkAtrr, []interface{}{data}})
 	switch event {
 	case mu.SERVICE_SPIDER_ECPS: //著作权等服务
@@ -288,6 +288,7 @@ func SaveObj(event int, checkAtrr string, data map[string]interface{}, saveredis
 				data["biddingcoll"] = arr[1]
 			}
 			MgoS.Save("data_bak", data)
+			//DataBakSaveCache <- data
 		}
 	}
 }

+ 1 - 1
src/spider/script.go

@@ -78,7 +78,7 @@ var TimeSleepChan = make(chan bool, 1)
 
 //加载文件
 func (s *Script) LoadScript(site, channel, user *string, code, script_file string, newstate bool, thread bool) string {
-	defer mu.Catch()
+	defer qu.Catch()
 	s.SCode = code
 	s.ScriptFile = script_file
 	if util.Config.Working == 0 { //高性能模式

+ 68 - 55
src/spider/spider.go

@@ -6,16 +6,16 @@ package spider
 import (
 	"crypto/sha1"
 	"fmt"
+	elc "gopkg.in/olivere/elastic/v7"
 	"io"
 	"log"
-	mu "mfw/util"
 	mgo "mongodb"
 	qu "qfw/util"
+	es "qfw/util/elastic.v7"
 	"strconv"
 	"strings"
 	"sync"
 
-	es "qfw/util/elastic"
 	"regexp"
 	util "spiderutil"
 	"sync/atomic"
@@ -25,7 +25,7 @@ import (
 	"github.com/yuin/gopher-lua"
 )
 
-//心跳
+// Heart 心跳
 type Heart struct {
 	DetailHeart        int64  //爬虫三级页执行心跳
 	DetailExecuteHeart int64  //三级页采集到数据心跳
@@ -36,7 +36,7 @@ type Heart struct {
 	Channel            string //栏目
 }
 
-//流量
+// SpiderFlow 流量
 type SpiderFlow struct {
 	Flow       int64  //流量
 	ModifyUser string //爬虫维护人
@@ -45,7 +45,7 @@ type SpiderFlow struct {
 	//Code       string
 }
 
-//爬虫()
+// Spider 爬虫
 type Spider struct {
 	Script
 	Code                            string //代码
@@ -84,28 +84,34 @@ type Spider struct {
 	IsMainThread     bool //是否为主线程(多线程采集时区分是否为主线程)
 }
 
-var Es *es.Elastic
-var EsIndex string
-var EsType string
-var MgoS *mgo.MongodbSim
+var (
+	Es      *es.Elastic
+	EsIndex string
+	EsType  string
+	MgoS    *mgo.MongodbSim
+	MgoEB   *mgo.MongodbSim
+
+	TimeChan          = make(chan bool, 1)
+	Reg               = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+(/?)`)
+	RestrictAccessReg = regexp.MustCompile(`访问被拒绝`)
+	//DomainNameReg     = regexp.MustCompile(`(?://).+?(?:)[::/]`)
+	//RepDomainNameReg  = regexp.MustCompile(`[::/]+`)
+	//Today             string
+	//SpiderFlowMap     = sync.Map{} //code:{"2022-05-16":SpiderFlow}
+	AllThreadNum int64
+	DelaySiteMap map[string]*DelaySite //延迟采集站点集合
+
+	//UpdataMgoCache = make(chan []map[string]interface{}, 1000)   //更新要重下数据的状态
+	//SP = make(chan bool, 5)
+	//SaveMgoCache = make(chan map[string]interface{}, 1000)       //保存爬虫采集非本站点数据
+	//SPS = make(chan bool, 5)
+	UpdataHeartCache = make(chan []map[string]interface{}, 1000) //更新爬虫心跳信息
+	SPH              = make(chan bool, 5)
+
+	DataBakSaveCache = make(chan map[string]interface{}, 1000) //保存采集信息详情页记录
+	DB_CH            = make(chan bool, 5)
+)
 
-//var MgoE *mgo.MongodbSim
-var MgoEB *mgo.MongodbSim
-var UpdataMgoCache = make(chan []map[string]interface{}, 1000)   //更新要重下数据的状态
-var UpdataHeartCache = make(chan []map[string]interface{}, 1000) //更新爬虫心跳信息
-var SaveMgoCache = make(chan map[string]interface{}, 1000)       //保存爬虫采集非本站点数据
-var SP = make(chan bool, 5)
-var SPH = make(chan bool, 5)
-var SPS = make(chan bool, 5)
-var TimeChan = make(chan bool, 1)
-var Reg = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+(/?)`)
-var DomainNameReg = regexp.MustCompile(`(?://).+?(?:)[::/]`)
-var RepDomainNameReg = regexp.MustCompile(`[::/]+`)
-var RestrictAccessReg = regexp.MustCompile(`访问被拒绝`)
-var Today string
-var SpiderFlowMap = sync.Map{} //code:{"2022-05-16":SpiderFlow}
-var AllThreadNum int64
-var DelaySiteMap map[string]*DelaySite //延迟采集站点集合
 type DelaySite struct {
 	DelayTime int
 	Compete   bool
@@ -236,7 +242,7 @@ func (s *Spider) ExecJob(reload bool) {
 
 //获取最新时间--作为最后更新时间
 func (s *Spider) GetLastPublishTime() (errs interface{}) {
-	defer mu.Catch()
+	defer qu.Catch()
 	var lastpublishtime string
 	//取得最后更新时间
 	if err := s.L.CallByParam(lua.P{
@@ -268,7 +274,7 @@ func (s *Spider) GetLastPublishTime() (errs interface{}) {
 
 //下载列表
 func (s *Spider) DownListPageItem() (errs interface{}) {
-	defer mu.Catch()
+	defer qu.Catch()
 	s.AlreadyGetPageHeart = map[int]bool{}                                     //重置记录
 	start, max := s.GetIntVar("spiderStartPage"), s.GetIntVar("spiderMaxPage") //起始页、最大页
 	s.MaxPage = max                                                            //
@@ -460,6 +466,9 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 			} else {
 				inc["other_percent"] = 1
 			}
+			if isRunRepeatList && start > max { //连续翻页超过了上限
+				inc["uplimit"] = 1
+			}
 		} else {
 			inc["zero"] = 1
 		}
@@ -479,28 +488,29 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 	return errs
 }
 
-func (s *Spider) ThisSiteData(tmp map[string]interface{}) {
-	defer qu.Catch()
-	href := qu.ObjToString(tmp["href"])
-	url_dn := RepDomainNameReg.ReplaceAllString(DomainNameReg.FindString(s.TargetChannelUrl), "")
-	href_dn := RepDomainNameReg.ReplaceAllString(DomainNameReg.FindString(href), "")
-	if url_dn != href_dn {
-		SaveMgoCache <- map[string]interface{}{
-			"site":       s.Name,
-			"channel":    s.Channel,
-			"spidercode": s.Code,
-			"url":        s.TargetChannelUrl,
-			"href":       href,
-			"modifyuser": s.MUserName,
-			"comeintime": time.Now().Unix(),
-		}
-	}
-}
+//站点信息统计
+//func (s *Spider) ThisSiteData(tmp map[string]interface{}) {
+//	defer qu.Catch()
+//	href := qu.ObjToString(tmp["href"])
+//	url_dn := RepDomainNameReg.ReplaceAllString(DomainNameReg.FindString(s.TargetChannelUrl), "")
+//	href_dn := RepDomainNameReg.ReplaceAllString(DomainNameReg.FindString(href), "")
+//	if url_dn != href_dn {
+//		SaveMgoCache <- map[string]interface{}{
+//			"site":       s.Name,
+//			"channel":    s.Channel,
+//			"spidercode": s.Code,
+//			"url":        s.TargetChannelUrl,
+//			"href":       href,
+//			"modifyuser": s.MUserName,
+//			"comeintime": time.Now().Unix(),
+//		}
+//	}
+//}
 
 //遍历,开启三级页下载(历史补漏)
 func (s *Spider) HistoricalMendDownloadDetailItem(p interface{}) {
 	//qu.Debug("--------------历史下载-----------------")
-	defer mu.Catch()
+	defer qu.Catch()
 	var err interface{}
 	data := map[string]interface{}{}
 	paramdata := p.(map[string]interface{})
@@ -532,7 +542,8 @@ func (s *Spider) HistoricalMendDownloadDetailItem(p interface{}) {
 		title := qu.ObjToString(paramdata["title"])
 		eTime := time.Now().Unix()
 		sTime := eTime - int64(7*86400)
-		esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
+		//esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
+		esQuery := elc.NewBoolQuery().Must(elc.NewRangeQuery("comeintime").Gte(sTime).Lte(eTime)).Must(elc.NewTermQuery("title.mtitle", title))
 		if Es.Count(EsIndex, EsType, esQuery) > 0 { //es中含本title数据,不再采集,更新list表数据状态
 			isEsRepeat = true
 		}
@@ -562,7 +573,7 @@ func (s *Spider) HistoricalMendDownloadDetailItem(p interface{}) {
 	if publishtime < time.Now().AddDate(-1, 0, 0).Unix() { //一年前数据进行全量bloom redis href判重
 		isExist, _ = util.ExistsBloomRedis("href", tmphref)
 		if isExist {
-			MgoS.UpdateById("spider_listdata", id, map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": "bloom_href", "updatetime": time.Now().Unix()}})
+			MgoS.UpdateById("spider_listdata", id, map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": "bloom_href", "tmphref": tmphref, "updatetime": time.Now().Unix()}})
 			return
 		}
 	}
@@ -577,7 +588,6 @@ func (s *Spider) HistoricalMendDownloadDetailItem(p interface{}) {
 	}
 	//更新spider_listdata中数据下载成功标记(根据链接更新数据state;可能由后续下载成功时更新)
 	MgoS.Update("spider_listdata", map[string]interface{}{"href": href}, map[string]interface{}{"$set": set}, false, true)
-
 	//三级页href替换导致前后href不同,采集成功后将原始href加入全量redis
 	//if tmphref := qu.ObjToString(data["href"]); tmphref != href {
 	//	util.AddBloomRedis("href", href)
@@ -616,7 +626,7 @@ func (s *Spider) HistoricalMendDownloadDetailItem(p interface{}) {
 
 //遍历,开启三级页下载(增量)
 func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
-	defer mu.Catch()
+	defer qu.Catch()
 	var err interface{}
 	data := map[string]interface{}{}
 	paramdata := p.(map[string]interface{})
@@ -648,7 +658,8 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
 			title := qu.ObjToString(paramdata["title"])
 			eTime := time.Now().Unix()
 			sTime := eTime - int64(7*86400)
-			esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
+			//esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
+			esQuery := elc.NewBoolQuery().Must(elc.NewRangeQuery("comeintime").Gte(sTime).Lte(eTime)).Must(elc.NewTermQuery("title.mtitle", title))
 			if Es.Count(EsIndex, EsType, esQuery) > 0 { //es中含本title数据,不再采集,更新list表数据状态
 				isEsRepeat = true
 			}
@@ -685,7 +696,7 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
 	if util.Config.Uploadevent == 7410 || publishtime < time.Now().AddDate(-1, 0, 0).Unix() {
 		isExist, _ = util.ExistsBloomRedis("href", tmphref)
 		if isExist {
-			MgoS.UpdateById("spider_listdata", id, map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": "bloom_href", "updatetime": time.Now().Unix()}})
+			MgoS.UpdateById("spider_listdata", id, map[string]interface{}{"$set": map[string]interface{}{"state": 1, "exist": "bloom_href", "tmphref": tmphref, "updatetime": time.Now().Unix()}})
 			return
 		}
 	}
@@ -722,7 +733,7 @@ func (s *Spider) DownloadDetailItem(p interface{}, num *int) {
 
 //遍历下载名录
 func (s *Spider) DownloadDetailByNames(p interface{}) {
-	defer mu.Catch()
+	defer qu.Catch()
 	var err interface{}
 	/*
 		if s.Stop {
@@ -758,7 +769,7 @@ func (s *Spider) DownloadDetailByNames(p interface{}) {
 
 //下载解析详情页
 func (s *Spider) DownloadDetailPage(param map[string]interface{}, data map[string]interface{}) (map[string]interface{}, interface{}) {
-	defer mu.Catch()
+	defer qu.Catch()
 	s.LastHeartbeat = time.Now().Unix()
 	util.TimeSleepFunc((time.Duration(s.SleepBase+util.GetRandMath(s.SleepRand)))*time.Millisecond, TimeSleepChan)
 	tab := s.L.NewTable()
@@ -954,7 +965,8 @@ func (s *Spider) DownloadDetail(reload bool, isHistory bool) {
 						title := qu.ObjToString(tmp["title"])
 						eTime := time.Now().Unix()
 						sTime := eTime - int64(7*86400)
-						esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
+						//esQuery := `{"query": {"filtered": {"filter": {"bool": {"must": [{"range": {"comeintime": {"gte": "` + fmt.Sprint(sTime) + `","lte": "` + fmt.Sprint(eTime) + `"}}}]}},"query": {"bool": {"must": [{"multi_match": {"query": "` + title + `","type": "phrase","fields": ["title"]}}]}}}}}`
+						esQuery := elc.NewBoolQuery().Must(elc.NewRangeQuery("comeintime").Gte(sTime).Lte(eTime)).Must(elc.NewTermQuery("title.mtitle", title))
 						count := Es.Count(EsIndex, EsType, esQuery)
 						if count > 0 { //es中含本title数据,不再采集,更新list表数据状态
 							util.AddBloomRedis("href", href)
@@ -1046,6 +1058,7 @@ func (s *Spider) DownloadDetail(reload bool, isHistory bool) {
 								"state":      1,
 								"updatetime": time.Now().Unix(),
 								"exist":      "bloom_href",
+								"tmphref":    tmphref,
 							}}
 							update = append(update, query)
 							update = append(update, set)

+ 191 - 160
src/spider/store.go

@@ -1,15 +1,10 @@
 package spider
 
 import (
-	"encoding/json"
 	"fmt"
-	"github.com/cron"
 	"github.com/donnie4w/go-logger/logger"
-	"github.com/yuin/gopher-lua"
 	mu "mfw/util"
 	"qfw/util"
-
-	//"qfw/util/redis"
 	lu "spiderutil"
 	"strings"
 	"time"
@@ -35,7 +30,7 @@ type LogMap struct {
 
 //数据存储批量
 func StoreBlak(mode, event int, c, coverAttr string, data []map[string]interface{}) {
-	defer mu.Catch()
+	defer util.Catch()
 	for _, v := range data {
 		if t, err := time.ParseInLocation(util.Date_Full_Layout, util.ObjToString(v["publishtime"]), time.Local); err == nil {
 			v["publishtime"] = t.Unix()
@@ -56,7 +51,7 @@ func StoreBlak(mode, event int, c, coverAttr string, data []map[string]interface
 
 //数据存储
 func Store(mode, event int, c, coverAttr string, data map[string]interface{}, flag bool) {
-	defer mu.Catch()
+	defer util.Catch()
 	if t, err := time.ParseInLocation(util.Date_Full_Layout, util.ObjToString(data["publishtime"]), time.Local); err == nil {
 		data["publishtime"] = t.Unix()
 	}
@@ -74,11 +69,11 @@ func Store(mode, event int, c, coverAttr string, data map[string]interface{}, fl
 				logger.Warn(c, mode, "保存失败", data)
 			}
 		}
-		href := fmt.Sprint(data["href"])
-		if len(href) > 5 && flag { //有效数据
-			hashHref := lu.HexText(href)
-			lu.RedisClusterSet(hashHref, "", -1)
-		}
+		//href := fmt.Sprint(data["href"])
+		//if len(href) > 5 && flag { //有效数据
+		//	hashHref := lu.HexText(href)
+		//	lu.RedisClusterSet(hashHref, "", -1)
+		//}
 	} else if mode == 2 {
 		data["T"] = c
 		SaveObj(event, coverAttr, data, flag)
@@ -91,7 +86,7 @@ func Store(mode, event int, c, coverAttr string, data map[string]interface{}, fl
 
 //保存验证错误日志
 func saveVerificationLog(code, name, url, content string) {
-	defer mu.Catch()
+	defer util.Catch()
 	data := map[string]interface{}{}
 	data["code"] = code
 	data["name"] = name
@@ -104,7 +99,7 @@ func saveVerificationLog(code, name, url, content string) {
 
 //查找信息是否存在
 func findHasExit(c, q string) bool {
-	defer mu.Catch()
+	defer util.Catch()
 	ret, _ := MgoS.FindOne(c, q)
 	if ret != nil && len(*ret) > 0 {
 		return true
@@ -117,7 +112,7 @@ func findHasExit(c, q string) bool {
 var spider_ldtime = map[string]map[string]interface{}{}
 
 func GetLastPubtime(code string) int64 {
-	defer mu.Catch()
+	defer util.Catch()
 	if len(spider_ldtime) < 1 {
 		list, _ := MgoS.Find("spider_ldtime", nil, nil, nil, false, -1, -1)
 		for _, v := range *list {
@@ -134,7 +129,7 @@ func GetLastPubtime(code string) int64 {
 
 //获取最后执行时间
 func GetLastExectime(code string) int64 {
-	defer mu.Catch()
+	defer util.Catch()
 	if len(spider_ldtime) < 1 {
 		list, _ := MgoS.Find("spider_ldtime", nil, nil, nil, false, -1, -1)
 		for _, v := range *list {
@@ -153,7 +148,7 @@ func GetLastExectime(code string) int64 {
 var spider_downlog = map[string]map[string]interface{}{}
 
 func GetDownloadLast(code, date string) map[string]interface{} {
-	defer mu.Catch()
+	defer util.Catch()
 	if len(spider_downlog) < 1 {
 		list, _ := MgoS.Find("spider_downlog", map[string]interface{}{"date": date}, nil, nil, false, -1, -1)
 		for _, v := range *list {
@@ -193,51 +188,8 @@ func GcCount() {
 	lu.TimeAfterFunc(30*time.Minute, GcCount, TimeChan)
 }
 
-//保存错误数据信息,重新下载
-func SaveErrorData(modifyuser string, pd map[string]interface{}, err interface{}) {
-	defer util.Catch()
-	if href := util.ObjToString(pd["href"]); href != "" {
-		delete(pd, "_id")
-		pd["state"] = 0
-		pd["from"] = "lua"
-		pd["comeintime"] = time.Now().Unix()
-		pd["modifyuser"] = modifyuser
-		if luaErr, ok := err.(*lua.ApiError); ok && luaErr != nil {
-			pd["error"] = luaErr.Object.String()
-		}
-		if publishtime, ok := pd["publishtime"].(string); ok {
-			pd["publishtime"] = lu.ParseDate2Int64(publishtime)
-		}
-		if jsondata := util.ObjToString(pd["jsondata"]); jsondata != "" && jsondata != "null" {
-			tmp := map[string]interface{}{}
-			json.Unmarshal([]byte(jsondata), &tmp)
-			pd["jsondata"] = tmp
-		}
-		coll := "spider_highlistdata"
-		if lu.Config.Modal == 0 {
-			coll = "spider_listdata"
-		} else if lu.Config.IsHistoryEvent {
-			coll = "spider_historydata"
-		}
-		pd["coll"] = coll
-		//mgu.Save("regatherdata", "spider", "spider", pd)
-		query := map[string]interface{}{
-			"href": href,
-		}
-		set := map[string]interface{}{
-			"$set": pd,
-		}
-		update := []map[string]interface{}{}
-		update = append(update, query)
-		update = append(update, set)
-		UpdataMgoCache <- update
-		//Mgo.Update("regatherdata", "spider", "spider", query, set, true, false)
-	}
-}
-
 //保存modal=1模式采集的列表页信息
 func SaveHighListPageData(tmp map[string]interface{}, hashHref string, num *int) {
-	lu.RedisSet("list", "list_"+hashHref, "", 86400*365*2)
 	tmp["state"] = 0
 	tmp["event"] = lu.Config.Uploadevent
 	tmp["comeintime"] = time.Now().Unix()
@@ -246,6 +198,7 @@ func SaveHighListPageData(tmp map[string]interface{}, hashHref string, num *int)
 	} else {
 		MgoS.Save("spider_highlistdata", tmp)
 	}
+	lu.RedisSet("list", "list_"+hashHref, "", 86400*365*2)
 }
 
 //保存7410、7500、7510、7520、7700采集的列表页信息
@@ -292,43 +245,6 @@ func UpdateHighListDataByCode(code string) {
 	MgoS.Update("spider_highlistdata", query, set, false, true)
 }
 
-//批量更新错误数据
-func UpdateErrDataMgo() {
-	fmt.Println("Update Error Data...")
-	arru := make([][]map[string]interface{}, 50)
-	indexu := 0
-	for {
-		select {
-		case v := <-UpdataMgoCache:
-			arru[indexu] = v
-			indexu++
-			if indexu == 50 {
-				SP <- true
-				go func(arru [][]map[string]interface{}) {
-					defer func() {
-						<-SP
-					}()
-					MgoS.UpSertBulk("regatherdata", arru...)
-				}(arru)
-				arru = make([][]map[string]interface{}, 50)
-				indexu = 0
-			}
-		case <-time.After(1 * time.Minute):
-			if indexu > 0 {
-				SP <- true
-				go func(arru [][]map[string]interface{}) {
-					defer func() {
-						<-SP
-					}()
-					MgoS.UpSertBulk("regatherdata", arru...)
-				}(arru[:indexu])
-				arru = make([][]map[string]interface{}, 50)
-				indexu = 0
-			}
-		}
-	}
-}
-
 //批量更新心跳信息
 func UpdateHeartInfo() {
 	fmt.Println("Update Heart Info...")
@@ -341,11 +257,11 @@ func UpdateHeartInfo() {
 			indexh++
 			if indexh == 200 {
 				SPH <- true
-				go func(heartarr [][]map[string]interface{}) {
+				go func(tmp [][]map[string]interface{}) {
 					defer func() {
 						<-SPH
 					}()
-					MgoS.UpSertBulk("spider_heart", heartarr...)
+					MgoS.UpSertBulk("spider_heart", tmp...)
 				}(heartarr)
 				heartarr = make([][]map[string]interface{}, 200)
 				indexh = 0
@@ -353,11 +269,11 @@ func UpdateHeartInfo() {
 		case <-time.After(1 * time.Minute):
 			if indexh > 0 {
 				SPH <- true
-				go func(heartarr [][]map[string]interface{}) {
+				go func(tmp [][]map[string]interface{}) {
 					defer func() {
 						<-SPH
 					}()
-					MgoS.UpSertBulk("spider_heart", heartarr...)
+					MgoS.UpSertBulk("spider_heart", tmp...)
 				}(heartarr[:indexh])
 				heartarr = make([][]map[string]interface{}, 200)
 				indexh = 0
@@ -366,81 +282,196 @@ func UpdateHeartInfo() {
 	}
 }
 
-//保存爬虫采集非本站点数据
-func SaveOtherSiteData() {
-	fmt.Println("Save Other Site Data...")
+//批量保存data_bak
+func SaveDataBak() {
+	fmt.Println("Save DataBak...")
 	savearr := make([]map[string]interface{}, 200)
-	indexh := 0
+	indexdb := 0
 	for {
 		select {
-		case v := <-SaveMgoCache:
-			savearr[indexh] = v
-			indexh++
-			if indexh == 200 {
-				SPS <- true
-				go func(savearr []map[string]interface{}) {
+		case v := <-DataBakSaveCache:
+			savearr[indexdb] = v
+			indexdb++
+			if indexdb == 200 {
+				DB_CH <- true
+				go func(tmp []map[string]interface{}) {
 					defer func() {
-						<-SPS
+						<-DB_CH
 					}()
-					MgoS.SaveBulk("spider_othersite", savearr...)
+					MgoS.SaveBulk("data_bak", tmp...)
 				}(savearr)
 				savearr = make([]map[string]interface{}, 200)
-				indexh = 0
+				indexdb = 0
 			}
-		case <-time.After(1 * time.Minute):
-			if indexh > 0 {
-				SPS <- true
-				go func(savearr []map[string]interface{}) {
+		case <-time.After(30 * time.Second):
+			if indexdb > 0 {
+				DB_CH <- true
+				go func(tmp []map[string]interface{}) {
 					defer func() {
-						<-SPS
+						<-DB_CH
 					}()
-					MgoS.SaveBulk("spider_othersite", savearr...)
-				}(savearr[:indexh])
+					MgoS.SaveBulk("data_bak", tmp...)
+				}(savearr[:indexdb])
 				savearr = make([]map[string]interface{}, 200)
-				indexh = 0
+				indexdb = 0
 			}
 		}
 	}
 }
 
+//批量更新错误数据
+//func UpdateErrDataMgo() {
+//	fmt.Println("Update Error Data...")
+//	arru := make([][]map[string]interface{}, 50)
+//	indexu := 0
+//	for {
+//		select {
+//		case v := <-UpdataMgoCache:
+//			arru[indexu] = v
+//			indexu++
+//			if indexu == 50 {
+//				SP <- true
+//				go func(arru [][]map[string]interface{}) {
+//					defer func() {
+//						<-SP
+//					}()
+//					MgoS.UpSertBulk("regatherdata", arru...)
+//				}(arru)
+//				arru = make([][]map[string]interface{}, 50)
+//				indexu = 0
+//			}
+//		case <-time.After(1 * time.Minute):
+//			if indexu > 0 {
+//				SP <- true
+//				go func(arru [][]map[string]interface{}) {
+//					defer func() {
+//						<-SP
+//					}()
+//					MgoS.UpSertBulk("regatherdata", arru...)
+//				}(arru[:indexu])
+//				arru = make([][]map[string]interface{}, 50)
+//				indexu = 0
+//			}
+//		}
+//	}
+//}
+//保存错误数据信息,重新下载
+//func SaveErrorData(modifyuser string, pd map[string]interface{}, err interface{}) {
+//	defer util.Catch()
+//	if href := util.ObjToString(pd["href"]); href != "" {
+//		delete(pd, "_id")
+//		pd["state"] = 0
+//		pd["from"] = "lua"
+//		pd["comeintime"] = time.Now().Unix()
+//		pd["modifyuser"] = modifyuser
+//		if luaErr, ok := err.(*lua.ApiError); ok && luaErr != nil {
+//			pd["error"] = luaErr.Object.String()
+//		}
+//		if publishtime, ok := pd["publishtime"].(string); ok {
+//			pd["publishtime"] = lu.ParseDate2Int64(publishtime)
+//		}
+//		if jsondata := util.ObjToString(pd["jsondata"]); jsondata != "" && jsondata != "null" {
+//			tmp := map[string]interface{}{}
+//			json.Unmarshal([]byte(jsondata), &tmp)
+//			pd["jsondata"] = tmp
+//		}
+//		coll := "spider_highlistdata"
+//		if lu.Config.Modal == 0 {
+//			coll = "spider_listdata"
+//		} else if lu.Config.IsHistoryEvent {
+//			coll = "spider_historydata"
+//		}
+//		pd["coll"] = coll
+//		//mgu.Save("regatherdata", "spider", "spider", pd)
+//		query := map[string]interface{}{
+//			"href": href,
+//		}
+//		set := map[string]interface{}{
+//			"$set": pd,
+//		}
+//		update := []map[string]interface{}{}
+//		update = append(update, query)
+//		update = append(update, set)
+//		UpdataMgoCache <- update
+//		//Mgo.Update("regatherdata", "spider", "spider", query, set, true, false)
+//	}
+//}
+
+//保存爬虫采集非本站点数据
+//func SaveOtherSiteData() {
+//	fmt.Println("Save Other Site Data...")
+//	savearr := make([]map[string]interface{}, 200)
+//	indexh := 0
+//	for {
+//		select {
+//		case v := <-SaveMgoCache:
+//			savearr[indexh] = v
+//			indexh++
+//			if indexh == 200 {
+//				SPS <- true
+//				go func(savearr []map[string]interface{}) {
+//					defer func() {
+//						<-SPS
+//					}()
+//					MgoS.SaveBulk("spider_othersite", savearr...)
+//				}(savearr)
+//				savearr = make([]map[string]interface{}, 200)
+//				indexh = 0
+//			}
+//		case <-time.After(1 * time.Minute):
+//			if indexh > 0 {
+//				SPS <- true
+//				go func(savearr []map[string]interface{}) {
+//					defer func() {
+//						<-SPS
+//					}()
+//					MgoS.SaveBulk("spider_othersite", savearr...)
+//				}(savearr[:indexh])
+//				savearr = make([]map[string]interface{}, 200)
+//				indexh = 0
+//			}
+//		}
+//	}
+//}
+
 //定时任务
-func TimeTask() {
-	now := time.Now()
-	Today = util.FormatDate(&now, util.Date_Short_Layout) //初始化日期
-	cr := cron.New()
-	cr.Start()
-	cr.AddFunc("0 30 0 * * ?", UpdateSpiderFlow) //每天零时提交统计
-}
+//func TimeTask() {
+//	now := time.Now()
+//	Today = util.FormatDate(&now, util.Date_Short_Layout) //初始化日期
+//	cr := cron.New()
+//	cr.Start()
+//	cr.AddFunc("0 30 0 * * ?", UpdateSpiderFlow) //每天零时提交统计
+//}
 
 //更新流量信息
-func UpdateSpiderFlow() {
-	defer util.Catch()
-	logger.Info("统计流量信息开始...", Today)
-	arr := []map[string]interface{}{}
-	SpiderFlowMap.Range(func(key, temp interface{}) bool {
-		date := strings.Split(key.(string), "+")
-		if len(date) == 2 && date[0] == Today { //统计非当天的
-			if sfMap, ok := temp.(*SpiderFlow); ok {
-				arr = append(arr, map[string]interface{}{
-					"spidercode": date[1],
-					"date":       date[0],
-					"flow":       sfMap.Flow,
-					"site":       sfMap.Site,
-					"channel":    sfMap.Channel,
-					"modifyuser": sfMap.ModifyUser,
-					"comeintime": time.Now().Unix(),
-					"event":      lu.Config.Uploadevent,
-				})
-				SpiderFlowMap.Delete(key) //统计完成后删除非当天数据
-			}
-		}
-		return true
-	})
-	if len(arr) > 0 {
-		MgoS.SaveBulk("spider_flow", arr...)
-		arr = []map[string]interface{}{}
-	}
-	now := time.Now()
-	Today = util.FormatDate(&now, util.Date_Short_Layout)
-	logger.Info("统计流量信息完成...", Today)
-}
+//func UpdateSpiderFlow() {
+//	defer util.Catch()
+//	logger.Info("统计流量信息开始...", Today)
+//	arr := []map[string]interface{}{}
+//	SpiderFlowMap.Range(func(key, temp interface{}) bool {
+//		date := strings.Split(key.(string), "+")
+//		if len(date) == 2 && date[0] == Today { //统计非当天的
+//			if sfMap, ok := temp.(*SpiderFlow); ok {
+//				arr = append(arr, map[string]interface{}{
+//					"spidercode": date[1],
+//					"date":       date[0],
+//					"flow":       sfMap.Flow,
+//					"site":       sfMap.Site,
+//					"channel":    sfMap.Channel,
+//					"modifyuser": sfMap.ModifyUser,
+//					"comeintime": time.Now().Unix(),
+//					"event":      lu.Config.Uploadevent,
+//				})
+//				SpiderFlowMap.Delete(key) //统计完成后删除非当天数据
+//			}
+//		}
+//		return true
+//	})
+//	if len(arr) > 0 {
+//		MgoS.SaveBulk("spider_flow", arr...)
+//		arr = []map[string]interface{}{}
+//	}
+//	now := time.Now()
+//	Today = util.FormatDate(&now, util.Date_Short_Layout)
+//	logger.Info("统计流量信息完成...", Today)
+//}