Browse Source

爬虫补采模块修改

mxs 1 năm trước cách đây
mục cha
commit
088e335f90
3 tập tin đã thay đổi với 11 bổ sung8 xóa
  1. 2 2
      src/main.go
  2. 7 6
      src/spider/download.go
  3. 2 0
      src/spider/supplement.go

+ 2 - 2
src/main.go

@@ -60,8 +60,6 @@ func init() {
 		qu.ObjToString(Config.OssInfo["ossAccessKeySecret"]),
 		qu.ObjToString(Config.OssInfo["ossBucketName"]),
 	)
-	//数据补采配置
-	spider.InitSupplement()
 	//xweb框架配置
 	logger.SetConsole(false)
 	if Config.LogLevel <= 1 {
@@ -112,6 +110,8 @@ func InitMgo() {
 }
 
 func main() {
+	//数据补采配置
+	spider.InitSupplement()
 	//临时统计总的线程数
 	go spider.AllThreadLog()
 	//定时清理日志

+ 7 - 6
src/spider/download.go

@@ -1,4 +1,5 @@
-/**
+/*
+*
 GO代码相对简单,
 重点处理下载工具,爬虫启动,监控等。
 逻辑处理交给LUA处理
@@ -26,7 +27,7 @@ func init() {
 	regImg, _ = regexp.Compile(regImgStr)
 }
 
-//下载页面,发送消息,等待下载
+// 下载页面,发送消息,等待下载
 func Download(retLen *int64, downloaderid, url, method string, head map[string]interface{}, encoding string, useproxy, ishttps bool, code string, timeout int64) string {
 	defer mu.Catch()
 	msgid := mu.UUID(8)
@@ -83,7 +84,7 @@ func Download(retLen *int64, downloaderid, url, method string, head map[string]i
 	}
 }
 
-//下载页面,发送消息,等待下载
+// 下载页面,发送消息,等待下载
 func DownloadAdv(retLen *int64, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) (string, []*http.Cookie, map[string]interface{}) {
 	defer mu.Catch()
 	msgid := mu.UUID(8)
@@ -200,7 +201,7 @@ func DownloadAdvNew(downloaderid, url, method string, reqparam, head map[string]
 	}
 }
 
-//下载附件
+// 下载附件
 func DownloadFile_bak(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
 	defer mu.Catch()
 	msgid := mu.UUID(8)
@@ -384,7 +385,7 @@ func DownloadByChrome(code, downloaderid string, chrometask lu.ChromeTask, timeo
 	return
 }
 
-//下载点是否可用
+// 下载点是否可用
 func isAvailable(code string) bool {
 	b := false
 	for k, _ := range Alldownloader {
@@ -395,7 +396,7 @@ func isAvailable(code string) bool {
 	return b
 }
 
-//下载点是否可用
+// 下载点是否可用
 func isAvailableFile(code string) bool {
 	b := false
 	for k, _ := range AlldownloaderFile {

+ 2 - 0
src/spider/supplement.go

@@ -3,6 +3,7 @@ package spider
 import (
 	"flag"
 	"github.com/cron"
+	"github.com/donnie4w/go-logger/logger"
 	"os"
 )
 
@@ -26,6 +27,7 @@ func InitSupplement() {
 	flag.IntVar(&Supplement_Day, "d", 1, "补采几天的数据")
 	flag.IntVar(&Supplement_MaxErrorTimes, "e", 5, "连续几页异常采集中断")
 	flag.Parse()
+	logger.Debug("Supplement:", "-s=", Supplement, "-c=", Supplement_Cycle, "-d=", Supplement_Day, "-e=", Supplement_MaxErrorTimes)
 	if Supplement {
 		Supplement_Publishtime = GetTime(-Supplement_Day)
 		if Supplement_Cycle == "day" {