Sfoglia il codice sorgente

爬虫补采模块保存字段修改

mxs 1 anno fa
parent
commit
0b645250e7
2 ha cambiato i file con 21 aggiunte e 8 eliminazioni
  1. 3 2
      src/spider/spider.go
  2. 18 6
      src/spider/supplement.go

+ 3 - 2
src/spider/spider.go

@@ -957,7 +957,8 @@ func (s *Spider) SupplementDownListPageItem() (errs interface{}) {
 		Spidercode: s.Code,
 		Modifyuser: s.MUserName,
 		Finish:     finish,
-		Comeintime: time.Now().Unix(), //提前赋值(无法运行完毕的爬虫不会有后期赋值)
+		StartTime:  time.Now().Unix(), //提前赋值(无法运行完毕的爬虫不会有后期赋值)
+		Stype:      Supplement_Cycle,
 	}
 	for {
 		if errtimes >= Supplement_MaxErrorTimes || publishtimeAllZeroTimes > Supplement_Publishtime_ZeroTimes { //连续异常次数超过10次,爬虫不再翻页
@@ -1075,7 +1076,7 @@ func (s *Spider) SupplementDownListPageItem() (errs interface{}) {
 	ss.EndPage = start
 	ss.DownNum = downloadAllNum
 	ss.RepeatNum = repeatAllNum
-	ss.Comeintime = time.Now().Unix()
+	ss.EndTime = time.Now().Unix()
 	return errs
 }
 

+ 18 - 6
src/spider/supplement.go

@@ -7,6 +7,7 @@ import (
 	"gopkg.in/mgo.v2/bson"
 	"os"
 	qu "qfw/util"
+	sp "spiderutil"
 	"sync"
 	"time"
 )
@@ -38,10 +39,13 @@ type SupplementSpider struct {
 	DownNum            int    `bson:"downnum"`
 	RepeatNum          int    `bson:"repeatnum"`
 	Comeintime         int64  `bson:"comeintime"`
+	StartTime          int64  `bson:"starttime"`
+	EndTime            int64  `bson:"endtime"`
 	Success            int    `bson:"success"`
 	Failed             int    `bson:"failed"`
 	PublishtimeZeroNum int    `bson:"ptimezeronum"`
 	EffectiveNum       int    `bson:"effectivenum"`
+	Stype              string `bson:"stype"`
 }
 
 func InitSupplement() {
@@ -59,8 +63,8 @@ func InitSupplement() {
 			Supplement_EndCron = "0 0 9 ? * *"
 			//InitSpider()
 		} else if Supplement_Cycle == "week" {
-			Supplement_StartCron = "0 0 0 ? * SAT"
-			Supplement_EndCron = "0 0 0 ? * MON"
+			Supplement_StartCron = "0 0 22 ? * SAT"
+			Supplement_EndCron = "0 0 9 ? * MON"
 		}
 		c := cron.New()
 		c.Start()
@@ -83,19 +87,24 @@ func SupplementEnd() {
 
 func SupplementDataCount() {
 	logger.Info("补采数据统计开始...")
-	timeEnd := GetStrTime(-1)
-	timeStart := GetStrTime(-3)
 	sess := MgoS.GetMgoConn()
 	defer MgoS.DestoryMongoConn(sess)
 	ch := make(chan bool, 5)
 	wg := &sync.WaitGroup{}
 	lock := &sync.Mutex{}
+	ptimeEnd := GetStrTime(-1)
+	ptimeStart := GetStrTime(-3)
 	startTime := time.Now().Unix() - 3600*12
+	if Supplement_Cycle == "week" {
+		ptimeStart = GetStrTime(-9)
+		startTime = time.Now().Unix() - 3600*12 - 86400*2
+	}
+	logger.Info("search ptime", ptimeStart, ptimeEnd)
 	query := map[string]interface{}{
 		"comeintime": map[string]interface{}{
 			"$gte": startTime,
 		},
-		"event": 7001,
+		"event": sp.Config.Uploadevent,
 	}
 	field := map[string]interface{}{
 		"state":       1,
@@ -127,7 +136,7 @@ func SupplementDataCount() {
 				}
 				if publishtime == "0" || publishtime == "" {
 					ss.PublishtimeZeroNum++
-				} else if publishtime >= timeStart && publishtime < timeEnd {
+				} else if publishtime >= ptimeStart && publishtime < ptimeEnd {
 					ss.EffectiveNum++
 				}
 			}
@@ -160,6 +169,8 @@ func SupplementDataCount() {
 				}
 				if publishtime == "0" || publishtime == "" {
 					ss.PublishtimeZeroNum++
+				} else if publishtime >= ptimeStart && publishtime < ptimeEnd {
+					ss.EffectiveNum++
 				}
 			}
 			lock.Unlock()
@@ -173,6 +184,7 @@ func SupplementDataCount() {
 func SupplementDataSave() {
 	var saveArr []map[string]interface{}
 	for code, ss := range Supplement_SaveData {
+		ss.Comeintime = time.Now().Unix()
 		bt, err := bson.Marshal(ss)
 		if err != nil {
 			logger.Info("supplement marshal err:", code)