mxs 8 месяцев назад
Родитель
Сommit
5e23e47584
3 измененных файлов с 18 добавлено и 12 удалено
  1. 11 10
      backend/vm/check.go
  2. 5 0
      backend/vm/vm.go
  3. 2 2
      server.go

+ 11 - 10
backend/vm/check.go

@@ -123,7 +123,7 @@ T:
 		r, _ := el.Value.(*be.ResultItem)
 		ret.Title = r.Title != ""
 		ret.PublishUnit = r.PublishUnit != ""
-		ret.PublishTime = r.PublishTime != ""
+		ret.PublishTime = r.PublishTime != "" && Reg_Date.MatchString(r.PublishTime)
 		ret.Content = r.Content != ""
 		ret.Attaches = len(r.AttachLinks) > 0
 	}
@@ -166,8 +166,9 @@ T:
 //		return ret, errors.New("初始化列表页失败")
 //	}
 //	no := 1
+//	ret.ListTrunPage = true
 //T:
-//	for j := 0; j < 2; j++ { //最多检查2页
+//	for j := 0; j < VERIVY_MAX_TRUN_PAGE && j < int(sc.MaxPages); j++ { //最多检查2页
 //		qu.Debug("开始检查第" + fmt.Sprint(j+1) + "页...")
 //		listResult := make(be.ResultItems, 0)
 //		err := chromedp.Run(ctx, chromedp.Tasks{
@@ -202,17 +203,16 @@ T:
 //		qu.Debug("列表采集条数结果:", verifyResult.Len())
 //		//6.翻页
 //		if verifyResult.Len() > 0 {
-//			if sc.MaxPages == 1 { //最大页为1,不校验翻页
-//				ret.ListTrunPage = true
-//				break
-//			} else if sc.MaxPages > 1 { // && !ret.ListTrunPage {
+//			if sc.MaxPages > 1 && j < VERIVY_MAX_TRUN_PAGE-1 && j < int(sc.MaxPages)-1 { //&& !ret.ListTrunPage {
 //				if err = trunPage(sc, sc.ListTurnDelayTime, ctx); err != nil { //翻页失败
-//					qu.Debug("翻页失败:", err)
+//					qu.Debug("第" + fmt.Sprint(j+1) + "页翻页失败")
+//					ret.ListTrunPage = false
 //					break T
-//				} else {
-//					ret.ListTrunPage = true
 //				}
 //			}
+//		} else {
+//			ret.ListTrunPage = false
+//			break T
 //		}
 //	}
 //	//检查
@@ -220,7 +220,8 @@ T:
 //		r, _ := el.Value.(*be.ResultItem)
 //		ret.Title = r.Title != ""
 //		qu.Debug("Check Title:", ret.Title, r.Title, r.ListTitle)
-//		ret.PublishTime = r.PublishTime != ""
+//
+//		ret.PublishTime = r.PublishTime != "" && Reg_Date.MatchString(r.PublishTime)
 //		qu.Debug("Check PublishTime:", ret.PublishTime, r.PublishTime, r.ListPubTime)
 //	}
 //	if ret.ListItems {

+ 5 - 0
backend/vm/vm.go

@@ -11,6 +11,7 @@ import (
 	"math/rand"
 	"net/http"
 	"os"
+	"regexp"
 	be "spider_creator/backend"
 	"strings"
 	"text/template"
@@ -26,6 +27,10 @@ const (
 	VERIVY_MAX_TRUN_PAGE = 3
 )
 
+var (
+	Reg_Date = regexp.MustCompile(`\d`)
+)
+
 type (
 	//单一任务
 	VM struct {

+ 2 - 2
server.go

@@ -12,9 +12,9 @@ import (
 	"time"
 )
 
-const HREF = "http://127.0.0.1:8091/%s" //线下测试环境
+//const HREF = "http://127.0.0.1:8091/%s" //线下测试环境
 
-//const HREF = "http://visualizeld.spdata.jianyu360.com/%s" //正式库
+const HREF = "http://visualizeld.spdata.jianyu360.com/%s" //正式库
 //const HREF = "http://visualize.spdata.jianyu360.com/%s" //临时库
 
 type Result struct {