ソースを参照

新增工具类;爬虫校验

mxs 1 年間 前
コミット
50431cad16
4 ファイル変更51 行追加27 行削除
  1. 1 0
      src/front/front.go
  2. 14 4
      src/front/spider.go
  3. 17 7
      src/spider/script.go
  4. 19 16
      src/util/util.go

+ 1 - 0
src/front/front.go

@@ -560,6 +560,7 @@ func (f *Front) ImportLua() {
 					cells := v.Cells
 					if cells[1].Value != "" {
 						code := cells[1].Value
+						code = u.SymbolReg.ReplaceAllString(code, "")
 						query := map[string]interface{}{"code": cells[1].Value}
 						rs, _ := u.MgoEB.FindOne("import", query)
 						if len(*rs) > 0 {

+ 14 - 4
src/front/spider.go

@@ -694,9 +694,15 @@ func LuaTextCheck(infoformat int, param map[string]interface{}, param_list_chrom
 		if param, ok := param["param_common"].([]interface{}); ok && len(param) >= 3 {
 			spidercode := qu.ObjToString(param[0])
 			site := qu.ObjToString(param[1])
-			checkText := fmt.Sprintf(`item["spidercode"]="%s";item["site"]="%s"`, spidercode, site)
-			if strings.Contains(list, `item["spidercode"]`) && !strings.Contains(list, checkText) {
-				errmsg += `item["spidercode"]、item["site"]的值与模板不一致;`
+			channel := qu.ObjToString(param[2])
+			if !strings.Contains(list, fmt.Sprintf(u.CheckText_Code, spidercode)) {
+				errmsg += `item["spidercode"]的值与模板不一致;`
+			}
+			if !strings.Contains(list, fmt.Sprintf(u.CheckText_Site, site)) {
+				errmsg += `item["site"]的值与模板不一致;`
+			}
+			if !strings.Contains(list, fmt.Sprintf(u.CheckText_Channel, channel)) {
+				warnmsg += `item["channel"]的值与模板不一致;`
 			}
 		}
 		//校验列表页area、city、distric
@@ -1417,7 +1423,11 @@ func (f *Front) Assort() {
 		//下架爬虫
 		//lua, _ := u.MgoE.FindOne("luaconfig", query)
 		lua, _ := u.MgoEB.FindOne("luaconfig", query)
-		upresult, err := spider.UpdateSpiderByCodeState(code, "6", qu.IntAll((*lua)["event"]))
+		event := qu.IntAll((*lua)["event"])
+		if (*lua)["downevent"] != nil { //爬虫开发修改爬虫节点,审核人员分类爬虫时,原来爬虫所在节点下架
+			event = qu.IntAll((*lua)["downevent"])
+		}
+		upresult, err := spider.UpdateSpiderByCodeState(code, "6", event)
 		qu.Debug("下架爬虫:", code, upresult, err)
 		if upresult && err == nil {
 			//更新爬虫状态

+ 17 - 7
src/spider/script.go

@@ -1,4 +1,5 @@
-/**
+/*
+*
 脚本加载+调用 封装,
 前期走文件系统加载
 后期走数据库配置,
@@ -38,7 +39,7 @@ import (
 	"github.com/yuin/gopher-lua"
 )
 
-//脚本
+// 脚本
 type Script struct {
 	SCode, ScriptFile string
 	Encoding          string
@@ -51,7 +52,7 @@ type Script struct {
 	Test_goreqcon     int //go发起次数(正文)
 }
 
-//加载文件
+// 加载文件
 func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ...string) {
 	s.ScriptFile = script
 	options := lua.Options{
@@ -526,6 +527,17 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		S.Push(lua.LString(result))
 		return 1
 	}))
+	//GB2312字符集解码
+	s.L.SetGlobal("decodeGB2312", s.L.NewFunction(func(S *lua.LState) int {
+		text := S.ToString(-1)
+		result := ""
+		decodedString, _, err := transform.String(simplifiedchinese.GB18030.NewDecoder(), text)
+		if err == nil {
+			result = decodedString
+		}
+		S.Push(lua.LString(result))
+		return 1
+	}))
 	//aes cbc模式加密
 	s.L.SetGlobal("aesEncryptCBC", s.L.NewFunction(func(S *lua.LState) int {
 		origData := S.ToString(-3)
@@ -930,7 +942,7 @@ func (s *Script) Reload() {
 	s.LoadScript(&site, "", s.ScriptFile)
 }
 
-//unicode转码
+// unicode转码
 func transUnic(str string) string {
 	buf := bytes.NewBuffer(nil)
 	i, j := 0, len(str)
@@ -957,12 +969,11 @@ func transUnic(str string) string {
 	return buf.String()
 }
 
-//取得变量
+// 取得变量
 func (s *Script) GetVar(key string) string {
 	return s.L.GetGlobal(key).String()
 }
 
-//
 func (s *Script) GetIntVar(key string) int {
 	lv := s.L.GetGlobal(key)
 	if v, ok := lv.(lua.LNumber); ok {
@@ -971,7 +982,6 @@ func (s *Script) GetIntVar(key string) int {
 	return -1
 }
 
-//
 func (s *Script) GetBoolVar(key string) bool {
 	lv := s.L.GetGlobal(key)
 	if v, ok := lv.(lua.LBool); ok {

+ 19 - 16
src/util/util.go

@@ -17,22 +17,25 @@ import (
 const Role_Admin, Role_Examine, Role_Dev = 3, 2, 1 //管理员,审核员,开发员
 var (
 	//MgoE            *mgo.MongodbSim //编辑器87
-	MgoEB            *mgo.MongodbSim //编辑器163
-	MgoS             *mgo.MongodbSim
-	Province         map[string][]string
-	City             map[string][]string
-	DomainNameReg    = regexp.MustCompile(`(http|https)[::]+`)
-	DownLoadReg      = regexp.MustCompile(`download\(.*?\)`)
-	CodeTypeReg      = regexp.MustCompile(`(utf8|utf-8|gbk)`)
-	TitleFilterReg1  = regexp.MustCompile(`[\p{Han}]`)
-	TitleFilterReg2  = regexp.MustCompile(`((上|下)一(页|篇)|阅读次数)`)
-	DetailFilterReg1 = regexp.MustCompile(`((上|下)一(页|篇)|阅读次数|浏览次数|扫一扫|分享|区块链存证)`)
-	Area             []string //省份
-	DomainReg        = regexp.MustCompile(`(?://).+?(?:[::/])`)
-	SymbolReg        = regexp.MustCompile("[,,\\s\u3000\u2003\u00a0]+")
-	ReplaceReg       = regexp.MustCompile(`[]::/]+`)
-	CheckText        = `item["spidercode"]="%s";item["site"]="%s";item["channel"]="%s"`
-	JsonDataMap      = map[string]bool{ //jsondata
+	MgoEB             *mgo.MongodbSim //编辑器163
+	MgoS              *mgo.MongodbSim
+	Province          map[string][]string
+	City              map[string][]string
+	DomainNameReg     = regexp.MustCompile(`(http|https)[::]+`)
+	DownLoadReg       = regexp.MustCompile(`download\(.*?\)`)
+	CodeTypeReg       = regexp.MustCompile(`(utf8|utf-8|gbk)`)
+	TitleFilterReg1   = regexp.MustCompile(`[\p{Han}]`)
+	TitleFilterReg2   = regexp.MustCompile(`((上|下)一(页|篇)|阅读次数)`)
+	DetailFilterReg1  = regexp.MustCompile(`((上|下)一(页|篇)|阅读次数|浏览次数|扫一扫|分享|区块链存证)`)
+	Area              []string //省份
+	DomainReg         = regexp.MustCompile(`(?://).+?(?:[::/])`)
+	SymbolReg         = regexp.MustCompile("[,,\\s\u3000\u2003\u00a0]+")
+	ReplaceReg        = regexp.MustCompile(`[]::/]+`)
+	CheckText         = `item["spidercode"]="%s";item["site"]="%s";item["channel"]="%s"`
+	CheckText_Code    = `item["spidercode"]="%s"`
+	CheckText_Site    = `item["site"]="%s"`
+	CheckText_Channel = `item["channel"]="%s"`
+	JsonDataMap       = map[string]bool{ //jsondata
 		"extweight":          true,
 		"projecthref":        true,
 		"sourcewebsite":      true,