Browse Source

新增开发表和线上爬虫运行表逻辑

mxs 4 months ago
parent
commit
cc16abdf34
3 changed files with 15 additions and 9 deletions
  1. 9 5
      src/spider/handler.go
  2. 5 4
      src/spider/script.go
  3. 1 0
      src/spider/spider.go

+ 9 - 5
src/spider/handler.go

@@ -39,7 +39,7 @@ var InitCount int
 var InitAllLuaOver = make(chan bool, 1) //所有脚本是否加载完毕
 
 func InitSpider() {
-	scriptMap := getSpiderScriptDB("all") //加载爬虫,初始化模板
+	scriptMap := getSpiderScriptDB("all", "online") //加载爬虫,初始化模板
 	scriptMapFile := getSpiderScriptFile(false)
 	for code, v := range scriptMap {
 		LoopListPath.Store(code, v)
@@ -294,7 +294,7 @@ func QueueUpScriptDetail() {
 }
 
 // 获取所有爬虫脚本--数据库
-func getSpiderScriptDB(code string) map[string]map[string]string {
+func getSpiderScriptDB(code, stype string) map[string]map[string]string {
 	scriptSpider := map[string]map[string]string{}
 	query := map[string]interface{}{}
 	if Supplement { //数据采集
@@ -312,7 +312,11 @@ func getSpiderScriptDB(code string) map[string]map[string]string {
 		query = map[string]interface{}{"code": code, "event": util.Config.Uploadevent}
 		//query = `{"$or":[{"iupload":1},{"iupload":3}],"event":` + fmt.Sprint(util.Config.Uploadevent) + `,"modifytime":{"$gt":1502937042}}`
 	}
-	listdb, _ := MgoEB.Find("luaconfig", query, map[string]interface{}{"_id": -1}, nil, false, -1, -1)
+	coll := "luaconfig"
+	if stype == "online" {
+		coll = "luaconfig_online"
+	}
+	listdb, _ := MgoEB.Find(coll, query, map[string]interface{}{"_id": -1}, nil, false, -1, -1)
 	//临时历史附件
 	//listdb, _ := MgoEB.Find("luaconfig_test", query, map[string]interface{}{"_id": -1}, nil, false, -1, -1)
 
@@ -457,7 +461,7 @@ func UpdateSpiderByCodeState(code, state string) (bool, error) {
 		up = true
 		err = nil
 	} else if state == "-1" { //爬虫重采更新线上爬虫
-		scriptMap := getSpiderScriptDB(code)
+		scriptMap := getSpiderScriptDB(code, "init")
 		logger.Info("更新线上脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
 		if util.Config.Working == 1 { //排队模式
 			for _, v := range scriptMap {
@@ -526,7 +530,7 @@ func UpdateSpiderByCodeState(code, state string) (bool, error) {
 			}
 		}
 	} else { //脚本上架
-		scriptMap := getSpiderScriptDB(code)
+		scriptMap := getSpiderScriptDB(code, "init")
 		logger.Info("上架新增脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
 		if util.Config.Modal == 1 && !util.Config.IsHistoryEvent { //分开采集
 			go UpdateHighListDataByCode(code)

+ 5 - 4
src/spider/script.go

@@ -1,3 +1,5 @@
+package spider
+
 /*
 *
 脚本加载+调用 封装,
@@ -5,8 +7,6 @@
 后期走数据库配置,
 LUA中公共的方法需要抽出来,主脚本文件加载LUA公共文件
 */
-package spider
-
 import (
 	codegrpc "analysiscode/client"
 	"bytes"
@@ -834,7 +834,8 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 		text := S.ToString(-3)
 		old := S.ToString(-2)
 		repl := S.ToString(-1)
-		text = strings.Replace(text, old, repl, n)
+		reg := regexp.MustCompile(old)
+		text = reg.ReplaceAllString(text, repl)
 		S.Push(lua.LString(text))
 		return 1
 	}))
@@ -1145,7 +1146,7 @@ func (s *Script) LoadScript(site, channel, user *string, code, script_file strin
 		if err == nil {
 			headJsonStr = string(headByte)
 		}
-		code, respHead, respCookie := codegrpc.GetCodeByPath(path, stype, headJsonStr, cookie, proxy)
+		code, respHead, respCookie := codegrpc.GetCodeByPath(s.SCode, path, stype, headJsonStr, cookie, proxy)
 		//qu.Debug("code====", code)
 		//qu.Debug("respHead====", respHead)
 		//qu.Debug("respCookie====", respCookie)

+ 1 - 0
src/spider/spider.go

@@ -1773,6 +1773,7 @@ func (s *Spider) DownloadDetail(reload bool, isHistory bool) {
 					}
 					//到此数据下载完成
 				}(tmp, spTmp)
+				tmp = map[string]interface{}{}
 			}
 			wg.Wait()
 			//更新数据