Bläddra i källkod

header返回参数;lua转python处理

maxiaoshan 3 år sedan
förälder
incheckning
2d9f8d2fb2
5 ändrade filer med 66 tillägg och 19 borttagningar
  1. 36 8
      src/front/front.go
  2. 5 1
      src/front/spider.go
  3. 6 5
      src/spider/download.go
  4. 16 5
      src/spider/script.go
  5. 3 0
      src/util/util.go

+ 36 - 8
src/front/front.go

@@ -7,7 +7,6 @@ import (
 	"log"
 	qu "qfw/util"
 	mgdb "qfw/util/mongodb"
-	mgu "qfw/util/mongodbutil"
 	"qfw/util/redis"
 	"regexp"
 	"sort"
@@ -46,7 +45,7 @@ type Front struct {
 	getJson         xweb.Mapper `xweb:"/center/spider/json"`            //
 	delRedis        xweb.Mapper `xweb:"/center/spider/delRedis"`        //清理Redis
 	updateESP       xweb.Mapper `xweb:"/center/spider/updateesp"`       //修改爬虫的节点/状态/平台
-	updatePendState xweb.Mapper `xweb:"/center/spider/updatePendState"` //修改爬虫的节点/状态/平台
+	updatePendState xweb.Mapper `xweb:"/center/spider/updatePendState"` //更新爬虫挂起状态
 
 	spiderModel xweb.Mapper `xweb:"/center/model"`           //获取补充模型
 	runStep     xweb.Mapper `xweb:"/center/run"`             //方法测试
@@ -874,8 +873,12 @@ func (f *Front) UpdateESP() {
 	update := map[string]interface{}{
 		"$set": set,
 	}
+	one, _ := u.MgoE.FindById("luaconfig", id, nil)
+	if len(*one) == 0 {
+		f.Write("n")
+		return
+	}
 	if w == "state" { //无效爬虫改为待完成
-		one, _ := u.MgoE.FindById("luaconfig", id, map[string]interface{}{"historyevent": 1, "event": 1})
 		tmpEvent := qu.IntAll((*one)["event"])
 		if one != nil && len(*one) > 0 {
 			if (*one)["historyevent"] == nil && tmpEvent != 7000 { //除7000节点外没有historyevent的要重新设置
@@ -888,15 +891,38 @@ func (f *Front) UpdateESP() {
 	} else if w == "platform" {
 		set["platform"] = val
 		if val != "golua平台" { //由lua平台切换到其他平台,删除心跳;爬虫下架;修改爬虫状态
-			set["state"] = 0
-			b := mgu.Update("spider_heart", "spider", "spider", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"del": true}}, false, true)
+			b := u.MgoS.Update("spider_heart", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"del": true}}, false, true)
 			qu.Debug("Del Heart:", b)
-			b, err := UpStateAndUpSpider(code, "", "", "", Sp_state_6) //线上爬虫下架
-			qu.Debug("爬虫下架成功:", b)
-			if !b || err != nil {
+			b, err := spider.UpdateSpiderByCodeState(code, "6", qu.IntAll((*one)["event"])) //下架
+			if b && err == nil {
+				//历史节点下架为了避免线上运行爬虫待完成时改为7000采集历史,但是又转到其他平台,导致原线上运行节点爬虫并未下线,心跳异常
+				b, err = spider.UpdateSpiderByCodeState(code, "6", qu.IntAll((*one)["historyevent"]))
+				if b && err == nil {
+					set["state"] = 0 //更新状态
+					set["luauser"] = map[string]interface{}{
+						"createuser":      (*one)["createuser"],
+						"createuserid":    (*one)["createuserid"],
+						"createuseremail": (*one)["createuseremail"],
+						"modifyuser":      (*one)["modifyuser"],
+						"modifyuserid":    (*one)["modifyuserid"],
+					}
+				} else {
+					qu.Debug("历史节点下架失败")
+					f.Write("n")
+					return
+				}
+			} else {
+				qu.Debug("增量节点下架失败")
 				f.Write("n")
 				return
 			}
+			//qu.Debug("下架:", upresult, code)
+			//b, err := UpStateAndUpSpider(code, "", "", "", Sp_state_6) //线上爬虫下架
+			//qu.Debug("爬虫下架成功:", b)
+			//if !b || err != nil {
+			//	f.Write("n")
+			//	return
+			//}
 		}
 	} else { //修改节点
 		event, _ := strconv.Atoi(val)
@@ -917,9 +943,11 @@ func (f *Front) UpdateESP() {
 	if mgdb.Update("luaconfig", query, update, false, false) {
 		log.Println("Id:", id, "	Update", w, val, "Success")
 		f.Write("y")
+		return
 	} else {
 		log.Println("Id:", id, "	Update", w, val, "Failed")
 		f.Write("n")
+		return
 	}
 	f.Write("n")
 }

+ 5 - 1
src/front/spider.go

@@ -404,6 +404,10 @@ func LuaTextCheck(list, detail string, type_list int) (b bool, msg string) {
 		b = true
 		msg = "代码中含有lua原生方法;"
 	}
+	if ListFilterReg.MatchString(detail) && !strings.Contains(detail, "delete") { //三级页含过滤但是没有data["delete"]="true"
+		b = true
+		msg = `三级页缺少data["delete"]="true"`
+	}
 	if type_list != 0 && !strings.Contains(list, "sendListNum") { //列表页专家模式且不含sendListNum
 		b = true
 		msg = "代码中缺少sendListNum方法;" + msg
@@ -665,7 +669,7 @@ func (f *Front) SpiderPass() {
 		script = (*one)["luacontent"].(string)
 	}
 	if liststr != "" && contentstr != "" {
-		msg1 = u.SpiderPassCheckLua(liststr, contentstr, (*one))
+		msg1 = u.SpiderPassCheckLua(liststr, contentstr, (*one)) //校验
 	}
 	s := spider.CreateSpider(downloadnode, script)
 	s.SpiderMaxPage = 1

+ 6 - 5
src/spider/download.go

@@ -90,7 +90,7 @@ func Download(downloadnode, downloaderid, url, method string, head map[string]in
 }
 
 //下载页面,发送消息,等待别人下载
-func DownloadAdv(downloadnode, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) (string, []*http.Cookie) {
+func DownloadAdv(downloadnode, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) (string, []*http.Cookie, map[string]interface{}) {
 	defer mu.Catch()
 	ResultMsclient := MsclientTest
 	if downloadnode == "test" { //805
@@ -133,7 +133,7 @@ func DownloadAdv(downloadnode, downloaderid, url, method string, reqparam, head
 				"ishttps":  ishttps,
 			}, timeout)
 		} else {
-			return "", nil
+			return "", nil, nil
 		}
 	}
 	if err != nil {
@@ -143,16 +143,17 @@ func DownloadAdv(downloadnode, downloaderid, url, method string, reqparam, head
 	tmp := map[string]interface{}{}
 	json.Unmarshal(ret, &tmp)
 	cooks := util.ParseHttpCookie(tmp["cookie"])
+	headers, _ := tmp["header"].(map[string]interface{})
 	if v, ok := tmp["code"].(string); ok && v == "200" {
 		if isImg {
 			bs, _ := tmp["content"].(string)
-			return string(bs), cooks
+			return string(bs), cooks, headers
 		} else {
 			bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
-			return string(bs), cooks
+			return string(bs), cooks, headers
 		}
 	} else {
-		return "", nil
+		return "", nil, nil
 	}
 }
 func DownloadFile(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {

+ 16 - 5
src/spider/script.go

@@ -108,17 +108,20 @@ func (s *Script) LoadScript(downloadnode, script string, isfile ...string) {
 		json.Unmarshal([]byte(cookie), &mycookie)
 		var ret string
 		var retcookie []*http.Cookie
+		var headers = map[string]interface{}{}
 		if param == nil {
 			ptext := map[string]interface{}{"text": S.ToString(-3)}
-			ret, retcookie = DownloadAdv(downloadnode, s.Downloader, url, method, ptext, util.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
+			ret, retcookie, headers = DownloadAdv(downloadnode, s.Downloader, url, method, ptext, util.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
 		} else {
-			ret, retcookie = DownloadAdv(downloadnode, s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
+			ret, retcookie, headers = DownloadAdv(downloadnode, s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
 		}
 		S.Push(lua.LString(ret))
 		scookie, _ := json.Marshal(retcookie)
 		S.Push(lua.LString(scookie))
+		hTable := util.MapToLuaTable(S, headers)
+		S.Push(hTable)
 		s.Test_luareqcount++
-		return 2
+		return 3
 	}))
 
 	s.L.SetGlobal("findOneText", s.L.NewFunction(func(S *lua.LState) int {
@@ -545,8 +548,16 @@ func (s *Script) LoadScript(downloadnode, script string, isfile ...string) {
 
 	//获取验证码
 	s.L.SetGlobal("getCodeByPath", s.L.NewFunction(func(S *lua.LState) int {
-		path := S.ToString(-1)
-		code := codegrpc.GetCodeByPath(path)
+		head := S.ToTable(-1)
+		path := S.ToString(-2)
+		headMap := util.GetTable(head)
+		qu.Debug(headMap)
+		headJsonStr := ""
+		headByte, err := json.Marshal(headMap)
+		if err == nil {
+			headJsonStr = string(headByte)
+		}
+		code := codegrpc.GetCodeByPath(path, headJsonStr)
 		S.Push(lua.LString(code))
 		return 1
 	}))

+ 3 - 0
src/util/util.go

@@ -115,6 +115,9 @@ func SpiderPassCheckLua(liststr, contentstr string, lua map[string]interface{})
 	if strings.Contains(liststr, "stringFind") && !strings.Contains(liststr, "--关键词过滤") {
 		msg = append(msg, "列表页代码有过滤方法stringFind但缺少注释:--关键词过滤")
 	}
+	if strings.Contains(contentstr, "--关键词过滤") && !strings.Contains(contentstr, "delete") {
+		msg = append(msg, `三级页代码有过滤方法但缺少data["delete"]="true"`)
+	}
 	//1.检测spidercode、site、channel
 	if param, ok := lua["param_common"].([]interface{}); ok && len(param) >= 3 {
 		spidercode := qu.ObjToString(param[0])