Bläddra i källkod

全量、增量redis val值修改

maxiaoshan 3 år sedan
förälder
incheckning
85c2848837
4 ändrade filer med 119 tillägg och 9 borttagningar
  1. 56 1
      src/spider/download.go
  2. 2 2
      src/spider/msgservice.go
  3. 59 4
      src/spider/script.go
  4. 2 2
      src/spider/spider.go

+ 56 - 1
src/spider/download.go

@@ -117,7 +117,7 @@ func DownloadAdv(downloaderid, url, method string, reqparam, head map[string]int
 				"ishttps":  ishttps,
 			}, timeout)
 		} else {
-			return "", nil
+			return "", nil, nil
 		}
 	}
 	if err != nil {
@@ -140,6 +140,61 @@ func DownloadAdv(downloaderid, url, method string, reqparam, head map[string]int
 		return "", nil, nil
 	}
 }
+func NewDownloadFile(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64, noredirect bool) []byte {
+	defer mu.Catch()
+	timeout = timeout * 10
+	msgid := mu.UUID(8)
+	if len(head) < 1 {
+		l := len(agent.UserAgents["common"])
+		r := rand.New(rand.NewSource(time.Now().UnixNano()))
+		head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
+	}
+	var ret []byte
+	var err error
+	if downloaderid == "" {
+		ret, err = MsclientFile.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
+			"url":        url,
+			"method":     method,
+			"head":       head,
+			"reqparam":   reqparam,
+			"cookie":     mycookie,
+			"encoding":   encoding,
+			"useproxy":   useproxy,
+			"ishttps":    ishttps,
+			"new":        true,
+			"noredirect": noredirect,
+		}, timeout)
+	} else {
+		if isAvailableFile(downloaderid) {
+			ret, err = MsclientFile.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
+				"url":        url,
+				"method":     method,
+				"head":       head,
+				"reqparam":   reqparam,
+				"cookie":     mycookie,
+				"encoding":   encoding,
+				"useproxy":   useproxy,
+				"ishttps":    ishttps,
+				"new":        true,
+				"noredirect": noredirect,
+			}, timeout)
+		} else {
+			return nil
+		}
+	}
+	if err != nil {
+		str := code + "方法DownloadFile,url:" + url + ",err:" + err.Error()
+		logger.Error(str, timeout)
+	}
+	tmp := map[string]interface{}{}
+	json.Unmarshal(ret, &tmp)
+	if v, ok := tmp["code"].(string); ok && v == "200" {
+		bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
+		return bs
+	} else {
+		return nil
+	}
+}
 
 //下载附件
 func DownloadFile_bak(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {

+ 2 - 2
src/spider/msgservice.go

@@ -289,12 +289,12 @@ func SaveObj(event int, checkAtrr string, data map[string]interface{}, saveredis
 			}
 			//保存服务未接收成功的数据会存入data_bak中,确保数据不丢失依赖补发程序
 			if id != "" {
-				util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*365)
+				util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, "", 3600*24*365)
 				if !flag { //保存服务发送成功
 					//全量(判断是否已存在防止覆盖id)
 					isExist, _ := util.ExistRedis("title_repeat_fulljudgement", db, hashHref)
 					if !isExist {
-						util.PutRedis("title_repeat_fulljudgement", db, hashHref, "lua_"+id, -1)
+						util.PutRedis("title_repeat_fulljudgement", db, hashHref, "", -1)
 					}
 				}
 			}

+ 59 - 4
src/spider/script.go

@@ -203,6 +203,12 @@ func (s *Script) LoadScript(code, script_file string, newstate bool) string {
 		S.Push(ret)
 		return 1
 	}))
+	//推送列表页下载数据量
+	s.L.SetGlobal("sendListNum", s.L.NewFunction(func(S *lua.LState) int {
+		//table := S.ToTable(-1)
+		//list := util.TableToMap(table)
+		return 1
+	}))
 	s.L.SetGlobal("findMap", s.L.NewFunction(func(S *lua.LState) int {
 		qmap := S.ToTable(-2)
 		content := S.ToString(-1)
@@ -265,6 +271,7 @@ func (s *Script) LoadScript(code, script_file string, newstate bool) string {
 		str := S.CheckString(-1)
 		switch codeType {
 		case "unicode":
+			str = strings.Replace(str, "%u", "\\u", -1)
 			str = transUnic(str)
 		case "urlencode_gbk":
 			data, _ := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(str)), simplifiedchinese.GBK.NewEncoder()))
@@ -707,13 +714,61 @@ func (s *Script) LoadScript(code, script_file string, newstate bool) string {
 		S.Push(lua.LString(decimalNum.String()))
 		return 1
 	}))
-
 	//获取验证码
 	s.L.SetGlobal("getCodeByPath", s.L.NewFunction(func(S *lua.LState) int {
-		path := S.ToString(-1)
-		code := codegrpc.GetCodeByPath(path)
+		cookie := S.ToString(-1)
+		head := S.ToTable(-2)
+		stype := S.ToString(-3)
+		path := S.ToString(-4)
+		headMap := util.GetTable(head)
+		//qu.Debug("cookie----------", cookie)
+		//qu.Debug("headMap----------", headMap)
+		headJsonStr := ""
+		headByte, err := json.Marshal(headMap)
+		if err == nil {
+			headJsonStr = string(headByte)
+		}
+		code, respHead, respCookie := codegrpc.GetCodeByPath(path, stype, headJsonStr, cookie)
+		//qu.Debug("code====", code)
+		//qu.Debug("respHead====", respHead)
+		//qu.Debug("respCookie====", respCookie)
 		S.Push(lua.LString(code))
-		return 1
+		respHeadMap := map[string]interface{}{}
+		json.Unmarshal([]byte(respHead), &respHeadMap)
+		hTable := util.MapToLuaTable(S, respHeadMap)
+		S.Push(hTable)
+		S.Push(lua.LString(respCookie))
+		return 3
+	}))
+	s.L.SetGlobal("newDownloadFile", s.L.NewFunction(func(S *lua.LState) int {
+		cookie := S.ToString(-1)
+		head := S.ToTable(-2)
+		param := S.ToTable(-3)
+		method := S.ToString(-4)
+		url := S.ToString(-5)
+		fileName := S.ToString(-6)
+		ishttps := strings.Contains(url, "https")
+		var mycookie []*http.Cookie
+		if cookie != "{}" {
+			json.Unmarshal([]byte(cookie), &mycookie)
+		} else {
+			mycookie = make([]*http.Cookie, 0)
+		}
+		fileName = strings.TrimSpace(fileName)
+		url = strings.TrimSpace(url)
+		ret := NewDownloadFile(s.Downloader, url, method, util.GetTable(param), util.GetTable(head), mycookie, s.Encoding, false, ishttps, "", s.Timeout, false)
+		url, name, size, ftype, fid := util.UploadFile(s.SCode, fileName, url, ret)
+		if strings.TrimSpace(ftype) == "" {
+			if len(path.Ext(name)) > 0 {
+				ftype = path.Ext(name)[1:]
+			}
+		}
+		S.Push(lua.LString(url))
+		S.Push(lua.LString(name))
+		S.Push(lua.LString(size))
+		S.Push(lua.LString(ftype))
+		S.Push(lua.LString(fid))
+		return 5
 	}))
 	return ""
 }

+ 2 - 2
src/spider/spider.go

@@ -112,7 +112,7 @@ func (s *Spider) DownloadDetailItem(paramdata, tmp map[string]interface{}) {
 		isExist, _ := util.ExistRedis("title_repeat_judgement", 0, "url_repeat_"+href)
 		if isExist { //更新redis生命周期
 			log.Println("href had--", isExist, href)
-			util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*365)
+			util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, "", 3600*24*365)
 			//更新数据状态
 			UpdateState(id, map[string]interface{}{"state": 3, "remark": "Href Redis Is Exist"})
 			return
@@ -139,7 +139,7 @@ func (s *Spider) DownloadDetailItem(paramdata, tmp map[string]interface{}) {
 	} else if from == "lua" && tmphref != href { //三级页href替换导致前后href不同
 		logger.Debug("beforeHref:", href, "	afterHref:", tmphref)
 		//增量
-		util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, href, 3600*24*365)
+		util.PutRedis("title_repeat_judgement", 0, "url_repeat_"+href, "", 3600*24*365)
 		//全量
 		db := HexToBigIntMod(href)
 		hashHref := HexText(href)