/** GO代码相对简单, 重点处理下载工具,爬虫启动,监控等。 逻辑处理交给LUA处理 */ package spider import ( "encoding/base64" "encoding/json" "math/rand" mu "mfw/util" "net/http" "regexp" lu "spiderutil" "time" "github.com/donnie4w/go-logger/logger" "github.com/surfer/agent" ) var regImgStr = "\\.(JPG|jpg|GIF|gif|PNG|png|BMP|bmp|doc|docx|pdf|xls|xlsx)$" var regImg *regexp.Regexp func init() { regImg, _ = regexp.Compile(regImgStr) } //下载页面,发送消息,等待下载 func Download(retLen *int64, downloaderid, url, method string, head map[string]interface{}, encoding string, useproxy, ishttps bool, code string, timeout int64) string { defer mu.Catch() msgid := mu.UUID(8) if len(head) < 1 { l := len(agent.UserAgents["common"]) r := rand.New(rand.NewSource(time.Now().UnixNano())) head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)] } isImg := regImg.MatchString(url) var ret []byte var err error if downloaderid == "" { ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{ "url": url, "method": method, "head": head, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { if isAvailable(downloaderid) { ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{ "url": url, "method": method, "head": head, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { return "" } } //retLenTmp := int64(len(ret)) //*retLen = retLenTmp if err != nil { str := code + "方法DownloadAdv,url:" + url + ",err:" + err.Error() logger.Error(str, timeout) //AddCheckLogs(url, code, "net", str) } tmp := map[string]interface{}{} json.Unmarshal(ret, &tmp) if v, ok := tmp["code"].(string); ok && v == "200" { if isImg { bs, _ := tmp["content"].(string) return string(bs) } else { bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string)) return string(bs) } } else { return "" } } //下载页面,发送消息,等待下载 func DownloadAdv(retLen *int64, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) (string, []*http.Cookie, map[string]interface{}) { defer mu.Catch() msgid := mu.UUID(8) if len(head) < 1 { l := len(agent.UserAgents["common"]) r := rand.New(rand.NewSource(time.Now().UnixNano())) head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)] } isImg := regImg.MatchString(url) var ret []byte var err error if downloaderid == "" { ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { if isAvailable(downloaderid) { ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { return "", nil, nil } } //retLenTmp := int64(len(ret)) //*retLen = retLenTmp if err != nil { str := code + "方法DownloadAdv,url:" + url + ",err:" + err.Error() logger.Error(str, timeout) } tmp := map[string]interface{}{} json.Unmarshal(ret, &tmp) cooks := lu.ParseHttpCookie(tmp["cookie"]) headers, _ := tmp["header"].(map[string]interface{}) if v, ok := tmp["code"].(string); ok && v == "200" { if isImg { bs, _ := tmp["content"].(string) return string(bs), cooks, headers } else { bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string)) return string(bs), cooks, headers } } else { return "", nil, nil } } //下载附件 func DownloadFile_bak(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte { defer mu.Catch() msgid := mu.UUID(8) if len(head) < 1 { l := len(agent.UserAgents["common"]) r := rand.New(rand.NewSource(time.Now().UnixNano())) head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)] } var ret []byte var err error if downloaderid == "" { ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { if isAvailable(downloaderid) { ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { return nil } } if err != nil { str := code + "方法DownloadFile,url:" + url + ",err:" + err.Error() logger.Error(str, timeout) } tmp := map[string]interface{}{} json.Unmarshal(ret, &tmp) if v, ok := tmp["code"].(string); ok && v == "200" { bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string)) //log.Println(string(bs)) return bs } else { return nil } } func DownloadFile(retLen *int64, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte { defer mu.Catch() timeout = timeout * 2 msgid := mu.UUID(8) if len(head) < 1 { l := len(agent.UserAgents["common"]) r := rand.New(rand.NewSource(time.Now().UnixNano())) head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)] } var ret []byte var err error if downloaderid == "" { ret, err = MsclientFile.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { if isAvailableFile(downloaderid) { ret, err = MsclientFile.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { return nil } } //retLenTmp := int64(len(ret)) //*retLen = retLenTmp if err != nil { str := code + "方法DownloadFile,url:" + url + ",err:" + err.Error() logger.Error(str, timeout) } tmp := map[string]interface{}{} json.Unmarshal(ret, &tmp) if v, ok := tmp["code"].(string); ok && v == "200" { bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string)) return bs } else { return nil } } //下载点是否可用 func isAvailable(code string) bool { b := false for k, _ := range Alldownloader { if k == code { b = true } } return b } //下载点是否可用 func isAvailableFile(code string) bool { b := false for k, _ := range AlldownloaderFile { if k == code { b = true } } return b }