/** GO代码相对简单, 重点处理下载工具,爬虫启动,监控等。 逻辑处理交给LUA处理 */ package spider import ( "encoding/base64" "encoding/json" "log" "math/rand" mu "mfw/util" "net/http" "regexp" util "spiderutil" "time" "github.com/surfer/agent" ) var regImgStr = "\\.(JPG|jpg|GIF|gif|PNG|png|BMP|bmp)$" var regImg *regexp.Regexp var GarbledCodeReg = regexp.MustCompile("[纰锟绲庯卞鍤滐銇鐟閫嚜鎯壐璩鏉彲鍋撅绺閲嗭絣鐤鏅盫鎽亰寰钂鎳鍒鐏宀婾嗚亗鎬憰攬鍙嶁鑻疐璁鐞鏇顭庮渾寮鑶剸鐙鈪鍐実綍擄鐒鐛绫瀵珐鍡閬栬憟灞綅顡韪忚鍓笉犵鍎鐥慪璜钀氭畯焛鎲顏熺崿鍜鍩僜鍚褰囶鍘櫥闀撹棢檅閯嗏絖灦戝閹涜闇鐮捒鈥璺籏绶澶鎷樺鍌絒嗘鍊ク鐧榦璞嚟鍢鐡瓼屾煢宄鑽畵鎭鈹鑷稛磭鏋孊钄狅絆鐘塋尟鑺絍绂绗嘐幇璨閾戭嚦鐫婅檴碭妤鑴厷挰鐜縒闆憁鏃鐗猒鏁橈顤秨哵鍧紛濊閷顥閺惪鐓嶈亙濠掗帾媞鏀慿瓙鎺闁鎰鑸鎹皝鍔鍦骞閶鍞挾鎴竗閵繉闋戞籅閽欏閼縲鐣呮墔顐ら憼檾锝挻顚炶姂剾鐑鐭潛閰涳楂懘願澧亣倴鐦忕嫄刡灏棙宓媐铇甀鏂楁従態瀹揕闃姒炲矕鏌眱鍍熸腹儝绱獻鐬鑵矦鍝嗗墹崇琛勭仈濴顒剭閴鍏鐝曨锛よ顧勯槈夊潏鐖垚矑鍛瞋終缂鐪鍠鏆妫攏顪娌濆嘇璎厫鍗閮顝給榇婂唭姘燁鏍鑹笎爑嚔槌瀣糵炵櫤鐎闅ゅ類鐨夛绋搕缃娉犲搻鐠儧鋸闉攜楸ㄨ埧欒闊垱鈩厔弐顠拵鑾]+") func init() { regImg, _ = regexp.Compile(regImgStr) } //下载页面,发送消息,等待别人下载 func Download(downloadnode, downloaderid, url, method string, head map[string]interface{}, encoding string, useproxy, ishttps bool, code string, timeout int64) string { defer mu.Catch() ResultMsclient := MsclientTest if downloadnode == "test" { //805 ResultMsclient = MsclientTest } else if downloadnode == "comm" { //801 ResultMsclient = Msclient } else if downloadnode == "bid" { //803 ResultMsclient = MsclientBid } msgid := mu.UUID(8) if len(head) < 1 { l := len(agent.UserAgents["common"]) r := rand.New(rand.NewSource(time.Now().UnixNano())) head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)] } isImg := regImg.MatchString(url) var ret []byte var err error if downloaderid == "" { ret, err = ResultMsclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{ "url": url, "method": method, "head": head, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { if isAvailable(downloaderid) { ret, err = ResultMsclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{ "url": url, "method": method, "head": head, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { return "" } } if err != nil { str := "方法DownloadAdv,url:" + url + ",err:" + err.Error() log.Println(str) } tmp := map[string]interface{}{} json.Unmarshal(ret, &tmp) if v, ok := tmp["code"].(string); ok && v == "200" { if isImg { bs, _ := tmp["content"].(string) return string(bs) } else { bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string)) return string(bs) } } else { return "" } } //下载页面,发送消息,等待别人下载 func DownloadAdv(downloadnode, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) (string, []*http.Cookie) { defer mu.Catch() ResultMsclient := MsclientTest if downloadnode == "test" { //805 ResultMsclient = MsclientTest } else if downloadnode == "comm" { //801 ResultMsclient = Msclient } else if downloadnode == "bid" { //803 ResultMsclient = MsclientBid } msgid := mu.UUID(8) if len(head) < 1 { l := len(agent.UserAgents["common"]) r := rand.New(rand.NewSource(time.Now().UnixNano())) head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)] } isImg := regImg.MatchString(url) var ret []byte var err error if downloaderid == "" { ret, err = ResultMsclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { if isAvailable(downloaderid) { ret, err = ResultMsclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { return "", nil } } if err != nil { str := "方法DownloadAdv,url:" + url + ",err:" + err.Error() log.Println(str) } tmp := map[string]interface{}{} json.Unmarshal(ret, &tmp) cooks := util.ParseHttpCookie(tmp["cookie"]) if v, ok := tmp["code"].(string); ok && v == "200" { if isImg { bs, _ := tmp["content"].(string) return string(bs), cooks } else { bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string)) return string(bs), cooks } } else { return "", nil } } func DownloadFile(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte { defer mu.Catch() timeout = timeout * 10 msgid := mu.UUID(8) if len(head) < 1 { l := len(agent.UserAgents["common"]) r := rand.New(rand.NewSource(time.Now().UnixNano())) head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)] } var ret []byte var err error if downloaderid == "" { ret, err = MsclientFile.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { if isAvailableFile(downloaderid) { ret, err = MsclientFile.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { return nil } } if err != nil { str := code + "方法DownloadFile,url:" + url + ",err:" + err.Error() log.Println(str, timeout) } tmp := map[string]interface{}{} json.Unmarshal(ret, &tmp) if v, ok := tmp["code"].(string); ok && v == "200" { bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string)) return bs } else { return nil } } func DownloadFile_back(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte { defer mu.Catch() msgid := mu.UUID(8) if len(head) < 1 { l := len(agent.UserAgents["common"]) r := rand.New(rand.NewSource(time.Now().UnixNano())) head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)] } var ret []byte var err error if downloaderid == "" { ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { if isAvailable(downloaderid) { ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{ "url": url, "method": method, "head": head, "reqparam": reqparam, "cookie": mycookie, "encoding": encoding, "useproxy": useproxy, "ishttps": ishttps, }, timeout) } else { return nil } } if err != nil { str := "方法DownloadFile,url:" + url + ",err:" + err.Error() log.Println(map[string]interface{}{"code": code, "content": str, "comeintime": time.Now().Unix()}) } tmp := map[string]interface{}{} json.Unmarshal(ret, &tmp) if v, ok := tmp["code"].(string); ok && v == "200" { bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string)) return bs } else { return nil } } //下载点是否可用 func isAvailable(code string) bool { b := false for k, _ := range Alldownloader { if k == code { b = true } } return b } //下载点是否可用 func isAvailableFile(code string) bool { b := false for k, _ := range AlldownloaderFile { if k == code { b = true } } return b }