123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281 |
- /**
- GO代码相对简单,
- 重点处理下载工具,爬虫启动,监控等。
- 逻辑处理交给LUA处理
- */
- package spider
- import (
- "encoding/base64"
- "encoding/json"
- "log"
- "math/rand"
- mu "mfw/util"
- "net/http"
- "regexp"
- util "spiderutil"
- "time"
- "github.com/surfer/agent"
- )
- var regImgStr = "\\.(JPG|jpg|GIF|gif|PNG|png|BMP|bmp)$"
- var regImg *regexp.Regexp
- var GarbledCodeReg = regexp.MustCompile("[纰锟绲庯卞鍤滐銇鐟閫嚜鎯壐璩鏉彲鍋撅绺閲嗭絣鐤鏅盫鎽亰寰钂鎳鍒鐏宀婾嗚亗鎬憰攬鍙嶁鑻疐璁鐞鏇顭庮渾寮鑶剸鐙鈪鍐実綍擄鐒鐛绫瀵珐鍡閬栬憟灞綅顡韪忚鍓笉犵鍎鐥慪璜钀氭畯焛鎲顏熺崿鍜鍩僜鍚褰囶鍘櫥闀撹棢檅閯嗏絖灦戝閹涜闇鐮捒鈥璺籏绶澶鎷樺鍌絒嗘鍊ク鐧榦璞嚟鍢鐡瓼屾煢宄鑽畵鎭鈹鑷稛磭鏋孊钄狅絆鐘塋尟鑺絍绂绗嘐幇璨閾戭嚦鐫婅檴碭妤鑴厷挰鐜縒闆憁鏃鐗猒鏁橈顤秨哵鍧紛濊閷顥閺惪鐓嶈亙濠掗帾媞鏀慿瓙鎺闁鎰鑸鎹皝鍔鍦骞閶鍞挾鎴竗閵繉闋戞籅閽欏閼縲鐣呮墔顐ら憼檾锝挻顚炶姂剾鐑鐭潛閰涳楂懘願澧亣倴鐦忕嫄刡灏棙宓媐铇甀鏂楁従態瀹揕闃姒炲矕鏌眱鍍熸腹儝绱獻鐬鑵矦鍝嗗墹崇琛勭仈濴顒剭閴鍏鐝曨锛よ顧勯槈夊潏鐖垚矑鍛瞋終缂鐪鍠鏆妫攏顪娌濆嘇璎厫鍗閮顝給榇婂唭姘燁鏍鑹笎爑嚔槌瀣糵炵櫤鐎闅ゅ類鐨夛绋搕缃娉犲搻鐠儧鋸闉攜楸ㄨ埧欒闊垱鈩厔弐顠拵鑾]+")
- func init() {
- regImg, _ = regexp.Compile(regImgStr)
- }
- //下载页面,发送消息,等待别人下载
- func Download(downloadnode, downloaderid, url, method string, head map[string]interface{}, encoding string, useproxy, ishttps bool, code string, timeout int64) string {
- defer mu.Catch()
- ResultMsclient := MsclientTest
- if downloadnode == "test" { //805
- ResultMsclient = MsclientTest
- } else if downloadnode == "comm" { //801
- ResultMsclient = Msclient
- } else if downloadnode == "bid" { //803
- ResultMsclient = MsclientBid
- }
- msgid := mu.UUID(8)
- if len(head) < 1 {
- l := len(agent.UserAgents["common"])
- r := rand.New(rand.NewSource(time.Now().UnixNano()))
- head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
- }
- isImg := regImg.MatchString(url)
- var ret []byte
- var err error
- if downloaderid == "" {
- ret, err = ResultMsclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
- "url": url,
- "method": method,
- "head": head,
- "encoding": encoding,
- "useproxy": useproxy,
- "ishttps": ishttps,
- }, timeout)
- } else {
- if isAvailable(downloaderid) {
- ret, err = ResultMsclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
- "url": url,
- "method": method,
- "head": head,
- "encoding": encoding,
- "useproxy": useproxy,
- "ishttps": ishttps,
- }, timeout)
- } else {
- return ""
- }
- }
- if err != nil {
- str := "方法DownloadAdv,url:" + url + ",err:" + err.Error()
- log.Println(str)
- }
- tmp := map[string]interface{}{}
- json.Unmarshal(ret, &tmp)
- if v, ok := tmp["code"].(string); ok && v == "200" {
- if isImg {
- bs, _ := tmp["content"].(string)
- return string(bs)
- } else {
- bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
- return string(bs)
- }
- } else {
- return ""
- }
- }
- //下载页面,发送消息,等待别人下载
- func DownloadAdv(downloadnode, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) (string, []*http.Cookie) {
- defer mu.Catch()
- ResultMsclient := MsclientTest
- if downloadnode == "test" { //805
- ResultMsclient = MsclientTest
- } else if downloadnode == "comm" { //801
- ResultMsclient = Msclient
- } else if downloadnode == "bid" { //803
- ResultMsclient = MsclientBid
- }
- msgid := mu.UUID(8)
- if len(head) < 1 {
- l := len(agent.UserAgents["common"])
- r := rand.New(rand.NewSource(time.Now().UnixNano()))
- head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
- }
- isImg := regImg.MatchString(url)
- var ret []byte
- var err error
- if downloaderid == "" {
- ret, err = ResultMsclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
- "url": url,
- "method": method,
- "head": head,
- "reqparam": reqparam,
- "cookie": mycookie,
- "encoding": encoding,
- "useproxy": useproxy,
- "ishttps": ishttps,
- }, timeout)
- } else {
- if isAvailable(downloaderid) {
- ret, err = ResultMsclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
- "url": url,
- "method": method,
- "head": head,
- "reqparam": reqparam,
- "cookie": mycookie,
- "encoding": encoding,
- "useproxy": useproxy,
- "ishttps": ishttps,
- }, timeout)
- } else {
- return "", nil
- }
- }
- if err != nil {
- str := "方法DownloadAdv,url:" + url + ",err:" + err.Error()
- log.Println(str)
- }
- tmp := map[string]interface{}{}
- json.Unmarshal(ret, &tmp)
- cooks := util.ParseHttpCookie(tmp["cookie"])
- if v, ok := tmp["code"].(string); ok && v == "200" {
- if isImg {
- bs, _ := tmp["content"].(string)
- return string(bs), cooks
- } else {
- bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
- return string(bs), cooks
- }
- } else {
- return "", nil
- }
- }
- func DownloadFile(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
- defer mu.Catch()
- timeout = timeout * 10
- msgid := mu.UUID(8)
- if len(head) < 1 {
- l := len(agent.UserAgents["common"])
- r := rand.New(rand.NewSource(time.Now().UnixNano()))
- head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
- }
- var ret []byte
- var err error
- if downloaderid == "" {
- ret, err = MsclientFile.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
- "url": url,
- "method": method,
- "head": head,
- "reqparam": reqparam,
- "cookie": mycookie,
- "encoding": encoding,
- "useproxy": useproxy,
- "ishttps": ishttps,
- }, timeout)
- } else {
- if isAvailableFile(downloaderid) {
- ret, err = MsclientFile.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
- "url": url,
- "method": method,
- "head": head,
- "reqparam": reqparam,
- "cookie": mycookie,
- "encoding": encoding,
- "useproxy": useproxy,
- "ishttps": ishttps,
- }, timeout)
- } else {
- return nil
- }
- }
- if err != nil {
- str := code + "方法DownloadFile,url:" + url + ",err:" + err.Error()
- log.Println(str, timeout)
- }
- tmp := map[string]interface{}{}
- json.Unmarshal(ret, &tmp)
- if v, ok := tmp["code"].(string); ok && v == "200" {
- bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
- return bs
- } else {
- return nil
- }
- }
- func DownloadFile_back(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
- defer mu.Catch()
- msgid := mu.UUID(8)
- if len(head) < 1 {
- l := len(agent.UserAgents["common"])
- r := rand.New(rand.NewSource(time.Now().UnixNano()))
- head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
- }
- var ret []byte
- var err error
- if downloaderid == "" {
- ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
- "url": url,
- "method": method,
- "head": head,
- "reqparam": reqparam,
- "cookie": mycookie,
- "encoding": encoding,
- "useproxy": useproxy,
- "ishttps": ishttps,
- }, timeout)
- } else {
- if isAvailable(downloaderid) {
- ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
- "url": url,
- "method": method,
- "head": head,
- "reqparam": reqparam,
- "cookie": mycookie,
- "encoding": encoding,
- "useproxy": useproxy,
- "ishttps": ishttps,
- }, timeout)
- } else {
- return nil
- }
- }
- if err != nil {
- str := "方法DownloadFile,url:" + url + ",err:" + err.Error()
- log.Println(map[string]interface{}{"code": code, "content": str, "comeintime": time.Now().Unix()})
- }
- tmp := map[string]interface{}{}
- json.Unmarshal(ret, &tmp)
- if v, ok := tmp["code"].(string); ok && v == "200" {
- bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
- return bs
- } else {
- return nil
- }
- }
- //下载点是否可用
- func isAvailable(code string) bool {
- b := false
- for k, _ := range Alldownloader {
- if k == code {
- b = true
- }
- }
- return b
- }
- //下载点是否可用
- func isAvailableFile(code string) bool {
- b := false
- for k, _ := range AlldownloaderFile {
- if k == code {
- b = true
- }
- }
- return b
- }
|