download.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. /**
  2. GO代码相对简单,
  3. 重点处理下载工具,爬虫启动,监控等。
  4. 逻辑处理交给LUA处理
  5. */
  6. package spider
  7. import (
  8. "encoding/base64"
  9. "encoding/json"
  10. "math/rand"
  11. mu "mfw/util"
  12. "net/http"
  13. "regexp"
  14. lu "spiderutil"
  15. "time"
  16. "github.com/donnie4w/go-logger/logger"
  17. "github.com/surfer/agent"
  18. )
  19. var regImgStr = "\\.(JPG|jpg|GIF|gif|PNG|png|BMP|bmp|doc|docx|pdf|xls|xlsx)$"
  20. var regImg *regexp.Regexp
  21. func init() {
  22. regImg, _ = regexp.Compile(regImgStr)
  23. }
  24. //下载页面,发送消息,等待下载
  25. func Download(retLen *int64, downloaderid, url, method string, head map[string]interface{}, encoding string, useproxy, ishttps bool, code string, timeout int64) string {
  26. defer mu.Catch()
  27. msgid := mu.UUID(8)
  28. if len(head) < 1 {
  29. l := len(agent.UserAgents["common"])
  30. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  31. head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
  32. }
  33. isImg := regImg.MatchString(url)
  34. var ret []byte
  35. var err error
  36. if downloaderid == "" {
  37. ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
  38. "url": url,
  39. "method": method,
  40. "head": head,
  41. "encoding": encoding,
  42. "useproxy": useproxy,
  43. "ishttps": ishttps,
  44. }, timeout)
  45. } else {
  46. if isAvailable(downloaderid) {
  47. ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
  48. "url": url,
  49. "method": method,
  50. "head": head,
  51. "encoding": encoding,
  52. "useproxy": useproxy,
  53. "ishttps": ishttps,
  54. }, timeout)
  55. } else {
  56. return ""
  57. }
  58. }
  59. //retLenTmp := int64(len(ret))
  60. //*retLen = retLenTmp
  61. if err != nil {
  62. str := code + "方法DownloadAdv,url:" + url + ",err:" + err.Error()
  63. logger.Error(str, timeout)
  64. //AddCheckLogs(url, code, "net", str)
  65. }
  66. tmp := map[string]interface{}{}
  67. json.Unmarshal(ret, &tmp)
  68. if v, ok := tmp["code"].(string); ok && v == "200" {
  69. if isImg {
  70. bs, _ := tmp["content"].(string)
  71. return string(bs)
  72. } else {
  73. bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
  74. return string(bs)
  75. }
  76. } else {
  77. return ""
  78. }
  79. }
  80. //下载页面,发送消息,等待下载
  81. func DownloadAdv(retLen *int64, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) (string, []*http.Cookie, map[string]interface{}) {
  82. defer mu.Catch()
  83. msgid := mu.UUID(8)
  84. if len(head) < 1 {
  85. l := len(agent.UserAgents["common"])
  86. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  87. head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
  88. }
  89. isImg := regImg.MatchString(url)
  90. var ret []byte
  91. var err error
  92. if downloaderid == "" {
  93. ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
  94. "url": url,
  95. "method": method,
  96. "head": head,
  97. "reqparam": reqparam,
  98. "cookie": mycookie,
  99. "encoding": encoding,
  100. "useproxy": useproxy,
  101. "ishttps": ishttps,
  102. }, timeout)
  103. } else {
  104. if isAvailable(downloaderid) {
  105. ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
  106. "url": url,
  107. "method": method,
  108. "head": head,
  109. "reqparam": reqparam,
  110. "cookie": mycookie,
  111. "encoding": encoding,
  112. "useproxy": useproxy,
  113. "ishttps": ishttps,
  114. }, timeout)
  115. } else {
  116. return "", nil, nil
  117. }
  118. }
  119. //retLenTmp := int64(len(ret))
  120. //*retLen = retLenTmp
  121. if err != nil {
  122. str := code + "方法DownloadAdv,url:" + url + ",err:" + err.Error()
  123. logger.Error(str, timeout)
  124. }
  125. tmp := map[string]interface{}{}
  126. json.Unmarshal(ret, &tmp)
  127. cooks := lu.ParseHttpCookie(tmp["cookie"])
  128. headers, _ := tmp["header"].(map[string]interface{})
  129. if v, ok := tmp["code"].(string); ok && v == "200" {
  130. if isImg {
  131. bs, _ := tmp["content"].(string)
  132. return string(bs), cooks, headers
  133. } else {
  134. bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
  135. return string(bs), cooks, headers
  136. }
  137. } else {
  138. return "", nil, nil
  139. }
  140. }
  141. //下载附件
  142. func DownloadFile_bak(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
  143. defer mu.Catch()
  144. msgid := mu.UUID(8)
  145. if len(head) < 1 {
  146. l := len(agent.UserAgents["common"])
  147. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  148. head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
  149. }
  150. var ret []byte
  151. var err error
  152. if downloaderid == "" {
  153. ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
  154. "url": url,
  155. "method": method,
  156. "head": head,
  157. "reqparam": reqparam,
  158. "cookie": mycookie,
  159. "encoding": encoding,
  160. "useproxy": useproxy,
  161. "ishttps": ishttps,
  162. }, timeout)
  163. } else {
  164. if isAvailable(downloaderid) {
  165. ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
  166. "url": url,
  167. "method": method,
  168. "head": head,
  169. "reqparam": reqparam,
  170. "cookie": mycookie,
  171. "encoding": encoding,
  172. "useproxy": useproxy,
  173. "ishttps": ishttps,
  174. }, timeout)
  175. } else {
  176. return nil
  177. }
  178. }
  179. if err != nil {
  180. str := code + "方法DownloadFile,url:" + url + ",err:" + err.Error()
  181. logger.Error(str, timeout)
  182. }
  183. tmp := map[string]interface{}{}
  184. json.Unmarshal(ret, &tmp)
  185. if v, ok := tmp["code"].(string); ok && v == "200" {
  186. bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
  187. //log.Println(string(bs))
  188. return bs
  189. } else {
  190. return nil
  191. }
  192. }
  193. func DownloadFile(retLen *int64, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
  194. defer mu.Catch()
  195. timeout = timeout * 2
  196. msgid := mu.UUID(8)
  197. if len(head) < 1 {
  198. l := len(agent.UserAgents["common"])
  199. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  200. head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
  201. }
  202. var ret []byte
  203. var err error
  204. if downloaderid == "" {
  205. ret, err = MsclientFile.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
  206. "url": url,
  207. "method": method,
  208. "head": head,
  209. "reqparam": reqparam,
  210. "cookie": mycookie,
  211. "encoding": encoding,
  212. "useproxy": useproxy,
  213. "ishttps": ishttps,
  214. }, timeout)
  215. } else {
  216. if isAvailableFile(downloaderid) {
  217. ret, err = MsclientFile.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
  218. "url": url,
  219. "method": method,
  220. "head": head,
  221. "reqparam": reqparam,
  222. "cookie": mycookie,
  223. "encoding": encoding,
  224. "useproxy": useproxy,
  225. "ishttps": ishttps,
  226. }, timeout)
  227. } else {
  228. return nil
  229. }
  230. }
  231. //retLenTmp := int64(len(ret))
  232. //*retLen = retLenTmp
  233. if err != nil {
  234. str := code + "方法DownloadFile,url:" + url + ",err:" + err.Error()
  235. logger.Error(str, timeout)
  236. }
  237. tmp := map[string]interface{}{}
  238. json.Unmarshal(ret, &tmp)
  239. if v, ok := tmp["code"].(string); ok && v == "200" {
  240. bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
  241. return bs
  242. } else {
  243. return nil
  244. }
  245. }
  246. //下载点是否可用
  247. func isAvailable(code string) bool {
  248. b := false
  249. for k, _ := range Alldownloader {
  250. if k == code {
  251. b = true
  252. }
  253. }
  254. return b
  255. }
  256. //下载点是否可用
  257. func isAvailableFile(code string) bool {
  258. b := false
  259. for k, _ := range AlldownloaderFile {
  260. if k == code {
  261. b = true
  262. }
  263. }
  264. return b
  265. }