download.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. /**
  2. GO代码相对简单,
  3. 重点处理下载工具,爬虫启动,监控等。
  4. 逻辑处理交给LUA处理
  5. */
  6. package spider
  7. import (
  8. "encoding/base64"
  9. "encoding/json"
  10. "log"
  11. "math/rand"
  12. mu "mfw/util"
  13. "net/http"
  14. "regexp"
  15. util "spiderutil"
  16. "time"
  17. "github.com/surfer/agent"
  18. )
  19. var regImgStr = "\\.(JPG|jpg|GIF|gif|PNG|png|BMP|bmp)$"
  20. var regImg *regexp.Regexp
  21. var GarbledCodeReg = regexp.MustCompile("[纰锟绲庯卞鍤滐銇鐟閫嚜鎯壐璩鏉彲鍋撅绺閲嗭絣鐤鏅盫鎽亰寰钂鎳鍒鐏宀婾嗚亗鎬憰攬鍙嶁鑻疐璁鐞鏇顭庮渾寮鑶剸鐙鈪鍐実綍擄鐒鐛绫瀵珐鍡閬栬憟灞綅顡韪忚鍓笉犵鍎鐥慪璜钀氭畯焛鎲顏熺崿鍜鍩僜鍚褰囶鍘櫥闀撹棢檅閯嗏絖灦戝閹涜闇鐮捒鈥璺籏绶澶鎷樺鍌絒嗘鍊ク鐧榦璞嚟鍢鐡瓼屾煢宄鑽畵鎭鈹鑷稛磭鏋孊钄狅絆鐘塋尟鑺絍绂绗嘐幇璨閾戭嚦鐫婅檴碭妤鑴厷挰鐜縒闆憁鏃鐗猒鏁橈顤秨哵鍧紛濊閷顥閺惪鐓嶈亙濠掗帾媞鏀慿瓙鎺闁鎰鑸鎹皝鍔鍦骞閶鍞挾鎴竗閵繉闋戞籅閽欏閼縲鐣呮墔顐ら憼檾锝挻顚炶姂剾鐑鐭潛閰涳楂懘願澧亣倴鐦忕嫄刡灏棙宓媐铇甀鏂楁従態瀹揕闃姒炲矕鏌眱鍍熸腹儝绱獻鐬鑵矦鍝嗗墹崇琛勭仈濴顒剭閴鍏鐝曨锛よ顧勯槈夊潏鐖垚矑鍛瞋終缂鐪鍠鏆妫攏顪娌濆嘇璎厫鍗閮顝給榇婂唭姘燁鏍鑹笎爑嚔槌瀣糵炵櫤鐎闅ゅ類鐨夛绋搕缃娉犲搻鐠儧鋸闉攜楸ㄨ埧欒闊垱鈩厔弐顠拵鑾]+")
  22. func init() {
  23. regImg, _ = regexp.Compile(regImgStr)
  24. }
  25. //下载页面,发送消息,等待别人下载
  26. func Download(downloadnode, downloaderid, url, method string, head map[string]interface{}, encoding string, useproxy, ishttps bool, code string, timeout int64) string {
  27. defer mu.Catch()
  28. ResultMsclient := MsclientTest
  29. if downloadnode == "test" { //805
  30. ResultMsclient = MsclientTest
  31. } else if downloadnode == "comm" { //801
  32. ResultMsclient = Msclient
  33. } else if downloadnode == "bid" { //803
  34. ResultMsclient = MsclientBid
  35. }
  36. msgid := mu.UUID(8)
  37. if len(head) < 1 {
  38. l := len(agent.UserAgents["common"])
  39. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  40. head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
  41. }
  42. isImg := regImg.MatchString(url)
  43. var ret []byte
  44. var err error
  45. if downloaderid == "" {
  46. ret, err = ResultMsclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
  47. "url": url,
  48. "method": method,
  49. "head": head,
  50. "encoding": encoding,
  51. "useproxy": useproxy,
  52. "ishttps": ishttps,
  53. }, timeout)
  54. } else {
  55. if isAvailable(downloaderid) {
  56. ret, err = ResultMsclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
  57. "url": url,
  58. "method": method,
  59. "head": head,
  60. "encoding": encoding,
  61. "useproxy": useproxy,
  62. "ishttps": ishttps,
  63. }, timeout)
  64. } else {
  65. return ""
  66. }
  67. }
  68. if err != nil {
  69. str := "方法DownloadAdv,url:" + url + ",err:" + err.Error()
  70. log.Println(str)
  71. }
  72. tmp := map[string]interface{}{}
  73. json.Unmarshal(ret, &tmp)
  74. if v, ok := tmp["code"].(string); ok && v == "200" {
  75. if isImg {
  76. bs, _ := tmp["content"].(string)
  77. return string(bs)
  78. } else {
  79. bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
  80. return string(bs)
  81. }
  82. } else {
  83. return ""
  84. }
  85. }
  86. //下载页面,发送消息,等待别人下载
  87. func DownloadAdv(downloadnode, downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) (string, []*http.Cookie) {
  88. defer mu.Catch()
  89. ResultMsclient := MsclientTest
  90. if downloadnode == "test" { //805
  91. ResultMsclient = MsclientTest
  92. } else if downloadnode == "comm" { //801
  93. ResultMsclient = Msclient
  94. } else if downloadnode == "bid" { //803
  95. ResultMsclient = MsclientBid
  96. }
  97. msgid := mu.UUID(8)
  98. if len(head) < 1 {
  99. l := len(agent.UserAgents["common"])
  100. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  101. head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
  102. }
  103. isImg := regImg.MatchString(url)
  104. var ret []byte
  105. var err error
  106. if downloaderid == "" {
  107. ret, err = ResultMsclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
  108. "url": url,
  109. "method": method,
  110. "head": head,
  111. "reqparam": reqparam,
  112. "cookie": mycookie,
  113. "encoding": encoding,
  114. "useproxy": useproxy,
  115. "ishttps": ishttps,
  116. }, timeout)
  117. } else {
  118. if isAvailable(downloaderid) {
  119. ret, err = ResultMsclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
  120. "url": url,
  121. "method": method,
  122. "head": head,
  123. "reqparam": reqparam,
  124. "cookie": mycookie,
  125. "encoding": encoding,
  126. "useproxy": useproxy,
  127. "ishttps": ishttps,
  128. }, timeout)
  129. } else {
  130. return "", nil
  131. }
  132. }
  133. if err != nil {
  134. str := "方法DownloadAdv,url:" + url + ",err:" + err.Error()
  135. log.Println(str)
  136. }
  137. tmp := map[string]interface{}{}
  138. json.Unmarshal(ret, &tmp)
  139. cooks := util.ParseHttpCookie(tmp["cookie"])
  140. if v, ok := tmp["code"].(string); ok && v == "200" {
  141. if isImg {
  142. bs, _ := tmp["content"].(string)
  143. return string(bs), cooks
  144. } else {
  145. bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
  146. return string(bs), cooks
  147. }
  148. } else {
  149. return "", nil
  150. }
  151. }
  152. func DownloadFile(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
  153. defer mu.Catch()
  154. timeout = timeout * 10
  155. msgid := mu.UUID(8)
  156. if len(head) < 1 {
  157. l := len(agent.UserAgents["common"])
  158. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  159. head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
  160. }
  161. var ret []byte
  162. var err error
  163. if downloaderid == "" {
  164. ret, err = MsclientFile.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
  165. "url": url,
  166. "method": method,
  167. "head": head,
  168. "reqparam": reqparam,
  169. "cookie": mycookie,
  170. "encoding": encoding,
  171. "useproxy": useproxy,
  172. "ishttps": ishttps,
  173. }, timeout)
  174. } else {
  175. if isAvailableFile(downloaderid) {
  176. ret, err = MsclientFile.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
  177. "url": url,
  178. "method": method,
  179. "head": head,
  180. "reqparam": reqparam,
  181. "cookie": mycookie,
  182. "encoding": encoding,
  183. "useproxy": useproxy,
  184. "ishttps": ishttps,
  185. }, timeout)
  186. } else {
  187. return nil
  188. }
  189. }
  190. if err != nil {
  191. str := code + "方法DownloadFile,url:" + url + ",err:" + err.Error()
  192. log.Println(str, timeout)
  193. }
  194. tmp := map[string]interface{}{}
  195. json.Unmarshal(ret, &tmp)
  196. if v, ok := tmp["code"].(string); ok && v == "200" {
  197. bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
  198. return bs
  199. } else {
  200. return nil
  201. }
  202. }
  203. func DownloadFile_back(downloaderid, url, method string, reqparam, head map[string]interface{}, mycookie []*http.Cookie, encoding string, useproxy, ishttps bool, code string, timeout int64) []byte {
  204. defer mu.Catch()
  205. msgid := mu.UUID(8)
  206. if len(head) < 1 {
  207. l := len(agent.UserAgents["common"])
  208. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  209. head["User-Agent"] = agent.UserAgents["common"][r.Intn(l)]
  210. }
  211. var ret []byte
  212. var err error
  213. if downloaderid == "" {
  214. ret, err = Msclient.Call("", msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_RAND_RECIVER, map[string]interface{}{
  215. "url": url,
  216. "method": method,
  217. "head": head,
  218. "reqparam": reqparam,
  219. "cookie": mycookie,
  220. "encoding": encoding,
  221. "useproxy": useproxy,
  222. "ishttps": ishttps,
  223. }, timeout)
  224. } else {
  225. if isAvailable(downloaderid) {
  226. ret, err = Msclient.Call(downloaderid, msgid, mu.SERVICE_DOWNLOAD, mu.SENDTO_TYPE_P2P, map[string]interface{}{
  227. "url": url,
  228. "method": method,
  229. "head": head,
  230. "reqparam": reqparam,
  231. "cookie": mycookie,
  232. "encoding": encoding,
  233. "useproxy": useproxy,
  234. "ishttps": ishttps,
  235. }, timeout)
  236. } else {
  237. return nil
  238. }
  239. }
  240. if err != nil {
  241. str := "方法DownloadFile,url:" + url + ",err:" + err.Error()
  242. log.Println(map[string]interface{}{"code": code, "content": str, "comeintime": time.Now().Unix()})
  243. }
  244. tmp := map[string]interface{}{}
  245. json.Unmarshal(ret, &tmp)
  246. if v, ok := tmp["code"].(string); ok && v == "200" {
  247. bs, _ := base64.StdEncoding.DecodeString(tmp["content"].(string))
  248. return bs
  249. } else {
  250. return nil
  251. }
  252. }
  253. //下载点是否可用
  254. func isAvailable(code string) bool {
  255. b := false
  256. for k, _ := range Alldownloader {
  257. if k == code {
  258. b = true
  259. }
  260. }
  261. return b
  262. }
  263. //下载点是否可用
  264. func isAvailableFile(code string) bool {
  265. b := false
  266. for k, _ := range AlldownloaderFile {
  267. if k == code {
  268. b = true
  269. }
  270. }
  271. return b
  272. }