main.go 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. package main
  2. import (
  3. "crypto/tls"
  4. "encoding/json"
  5. "fmt"
  6. "github.com/PuerkitoBio/goquery"
  7. "github.com/gogf/gf/v2/frame/g"
  8. "github.com/gogf/gf/v2/util/gconv"
  9. "golang.org/x/net/proxy"
  10. "io"
  11. "net/http"
  12. "net/url"
  13. "os"
  14. "time"
  15. )
  16. type (
  17. httpProxy struct {
  18. Http string `json:"http"`
  19. Https string `json:"https"`
  20. }
  21. //剑鱼获取代理返回值
  22. respStruct struct {
  23. Data httpProxy `json:"data"`
  24. }
  25. )
  26. // getHttpProxyAddress 获取代理节点
  27. func getHttpProxyAddress() (*httpProxy, error) {
  28. tr := &http.Transport{
  29. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  30. }
  31. client := &http.Client{Transport: tr}
  32. req, err := http.NewRequest("GET", "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch", nil)
  33. if err != nil {
  34. return nil, err
  35. }
  36. req.Header.Set("Authorization", "Basic amlhbnl1MDAxOjEyM3F3ZSFB")
  37. req.Header.Set("Cache-Control", "no-cache")
  38. req.Header.Set("Connection", "keep-alive")
  39. req.Header.Set("Pragma", "no-cache")
  40. req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36")
  41. resp, err := client.Do(req)
  42. if err != nil {
  43. return nil, err
  44. }
  45. defer resp.Body.Close()
  46. bodyText, err := io.ReadAll(resp.Body)
  47. if err != nil {
  48. return nil, err
  49. }
  50. respMap := respStruct{}
  51. if err := json.Unmarshal(bodyText, &respMap); err != nil {
  52. return nil, err
  53. }
  54. return &respMap.Data, nil
  55. }
  56. func GetProxyClient() (*http.Client, error) {
  57. proxyData, err := getHttpProxyAddress()
  58. if err != nil {
  59. return nil, err
  60. }
  61. proxyURL, err := url.Parse(proxyData.Http)
  62. if err != nil {
  63. return nil, err
  64. }
  65. // 使用 SOCKS5 代理创建 HTTP 客户端
  66. dialer, err := proxy.FromURL(proxyURL, proxy.Direct)
  67. if err != nil {
  68. return nil, err
  69. }
  70. return &http.Client{
  71. Transport: &http.Transport{
  72. Dial: dialer.Dial,
  73. },
  74. }, nil
  75. }
  76. func mustGetProxyClient() *http.Client {
  77. for i := 0; i < 10; i++ {
  78. client, err := GetProxyClient()
  79. if err != nil {
  80. continue
  81. }
  82. return client
  83. }
  84. panic("获取失败")
  85. return nil
  86. }
  87. // 测试获取ip
  88. func runDemo() {
  89. client, err := GetProxyClient()
  90. if err != nil {
  91. panic(err)
  92. }
  93. resp, err := client.Get("https://cip.cc")
  94. if err != nil {
  95. panic(err)
  96. }
  97. // 处理 HTTP 响应
  98. defer resp.Body.Close()
  99. doc, err := goquery.NewDocumentFromReader(resp.Body)
  100. val := doc.Find(".data.kq-well").Text()
  101. fmt.Println(val)
  102. }
  103. func main() {
  104. runDemo()
  105. runDemo_1()
  106. return
  107. }
  108. func runDemo_1() {
  109. var final string
  110. for i := 1; i <= 28; i++ {
  111. req, err := http.NewRequest("GET", fmt.Sprintf("https://www.360docs.net/doc/368802932-%d.html", i), nil)
  112. if err != nil {
  113. g.Log()
  114. }
  115. req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
  116. req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8,sq;q=0.7")
  117. req.Header.Set("Cache-Control", "no-cache")
  118. req.Header.Set("Connection", "keep-alive")
  119. req.Header.Set("Pragma", "no-cache")
  120. req.Header.Set("Upgrade-Insecure-Requests", "1")
  121. req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36")
  122. res, err := mustGetProxyClient().Do(req)
  123. if err != nil {
  124. panic(err)
  125. }
  126. defer res.Body.Close()
  127. doc, err := goquery.NewDocumentFromReader(res.Body)
  128. val, err := doc.Find("div#contents").Html()
  129. if err != nil {
  130. fmt.Println("ccc", err)
  131. }
  132. fmt.Println(i, val)
  133. final += val
  134. time.Sleep(time.Second)
  135. }
  136. os.WriteFile("区县code表.txt", gconv.Bytes(final), os.ModePerm)
  137. }