123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- package main
- import (
- "crypto/tls"
- "encoding/json"
- "fmt"
- "github.com/PuerkitoBio/goquery"
- "github.com/gogf/gf/v2/frame/g"
- "github.com/gogf/gf/v2/util/gconv"
- "golang.org/x/net/proxy"
- "io"
- "net/http"
- "net/url"
- "os"
- "time"
- )
- type (
- httpProxy struct {
- Http string `json:"http"`
- Https string `json:"https"`
- }
- //剑鱼获取代理返回值
- respStruct struct {
- Data httpProxy `json:"data"`
- }
- )
- // getHttpProxyAddress 获取代理节点
- func getHttpProxyAddress() (*httpProxy, error) {
- tr := &http.Transport{
- TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
- }
- client := &http.Client{Transport: tr}
- req, err := http.NewRequest("GET", "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch", nil)
- if err != nil {
- return nil, err
- }
- req.Header.Set("Authorization", "Basic amlhbnl1MDAxOjEyM3F3ZSFB")
- req.Header.Set("Cache-Control", "no-cache")
- req.Header.Set("Connection", "keep-alive")
- req.Header.Set("Pragma", "no-cache")
- req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36")
- resp, err := client.Do(req)
- if err != nil {
- return nil, err
- }
- defer resp.Body.Close()
- bodyText, err := io.ReadAll(resp.Body)
- if err != nil {
- return nil, err
- }
- respMap := respStruct{}
- if err := json.Unmarshal(bodyText, &respMap); err != nil {
- return nil, err
- }
- return &respMap.Data, nil
- }
- func GetProxyClient() (*http.Client, error) {
- proxyData, err := getHttpProxyAddress()
- if err != nil {
- return nil, err
- }
- proxyURL, err := url.Parse(proxyData.Http)
- if err != nil {
- return nil, err
- }
- // 使用 SOCKS5 代理创建 HTTP 客户端
- dialer, err := proxy.FromURL(proxyURL, proxy.Direct)
- if err != nil {
- return nil, err
- }
- return &http.Client{
- Transport: &http.Transport{
- Dial: dialer.Dial,
- },
- }, nil
- }
- func mustGetProxyClient() *http.Client {
- for i := 0; i < 10; i++ {
- client, err := GetProxyClient()
- if err != nil {
- continue
- }
- return client
- }
- panic("获取失败")
- return nil
- }
- // 测试获取ip
- func runDemo() {
- client, err := GetProxyClient()
- if err != nil {
- panic(err)
- }
- resp, err := client.Get("https://cip.cc")
- if err != nil {
- panic(err)
- }
- // 处理 HTTP 响应
- defer resp.Body.Close()
- doc, err := goquery.NewDocumentFromReader(resp.Body)
- val := doc.Find(".data.kq-well").Text()
- fmt.Println(val)
- }
- func main() {
- runDemo()
- runDemo_1()
- return
- }
- func runDemo_1() {
- var final string
- for i := 1; i <= 28; i++ {
- req, err := http.NewRequest("GET", fmt.Sprintf("https://www.360docs.net/doc/368802932-%d.html", i), nil)
- if err != nil {
- g.Log()
- }
- req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
- req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8,sq;q=0.7")
- req.Header.Set("Cache-Control", "no-cache")
- req.Header.Set("Connection", "keep-alive")
- req.Header.Set("Pragma", "no-cache")
- req.Header.Set("Upgrade-Insecure-Requests", "1")
- req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36")
- res, err := mustGetProxyClient().Do(req)
- if err != nil {
- panic(err)
- }
- defer res.Body.Close()
- doc, err := goquery.NewDocumentFromReader(res.Body)
- val, err := doc.Find("div#contents").Html()
- if err != nil {
- fmt.Println("ccc", err)
- }
- fmt.Println(i, val)
- final += val
- time.Sleep(time.Second)
- }
- os.WriteFile("区县code表.txt", gconv.Bytes(final), os.ModePerm)
- }
|