|
- package utils
- import (
- "fmt"
- "github.com/PuerkitoBio/goquery"
- "github.com/xuri/excelize/v2"
- "log"
- "net/http"
- "net/url"
- "os"
- "path"
- "strings"
- "time"
- )
- // ProvinceData 省份数据
- type ProvinceData struct {
- Name string `json:"name"` //北京市,天津市,河北省
- Href string `json:"href"` //65.html;
- Code string `json:"code"` //65
- Cities []CityData `json:"cities"` // 城市数据
- }
- // CityData 城市信息
- type CityData struct {
- Name string `json:"name"`
- Code string `json:"code"` //320800000000
- Href string `json:"href"`
- Areas []AreaData `json:"areas"`
- }
- // AreaData 区县数据
- type AreaData struct {
- Name string `json:"name"`
- Code string `json:"code"` //
- Href string `json:"href"`
- Streets []StreetData
- }
- // StreetData 街道数据
- type StreetData struct {
- Name string `json:"name"`
- Code string `json:"code"` //
- Href string `json:"href"`
- }
- type ResData struct {
- Level int `json:"level"`
- Province string `json:"province"`
- ProvinceCode string `json:"province_code"`
- City string `json:"city"` // 城市
- CityCode string `json:"city_code"`
- Area string `json:"area"` //区县
- AreaCode string `json:"area_code"`
- Street string `json:"street"` // 街道
- StreetCode string `json:"street_code"`
- }
- var baseUrl = "https://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2023/"
- // GetStaticInfo 获取国家统计局数据
- func GetStaticInfo() {
- res, err := http.Get(baseUrl)
- if err != nil {
- log.Fatal(err)
- }
- defer res.Body.Close()
- if res.StatusCode != 200 {
- log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
- }
- doc, err := goquery.NewDocumentFromReader(res.Body)
- if err != nil {
- log.Fatal(err)
- }
- ProvinceDatas := make([]ProvinceData, 0)
- doc.Find("tr[class=provincetr] td").Each(func(i int, selection *goquery.Selection) {
- //fmt.Println(selection.Html())
- //fmt.Println("省份", selection.Text())
- //fmt.Println("a", selection.Find("a").Text())
- href, exists := selection.Find("a").Attr("href")
- //href, exists := selection.Attr("href")
- if exists {
- //fmt.Println("省份href", href)
- }
- p := ProvinceData{
- Name: selection.Text(),
- Code: strings.Replace(href, ".html", "", -1),
- Href: href,
- }
- ProvinceDatas = append(ProvinceDatas, p)
- })
- // 获取城市数据
- getCityData(ProvinceDatas)
- var resD = make([]ResData, 0)
- for _, v := range ProvinceDatas {
- data := ResData{Level: 1, Province: v.Name, ProvinceCode: v.Code}
- resD = append(resD, data)
- for _, city := range v.Cities {
- cityData := ResData{Level: 2, Province: v.Name, ProvinceCode: v.Code, City: city.Name, CityCode: city.Code}
- resD = append(resD, cityData)
- for _, area := range city.Areas {
- areaData := ResData{Level: 3, Province: v.Name, ProvinceCode: v.Code, City: city.Name, CityCode: city.Code, Area: area.Name, AreaCode: area.Code}
- resD = append(resD, areaData)
- for _, street := range area.Streets {
- streetData := ResData{Level: 4, Province: v.Name, ProvinceCode: v.Code, City: city.Name, CityCode: city.Code, Area: area.Name, AreaCode: area.Code, Street: street.Name, StreetCode: street.Code}
- resD = append(resD, streetData)
- }
- }
- }
- }
- //导出数据文件
- file := time.Now().Format("20060102") + "_统计局统计数据.xlsx"
- //var xlsx *excelize.File
- currentPwd, _ := os.Getwd()
- exportFile := fmt.Sprintf("%s/%s", currentPwd, file)
- sheet := "2023"
- xlsx := excelize.NewFile(excelize.Options{ShortDatePattern: "yyyy/m/dd"})
- xlsx.NewSheet(sheet)
- xlsx.DeleteSheet("Sheet1")
- line := 0
- subtitles := []interface{}{"层级", "省份", "省份代码", "城市", "城市代码", "区县", "区县代码", "街道", "街道代码"}
- line++
- //设置第一行title
- _ = xlsx.SetSheetRow(sheet, fmt.Sprintf("%s%d", "A", line), &subtitles)
- fmt.Println("导出数据总数:-------", len(resD))
- for k, _ := range resD {
- line++
- val := []interface{}{
- resD[k].Level, resD[k].Province, resD[k].ProvinceCode, resD[k].City, resD[k].CityCode, resD[k].Area, resD[k].AreaCode, resD[k].Street, resD[k].StreetCode,
- }
- xlsx.SetSheetRow(sheet, fmt.Sprintf("%s%d", "A", line), &val)
- }
- xlsx.Path = exportFile
- xlsx.Save()
- fmt.Println("数据导出结束")
- }
- // getCityData 获取城市
- func getCityData(pds []ProvinceData) {
- for i := range pds {
- v := &pds[i]
- fmt.Println("省份", v.Name, "获取城市数据")
- //
- req, err := http.NewRequest("GET", baseUrl+v.Href, nil)
- if err != nil {
- log.Fatal(err)
- }
- req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/2.0")
- resp, err := http.DefaultClient.Do(req)
- if err != nil {
- log.Fatal(err)
- }
- defer resp.Body.Close()
- //获取城市信息
- //cityUrl := baseUrl + v.Href
- //res, err := http.Get(cityUrl)
- //if err != nil {
- // log.Fatal(err)
- //}
- //defer res.Body.Close()
- //if res.StatusCode != 200 {
- // log.Println(cityUrl, "请求失败")
- //}
- cityDoc, err := goquery.NewDocumentFromReader(resp.Body)
- if err != nil {
- log.Fatal(err)
- }
- //
- cities := make([]CityData, 0)
- cityDoc.Find("tr[class=citytr]").Each(func(i int, selection *goquery.Selection) {
- //fmt.Println(selection.Html())
- //fmt.Println("省份", selection.Text())
- //fmt.Println("first", selection.Find("a").First().Text())
- //fmt.Println("last", selection.Find("a").Last().Text())
- city := CityData{
- Name: selection.Find("a").Last().Text(),
- Code: selection.Find("a").First().Text(),
- }
- href, exists := selection.Find("a").First().Attr("href")
- if exists {
- //fmt.Println("省份href", href)
- city.Href = href
- }
- cities = append(cities, city)
- })
- // 获取市区数据
- getArea(cities)
- //fmt.Println(cities)
- v.Cities = cities
- time.Sleep(time.Second)
- }
- }
- // getArea 获取辖区信息
- func getArea(cities []CityData) {
- for i := range cities {
- v := &cities[i]
- fmt.Println("城市", v.Name, "获取区县信息")
- //获取城市信息
- cityUrl := baseUrl + v.Href
- res, err := http.Get(cityUrl)
- if err != nil {
- log.Fatal(err)
- }
- defer res.Body.Close()
- if res.StatusCode != 200 {
- log.Println(cityUrl, "请求失败")
- }
- areaDoc, err := goquery.NewDocumentFromReader(res.Body)
- if err != nil {
- log.Fatal(err)
- }
- //
- areas := make([]AreaData, 0)
- areaDoc.Find("tr[class=countytr]").Each(func(i int, selection *goquery.Selection) {
- //fmt.Println(selection.Html())
- area := AreaData{
- Name: selection.Find("a").Last().Text(),
- Code: selection.Find("a").First().Text(),
- }
- href, exists := selection.Find("a").First().Attr("href")
- if exists {
- //fmt.Println("省份href", href)
- area.Href = href
- }
- if area.Name != "" {
- areas = append(areas, area)
- }
- })
- getStreet(areas, cityUrl)
- v.Areas = areas
- time.Sleep(time.Microsecond * 100)
- }
- }
- // getStreet 获取街道数据
- func getStreet(areas []AreaData, preHref string) {
- u, _ := url.Parse(preHref)
- u.Path = path.Dir(u.Path)
- newStr := u.String()
- //log.Println(dir)
- for i := range areas {
- v := &areas[i]
- fmt.Println("区域", v.Name, "获取街道信息")
- req, err := http.NewRequest("GET", newStr+"/"+v.Href, nil)
- if err != nil {
- log.Fatal(err)
- }
- req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/2.0")
- resp, err := http.DefaultClient.Do(req)
- if err != nil {
- log.Fatal(err)
- }
- defer resp.Body.Close()
- streetDoc, err := goquery.NewDocumentFromReader(resp.Body)
- if err != nil {
- log.Fatal(err)
- }
- streets := make([]StreetData, 0)
- streetDoc.Find("tr[class=towntr]").Each(func(i int, selection *goquery.Selection) {
- //fmt.Println(selection.Html())
- street := StreetData{
- Name: selection.Find("a").Last().Text(),
- Code: selection.Find("a").First().Text(),
- }
- href, exists := selection.Find("a").First().Attr("href")
- if exists {
- //fmt.Println("省份href", href)
- street.Href = href
- }
- if street.Name != "" {
- streets = append(streets, street)
- }
- })
- v.Streets = streets
- time.Sleep(time.Microsecond * 100)
- }
- }
|