wcc 1 年之前
父節點
當前提交
81d77bb9b7
共有 2 個文件被更改,包括 68 次插入5 次删除
  1. 3 0
      china_area/main.go
  2. 65 5
      china_area/utils/stastic.go

+ 3 - 0
china_area/main.go

@@ -6,6 +6,9 @@ import (
 )
 )
 
 
 func main() {
 func main() {
+
+	utils.GetStaticInfo()
+	return
 	var wg sync.WaitGroup
 	var wg sync.WaitGroup
 	wg.Add(2)
 	wg.Add(2)
 	go func() {
 	go func() {

+ 65 - 5
china_area/utils/stastic.go

@@ -6,11 +6,14 @@ import (
 	"github.com/xuri/excelize/v2"
 	"github.com/xuri/excelize/v2"
 	"log"
 	"log"
 	"net/http"
 	"net/http"
+	"net/url"
 	"os"
 	"os"
+	"path"
 	"strings"
 	"strings"
 	"time"
 	"time"
 )
 )
 
 
+// ProvinceData 省份数据
 type ProvinceData struct {
 type ProvinceData struct {
 	Name   string     `json:"name"`   //北京市,天津市,河北省
 	Name   string     `json:"name"`   //北京市,天津市,河北省
 	Href   string     `json:"href"`   //65.html;
 	Href   string     `json:"href"`   //65.html;
@@ -26,7 +29,16 @@ type CityData struct {
 	Areas []AreaData `json:"areas"`
 	Areas []AreaData `json:"areas"`
 }
 }
 
 
+// AreaData 区县数据
 type AreaData struct {
 type AreaData struct {
+	Name    string `json:"name"`
+	Code    string `json:"code"` //
+	Href    string `json:"href"`
+	Streets []StreetData
+}
+
+// StreetData 街道数据
+type StreetData struct {
 	Name string `json:"name"`
 	Name string `json:"name"`
 	Code string `json:"code"` //
 	Code string `json:"code"` //
 	Href string `json:"href"`
 	Href string `json:"href"`
@@ -36,10 +48,12 @@ type ResData struct {
 	Level        int    `json:"level"`
 	Level        int    `json:"level"`
 	Province     string `json:"province"`
 	Province     string `json:"province"`
 	ProvinceCode string `json:"province_code"`
 	ProvinceCode string `json:"province_code"`
-	City         string `json:"city"`
+	City         string `json:"city"` // 城市
 	CityCode     string `json:"city_code"`
 	CityCode     string `json:"city_code"`
-	Area         string `json:"area"`
+	Area         string `json:"area"` //区县
 	AreaCode     string `json:"area_code"`
 	AreaCode     string `json:"area_code"`
+	Street       string `json:"street"` // 街道
+	StreetCode   string `json:"street_code"`
 }
 }
 
 
 var baseUrl = "https://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2023/"
 var baseUrl = "https://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2023/"
@@ -91,6 +105,10 @@ func GetStaticInfo() {
 			for _, area := range city.Areas {
 			for _, area := range city.Areas {
 				areaData := ResData{Level: 3, Province: v.Name, ProvinceCode: v.Code, City: city.Name, CityCode: city.Code, Area: area.Name, AreaCode: area.Code}
 				areaData := ResData{Level: 3, Province: v.Name, ProvinceCode: v.Code, City: city.Name, CityCode: city.Code, Area: area.Name, AreaCode: area.Code}
 				resD = append(resD, areaData)
 				resD = append(resD, areaData)
+				for _, street := range area.Streets {
+					streetData := ResData{Level: 4, Province: v.Name, ProvinceCode: v.Code, City: city.Name, CityCode: city.Code, Area: area.Name, AreaCode: area.Code, Street: street.Name, StreetCode: street.Code}
+					resD = append(resD, streetData)
+				}
 			}
 			}
 		}
 		}
 	}
 	}
@@ -105,7 +123,7 @@ func GetStaticInfo() {
 	xlsx.NewSheet(sheet)
 	xlsx.NewSheet(sheet)
 	xlsx.DeleteSheet("Sheet1")
 	xlsx.DeleteSheet("Sheet1")
 	line := 0
 	line := 0
-	subtitles := []interface{}{"层级", "省份", "省份代码", "城市", "城市代码", "区县", "区县代码"}
+	subtitles := []interface{}{"层级", "省份", "省份代码", "城市", "城市代码", "区县", "区县代码", "街道", "街道代码"}
 	line++
 	line++
 	//设置第一行title
 	//设置第一行title
 	_ = xlsx.SetSheetRow(sheet, fmt.Sprintf("%s%d", "A", line), &subtitles)
 	_ = xlsx.SetSheetRow(sheet, fmt.Sprintf("%s%d", "A", line), &subtitles)
@@ -114,7 +132,7 @@ func GetStaticInfo() {
 	for k, _ := range resD {
 	for k, _ := range resD {
 		line++
 		line++
 		val := []interface{}{
 		val := []interface{}{
-			resD[k].Level, resD[k].Province, resD[k].ProvinceCode, resD[k].City, resD[k].CityCode, resD[k].Area, resD[k].AreaCode,
+			resD[k].Level, resD[k].Province, resD[k].ProvinceCode, resD[k].City, resD[k].CityCode, resD[k].Area, resD[k].AreaCode, resD[k].Street, resD[k].StreetCode,
 		}
 		}
 		xlsx.SetSheetRow(sheet, fmt.Sprintf("%s%d", "A", line), &val)
 		xlsx.SetSheetRow(sheet, fmt.Sprintf("%s%d", "A", line), &val)
 	}
 	}
@@ -221,15 +239,57 @@ func getArea(cities []CityData) {
 				areas = append(areas, area)
 				areas = append(areas, area)
 			}
 			}
 		})
 		})
+
+		getStreet(areas, cityUrl)
+
 		v.Areas = areas
 		v.Areas = areas
 		time.Sleep(time.Microsecond * 100)
 		time.Sleep(time.Microsecond * 100)
+
 	}
 	}
 }
 }
 
 
 // getStreet 获取街道数据
 // getStreet 获取街道数据
-func getStreet(areas []AreaData) {
+func getStreet(areas []AreaData, preHref string) {
+	u, _ := url.Parse(preHref)
+	u.Path = path.Dir(u.Path)
+	newStr := u.String()
+	//log.Println(dir)
 	for i := range areas {
 	for i := range areas {
 		v := &areas[i]
 		v := &areas[i]
 		fmt.Println("区域", v.Name, "获取街道信息")
 		fmt.Println("区域", v.Name, "获取街道信息")
+		req, err := http.NewRequest("GET", newStr+"/"+v.Href, nil)
+		if err != nil {
+			log.Fatal(err)
+		}
+		req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/2.0")
+
+		resp, err := http.DefaultClient.Do(req)
+		if err != nil {
+			log.Fatal(err)
+		}
+		defer resp.Body.Close()
+		streetDoc, err := goquery.NewDocumentFromReader(resp.Body)
+		if err != nil {
+			log.Fatal(err)
+		}
+
+		streets := make([]StreetData, 0)
+		streetDoc.Find("tr[class=towntr]").Each(func(i int, selection *goquery.Selection) {
+			//fmt.Println(selection.Html())
+			street := StreetData{
+				Name: selection.Find("a").Last().Text(),
+				Code: selection.Find("a").First().Text(),
+			}
+			href, exists := selection.Find("a").First().Attr("href")
+			if exists {
+				//fmt.Println("省份href", href)
+				street.Href = href
+			}
+			if street.Name != "" {
+				streets = append(streets, street)
+			}
+		})
+		v.Streets = streets
+		time.Sleep(time.Microsecond * 100)
 	}
 	}
 }
 }