stastic.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. package utils
  2. import (
  3. "fmt"
  4. "github.com/PuerkitoBio/goquery"
  5. "github.com/xuri/excelize/v2"
  6. "log"
  7. "net/http"
  8. "net/url"
  9. "os"
  10. "path"
  11. "strings"
  12. "time"
  13. )
  14. // ProvinceData 省份数据
  15. type ProvinceData struct {
  16. Name string `json:"name"` //北京市,天津市,河北省
  17. Href string `json:"href"` //65.html;
  18. Code string `json:"code"` //65
  19. Cities []CityData `json:"cities"` // 城市数据
  20. }
  21. // CityData 城市信息
  22. type CityData struct {
  23. Name string `json:"name"`
  24. Code string `json:"code"` //320800000000
  25. Href string `json:"href"`
  26. Areas []AreaData `json:"areas"`
  27. }
  28. // AreaData 区县数据
  29. type AreaData struct {
  30. Name string `json:"name"`
  31. Code string `json:"code"` //
  32. Href string `json:"href"`
  33. Streets []StreetData
  34. }
  35. // StreetData 街道数据
  36. type StreetData struct {
  37. Name string `json:"name"`
  38. Code string `json:"code"` //
  39. Href string `json:"href"`
  40. }
  41. type ResData struct {
  42. Level int `json:"level"`
  43. Province string `json:"province"`
  44. ProvinceCode string `json:"province_code"`
  45. City string `json:"city"` // 城市
  46. CityCode string `json:"city_code"`
  47. Area string `json:"area"` //区县
  48. AreaCode string `json:"area_code"`
  49. Street string `json:"street"` // 街道
  50. StreetCode string `json:"street_code"`
  51. }
  52. var baseUrl = "https://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2023/"
  53. // GetStaticInfo 获取国家统计局数据
  54. func GetStaticInfo() {
  55. res, err := http.Get(baseUrl)
  56. if err != nil {
  57. log.Fatal(err)
  58. }
  59. defer res.Body.Close()
  60. if res.StatusCode != 200 {
  61. log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
  62. }
  63. doc, err := goquery.NewDocumentFromReader(res.Body)
  64. if err != nil {
  65. log.Fatal(err)
  66. }
  67. ProvinceDatas := make([]ProvinceData, 0)
  68. doc.Find("tr[class=provincetr] td").Each(func(i int, selection *goquery.Selection) {
  69. //fmt.Println(selection.Html())
  70. //fmt.Println("省份", selection.Text())
  71. //fmt.Println("a", selection.Find("a").Text())
  72. href, exists := selection.Find("a").Attr("href")
  73. //href, exists := selection.Attr("href")
  74. if exists {
  75. //fmt.Println("省份href", href)
  76. }
  77. p := ProvinceData{
  78. Name: selection.Text(),
  79. Code: strings.Replace(href, ".html", "", -1),
  80. Href: href,
  81. }
  82. ProvinceDatas = append(ProvinceDatas, p)
  83. })
  84. // 获取城市数据
  85. getCityData(ProvinceDatas)
  86. var resD = make([]ResData, 0)
  87. for _, v := range ProvinceDatas {
  88. data := ResData{Level: 1, Province: v.Name, ProvinceCode: v.Code}
  89. resD = append(resD, data)
  90. for _, city := range v.Cities {
  91. cityData := ResData{Level: 2, Province: v.Name, ProvinceCode: v.Code, City: city.Name, CityCode: city.Code}
  92. resD = append(resD, cityData)
  93. for _, area := range city.Areas {
  94. areaData := ResData{Level: 3, Province: v.Name, ProvinceCode: v.Code, City: city.Name, CityCode: city.Code, Area: area.Name, AreaCode: area.Code}
  95. resD = append(resD, areaData)
  96. for _, street := range area.Streets {
  97. streetData := ResData{Level: 4, Province: v.Name, ProvinceCode: v.Code, City: city.Name, CityCode: city.Code, Area: area.Name, AreaCode: area.Code, Street: street.Name, StreetCode: street.Code}
  98. resD = append(resD, streetData)
  99. }
  100. }
  101. }
  102. }
  103. //导出数据文件
  104. file := time.Now().Format("20060102") + "_统计局统计数据.xlsx"
  105. //var xlsx *excelize.File
  106. currentPwd, _ := os.Getwd()
  107. exportFile := fmt.Sprintf("%s/%s", currentPwd, file)
  108. sheet := "2023"
  109. xlsx := excelize.NewFile(excelize.Options{ShortDatePattern: "yyyy/m/dd"})
  110. xlsx.NewSheet(sheet)
  111. xlsx.DeleteSheet("Sheet1")
  112. line := 0
  113. subtitles := []interface{}{"层级", "省份", "省份代码", "城市", "城市代码", "区县", "区县代码", "街道", "街道代码"}
  114. line++
  115. //设置第一行title
  116. _ = xlsx.SetSheetRow(sheet, fmt.Sprintf("%s%d", "A", line), &subtitles)
  117. fmt.Println("导出数据总数:-------", len(resD))
  118. for k, _ := range resD {
  119. line++
  120. val := []interface{}{
  121. resD[k].Level, resD[k].Province, resD[k].ProvinceCode, resD[k].City, resD[k].CityCode, resD[k].Area, resD[k].AreaCode, resD[k].Street, resD[k].StreetCode,
  122. }
  123. xlsx.SetSheetRow(sheet, fmt.Sprintf("%s%d", "A", line), &val)
  124. }
  125. xlsx.Path = exportFile
  126. xlsx.Save()
  127. fmt.Println("数据导出结束")
  128. }
  129. // getCityData 获取城市
  130. func getCityData(pds []ProvinceData) {
  131. for i := range pds {
  132. v := &pds[i]
  133. fmt.Println("省份", v.Name, "获取城市数据")
  134. //
  135. req, err := http.NewRequest("GET", baseUrl+v.Href, nil)
  136. if err != nil {
  137. log.Fatal(err)
  138. }
  139. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/2.0")
  140. resp, err := http.DefaultClient.Do(req)
  141. if err != nil {
  142. log.Fatal(err)
  143. }
  144. defer resp.Body.Close()
  145. //获取城市信息
  146. //cityUrl := baseUrl + v.Href
  147. //res, err := http.Get(cityUrl)
  148. //if err != nil {
  149. // log.Fatal(err)
  150. //}
  151. //defer res.Body.Close()
  152. //if res.StatusCode != 200 {
  153. // log.Println(cityUrl, "请求失败")
  154. //}
  155. cityDoc, err := goquery.NewDocumentFromReader(resp.Body)
  156. if err != nil {
  157. log.Fatal(err)
  158. }
  159. //
  160. cities := make([]CityData, 0)
  161. cityDoc.Find("tr[class=citytr]").Each(func(i int, selection *goquery.Selection) {
  162. //fmt.Println(selection.Html())
  163. //fmt.Println("省份", selection.Text())
  164. //fmt.Println("first", selection.Find("a").First().Text())
  165. //fmt.Println("last", selection.Find("a").Last().Text())
  166. city := CityData{
  167. Name: selection.Find("a").Last().Text(),
  168. Code: selection.Find("a").First().Text(),
  169. }
  170. href, exists := selection.Find("a").First().Attr("href")
  171. if exists {
  172. //fmt.Println("省份href", href)
  173. city.Href = href
  174. }
  175. cities = append(cities, city)
  176. })
  177. // 获取市区数据
  178. getArea(cities)
  179. //fmt.Println(cities)
  180. v.Cities = cities
  181. time.Sleep(time.Second)
  182. }
  183. }
  184. // getArea 获取辖区信息
  185. func getArea(cities []CityData) {
  186. for i := range cities {
  187. v := &cities[i]
  188. fmt.Println("城市", v.Name, "获取区县信息")
  189. //获取城市信息
  190. cityUrl := baseUrl + v.Href
  191. res, err := http.Get(cityUrl)
  192. if err != nil {
  193. log.Fatal(err)
  194. }
  195. defer res.Body.Close()
  196. if res.StatusCode != 200 {
  197. log.Println(cityUrl, "请求失败")
  198. }
  199. areaDoc, err := goquery.NewDocumentFromReader(res.Body)
  200. if err != nil {
  201. log.Fatal(err)
  202. }
  203. //
  204. areas := make([]AreaData, 0)
  205. areaDoc.Find("tr[class=countytr]").Each(func(i int, selection *goquery.Selection) {
  206. //fmt.Println(selection.Html())
  207. area := AreaData{
  208. Name: selection.Find("a").Last().Text(),
  209. Code: selection.Find("a").First().Text(),
  210. }
  211. href, exists := selection.Find("a").First().Attr("href")
  212. if exists {
  213. //fmt.Println("省份href", href)
  214. area.Href = href
  215. }
  216. if area.Name != "" {
  217. areas = append(areas, area)
  218. }
  219. })
  220. getStreet(areas, cityUrl)
  221. v.Areas = areas
  222. time.Sleep(time.Microsecond * 100)
  223. }
  224. }
  225. // getStreet 获取街道数据
  226. func getStreet(areas []AreaData, preHref string) {
  227. u, _ := url.Parse(preHref)
  228. u.Path = path.Dir(u.Path)
  229. newStr := u.String()
  230. //log.Println(dir)
  231. for i := range areas {
  232. v := &areas[i]
  233. fmt.Println("区域", v.Name, "获取街道信息")
  234. req, err := http.NewRequest("GET", newStr+"/"+v.Href, nil)
  235. if err != nil {
  236. log.Fatal(err)
  237. }
  238. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/2.0")
  239. resp, err := http.DefaultClient.Do(req)
  240. if err != nil {
  241. log.Fatal(err)
  242. }
  243. defer resp.Body.Close()
  244. streetDoc, err := goquery.NewDocumentFromReader(resp.Body)
  245. if err != nil {
  246. log.Fatal(err)
  247. }
  248. streets := make([]StreetData, 0)
  249. streetDoc.Find("tr[class=towntr]").Each(func(i int, selection *goquery.Selection) {
  250. //fmt.Println(selection.Html())
  251. street := StreetData{
  252. Name: selection.Find("a").Last().Text(),
  253. Code: selection.Find("a").First().Text(),
  254. }
  255. href, exists := selection.Find("a").First().Attr("href")
  256. if exists {
  257. //fmt.Println("省份href", href)
  258. street.Href = href
  259. }
  260. if street.Name != "" {
  261. streets = append(streets, street)
  262. }
  263. })
  264. v.Streets = streets
  265. time.Sleep(time.Microsecond * 100)
  266. }
  267. }