package main import ( "encoding/json" "fmt" "github.com/PuerkitoBio/goquery" util "jygit.jydev.jianyu360.cn/data_processing/common_utils" "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb" "log" "net/http" "strings" "testing" ) func TestGovWeb(T *testing.T) { //87 竞品` MgoLua := &mongodb.MongodbSim{ //MongodbAddr: "172.17.4.87:27080", MongodbAddr: "127.0.0.1:27081", Size: 10, DbName: "zxl", UserName: "", Password: "", Direct: true, } MgoLua.InitPool() sess := MgoLua.GetMgoConn() defer MgoLua.DestoryMongoConn(sess) it := sess.DB("zxl").C("excel").Find(nil).Select(nil).Iter() count := 0 for tmp := make(map[string]interface{}); it.Next(&tmp); count++ { if count%100 == 0 { log.Println("current:", count, tmp["name"], tmp["href"]) } where := map[string]interface{}{ "name": tmp["name"], "href": tmp["href"], "from": tmp["from"], } xd, _ := MgoLua.FindOne("xlsx_html_data", where) ins := *xd htmlContent := util.ObjToString(ins["html"]) doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent)) if err != nil { log.Fatal(err) } // 获取 title := doc.Find("title").Text() // 查找包含“版权所有”的文本节点 var copyright string doc.Find("p").Each(func(i int, s *goquery.Selection) { text := strings.TrimSpace(s.Text()) if strings.Contains(text, "版权所有") { copyright = text } }) if util.ObjToString(ins["title"]) == "" { ins["title"] = title } ins["copyright"] = copyright //fmt.Println("标题内容:", title) //fmt.Println("版权信息:", copyright) MgoLua.SaveByOriID("wcc_html_data", ins) } } func TestA(T *testing.T) { htmlContent := "<html xmlns=\"http://www.w3.org/1999/xhtml\"><head>\n <meta charset=\"UTF-8\">\n <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n <meta name=\"keywords\" content=\"China, government, Premier, Premier Li, Chinese Premier, People's Republic of China, Chinese government, China government, Chinese Central Government, State Council, China State Council, China leaders, PRC\">\n <meta name=\"description\" content=\"Get quick, easy access to all services, policies, news and information about the Chinese government and Chinese leaders\">\n <meta name=\"format-detection\" content=\"telephone=no\">\n <title>The State Council of the People's Republic of China\n \n \n \n \n \n \n \n \n\n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n\n
\n \n \n
\n
\n \n
\n
\n

\n July 16 2025\n

\n

\n

App

\n
\n
\n \n
\n
\n \n \n \n
\n \n
\n\n \n \n
\n
\n
\n
\n \n \n \n
\n
\n
\n \n
\n
\n 中文\n
\n
\n
\n \n \n
\n \n \n \n \n \n
\n \n \n \n
\n
\n \n STATISTICS\n \n
\n
\n \n \n \n \n \n

\n

China's economy in first half 2025

\n \n \n \n \n \n \n \n \n \n \n

China's GDP expands 5.3 pct year on year in H1

\n \n\n
\n
\n \n
\n
\n \n INSTITUTIONS\n \n
\n
\n \n
\n

PREMIER:

\n

Li Qiang

\n
\n
\n

VICE-PREMIERS:

\n

Ding Xuexiang,

 \n

He Lifeng,

 \n

Zhang Guoqing,

 \n

Liu Guozhong

\n
\n
\n

STATE COUNCILORS:

\n

Wang Xiaohong,

\n

Wu Zhenglong,

\n

Shen Yiqin

\n
\n
\n

SECRETARY GENERAL:

\n

Wu Zhenglong

\n
\n \n
\n
\n\n
\n \n
\n \n \n \n \n
\n \n
\n \n \n \n \n \n \n \n
\n \n
\n \n \n \n \n
\n \n \n\n
\n \n
\n
\n \n \n
\n
\n FEEDBACK\n
\n \n \n

RESPONSES

\n \n \n

FAQ

\n \n \n
\n
\n
\n \n \n
\n \n
\n

Copyright© www.gov.cn | About us | Contact us

\n

Website Identification Code bm01000001 \n Registration Number: 05070218

\n

All rights reserved. The content (including but not limited to text, photo, multimedia information, etc) published in this site belongs to www.gov.cn.

\n

Without written authorization from www.gov.cn, such content shall not be republished or used in any form.

\n
\n
\n \n
\n
\n Mobile\n
\n
\n
\n
\n
\n
\n
\n \n \n
\n
\n \n \n
\n
\n
\n \n
\n \n
\n
\n
\n
\n
\n \n
\n \n
\n\n \n
\n
\n
\n \n \n
\n
\n \n \n \n \n \n
\n
\n
\n \n INSTITUTIONS\n \n
\n
\n \n
\n

PREMIER:

\n

Li Qiang

\n
\n
\n

VICE-PREMIERS:

\n

Ding Xuexiang,

 \n

He Lifeng,

 \n

Zhang Guoqing,

 \n

Liu Guozhong

\n
\n
\n

STATE COUNCILORS:

\n

Wang Xiaohong,

\n

Wu Zhenglong,

\n

Shen Yiqin

\n
\n
\n

SECRETARY GENERAL:

\n

Wu Zhenglong

\n
\n \n
\n
\n
\n
\n \n POLICIES\n \n
\n
\n \n \n \n \n \n

\n

China issues guidelines highlighting independent, impartial judicial work

\n \n \n \n \n \n \n \n \n\n
\n \n
\n
\n
\n \n STATISTICS\n \n \n
\n
\n \n \n \n \n

\n

China's economy in first half 2025

\n \n \n \n \n \n \n \n \n
\n \n
\n
\n \n
\n
\n
\n
\n \n
\n \n \n \n \n
\n
\n \n
\n \n \n \n \n \n
\n
\n \n
\n \n \n \n \n \n
\n
\n \n
\n \n \n
\n\n
\n \n \n\n
\n \n
\n
\n \n \n
\n
\n FEEDBACK\n
\n \n \n

RESPONSES

\n \n \n

FAQ

\n \n \n
\n
\n
\n \n \n
\n \n

Desktop

\n

Copyright© www.gov.cn | Contact us

\n

Website Identification Code bm01000001

\n

Registration Number: 05070218

\n
\n \n \n \n
\n
\n \n \n \n \n \n \n \n\n \n\n\n \n \n \n \n \n \n \n" doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent)) if err != nil { log.Fatal(err) } // 获取 title := doc.Find("title").Text() // 查找包含“版权所有”的文本节点 var copyright string doc.Find("p").Each(func(i int, s *goquery.Selection) { text := strings.TrimSpace(s.Text()) if strings.Contains(text, "版权所有") { copyright = text } }) fmt.Println("标题内容:", title) fmt.Println("版权信息:", copyright) //a := []int{1, 2, 3} //b := make([]int, 0) //for _, v := range a { // b = append(b, v) //} //log.Println(b) } func TestB(T *testing.T) { CollArr := []string{"company_base", "company_employee", "company_history_name", "company_partner", "annual_report_base", "annual_report_website", "special_enterprise", "special_foundation", "special_gov_unit", "special_hongkong_company", "special_law_office", "special_social_organ", "special_trade_union"} for _, v := range CollArr { switch v { case "company_base": fmt.Println("company_base") case "company_employee": fmt.Println("company_employee") case "special_enterprise", "special_foundation", "special_gov_unit", "special_hongkong_company", "special_law_office", "special_social_organ", "special_trade_union": fmt.Println("special") } } } // TestGetArea 获取行政区划 城市区划代码 func TestGetArea(T *testing.T) { //url := "http://xzqh.mca.gov.cn/getInfo?code=100000&type=2" //省份列表 //url := "http://xzqh.mca.gov.cn/getInfo?code=100000&type=2" //省份列表 //url := "http://xzqh.mca.gov.cn/data/120000_Point.geojson" // url := "http://xzqh.mca.gov.cn/data/quanguo_Point.geojson" // resp, err := http.Get(url) if err != nil { log.Printf("Error %v", err) return } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { log.Printf("Error getting watcher execution status. Status code: %d", resp.StatusCode) return } var result map[string]interface{} err = json.NewDecoder(resp.Body).Decode(&result) if err != nil { log.Printf("Error decoding watcher execution result: %v", err) return } //if len(result) > 0 { // for k, _ := range result { // log.Println(k) // urlD := fmt.Sprintf("http://xzqh.mca.gov.cn/data/%s_Point.geojson", "810000") // resp, err := http.Get(urlD) // if err != nil { // log.Printf("Error %v", err) // return // } // defer resp.Body.Close() // // var resultD map[string]interface{} // err = json.NewDecoder(resp.Body).Decode(&resultD) // if err != nil { // log.Printf("Error decoding watcher execution result: %v", err) // } // // } //} //log.Println(result) }