package main import ( "crypto/tls" "fmt" "github.com/xuri/excelize/v2" util "jygit.jydev.jianyu360.cn/data_processing/common_utils" "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb" "log" "net" "net/http" "net/url" "strings" "sync" "time" ) // updatePing 更新ping 状态 func updatePing() { //87 竞品` MgoLua := &mongodb.MongodbSim{ //MongodbAddr: "172.17.4.87:27080", MongodbAddr: "127.0.0.1:27081", Size: 10, DbName: "editor", UserName: "", Password: "", Direct: true, } MgoLua.InitPool() sess := MgoLua.GetMgoConn() defer MgoLua.DestoryMongoConn(sess) ch := make(chan bool, 30) wg := &sync.WaitGroup{} where := map[string]interface{}{ "visit": "域名不可访问", } it := sess.DB("editor").C("wcc_code_test").Find(&where).Select(nil).Iter() count := 0 for tmp := make(map[string]interface{}); it.Next(&tmp); count++ { if count%100 == 0 { log.Println("current:", count, tmp["domain"], tmp["code"]) } ch <- true wg.Add(1) go func(tmp map[string]interface{}) { defer func() { <-ch wg.Done() }() id := mongodb.BsonIdToSId(tmp["_id"]) domain := util.ObjToString(tmp["domain"]) update := make(map[string]interface{}) if checkURL(domain) { update["visit"] = "域名可访问" } else { update["visit"] = "域名不可访问" } MgoLua.UpdateById("wcc_code_test", id, map[string]interface{}{"$set": update}) }(tmp) tmp = make(map[string]interface{}) } wg.Wait() log.Println("数据处理完毕") } // checkURL 检查域名是否可访问 func checkURL(domain string) bool { // 打印 DNS 解析 //ips, err := net.LookupHost(domain) //if err != nil { // fmt.Println("DNS 解析失败:", err) //} else { // fmt.Println("Go 程序解析到的 IP:", ips) //} // 设置 HTTP 代理(走 Clash,本地端口根据实际情况改,比如 7890) //proxyURL, _ := url.Parse("http://127.0.0.1:7897") // 自定义 Transport:只用 IPv4,启用 HTTP/2 transport := &http.Transport{ //Proxy: http.ProxyURL(proxyURL), DialContext: (&net.Dialer{ Timeout: 8 * time.Second, DualStack: false, // 只用 IPv4 }).DialContext, ForceAttemptHTTP2: true, TLSClientConfig: &tls.Config{ InsecureSkipVerify: true, // 跳过证书验证 }, } client := &http.Client{ Timeout: 60 * time.Second, Transport: transport, } makeRequest := func(url string) bool { req, _ := http.NewRequest("GET", url, nil) // 浏览器常用头 req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64)") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") req.Header.Set("Accept-Encoding", "gzip, deflate, br") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9") req.Header.Set("Connection", "keep-alive") start := time.Now() resp, err := client.Do(req) cost := time.Since(start) if err != nil { fmt.Println("访问", url, "失败:", err, "耗时:", cost) return false } defer resp.Body.Close() //fmt.Println("访问", url, "成功,状态码:", resp.StatusCode, "耗时:", cost) return true } // 先 http,再 https if makeRequest("http://" + domain) { return true } if makeRequest("https://" + domain) { return true } return false } func check(domain string) bool { // DNS 用系统默认 ips, err := net.LookupHost(domain) if err != nil { fmt.Println("DNS 解析失败:", err) } else { fmt.Println("系统 DNS 解析到的 IP:", ips) } // 配置代理 proxyURL, _ := url.Parse("http://127.0.0.1:7897") transport := &http.Transport{ Proxy: http.ProxyURL(proxyURL), ForceAttemptHTTP2: true, // TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, // 如果证书问题,可打开 } client := &http.Client{ Timeout: 10 * time.Second, Transport: transport, } makeRequest := func(url string) bool { req, _ := http.NewRequest("GET", url, nil) // 浏览器 header req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64)") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") req.Header.Set("Accept-Encoding", "gzip, deflate, br") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9") req.Header.Set("Connection", "keep-alive") start := time.Now() resp, err := client.Do(req) cost := time.Since(start) if err != nil { fmt.Println("访问", url, "失败:", err, "耗时:", cost) return false } defer resp.Body.Close() fmt.Println("访问", url, "成功,状态码:", resp.StatusCode, "耗时:", cost) return true } // 先 http,再 https if makeRequest("http://" + domain) { return true } if makeRequest("https://" + domain) { return true } return false } // pingDomain 测试域名可访问 func pingDomain() { //87 竞品` MgoLua := &mongodb.MongodbSim{ MongodbAddr: "172.17.4.87:27080", //MongodbAddr: "127.0.0.1:27081", Size: 10, DbName: "editor", UserName: "", Password: "", //Direct: true, } MgoLua.InitPool() sess := MgoLua.GetMgoConn() defer MgoLua.DestoryMongoConn(sess) where := map[string]interface{}{ "i_state": map[string]interface{}{ "$in": []int{0, 1, 2}, }, } it := sess.DB("editor").C("task").Find(where).Select(nil).Iter() count := 0 for tmp := make(map[string]interface{}); it.Next(&tmp); count++ { if count%100 == 0 { log.Println("current:", count, tmp["s_site"], tmp["s_code"]) } code := util.ObjToString(tmp["s_code"]) where2 := map[string]interface{}{ "code": code, } re, _ := MgoLua.FindOne("luaconfig", where2) if len(*(re)) > 0 { href := util.ObjToString((*re)["href"]) parsedUrl, err := url.Parse(href) if err != nil { log.Fatalf("解析URL失败: %v", err) } domain := parsedUrl.Host inser := map[string]interface{}{ "code": code, "domain": domain, "i_state": tmp["i_state"], } timeout := 5 * time.Second // 判断 domain 是否包含端口 host, port, err := net.SplitHostPort(domain) if err != nil { // domain 本身没有带端口 host = domain port = "80" } conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, port), timeout) if err != nil { inser["visit"] = "域名不可访问" } else { inser["visit"] = "域名可访问" conn.Close() } MgoLua.Save("wcc_code_test", inser) } } } // updateLuaConfiig 更新爬虫采集平台配置 func updateLuaConfiig() { //87 竞品 MgoLua := &mongodb.MongodbSim{ MongodbAddr: "172.17.4.87:27080", //MongodbAddr: "127.0.0.1:27081", Size: 10, DbName: "editor", UserName: "", Password: "", //Direct: true, } MgoLua.InitPool() sess := MgoLua.GetMgoConn() defer MgoLua.DestoryMongoConn(sess) //f, err := excelize.OpenFile("./luaconfig.xlsx") f, err := excelize.OpenFile("./第二批刷任务.xlsx") if err != nil { fmt.Println(err) return } defer func() { if err := f.Close(); err != nil { fmt.Println(err) } }() //rows, err := f.GetRows("Sheet1") rows, err := f.GetRows("未收录产生的任务") if err != nil { fmt.Println(err) return } /** 1、刷至 golua平台 爬虫,state=0,platform=golua平台 2、刷至通用平台爬虫,state=0,platform=通用平台,claimtype=1 3、刷至jschrome平台爬虫,state=0,platform=jschrome 4、以上所有爬虫均修改createuser、modifyuser、modifyuserid、createuserid、createuseremail、next字段。createuser、modifyuser 为user表s_name;modifyuserid、createuserid为user表_id;createuseremail、next为user表s_email 5、爬虫表87/editor/luaconfig 用户表87/editor/user */ for i := 1; i < len(rows); i++ { row := rows[i] code := strings.TrimSpace(row[0]) modifyuser := strings.TrimSpace(row[1]) platform := strings.TrimSpace(row[2]) //更新MongoDB updateWhere := map[string]interface{}{ "code": code, } exists, _ := MgoLua.FindOne("luaconfig", updateWhere) if len(*exists) == 0 { log.Println("code 没有找到数据", code) continue } log.Println(code, modifyuser, platform) update := make(map[string]interface{}) if platform == "golua平台" { update["state"] = 0 update["platform"] = "golua平台" update["claimtype"] = 1 } else if platform == "通用平台" { update["state"] = 0 update["platform"] = "通用平台" update["claimtype"] = 1 } else if platform == "jschrome" { update["state"] = 0 update["platform"] = "jschrome" update["claimtype"] = 1 } update["createuser"] = modifyuser update["modifyuser"] = modifyuser where := map[string]interface{}{ "s_name": modifyuser, } user, _ := MgoLua.FindOne("user", where) if user == nil { log.Println("user 查询失败", where) return } update["modifyuserid"] = mongodb.BsonIdToSId((*user)["_id"]) update["createuserid"] = mongodb.BsonIdToSId((*user)["_id"]) update["createuseremail"] = (*user)["s_email"] update["next"] = (*user)["s_email"] MgoLua.Update("luaconfig", updateWhere, map[string]interface{}{"$set": update}, true, false) } log.Println("数据处理完毕") }