123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355 |
- package main
- import (
- "crypto/tls"
- "fmt"
- "github.com/xuri/excelize/v2"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
- "log"
- "net"
- "net/http"
- "net/url"
- "strings"
- "sync"
- "time"
- )
- // updatePing 更新ping 状态
- func updatePing() {
- //87 竞品`
- MgoLua := &mongodb.MongodbSim{
- //MongodbAddr: "172.17.4.87:27080",
- MongodbAddr: "127.0.0.1:27081",
- Size: 10,
- DbName: "editor",
- UserName: "",
- Password: "",
- Direct: true,
- }
- MgoLua.InitPool()
- sess := MgoLua.GetMgoConn()
- defer MgoLua.DestoryMongoConn(sess)
- ch := make(chan bool, 30)
- wg := &sync.WaitGroup{}
- where := map[string]interface{}{
- "visit": "域名不可访问",
- }
- it := sess.DB("editor").C("wcc_code_test").Find(&where).Select(nil).Iter()
- count := 0
- for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
- if count%100 == 0 {
- log.Println("current:", count, tmp["domain"], tmp["code"])
- }
- ch <- true
- wg.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-ch
- wg.Done()
- }()
- id := mongodb.BsonIdToSId(tmp["_id"])
- domain := util.ObjToString(tmp["domain"])
- update := make(map[string]interface{})
- if checkURL(domain) {
- update["visit"] = "域名可访问"
- } else {
- update["visit"] = "域名不可访问"
- }
- MgoLua.UpdateById("wcc_code_test", id, map[string]interface{}{"$set": update})
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg.Wait()
- log.Println("数据处理完毕")
- }
- // checkURL 检查域名是否可访问
- func checkURL(domain string) bool {
- // 打印 DNS 解析
- //ips, err := net.LookupHost(domain)
- //if err != nil {
- // fmt.Println("DNS 解析失败:", err)
- //} else {
- // fmt.Println("Go 程序解析到的 IP:", ips)
- //}
- // 设置 HTTP 代理(走 Clash,本地端口根据实际情况改,比如 7890)
- //proxyURL, _ := url.Parse("http://127.0.0.1:7897")
- // 自定义 Transport:只用 IPv4,启用 HTTP/2
- transport := &http.Transport{
- //Proxy: http.ProxyURL(proxyURL),
- DialContext: (&net.Dialer{
- Timeout: 8 * time.Second,
- DualStack: false, // 只用 IPv4
- }).DialContext,
- ForceAttemptHTTP2: true,
- TLSClientConfig: &tls.Config{
- InsecureSkipVerify: true, // 跳过证书验证
- },
- }
- client := &http.Client{
- Timeout: 60 * time.Second,
- Transport: transport,
- }
- makeRequest := func(url string) bool {
- req, _ := http.NewRequest("GET", url, nil)
- // 浏览器常用头
- req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64)")
- req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
- req.Header.Set("Accept-Encoding", "gzip, deflate, br")
- req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")
- req.Header.Set("Connection", "keep-alive")
- start := time.Now()
- resp, err := client.Do(req)
- cost := time.Since(start)
- if err != nil {
- fmt.Println("访问", url, "失败:", err, "耗时:", cost)
- return false
- }
- defer resp.Body.Close()
- //fmt.Println("访问", url, "成功,状态码:", resp.StatusCode, "耗时:", cost)
- return true
- }
- // 先 http,再 https
- if makeRequest("http://" + domain) {
- return true
- }
- if makeRequest("https://" + domain) {
- return true
- }
- return false
- }
- func check(domain string) bool {
- // DNS 用系统默认
- ips, err := net.LookupHost(domain)
- if err != nil {
- fmt.Println("DNS 解析失败:", err)
- } else {
- fmt.Println("系统 DNS 解析到的 IP:", ips)
- }
- // 配置代理
- proxyURL, _ := url.Parse("http://127.0.0.1:7897")
- transport := &http.Transport{
- Proxy: http.ProxyURL(proxyURL),
- ForceAttemptHTTP2: true,
- // TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, // 如果证书问题,可打开
- }
- client := &http.Client{
- Timeout: 10 * time.Second,
- Transport: transport,
- }
- makeRequest := func(url string) bool {
- req, _ := http.NewRequest("GET", url, nil)
- // 浏览器 header
- req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64)")
- req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
- req.Header.Set("Accept-Encoding", "gzip, deflate, br")
- req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")
- req.Header.Set("Connection", "keep-alive")
- start := time.Now()
- resp, err := client.Do(req)
- cost := time.Since(start)
- if err != nil {
- fmt.Println("访问", url, "失败:", err, "耗时:", cost)
- return false
- }
- defer resp.Body.Close()
- fmt.Println("访问", url, "成功,状态码:", resp.StatusCode, "耗时:", cost)
- return true
- }
- // 先 http,再 https
- if makeRequest("http://" + domain) {
- return true
- }
- if makeRequest("https://" + domain) {
- return true
- }
- return false
- }
- // pingDomain 测试域名可访问
- func pingDomain() {
- //87 竞品`
- MgoLua := &mongodb.MongodbSim{
- MongodbAddr: "172.17.4.87:27080",
- //MongodbAddr: "127.0.0.1:27081",
- Size: 10,
- DbName: "editor",
- UserName: "",
- Password: "",
- //Direct: true,
- }
- MgoLua.InitPool()
- sess := MgoLua.GetMgoConn()
- defer MgoLua.DestoryMongoConn(sess)
- where := map[string]interface{}{
- "i_state": map[string]interface{}{
- "$in": []int{0, 1, 2},
- },
- }
- it := sess.DB("editor").C("task").Find(where).Select(nil).Iter()
- count := 0
- for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
- if count%100 == 0 {
- log.Println("current:", count, tmp["s_site"], tmp["s_code"])
- }
- code := util.ObjToString(tmp["s_code"])
- where2 := map[string]interface{}{
- "code": code,
- }
- re, _ := MgoLua.FindOne("luaconfig", where2)
- if len(*(re)) > 0 {
- href := util.ObjToString((*re)["href"])
- parsedUrl, err := url.Parse(href)
- if err != nil {
- log.Fatalf("解析URL失败: %v", err)
- }
- domain := parsedUrl.Host
- inser := map[string]interface{}{
- "code": code,
- "domain": domain,
- "i_state": tmp["i_state"],
- }
- timeout := 5 * time.Second
- // 判断 domain 是否包含端口
- host, port, err := net.SplitHostPort(domain)
- if err != nil {
- // domain 本身没有带端口
- host = domain
- port = "80"
- }
- conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, port), timeout)
- if err != nil {
- inser["visit"] = "域名不可访问"
- } else {
- inser["visit"] = "域名可访问"
- conn.Close()
- }
- MgoLua.Save("wcc_code_test", inser)
- }
- }
- }
- // updateLuaConfiig 更新爬虫采集平台配置
- func updateLuaConfiig() {
- //87 竞品
- MgoLua := &mongodb.MongodbSim{
- MongodbAddr: "172.17.4.87:27080",
- //MongodbAddr: "127.0.0.1:27081",
- Size: 10,
- DbName: "editor",
- UserName: "",
- Password: "",
- //Direct: true,
- }
- MgoLua.InitPool()
- sess := MgoLua.GetMgoConn()
- defer MgoLua.DestoryMongoConn(sess)
- //f, err := excelize.OpenFile("./luaconfig.xlsx")
- f, err := excelize.OpenFile("./第二批刷任务.xlsx")
- if err != nil {
- fmt.Println(err)
- return
- }
- defer func() {
- if err := f.Close(); err != nil {
- fmt.Println(err)
- }
- }()
- //rows, err := f.GetRows("Sheet1")
- rows, err := f.GetRows("未收录产生的任务")
- if err != nil {
- fmt.Println(err)
- return
- }
- /**
- 1、刷至 golua平台 爬虫,state=0,platform=golua平台
- 2、刷至通用平台爬虫,state=0,platform=通用平台,claimtype=1
- 3、刷至jschrome平台爬虫,state=0,platform=jschrome
- 4、以上所有爬虫均修改createuser、modifyuser、modifyuserid、createuserid、createuseremail、next字段。createuser、modifyuser 为user表s_name;modifyuserid、createuserid为user表_id;createuseremail、next为user表s_email
- 5、爬虫表87/editor/luaconfig 用户表87/editor/user
- */
- for i := 1; i < len(rows); i++ {
- row := rows[i]
- code := strings.TrimSpace(row[0])
- modifyuser := strings.TrimSpace(row[1])
- platform := strings.TrimSpace(row[2])
- //更新MongoDB
- updateWhere := map[string]interface{}{
- "code": code,
- }
- exists, _ := MgoLua.FindOne("luaconfig", updateWhere)
- if len(*exists) == 0 {
- log.Println("code 没有找到数据", code)
- continue
- }
- log.Println(code, modifyuser, platform)
- update := make(map[string]interface{})
- if platform == "golua平台" {
- update["state"] = 0
- update["platform"] = "golua平台"
- update["claimtype"] = 1
- } else if platform == "通用平台" {
- update["state"] = 0
- update["platform"] = "通用平台"
- update["claimtype"] = 1
- } else if platform == "jschrome" {
- update["state"] = 0
- update["platform"] = "jschrome"
- update["claimtype"] = 1
- }
- update["createuser"] = modifyuser
- update["modifyuser"] = modifyuser
- where := map[string]interface{}{
- "s_name": modifyuser,
- }
- user, _ := MgoLua.FindOne("user", where)
- if user == nil {
- log.Println("user 查询失败", where)
- return
- }
- update["modifyuserid"] = mongodb.BsonIdToSId((*user)["_id"])
- update["createuserid"] = mongodb.BsonIdToSId((*user)["_id"])
- update["createuseremail"] = (*user)["s_email"]
- update["next"] = (*user)["s_email"]
- MgoLua.Update("luaconfig", updateWhere, map[string]interface{}{"$set": update}, true, false)
- }
- log.Println("数据处理完毕")
- }
|