luaconfig.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. package main
  2. import (
  3. "crypto/tls"
  4. "fmt"
  5. "github.com/xuri/excelize/v2"
  6. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  7. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  8. "log"
  9. "net"
  10. "net/http"
  11. "net/url"
  12. "strings"
  13. "sync"
  14. "time"
  15. )
  16. // updatePing 更新ping 状态
  17. func updatePing() {
  18. //87 竞品`
  19. MgoLua := &mongodb.MongodbSim{
  20. //MongodbAddr: "172.17.4.87:27080",
  21. MongodbAddr: "127.0.0.1:27081",
  22. Size: 10,
  23. DbName: "editor",
  24. UserName: "",
  25. Password: "",
  26. Direct: true,
  27. }
  28. MgoLua.InitPool()
  29. sess := MgoLua.GetMgoConn()
  30. defer MgoLua.DestoryMongoConn(sess)
  31. ch := make(chan bool, 30)
  32. wg := &sync.WaitGroup{}
  33. where := map[string]interface{}{
  34. "visit": "域名不可访问",
  35. }
  36. it := sess.DB("editor").C("wcc_code_test").Find(&where).Select(nil).Iter()
  37. count := 0
  38. for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
  39. if count%100 == 0 {
  40. log.Println("current:", count, tmp["domain"], tmp["code"])
  41. }
  42. ch <- true
  43. wg.Add(1)
  44. go func(tmp map[string]interface{}) {
  45. defer func() {
  46. <-ch
  47. wg.Done()
  48. }()
  49. id := mongodb.BsonIdToSId(tmp["_id"])
  50. domain := util.ObjToString(tmp["domain"])
  51. update := make(map[string]interface{})
  52. if checkURL(domain) {
  53. update["visit"] = "域名可访问"
  54. } else {
  55. update["visit"] = "域名不可访问"
  56. }
  57. MgoLua.UpdateById("wcc_code_test", id, map[string]interface{}{"$set": update})
  58. }(tmp)
  59. tmp = make(map[string]interface{})
  60. }
  61. wg.Wait()
  62. log.Println("数据处理完毕")
  63. }
  64. // checkURL 检查域名是否可访问
  65. func checkURL(domain string) bool {
  66. // 打印 DNS 解析
  67. //ips, err := net.LookupHost(domain)
  68. //if err != nil {
  69. // fmt.Println("DNS 解析失败:", err)
  70. //} else {
  71. // fmt.Println("Go 程序解析到的 IP:", ips)
  72. //}
  73. // 设置 HTTP 代理(走 Clash,本地端口根据实际情况改,比如 7890)
  74. //proxyURL, _ := url.Parse("http://127.0.0.1:7897")
  75. // 自定义 Transport:只用 IPv4,启用 HTTP/2
  76. transport := &http.Transport{
  77. //Proxy: http.ProxyURL(proxyURL),
  78. DialContext: (&net.Dialer{
  79. Timeout: 8 * time.Second,
  80. DualStack: false, // 只用 IPv4
  81. }).DialContext,
  82. ForceAttemptHTTP2: true,
  83. TLSClientConfig: &tls.Config{
  84. InsecureSkipVerify: true, // 跳过证书验证
  85. },
  86. }
  87. client := &http.Client{
  88. Timeout: 60 * time.Second,
  89. Transport: transport,
  90. }
  91. makeRequest := func(url string) bool {
  92. req, _ := http.NewRequest("GET", url, nil)
  93. // 浏览器常用头
  94. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64)")
  95. req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
  96. req.Header.Set("Accept-Encoding", "gzip, deflate, br")
  97. req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")
  98. req.Header.Set("Connection", "keep-alive")
  99. start := time.Now()
  100. resp, err := client.Do(req)
  101. cost := time.Since(start)
  102. if err != nil {
  103. fmt.Println("访问", url, "失败:", err, "耗时:", cost)
  104. return false
  105. }
  106. defer resp.Body.Close()
  107. //fmt.Println("访问", url, "成功,状态码:", resp.StatusCode, "耗时:", cost)
  108. return true
  109. }
  110. // 先 http,再 https
  111. if makeRequest("http://" + domain) {
  112. return true
  113. }
  114. if makeRequest("https://" + domain) {
  115. return true
  116. }
  117. return false
  118. }
  119. func check(domain string) bool {
  120. // DNS 用系统默认
  121. ips, err := net.LookupHost(domain)
  122. if err != nil {
  123. fmt.Println("DNS 解析失败:", err)
  124. } else {
  125. fmt.Println("系统 DNS 解析到的 IP:", ips)
  126. }
  127. // 配置代理
  128. proxyURL, _ := url.Parse("http://127.0.0.1:7897")
  129. transport := &http.Transport{
  130. Proxy: http.ProxyURL(proxyURL),
  131. ForceAttemptHTTP2: true,
  132. // TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, // 如果证书问题,可打开
  133. }
  134. client := &http.Client{
  135. Timeout: 10 * time.Second,
  136. Transport: transport,
  137. }
  138. makeRequest := func(url string) bool {
  139. req, _ := http.NewRequest("GET", url, nil)
  140. // 浏览器 header
  141. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64)")
  142. req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
  143. req.Header.Set("Accept-Encoding", "gzip, deflate, br")
  144. req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")
  145. req.Header.Set("Connection", "keep-alive")
  146. start := time.Now()
  147. resp, err := client.Do(req)
  148. cost := time.Since(start)
  149. if err != nil {
  150. fmt.Println("访问", url, "失败:", err, "耗时:", cost)
  151. return false
  152. }
  153. defer resp.Body.Close()
  154. fmt.Println("访问", url, "成功,状态码:", resp.StatusCode, "耗时:", cost)
  155. return true
  156. }
  157. // 先 http,再 https
  158. if makeRequest("http://" + domain) {
  159. return true
  160. }
  161. if makeRequest("https://" + domain) {
  162. return true
  163. }
  164. return false
  165. }
  166. // pingDomain 测试域名可访问
  167. func pingDomain() {
  168. //87 竞品`
  169. MgoLua := &mongodb.MongodbSim{
  170. MongodbAddr: "172.17.4.87:27080",
  171. //MongodbAddr: "127.0.0.1:27081",
  172. Size: 10,
  173. DbName: "editor",
  174. UserName: "",
  175. Password: "",
  176. //Direct: true,
  177. }
  178. MgoLua.InitPool()
  179. sess := MgoLua.GetMgoConn()
  180. defer MgoLua.DestoryMongoConn(sess)
  181. where := map[string]interface{}{
  182. "i_state": map[string]interface{}{
  183. "$in": []int{0, 1, 2},
  184. },
  185. }
  186. it := sess.DB("editor").C("task").Find(where).Select(nil).Iter()
  187. count := 0
  188. for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
  189. if count%100 == 0 {
  190. log.Println("current:", count, tmp["s_site"], tmp["s_code"])
  191. }
  192. code := util.ObjToString(tmp["s_code"])
  193. where2 := map[string]interface{}{
  194. "code": code,
  195. }
  196. re, _ := MgoLua.FindOne("luaconfig", where2)
  197. if len(*(re)) > 0 {
  198. href := util.ObjToString((*re)["href"])
  199. parsedUrl, err := url.Parse(href)
  200. if err != nil {
  201. log.Fatalf("解析URL失败: %v", err)
  202. }
  203. domain := parsedUrl.Host
  204. inser := map[string]interface{}{
  205. "code": code,
  206. "domain": domain,
  207. "i_state": tmp["i_state"],
  208. }
  209. timeout := 5 * time.Second
  210. // 判断 domain 是否包含端口
  211. host, port, err := net.SplitHostPort(domain)
  212. if err != nil {
  213. // domain 本身没有带端口
  214. host = domain
  215. port = "80"
  216. }
  217. conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, port), timeout)
  218. if err != nil {
  219. inser["visit"] = "域名不可访问"
  220. } else {
  221. inser["visit"] = "域名可访问"
  222. conn.Close()
  223. }
  224. MgoLua.Save("wcc_code_test", inser)
  225. }
  226. }
  227. }
  228. // updateLuaConfiig 更新爬虫采集平台配置
  229. func updateLuaConfiig() {
  230. //87 竞品
  231. MgoLua := &mongodb.MongodbSim{
  232. MongodbAddr: "172.17.4.87:27080",
  233. //MongodbAddr: "127.0.0.1:27081",
  234. Size: 10,
  235. DbName: "editor",
  236. UserName: "",
  237. Password: "",
  238. //Direct: true,
  239. }
  240. MgoLua.InitPool()
  241. sess := MgoLua.GetMgoConn()
  242. defer MgoLua.DestoryMongoConn(sess)
  243. //f, err := excelize.OpenFile("./luaconfig.xlsx")
  244. f, err := excelize.OpenFile("./第二批刷任务.xlsx")
  245. if err != nil {
  246. fmt.Println(err)
  247. return
  248. }
  249. defer func() {
  250. if err := f.Close(); err != nil {
  251. fmt.Println(err)
  252. }
  253. }()
  254. //rows, err := f.GetRows("Sheet1")
  255. rows, err := f.GetRows("未收录产生的任务")
  256. if err != nil {
  257. fmt.Println(err)
  258. return
  259. }
  260. /**
  261. 1、刷至 golua平台 爬虫,state=0,platform=golua平台
  262. 2、刷至通用平台爬虫,state=0,platform=通用平台,claimtype=1
  263. 3、刷至jschrome平台爬虫,state=0,platform=jschrome
  264. 4、以上所有爬虫均修改createuser、modifyuser、modifyuserid、createuserid、createuseremail、next字段。createuser、modifyuser 为user表s_name;modifyuserid、createuserid为user表_id;createuseremail、next为user表s_email
  265. 5、爬虫表87/editor/luaconfig 用户表87/editor/user
  266. */
  267. for i := 1; i < len(rows); i++ {
  268. row := rows[i]
  269. code := strings.TrimSpace(row[0])
  270. modifyuser := strings.TrimSpace(row[1])
  271. platform := strings.TrimSpace(row[2])
  272. //更新MongoDB
  273. updateWhere := map[string]interface{}{
  274. "code": code,
  275. }
  276. exists, _ := MgoLua.FindOne("luaconfig", updateWhere)
  277. if len(*exists) == 0 {
  278. log.Println("code 没有找到数据", code)
  279. continue
  280. }
  281. log.Println(code, modifyuser, platform)
  282. update := make(map[string]interface{})
  283. if platform == "golua平台" {
  284. update["state"] = 0
  285. update["platform"] = "golua平台"
  286. update["claimtype"] = 1
  287. } else if platform == "通用平台" {
  288. update["state"] = 0
  289. update["platform"] = "通用平台"
  290. update["claimtype"] = 1
  291. } else if platform == "jschrome" {
  292. update["state"] = 0
  293. update["platform"] = "jschrome"
  294. update["claimtype"] = 1
  295. }
  296. update["createuser"] = modifyuser
  297. update["modifyuser"] = modifyuser
  298. where := map[string]interface{}{
  299. "s_name": modifyuser,
  300. }
  301. user, _ := MgoLua.FindOne("user", where)
  302. if user == nil {
  303. log.Println("user 查询失败", where)
  304. return
  305. }
  306. update["modifyuserid"] = mongodb.BsonIdToSId((*user)["_id"])
  307. update["createuserid"] = mongodb.BsonIdToSId((*user)["_id"])
  308. update["createuseremail"] = (*user)["s_email"]
  309. update["next"] = (*user)["s_email"]
  310. MgoLua.Update("luaconfig", updateWhere, map[string]interface{}{"$set": update}, true, false)
  311. }
  312. log.Println("数据处理完毕")
  313. }