|
@@ -3,27 +3,94 @@ package main
|
|
|
import (
|
|
|
"fmt"
|
|
|
"github.com/xuri/excelize/v2"
|
|
|
+ util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
|
"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
|
|
|
"log"
|
|
|
+ "net"
|
|
|
+ "net/url"
|
|
|
+ "strings"
|
|
|
+ "time"
|
|
|
)
|
|
|
|
|
|
+// pingDomain 测试域名可访问
|
|
|
+func pingDomain() {
|
|
|
+ //87 竞品`
|
|
|
+ MgoLua := &mongodb.MongodbSim{
|
|
|
+ MongodbAddr: "172.17.4.87:27080",
|
|
|
+ //MongodbAddr: "127.0.0.1:27081",
|
|
|
+ Size: 10,
|
|
|
+ DbName: "editor",
|
|
|
+ UserName: "",
|
|
|
+ Password: "",
|
|
|
+ //Direct: true,
|
|
|
+ }
|
|
|
+ MgoLua.InitPool()
|
|
|
+ sess := MgoLua.GetMgoConn()
|
|
|
+ defer MgoLua.DestoryMongoConn(sess)
|
|
|
+
|
|
|
+ where := map[string]interface{}{
|
|
|
+ "i_state": map[string]interface{}{
|
|
|
+ "$in": []int{0, 1, 2},
|
|
|
+ },
|
|
|
+ }
|
|
|
+
|
|
|
+ it := sess.DB("editor").C("task").Find(where).Select(nil).Iter()
|
|
|
+ count := 0
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
|
|
|
+ if count%100 == 0 {
|
|
|
+ log.Println("current:", count, tmp["s_site"], tmp["s_code"])
|
|
|
+ }
|
|
|
+ code := util.ObjToString(tmp["s_code"])
|
|
|
+ where2 := map[string]interface{}{
|
|
|
+ "code": code,
|
|
|
+ }
|
|
|
+
|
|
|
+ re, _ := MgoLua.FindOne("luaconfig", where2)
|
|
|
+ if len(*(re)) > 0 {
|
|
|
+ href := util.ObjToString((*re)["href"])
|
|
|
+ parsedUrl, err := url.Parse(href)
|
|
|
+ if err != nil {
|
|
|
+ log.Fatalf("解析URL失败: %v", err)
|
|
|
+ }
|
|
|
+ domain := parsedUrl.Host
|
|
|
+
|
|
|
+ inser := map[string]interface{}{
|
|
|
+ "code": code,
|
|
|
+ "domain": domain,
|
|
|
+ "i_state": tmp["i_state"],
|
|
|
+ }
|
|
|
+ timeout := 5 * time.Second
|
|
|
+ conn, err := net.DialTimeout("tcp", domain+":80", timeout)
|
|
|
+ if err != nil {
|
|
|
+ inser["visit"] = "域名不可访问"
|
|
|
+ } else {
|
|
|
+ inser["visit"] = "域名可访问"
|
|
|
+ conn.Close()
|
|
|
+ }
|
|
|
+ MgoLua.Save("wcc_code_test", inser)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
// updateLuaConfiig 更新爬虫采集平台配置
|
|
|
func updateLuaConfiig() {
|
|
|
//87 竞品
|
|
|
MgoLua := &mongodb.MongodbSim{
|
|
|
- //MongodbAddr: "172.17.189.140:27080",
|
|
|
- MongodbAddr: "127.0.0.1:27081",
|
|
|
- Size: 10,
|
|
|
- DbName: "editor",
|
|
|
- UserName: "",
|
|
|
- Password: "",
|
|
|
- Direct: true,
|
|
|
+ MongodbAddr: "172.17.4.87:27080",
|
|
|
+ //MongodbAddr: "127.0.0.1:27081",
|
|
|
+ Size: 10,
|
|
|
+ DbName: "editor",
|
|
|
+ UserName: "",
|
|
|
+ Password: "",
|
|
|
+ //Direct: true,
|
|
|
}
|
|
|
MgoLua.InitPool()
|
|
|
sess := MgoLua.GetMgoConn()
|
|
|
defer MgoLua.DestoryMongoConn(sess)
|
|
|
|
|
|
- f, err := excelize.OpenFile("./luaconfig.xlsx")
|
|
|
+ //f, err := excelize.OpenFile("./luaconfig.xlsx")
|
|
|
+ f, err := excelize.OpenFile("./第二批刷任务.xlsx")
|
|
|
|
|
|
if err != nil {
|
|
|
fmt.Println(err)
|
|
@@ -35,14 +102,72 @@ func updateLuaConfiig() {
|
|
|
}
|
|
|
}()
|
|
|
|
|
|
- rows, err := f.GetRows("Sheetq")
|
|
|
+ //rows, err := f.GetRows("Sheet1")
|
|
|
+ rows, err := f.GetRows("未收录产生的任务")
|
|
|
if err != nil {
|
|
|
fmt.Println(err)
|
|
|
return
|
|
|
}
|
|
|
+ /**
|
|
|
+ 1、刷至 golua平台 爬虫,state=0,platform=golua平台
|
|
|
+ 2、刷至通用平台爬虫,state=0,platform=通用平台,claimtype=1
|
|
|
+ 3、刷至jschrome平台爬虫,state=0,platform=jschrome
|
|
|
+ 4、以上所有爬虫均修改createuser、modifyuser、modifyuserid、createuserid、createuseremail、next字段。createuser、modifyuser 为user表s_name;modifyuserid、createuserid为user表_id;createuseremail、next为user表s_email
|
|
|
+ 5、爬虫表87/editor/luaconfig 用户表87/editor/user
|
|
|
+ */
|
|
|
|
|
|
for i := 1; i < len(rows); i++ {
|
|
|
row := rows[i]
|
|
|
- log.Println(row)
|
|
|
+ code := strings.TrimSpace(row[0])
|
|
|
+ modifyuser := strings.TrimSpace(row[1])
|
|
|
+ platform := strings.TrimSpace(row[2])
|
|
|
+
|
|
|
+ //更新MongoDB
|
|
|
+ updateWhere := map[string]interface{}{
|
|
|
+ "code": code,
|
|
|
+ }
|
|
|
+
|
|
|
+ exists, _ := MgoLua.FindOne("luaconfig", updateWhere)
|
|
|
+ if len(*exists) == 0 {
|
|
|
+ log.Println("code 没有找到数据", code)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ log.Println(code, modifyuser, platform)
|
|
|
+ update := make(map[string]interface{})
|
|
|
+ if platform == "golua平台" {
|
|
|
+ update["state"] = 0
|
|
|
+ update["platform"] = "golua平台"
|
|
|
+ update["claimtype"] = 1
|
|
|
+ } else if platform == "通用平台" {
|
|
|
+ update["state"] = 0
|
|
|
+ update["platform"] = "通用平台"
|
|
|
+ update["claimtype"] = 1
|
|
|
+ } else if platform == "jschrome" {
|
|
|
+ update["state"] = 0
|
|
|
+ update["platform"] = "jschrome"
|
|
|
+ update["claimtype"] = 1
|
|
|
+ }
|
|
|
+
|
|
|
+ update["createuser"] = modifyuser
|
|
|
+ update["modifyuser"] = modifyuser
|
|
|
+
|
|
|
+ where := map[string]interface{}{
|
|
|
+ "s_name": modifyuser,
|
|
|
+ }
|
|
|
+
|
|
|
+ user, _ := MgoLua.FindOne("user", where)
|
|
|
+ if user == nil {
|
|
|
+ log.Println("user 查询失败", where)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ update["modifyuserid"] = mongodb.BsonIdToSId((*user)["_id"])
|
|
|
+ update["createuserid"] = mongodb.BsonIdToSId((*user)["_id"])
|
|
|
+ update["createuseremail"] = (*user)["s_email"]
|
|
|
+ update["next"] = (*user)["s_email"]
|
|
|
+
|
|
|
+ MgoLua.Update("luaconfig", updateWhere, map[string]interface{}{"$set": update}, true, false)
|
|
|
}
|
|
|
+
|
|
|
+ log.Println("数据处理完毕")
|
|
|
}
|