wcc 4 hete
szülő
commit
1c4e940326

+ 1 - 1
mysql/init.go

@@ -45,7 +45,7 @@ func Init() {
 func InitMgo() {
 	//87 竞品
 	Mgo = &mongodb.MongodbSim{
-		//MongodbAddr: "172.17.189.140:27080",
+		//MongodbAddr: "172.17.4.87:27080",
 		MongodbAddr: GF.Mongob.Host,
 		Size:        10,
 		DbName:      GF.Mongob.DB,

BIN
project_chuan/6月份未匹配到剑鱼的项目清单.xlsx


+ 13 - 3
project_chuan/project.go

@@ -562,6 +562,9 @@ func searchES23(client *elastic.Client, projectName, buyer2 string, scoreThresho
 	seenIDs := make(map[string]bool)
 
 	for _, field := range fieldsToTry {
+		if field == "detail" && len(allResults) > 0 {
+			break
+		}
 		for _, filter := range filtersToTry {
 			// 构建查询:使用 MultiMatchQuery + phrase
 			query := elastic.NewBoolQuery().
@@ -577,7 +580,7 @@ func searchES23(client *elastic.Client, projectName, buyer2 string, scoreThresho
 				Index("bidding").
 				Query(query).
 				Size(70).
-				FetchSourceContext(fetchFields). // 添加这一行
+				FetchSourceContext(fetchFields). // 添加这一行,查询部分字段
 				Do(context.Background())
 			if err != nil {
 				return nil, err
@@ -646,8 +649,14 @@ func searchES23(client *elastic.Client, projectName, buyer2 string, scoreThresho
 		bidd, _ := MgoB.FindById("bidding", id, nil)
 		detail := util.ObjToString((*bidd)["detail"])
 		// 字段中必须包含 projectName
-		if buyer2 != "" && detail != "" {
-			if !strings.Contains(detail, projectName) && !strings.Contains(detail, buyer2) {
+		if detail != "" {
+			if !strings.Contains(detail, projectName) {
+				continue
+			}
+		}
+
+		if buyer2 != "" {
+			if !strings.Contains(detail, buyer2) {
 				continue
 			}
 		}
@@ -657,6 +666,7 @@ func searchES23(client *elastic.Client, projectName, buyer2 string, scoreThresho
 		}
 		seenProjectNames[projectNameValue] = true
 
+		doc["detail"] = detail
 		results = append(results, doc)
 
 		if len(results) >= maxResults {

+ 56 - 2
project_chuan/project_test.go

@@ -1,12 +1,66 @@
 package main
 
 import (
+	"fmt"
 	"github.com/olivere/elastic/v7"
+	"github.com/xuri/excelize/v2"
 	"go.uber.org/zap"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
 	"testing"
 )
 
+func TestBiddng(t *testing.T) {
+	f, err := excelize.OpenFile("./6月份未匹配到剑鱼的项目清单.xlsx")
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	defer func() {
+		f.Save()
+		if err := f.Close(); err != nil {
+			fmt.Println(err)
+		}
+	}()
+
+	rows, err := f.GetRows("Sheet1")
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	//url := "http://172.17.4.184:19908"
+	url := "http://127.0.0.1:19908"
+	username := "jybid"
+	password := "Top2023_JEB01i@31"
+	//index := "bidding" //索引名称
+	// 创建 Elasticsearch 客户端
+	client, err := elastic.NewClient(
+		elastic.SetURL(url),
+		elastic.SetBasicAuth(username, password),
+		elastic.SetSniff(false),
+	)
+	if err != nil {
+		fmt.Printf("创建 Elasticsearch 客户端失败:%s", err)
+	}
+
+	for i := 1; i < len(rows); i++ {
+		projectName := rows[i][1]
+
+		res, err := searchES23(client, projectName, "", 60, 20)
+
+		if err != nil {
+			fmt.Println(projectName)
+			log.Info("TestBiddng", zap.Error(err))
+		}
+		if len(res) > 0 {
+			f.SetCellValue("Sheet1", fmt.Sprintf("F%v", i+1), 1)
+		}
+		fmt.Printf("%s 总数是:%d\n", projectName, len(res))
+
+	}
+
+}
+
 func TestSearchES23(t *testing.T) {
 	client, err := elastic.NewClient(
 		elastic.SetURL(GF.Es.URL),
@@ -17,7 +71,7 @@ func TestSearchES23(t *testing.T) {
 		log.Fatal("创建 Elasticsearch 客户端失败", zap.Error(err))
 	}
 
-	projectName := "G655荔波至茂兰段公路改扩建工程"
-	results, err := searchES23(client, projectName, "", 20, 50)
+	projectName := "奉贤区南竹港南水闸改建工程"
+	results, err := searchES23(client, projectName, "", 50, 50)
 	log.Info("aaa", zap.Any("results", results))
 }

+ 10 - 3
updateBidding/lua_test.go

@@ -7,12 +7,13 @@ import (
 	"log"
 	"strings"
 	"testing"
+	"time"
 )
 
 func TestUpdateLuaConfig(t *testing.T) {
 	//87 竞品
 	MgoLua := &mongodb.MongodbSim{
-		//MongodbAddr: "172.17.189.140:27080",
+		//MongodbAddr: "172.17.4.87:27080",
 		MongodbAddr: "127.0.0.1:27081",
 		Size:        10,
 		DbName:      "editor",
@@ -24,7 +25,8 @@ func TestUpdateLuaConfig(t *testing.T) {
 	sess := MgoLua.GetMgoConn()
 	defer MgoLua.DestoryMongoConn(sess)
 
-	f, err := excelize.OpenFile("./luaconfig.xlsx")
+	//f, err := excelize.OpenFile("./luaconfig.xlsx")
+	f, err := excelize.OpenFile("./第二批刷任务.xlsx")
 
 	if err != nil {
 		fmt.Println(err)
@@ -36,7 +38,8 @@ func TestUpdateLuaConfig(t *testing.T) {
 		}
 	}()
 
-	rows, err := f.GetRows("Sheet1")
+	//rows, err := f.GetRows("Sheet1")
+	rows, err := f.GetRows("已收录站点及爬虫信息")
 	if err != nil {
 		fmt.Println(err)
 		return
@@ -70,6 +73,7 @@ func TestUpdateLuaConfig(t *testing.T) {
 		if platform == "golua平台" {
 			update["state"] = 0
 			update["platform"] = "golua平台"
+			update["claimtype"] = 1
 		} else if platform == "通用平台" {
 			update["state"] = 0
 			update["platform"] = "通用平台"
@@ -77,10 +81,13 @@ func TestUpdateLuaConfig(t *testing.T) {
 		} else if platform == "jschrome" {
 			update["state"] = 0
 			update["platform"] = "jschrome"
+			update["claimtype"] = 1
 		}
 
 		update["createuser"] = modifyuser
 		update["modifyuser"] = modifyuser
+		update["claimtime"] = time.Now().Unix()
+		update["recovertime"] = time.Now().AddDate(0, 1, 0).Unix()
 
 		where := map[string]interface{}{
 			"s_name": modifyuser,

+ 135 - 10
updateBidding/luaconfig.go

@@ -3,27 +3,94 @@ package main
 import (
 	"fmt"
 	"github.com/xuri/excelize/v2"
+	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
 	"log"
+	"net"
+	"net/url"
+	"strings"
+	"time"
 )
 
+// pingDomain 测试域名可访问
+func pingDomain() {
+	//87 竞品`
+	MgoLua := &mongodb.MongodbSim{
+		MongodbAddr: "172.17.4.87:27080",
+		//MongodbAddr: "127.0.0.1:27081",
+		Size:     10,
+		DbName:   "editor",
+		UserName: "",
+		Password: "",
+		//Direct:      true,
+	}
+	MgoLua.InitPool()
+	sess := MgoLua.GetMgoConn()
+	defer MgoLua.DestoryMongoConn(sess)
+
+	where := map[string]interface{}{
+		"i_state": map[string]interface{}{
+			"$in": []int{0, 1, 2},
+		},
+	}
+
+	it := sess.DB("editor").C("task").Find(where).Select(nil).Iter()
+	count := 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
+		if count%100 == 0 {
+			log.Println("current:", count, tmp["s_site"], tmp["s_code"])
+		}
+		code := util.ObjToString(tmp["s_code"])
+		where2 := map[string]interface{}{
+			"code": code,
+		}
+
+		re, _ := MgoLua.FindOne("luaconfig", where2)
+		if len(*(re)) > 0 {
+			href := util.ObjToString((*re)["href"])
+			parsedUrl, err := url.Parse(href)
+			if err != nil {
+				log.Fatalf("解析URL失败: %v", err)
+			}
+			domain := parsedUrl.Host
+
+			inser := map[string]interface{}{
+				"code":    code,
+				"domain":  domain,
+				"i_state": tmp["i_state"],
+			}
+			timeout := 5 * time.Second
+			conn, err := net.DialTimeout("tcp", domain+":80", timeout)
+			if err != nil {
+				inser["visit"] = "域名不可访问"
+			} else {
+				inser["visit"] = "域名可访问"
+				conn.Close()
+			}
+			MgoLua.Save("wcc_code_test", inser)
+		}
+	}
+
+}
+
 // updateLuaConfiig 更新爬虫采集平台配置
 func updateLuaConfiig() {
 	//87 竞品
 	MgoLua := &mongodb.MongodbSim{
-		//MongodbAddr: "172.17.189.140:27080",
-		MongodbAddr: "127.0.0.1:27081",
-		Size:        10,
-		DbName:      "editor",
-		UserName:    "",
-		Password:    "",
-		Direct:      true,
+		MongodbAddr: "172.17.4.87:27080",
+		//MongodbAddr: "127.0.0.1:27081",
+		Size:     10,
+		DbName:   "editor",
+		UserName: "",
+		Password: "",
+		//Direct:      true,
 	}
 	MgoLua.InitPool()
 	sess := MgoLua.GetMgoConn()
 	defer MgoLua.DestoryMongoConn(sess)
 
-	f, err := excelize.OpenFile("./luaconfig.xlsx")
+	//f, err := excelize.OpenFile("./luaconfig.xlsx")
+	f, err := excelize.OpenFile("./第二批刷任务.xlsx")
 
 	if err != nil {
 		fmt.Println(err)
@@ -35,14 +102,72 @@ func updateLuaConfiig() {
 		}
 	}()
 
-	rows, err := f.GetRows("Sheetq")
+	//rows, err := f.GetRows("Sheet1")
+	rows, err := f.GetRows("未收录产生的任务")
 	if err != nil {
 		fmt.Println(err)
 		return
 	}
+	/**
+	1、刷至 golua平台 爬虫,state=0,platform=golua平台
+	2、刷至通用平台爬虫,state=0,platform=通用平台,claimtype=1
+	3、刷至jschrome平台爬虫,state=0,platform=jschrome
+	4、以上所有爬虫均修改createuser、modifyuser、modifyuserid、createuserid、createuseremail、next字段。createuser、modifyuser 为user表s_name;modifyuserid、createuserid为user表_id;createuseremail、next为user表s_email
+	5、爬虫表87/editor/luaconfig 用户表87/editor/user
+	*/
 
 	for i := 1; i < len(rows); i++ {
 		row := rows[i]
-		log.Println(row)
+		code := strings.TrimSpace(row[0])
+		modifyuser := strings.TrimSpace(row[1])
+		platform := strings.TrimSpace(row[2])
+
+		//更新MongoDB
+		updateWhere := map[string]interface{}{
+			"code": code,
+		}
+
+		exists, _ := MgoLua.FindOne("luaconfig", updateWhere)
+		if len(*exists) == 0 {
+			log.Println("code 没有找到数据", code)
+			continue
+		}
+		log.Println(code, modifyuser, platform)
+		update := make(map[string]interface{})
+		if platform == "golua平台" {
+			update["state"] = 0
+			update["platform"] = "golua平台"
+			update["claimtype"] = 1
+		} else if platform == "通用平台" {
+			update["state"] = 0
+			update["platform"] = "通用平台"
+			update["claimtype"] = 1
+		} else if platform == "jschrome" {
+			update["state"] = 0
+			update["platform"] = "jschrome"
+			update["claimtype"] = 1
+		}
+
+		update["createuser"] = modifyuser
+		update["modifyuser"] = modifyuser
+
+		where := map[string]interface{}{
+			"s_name": modifyuser,
+		}
+
+		user, _ := MgoLua.FindOne("user", where)
+		if user == nil {
+			log.Println("user 查询失败", where)
+			return
+		}
+
+		update["modifyuserid"] = mongodb.BsonIdToSId((*user)["_id"])
+		update["createuserid"] = mongodb.BsonIdToSId((*user)["_id"])
+		update["createuseremail"] = (*user)["s_email"]
+		update["next"] = (*user)["s_email"]
+
+		MgoLua.Update("luaconfig", updateWhere, map[string]interface{}{"$set": update}, true, false)
 	}
+
+	log.Println("数据处理完毕")
 }

+ 10 - 6
updateBidding/main.go

@@ -126,10 +126,13 @@ func Init() {
 }
 
 func main() {
-	Init()
+	pingDomain()
+
+	return
+	//Init()
 	//InitEsBiddingField()
-	go updateMethod()   //更新mongodb
-	go updateEsMethod() //更新es
+	//go updateMethod()   //更新mongodb
+	//go updateEsMethod() //更新es
 	//go updateEsMethodTest() // 更新测试环境ES
 	//go updateEsHrefMethod() //更新es href 字段
 	//go updateProjectEsMethod()
@@ -149,12 +152,12 @@ func main() {
 	//updateBiddingType() //更新标讯分类
 
 	//updateBiddingisValidFile()      //更新bidding isValidFile字段
-	updateBiddingTypeBySpidecode() //更新bidding ;根据spidecode 字段
+	//updateBiddingTypeBySpidecode() //更新bidding ;根据spidecode 字段
 	//updateBiddingBasicClass() 	//更新 存量数据 basicClass 字段
 	//updateBiddingBasicClassTest() //更新测试环境 	basicClass 字段
 	log.Info("over")
-	c := make(chan bool, 1)
-	<-c
+	//c := make(chan bool, 1)
+	//<-c
 }
 
 // fixBiddingEs 修复bidding 索引数据,
@@ -1043,6 +1046,7 @@ func updateEsMethod() {
 					defer func() {
 						<-updateEsSp
 					}()
+
 					Es.UpdateBulk("bidding", arru...)
 					EsNew.UpdateBulk("bidding", arru...)
 				}(arru[:indexu])

BIN
updateBidding/pingDomain


BIN
updateBidding/第二批刷任务.xlsx


BIN
xlsx/6月份未匹配到剑鱼的项目清单.xlsx


+ 64 - 0
xlsx/bidding.go

@@ -1,7 +1,9 @@
 package main
 
 import (
+	"context"
 	"fmt"
+	"github.com/olivere/elastic/v7"
 	"github.com/xuri/excelize/v2"
 	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
@@ -9,6 +11,68 @@ import (
 	"strings"
 )
 
+// dealProjectName 根据项目名称,判断标讯是否存在
+func dealProjectName() {
+	f, err := excelize.OpenFile("./6月份未匹配到剑鱼的项目清单.xlsx")
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	defer func() {
+		f.Save()
+		if err := f.Close(); err != nil {
+			fmt.Println(err)
+		}
+	}()
+
+	rows, err := f.GetRows("Sheet1")
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	//url := "http://172.17.4.184:19908"
+	url := "http://127.0.0.1:19908"
+	username := "jybid"
+	password := "Top2023_JEB01i@31"
+	index := "bidding" //索引名称
+	// 创建 Elasticsearch 客户端
+	client, err := elastic.NewClient(
+		elastic.SetURL(url),
+		elastic.SetBasicAuth(username, password),
+		elastic.SetSniff(false),
+	)
+	if err != nil {
+		log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
+	}
+
+	for i := 1; i < len(rows); i++ {
+		projectName := rows[i][1]
+
+		query := elastic.NewBoolQuery().
+			Must(
+				elastic.NewTermQuery("projectname.pname", projectName),
+			)
+
+		ctx := context.Background()
+		countRes, err := client.Count(index).
+			Query(query).
+			Do(ctx)
+
+		if err != nil {
+			log.Fatalf("统计失败:%s", err)
+		}
+
+		if countRes > 0 {
+			f.SetCellValue("Sheet1", fmt.Sprintf("F%v", i+1), 1)
+		}
+		fmt.Printf("%s 总数是:%d\n", projectName, countRes)
+
+	}
+
+	fmt.Println("结束~~~~~~~~~~~~~~~")
+}
+
 // getCountBidding 根据企业名称,获取企业的招标信息总数
 func getCountBidding() {
 	f, err := excelize.OpenFile("./银行业金融机构法人名单.xlsx")

+ 3 - 1
xlsx/main.go

@@ -59,7 +59,9 @@ func main() {
 	//getCountBidding() //统计采购单位标讯数据量
 
 	//processExcel()
-	matchSpecialEnterprise()
+	//matchSpecialEnterprise()
+
+	dealProjectName() //匹配项目名称
 	log.Println("over")
 }
 

+ 3 - 3
xlsx/xlsx_test.go

@@ -249,7 +249,7 @@ func TestB(T *testing.T) {
 func TestAAA(t *testing.T) {
 	//87 竞品
 	Mgo := &mongodb.MongodbSim{
-		//MongodbAddr: "172.17.189.140:27080",
+		//MongodbAddr: "172.17.4.87:27080",
 		MongodbAddr: "127.0.0.1:27081",
 		Size:        10,
 		DbName:      "py_theme",
@@ -358,7 +358,7 @@ func TestAAA(t *testing.T) {
 func TestZZ(t *testing.T) {
 	//87 竞品
 	Mgo := &mongodb.MongodbSim{
-		//MongodbAddr: "172.17.189.140:27080",
+		//MongodbAddr: "172.17.4.87:27080",
 		MongodbAddr: "127.0.0.1:27081",
 		Size:        10,
 		DbName:      "py_theme",
@@ -747,7 +747,7 @@ func TestExportBidding(t *testing.T) {
 //func TestZXZ(t *testing.T) {
 //	//87 竞品
 //	Mgo := &mongodb.MongodbSim{
-//		//MongodbAddr: "172.17.189.140:27080",
+//		//MongodbAddr: "172.17.4.87:27080",
 //		MongodbAddr: "127.0.0.1:27081",
 //		Size:        10,
 //		DbName:      "py_theme",