wcc 1 月之前
父節點
當前提交
2b5b7c14f0

二進制
bidding_test/countReportYear


二進制
updateBidding/updateBidding → bidding_test/countReportYear22


+ 9 - 7
bidding_test/main.go

@@ -39,7 +39,7 @@ func InitMgo() {
 		DbName:   "mixdata",
 		UserName: "SJZY_RWbid_ES",
 		Password: "SJZY@B4i4D5e6S",
-		//Direct:      true,
+		Direct:   true,
 	}
 	MgoQy.InitPool()
 
@@ -90,12 +90,14 @@ func InitEs() {
 }
 
 func main() {
-	InitMgo()
-	InitEs()
-
-	go updateEsMethod()
-	fixQyxy()
-	select {}
+	countReportYear22()
+	log.Println("数据处理完毕")
+	//InitMgo()
+	//InitEs()
+	//
+	//go updateEsMethod()
+	//fixQyxy()
+	//select {}
 	//getBidding0311()
 	//log.Println("数据删除完成")
 

+ 429 - 0
bidding_test/qyxy.go

@@ -5,12 +5,441 @@ import (
 	"encoding/json"
 	"fmt"
 	"github.com/olivere/elastic/v7"
+	"go.mongodb.org/mongo-driver/bson"
+	"go.mongodb.org/mongo-driver/bson/primitive"
+	"go.mongodb.org/mongo-driver/mongo"
+	"go.mongodb.org/mongo-driver/mongo/options"
 	"io"
 	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
 	"log"
+	"net/url"
+	"strconv"
+	"strings"
+	"sync"
+	"unicode/utf8"
 )
 
+func countReportYear22() {
+	ctx := context.Background()
+
+	username := "SJZY_RWbid_ES"
+	password := "SJZY@B4i4D5e6S"
+	hosts := []string{"172.31.31.202:27081", "172.20.45.128:27080"}
+
+	uri, err := BuildMongoURI(username, password, hosts, nil)
+	if err != nil {
+		panic(err)
+	}
+
+	clientOptions := options.Client().ApplyURI(uri)
+	client, err := mongo.Connect(ctx, clientOptions)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer client.Disconnect(ctx)
+
+	collection := client.Database("mixdata").Collection("qyxy_std")
+
+	// 分段配置
+	start := int64(1630914780)
+	end := int64(1751238647)
+	segment := (end - start) / 10
+
+	type SegmentResult struct {
+		Num      int
+		Year2023 int
+		Year2024 int
+	}
+	resultChan := make(chan SegmentResult, 10)
+
+	var outerWg sync.WaitGroup
+	for i := 0; i < 10; i++ {
+		segStart := start + int64(i)*segment
+		segEnd := segStart + segment
+		if i == 9 {
+			segEnd = end // 最后一段到结尾
+		}
+
+		outerWg.Add(1)
+		go func(segIdx int, segStart, segEnd int64) {
+			defer outerWg.Done()
+
+			// filter: 按段过滤
+			filter := bson.M{
+				"company_type": bson.M{"$ne": "个体工商户"},
+				"updatetime": bson.M{
+					"$gte": segStart,
+					"$lt":  segEnd,
+				},
+			}
+
+			batchSize := int32(500)
+			cursor, err := collection.Find(
+				ctx,
+				filter,
+				options.Find().
+					SetBatchSize(batchSize).
+					SetSort(bson.D{{Key: "_id", Value: -1}}),
+			)
+			if err != nil {
+				log.Printf("[segment %d] Find error: %v\n", segIdx, err)
+				return
+			}
+			defer cursor.Close(ctx)
+
+			// worker pool
+			workerCount := 8
+			docChan := make(chan map[string]interface{}, 1000)
+			var wg sync.WaitGroup
+
+			// 本段统计
+			localCount := map[int]int{
+				2023: 0,
+				2024: 0,
+			}
+			num := 0
+			var mu sync.Mutex
+
+			for w := 0; w < workerCount; w++ {
+				wg.Add(1)
+				go func() {
+					defer wg.Done()
+					for doc := range docChan {
+						CompanyStatus := util.ObjToString(doc["company_status"])
+						CompanyName := util.ObjToString(doc["company_name"])
+						UseFlag := util.IntAll(doc["use_flag"])
+						if UseFlag > 0 ||
+							strings.Contains(CompanyStatus, "注销") ||
+							strings.Contains(CompanyStatus, "吊销") ||
+							CompanyName == "" ||
+							strings.Contains(CompanyName, "已除名") ||
+							utf8.RuneCountInString(CompanyName) < 5 {
+							continue
+						}
+
+						annualReportsRaw, ok := doc["annual_reports"]
+						if !ok {
+							continue
+						}
+
+						annualReports := make([]interface{}, 0)
+						switch v := annualReportsRaw.(type) {
+						case []interface{}:
+							annualReports = v
+						case primitive.A:
+							annualReports = v
+						default:
+							continue
+						}
+
+						yearHasReport := map[int]bool{
+							2023: false,
+							2024: false,
+						}
+						for _, r := range annualReports {
+							report, ok := r.(map[string]interface{})
+							if !ok {
+								continue
+							}
+							yearRaw, exists := report["report_year"]
+							if !exists {
+								continue
+							}
+							var yearInt int
+							switch v := yearRaw.(type) {
+							case string:
+								y, err := strconv.Atoi(v)
+								if err != nil {
+									continue
+								}
+								yearInt = y
+							case float64:
+								yearInt = int(v)
+							case int32:
+								yearInt = int(v)
+							case int64:
+								yearInt = int(v)
+							default:
+								continue
+							}
+							if yearInt == 2023 {
+								yearHasReport[2023] = true
+							} else if yearInt == 2024 {
+								yearHasReport[2024] = true
+							}
+						}
+
+						mu.Lock()
+						if yearHasReport[2023] {
+							localCount[2023]++
+						}
+						if yearHasReport[2024] {
+							localCount[2024]++
+						}
+						mu.Unlock()
+					}
+				}()
+			}
+
+			// 遍历 cursor
+			for cursor.Next(ctx) {
+				var doc map[string]interface{}
+				if err := cursor.Decode(&doc); err != nil {
+					continue
+				}
+				num++
+				docChan <- doc
+				if num%10000 == 0 {
+					mu.Lock()
+					log.Printf("[segment %d] processed: %d, 2023: %d, 2024: %d", segIdx, num, localCount[2023], localCount[2024])
+					mu.Unlock()
+				}
+			}
+			close(docChan)
+			wg.Wait()
+
+			resultChan <- SegmentResult{
+				Num:      num,
+				Year2023: localCount[2023],
+				Year2024: localCount[2024],
+			}
+		}(i, segStart, segEnd)
+	}
+
+	// 等待所有段结束
+	go func() {
+		outerWg.Wait()
+		close(resultChan)
+	}()
+
+	// 汇总
+	totalDocs := 0
+	finalCount := map[int]int{
+		2023: 0,
+		2024: 0,
+	}
+	for segRes := range resultChan {
+		totalDocs += segRes.Num
+		finalCount[2023] += segRes.Year2023
+		finalCount[2024] += segRes.Year2024
+	}
+
+	fmt.Printf("总处理文档数: %d\n", totalDocs)
+	fmt.Printf("2023 年有年报的企业数: %d\n", finalCount[2023])
+	fmt.Printf("2024 年有年报的企业数: %d\n", finalCount[2024])
+}
+
+// countReportYear  统计企业年报
+func countReportYear() {
+	ctx := context.Background()
+
+	username := "SJZY_RWbid_ES"
+	password := "SJZY@B4i4D5e6S"
+	hosts := []string{"172.31.31.202:27081", "172.20.45.128:27080"}
+	//hosts := []string{"127.0.0.1:27083"}
+
+	// 构造 URI
+	uri, err := BuildMongoURI(username, password, hosts, nil)
+	if err != nil {
+		panic(err)
+	}
+
+	// 连接 MongoDB
+	clientOptions := options.Client().ApplyURI(uri)
+
+	//clientOptions.SetDirect(true)
+	client, err := mongo.Connect(ctx, clientOptions)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer client.Disconnect(ctx)
+
+	collection := client.Database("mixdata").Collection("qyxy_std")
+
+	// 查询条件:company_type != 个体工商户
+	filter := bson.M{"company_type": bson.M{"$ne": "个体工商户"}}
+
+	// 批量大小
+	batchSize := int32(500)
+
+	//cursor, err := collection.Find(ctx, filter, options.Find().SetBatchSize(batchSize))
+	cursor, err := collection.Find(
+		ctx,
+		filter,
+		options.Find().
+			SetBatchSize(batchSize).
+			SetSort(bson.D{{Key: "_id", Value: -1}}),
+	)
+
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer cursor.Close(ctx)
+
+	// 定义统计变量
+	finalCount := map[int]int{
+		2023: 0,
+		2024: 0,
+	}
+	var mu sync.Mutex
+
+	// worker 并发数
+	workerCount := 8
+	docChan := make(chan map[string]interface{}, 1000)
+	var wg sync.WaitGroup
+
+	// 启动 worker
+	for i := 0; i < workerCount; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for doc := range docChan {
+				// 过滤条件
+				CompanyStatus := util.ObjToString(doc["company_status"])
+				CompanyName := util.ObjToString(doc["company_name"])
+				UseFlag := util.IntAll(doc["use_flag"])
+				if UseFlag > 0 ||
+					strings.Contains(CompanyStatus, "注销") ||
+					strings.Contains(CompanyStatus, "吊销") ||
+					CompanyName == "" ||
+					strings.Contains(CompanyName, "已除名") ||
+					utf8.RuneCountInString(CompanyName) < 5 {
+					continue
+				}
+
+				// annual_reports
+				annualReportsRaw, ok := doc["annual_reports"]
+				if !ok {
+					continue
+				}
+
+				annualReports := make([]interface{}, 0)
+				switch v := annualReportsRaw.(type) {
+				case []interface{}:
+					annualReports = v
+				case primitive.A:
+					annualReports = v
+				default:
+					log.Printf("annual_reports unexpected type: %T\n", v)
+					continue
+				}
+
+				//annualReports, ok := annualReportsRaw.([]interface{})
+				//if !ok {
+				//	continue
+				//}
+
+				// 检测当前企业是否在 2023/2024 有年报,只算一次
+				yearHasReport := map[int]bool{
+					2023: false,
+					2024: false,
+				}
+
+				for _, r := range annualReports {
+					report, ok := r.(map[string]interface{})
+					if !ok {
+						continue
+					}
+					yearRaw, exists := report["report_year"]
+					if !exists {
+						continue
+					}
+
+					var yearInt int
+					switch v := yearRaw.(type) {
+					case string:
+						y, err := strconv.Atoi(v)
+						if err != nil {
+							continue
+						}
+						yearInt = y
+					case float64:
+						yearInt = int(v)
+					case int32:
+						yearInt = int(v)
+					case int64:
+						yearInt = int(v)
+					default:
+						continue
+					}
+
+					if yearInt == 2023 {
+						yearHasReport[2023] = true
+					} else if yearInt == 2024 {
+						yearHasReport[2024] = true
+					}
+				}
+
+				// 有就+1
+				mu.Lock()
+				if yearHasReport[2023] {
+					finalCount[2023]++
+				}
+				if yearHasReport[2024] {
+					finalCount[2024]++
+				}
+				mu.Unlock()
+			}
+		}()
+	}
+
+	// 主 goroutine 遍历 cursor,实时打印进度
+	num := 0
+	for cursor.Next(ctx) {
+		var doc map[string]interface{}
+		if err := cursor.Decode(&doc); err != nil {
+			log.Println("decode error:", err)
+			continue
+		}
+		num++
+		docChan <- doc
+
+		if num%10000 == 0 {
+			mu.Lock()
+			log.Printf("current: %d docs processed, 2023年企业数: %d, 2024年企业数: %d\n", num, finalCount[2023], finalCount[2024])
+			mu.Unlock()
+		}
+	}
+	close(docChan)
+
+	// 等待所有 worker 完成
+	wg.Wait()
+
+	// 输出统计结果
+	fmt.Printf("总处理文档数: %d\n", num)
+	fmt.Printf("2023 年有年报的企业数: %d\n", finalCount[2023])
+	fmt.Printf("2024 年有年报的企业数: %d\n", finalCount[2024])
+}
+
+// BuildMongoURI 构造 MongoDB 连接 URI
+func BuildMongoURI(username, password string, hosts []string, options map[string]string) (string, error) {
+	if len(hosts) == 0 {
+		return "", fmt.Errorf("hosts cannot be empty")
+	}
+
+	hostList := strings.Join(hosts, ",")
+
+	var authPart string
+	if username != "" {
+		escapedUsername := url.QueryEscape(username)
+		escapedPassword := url.QueryEscape(password)
+		authPart = fmt.Sprintf("%s:%s@", escapedUsername, escapedPassword)
+		// 如果密码为空,也会拼成 username:@host ,MongoDB URI 是支持的,可以保留
+	}
+
+	var optionStr string
+	if len(options) > 0 {
+		query := url.Values{}
+		for k, v := range options {
+			query.Set(k, v)
+		}
+		optionStr = "?" + query.Encode()
+	}
+
+	return fmt.Sprintf("mongodb://%s%s%s", authPart, hostList, optionStr), nil
+}
+
 func fixQyxy() {
 	// 连接 ES
 	url := "http://172.17.4.184:19908"

+ 1 - 1
environment/readme.txt

@@ -11,7 +11,7 @@
           3、企业所属企业集团系--图谱
           4、中标牵头方注册地-省(平安库省份)
           5、上市类型(归类:A股、新三板、非上市)
-      行业个性化标签需求详见附件;
+      行业个性化标签需求详见附件;     nn,n,m,,,nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn,,m,,                                                                                                                                                 n
     根据目前项目情况,优先级如下:行业通用需求里的第一项:1、项目核心内容(同项目范围);2、牵头人企业类型;3、上市类型;4、企业所属企业集团系;5、中标牵头方注册地-省;
                                                   行业个性化标签可以先用大模型出一版,我们可以加入人工修正;
 

+ 57 - 1
getEs/bidding.go

@@ -13,13 +13,69 @@ import (
 	"strings"
 )
 
+func getBiddingLimitData() {
+	//url := "http://172.17.4.184:19908"
+	url := "http://127.0.0.1:19908"
+	username := "jybid"
+	password := "Top2023_JEB01i@31"
+	index := "bidding" //索引名称
+	// 创建 Elasticsearch 客户端
+	client, err := elastic.NewClient(
+		elastic.SetURL(url),
+		elastic.SetBasicAuth(username, password),
+		elastic.SetSniff(false),
+	)
+	if err != nil {
+		log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
+	}
+
+	// 构建查询
+	query := elastic.NewBoolQuery().
+		Must(elastic.NewMatchQuery("detail", "开标记录"))
+
+	// 执行查询
+	searchResult, err := client.Search().Size(100).
+		Index(index).
+		Query(query).
+		Do(context.Background())
+	if err != nil {
+		log.Fatalf("Error executing search: %s", err)
+	}
+
+	// 本地数据库
+	MgoB = &mongodb.MongodbSim{
+		//MongodbAddr: "172.31.31.202:27081,172.20.45.128:27080",
+		MongodbAddr: "127.0.0.1:27083",
+		Size:        10,
+		DbName:      "qfw",
+		UserName:    "SJZY_RWbid_ES",
+		Password:    "SJZY@B4i4D5e6S",
+		//Direct:      true,
+	}
+	MgoB.InitPool()
+
+	for _, hit := range searchResult.Hits.Hits {
+		var doc map[string]interface{}
+		err := json.Unmarshal(hit.Source, &doc)
+		if err != nil {
+			log.Printf("解析文档失败:%s", err)
+			continue
+		}
+
+		MgoB.SaveByOriID("wcc_bidding_test", doc)
+
+	}
+
+	log.Println("数据处理完毕")
+}
+
 // getBidding2 获取bidding数据
 func getBidding2() {
 	url := "http://172.17.4.184:19908"
 	//url := "http://127.0.0.1:19908"
 	username := "jybid"
 	password := "Top2023_JEB01i@31"
-	index := "bidding" //索引名称
+	index := "bidding" //索引名
 	// 创建 Elasticsearch 客户端
 	client, err := elastic.NewClient(
 		elastic.SetURL(url),

二進制
getEs/getCountProjectWinner


+ 6 - 3
getEs/main.go

@@ -34,8 +34,11 @@ func InitMgo() {
 }
 
 func main() {
-
-	getQyxyNationToFiles()
+	InitMgo()
+	getCountProjectWinner3()
+	//CountProjectWinner()
+	//getBiddingLimitData()
+	//getQyxyNationToFiles()
 	//exportQyxy() //导出企业数据
 	//dealXlsx()
 	//getQyxyNation() //导出 国标行业分类,注册资金靠前的企业
@@ -82,7 +85,7 @@ func main() {
 	//
 	//getBidding2()
 	//--------------//
-	dealYJG() // 处理姚静歌需求,处理项目数据到Clickhouse
+	//dealYJG() // 处理姚静歌需求,处理项目数据到Clickhouse
 	log.Println("over ------------------ over")
 }
 

+ 336 - 0
getEs/project.go

@@ -1,18 +1,354 @@
 package main
 
 import (
+	"bytes"
 	"context"
 	"encoding/json"
 	"fmt"
+	"github.com/elastic/go-elasticsearch/v7"
 	"github.com/olivere/elastic/v7"
+	"go.mongodb.org/mongo-driver/bson"
 	"io"
 	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
 	"log"
 	"regexp"
 	"strings"
+	"sync"
+	"time"
 )
 
+// 并发控制参数
+const workerCount = 5
+
+type Task struct {
+	ID          string
+	CompanyName string
+}
+
+func getCountProjectWinner3() {
+	url := "http://172.17.4.184:19908"
+	username := "jybid"
+	password := "Top2023_JEB01i@31"
+
+	// 初始化 Elasticsearch 客户端
+	client, err := elastic.NewClient(
+		elastic.SetURL(url),
+		elastic.SetBasicAuth(username, password),
+		elastic.SetSniff(false),
+	)
+	if err != nil {
+		log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
+	}
+
+	sess := MgoB.GetMgoConn()
+	defer MgoB.DestoryMongoConn(sess)
+
+	coll := sess.DB("qfw").C("wcc_label_static_0625")
+	it := coll.Find(nil).Select(nil).Iter()
+
+	log.Println("taskRun 开始")
+	taskCh := make(chan Task, 100)
+	var wg sync.WaitGroup
+
+	// 启动 worker 协程
+	for i := 0; i < workerCount; i++ {
+		wg.Add(1)
+		go func(workerID int) {
+			defer wg.Done()
+			ctx := context.Background()
+			for task := range taskCh {
+				func() {
+					defer util.Catch() // 捕获单个任务错误,避免 crash
+
+					fields := []string{"buyer", "winner"}
+					years := []int{2020, 2021, 2022, 2023, 2024}
+					update := make(map[string]interface{})
+
+					for _, role := range fields {
+						for _, year := range years {
+							start := time.Date(year, 1, 1, 0, 0, 0, 0, time.UTC).Unix()
+							end := time.Date(year+1, 1, 1, 0, 0, 0, 0, time.UTC).Unix() - 1
+
+							query := elastic.NewBoolQuery().
+								Must(elastic.NewTermQuery(role, task.CompanyName)).
+								Filter(elastic.NewRangeQuery("publishtime").Gte(start).Lte(end))
+
+							count, err := client.Count().
+								Index("bidding").
+								Query(query).
+								Do(ctx)
+							if err != nil {
+								log.Printf("【Worker %d】 查询失败 [%s - %d]: %v", workerID, role, year, err)
+								continue
+							}
+							key := fmt.Sprintf("%s-%d", role, year)
+							update[key] = count
+						}
+					}
+
+					MgoB.UpdateById("wcc_label_static_0625", task.ID, bson.M{"$set": update})
+
+				}()
+			}
+		}(i)
+	}
+
+	// 主线程读取 MongoDB 数据发送到 task channel
+	count := 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); {
+		count++
+		if count%1000 == 0 {
+			log.Println("current:", count, tmp["company_name"])
+		}
+		task := Task{
+			ID:          mongodb.BsonIdToSId(tmp["_id"]),
+			CompanyName: util.ObjToString(tmp["company_name"]),
+		}
+		taskCh <- task
+	}
+	close(taskCh) // 所有任务发完
+
+	wg.Wait()
+	log.Println("所有任务处理完成")
+}
+
+func getCountProjectWinner() {
+	url := "http://172.17.4.184:19908"
+	//url := "http://127.0.0.1:19908"
+	username := "jybid"
+	password := "Top2023_JEB01i@31"
+	//index := "bidding" //索引名称
+	// 创建 Elasticsearch 客户端
+	client, err := elastic.NewClient(
+		elastic.SetURL(url),
+		elastic.SetBasicAuth(username, password),
+		elastic.SetSniff(false),
+	)
+	if err != nil {
+		log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
+	}
+	defer util.Catch()
+	sess := MgoB.GetMgoConn()
+	defer MgoB.DestoryMongoConn(sess)
+	it := sess.DB("qfw").C("wcc_label_static_0625").Find(nil).Select(nil).Iter()
+	log.Println("taskRun 开始")
+	count := 0
+	ctx := context.Background()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
+		if count%1000 == 0 {
+			log.Println("current:", count, tmp["company_name"])
+		}
+		companyName := util.ObjToString(tmp["company_name"])
+		//companyName := "上海市特种设备监督检验技术研究院"
+
+		id := mongodb.BsonIdToSId(tmp["_id"])
+		fields := []string{"buyer", "winner"}
+		years := []int{2020, 2021, 2022, 2023, 2024}
+
+		update := make(map[string]interface{})
+		for _, role := range fields {
+			//fmt.Printf("=== [%s 作为 %s 的数量统计] ===\n", companyName, role)
+			for _, year := range years {
+				// 年份范围(秒)
+				start := time.Date(year, 1, 1, 0, 0, 0, 0, time.UTC).Unix()
+				end := time.Date(year+1, 1, 1, 0, 0, 0, 0, time.UTC).Unix() - 1
+
+				// 构造查询
+				query := elastic.NewBoolQuery().
+					Must(elastic.NewTermQuery(role, companyName)).
+					Filter(elastic.NewRangeQuery("publishtime").Gte(start).Lte(end))
+
+				// 查询并只返回总数
+				count11, err := client.Count().
+					Index("bidding").
+					Query(query).
+					Do(ctx)
+				if err != nil {
+					log.Fatalf("查询 [%s-%d] 失败: %v", role, year, err)
+				}
+
+				ke := fmt.Sprintf("%v-%v", role, year)
+				update[ke] = count11
+				//fmt.Printf("年份: %d, 数量: %d\n", year, count11)
+			}
+		}
+		MgoB.UpdateById("wcc_label_static_0625", id, map[string]interface{}{"$set": update})
+	}
+}
+
+func CountProjectWinner2() {
+	// 连接 Elasticsearch
+	cfg := elasticsearch.Config{
+		Addresses: []string{"http://127.0.0.1:19908"}, // 或者 "http://172.17.4.184:19908"
+		//Addresses: []string{"http://172.17.4.184:19908"}, // 或者 "http://172.17.4.184:19908"
+		Username: "jybid",
+		Password: "Top2023_JEB01i@31",
+	}
+	es, err := elasticsearch.NewClient(cfg)
+	if err != nil {
+		log.Fatalf("创建 Elasticsearch 客户端失败: %s", err)
+	}
+	// 构造查询 JSON
+	query := map[string]interface{}{
+		"track_total_hits": true, // 必须有,确保超过1万条也能拿到真实数量
+		"size":             100,  // 每页条数
+		"query": map[string]interface{}{
+			"nested": map[string]interface{}{
+				"path": "zhima_labels",
+				"query": map[string]interface{}{
+					"term": map[string]interface{}{
+						"zhima_labels.zhima_name": "高新技术企业",
+					},
+				},
+			},
+		},
+	}
+
+	// 序列化为 JSON
+	var buf bytes.Buffer
+	if err := json.NewEncoder(&buf).Encode(query); err != nil {
+		log.Fatalf("Error encoding query: %s", err)
+	}
+
+	// 执行查询
+	res, err := es.Search(
+		es.Search.WithContext(context.Background()),
+		es.Search.WithIndex("qyxy"), // 替换为你的索引名
+		es.Search.WithBody(&buf),
+		es.Search.WithTrackTotalHits(true),
+	)
+	if err != nil {
+		log.Fatalf("Error getting response: %s", err)
+	}
+	defer res.Body.Close()
+	// 🔍 检查返回状态码
+	if res.IsError() {
+		bodyBytes, _ := io.ReadAll(res.Body)
+		log.Fatalf("ES 返回错误: %s\n%s", res.Status(), string(bodyBytes))
+	}
+
+	// ✅ 正常解析 body
+	var r map[string]interface{}
+	if err := json.NewDecoder(res.Body).Decode(&r); err != nil {
+		log.Fatalf("解析响应出错: %s", err)
+	}
+
+	// 打印总命中数
+	hits := r["hits"].(map[string]interface{})
+	total := hits["total"].(map[string]interface{})["value"]
+	fmt.Printf("命中总数: %v 条\n", total)
+
+	// 打印每条结果的 ID 和 _source
+	for _, hit := range hits["hits"].([]interface{}) {
+		doc := hit.(map[string]interface{})
+		id := doc["_id"]
+		source := doc["_source"]
+		sourceJSON, _ := json.MarshalIndent(source, "", "  ")
+		fmt.Printf("ID: %s\nSource: %s\n", id, sourceJSON)
+	}
+}
+
+// CountProjectWinner 统计企业中标项目数量
+func CountProjectWinner() {
+	url := "http://172.17.4.184:19908"
+	//url := "http://127.0.0.1:19908"
+	username := "jybid"
+	password := "Top2023_JEB01i@31"
+	//index := "bidding" //索引名称
+	index := "qyxy" //索引名称
+	// 创建 Elasticsearch 客户端
+	client, err := elastic.NewClient(
+		elastic.SetURL(url),
+		elastic.SetBasicAuth(username, password),
+		elastic.SetSniff(false),
+	)
+	if err != nil {
+		log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
+	}
+
+	labels := `高新技术企业,小巨人企业,国家级技术创新示范企业,众创空间,国家级科技企业孵化器,瞪羚企业,科技型中小企业,制造业单项冠军示范企业,制造业单项冠军产品生产企业,制造业单项冠军培育企业,国家企业技术中心,专精特新企业,省级技术创新示范企业,技术先进型服务企业,省级企业技术中心`
+	label_names := strings.Split(labels, ",")
+	for _, name := range label_names {
+		// 构造 nested 查询:zhima_labels.zhima_name == 高新技术企业
+		nestedQuery := elastic.NewNestedQuery(
+			"zhima_labels", // path 必须是 nested 字段名本身
+			elastic.NewBoolQuery().Must( // nested 里的子查询,用 Bool 包一下更稳
+				elastic.NewTermQuery("zhima_labels.zhima_name", name),
+			),
+		)
+
+		ctx := context.Background()
+		//开始滚动搜索
+		scrollID := ""
+		scroll := "10m"
+		searchSource := elastic.NewSearchSource().
+			Query(nestedQuery).
+			Size(10000).
+			Sort("_doc", true) //升序排序
+		//Sort("_doc", false) //降序排序
+
+		searchService := client.Scroll(index).
+			Size(10000).
+			Scroll(scroll).
+			SearchSource(searchSource)
+
+		res, err := searchService.Do(ctx)
+		if err != nil {
+			if err == io.EOF {
+				fmt.Println("没有数据")
+			} else {
+				panic(err)
+			}
+
+		}
+		//defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
+		fmt.Println(name, "总数是:", res.TotalHits())
+		total := 0
+		for len(res.Hits.Hits) > 0 {
+			for _, hit := range res.Hits.Hits {
+				var doc map[string]interface{}
+				err := json.Unmarshal(hit.Source, &doc)
+				if err != nil {
+					log.Printf("解析文档失败:%s", err)
+					continue
+				}
+				//存入新表
+				insert := map[string]interface{}{
+					"company_name": doc["company_name"],
+					"id":           doc["id"],
+					"label":        name,
+				}
+
+				err = MgoB.InsertOrUpdate("qfw", "wcc_label_static_0625", insert)
+				if err != nil {
+					log.Println("error", doc["id"])
+				}
+
+			}
+
+			total = total + len(res.Hits.Hits)
+			scrollID = res.ScrollId
+			res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
+			log.Println("current count:", total, name)
+			if err != nil {
+				if err == io.EOF {
+					// 滚动到最后一批数据,退出循环
+					break
+				}
+				log.Println("滚动搜索失败:", err, res)
+				break // 处理错误时退出循环
+			}
+		}
+		// 在循环外调用 ClearScroll
+		_, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
+		if err != nil {
+			log.Printf("清理滚动搜索失败:%s", err)
+		}
+	}
+
+}
+
 // getProject 获取项目数据
 func getProject() {
 	MgoB := &mongodb.MongodbSim{

二進制
hp/78list.xlsx


+ 7 - 7
hp/config.toml

@@ -6,11 +6,11 @@
     password = ""
 
 [mongoQ]  ## qyxy_std,用来查询公司省市区信息
-    host = "127.0.0.1:27017"
-    #    host = "172.17.4.181:27001"
-    db = "wcc"
-    username = ""
-    password = ""
+    host = "127.0.0.1:27083"
+    #    host = "172.31.31.202:27081,172.20.45.128:27080"
+    db = "mixdata"
+username = "SJZY_RWbid_ES"
+password = "SJZY@B4i4D5e6S"
 
 [readfile] ## 读取的企业文件
     path = "./78list.xlsx"
@@ -18,8 +18,8 @@
 
 [env]
     city =1                ## 是否导出省市区信息,默认0 不导出
-    proportion=0.34         ## 投资占股比例,大于这个的数据才会导出
-    subl = 5               ## 向下查询分支机构层级;最多支持到3级
+    proportion=0.5         ## 投资占股比例,大于这个的数据才会导出
+    subl = 1               ## 向下查询分支机构层级;最多支持到3级
     invesl = 5             ## 向下查询投资公司层级;最多支持到3级
 #    psubl = 10              ## 向上查询分支机构层级;最多支持
 #    pinvesl =10              ## 向上查询投资股东层级;

+ 5 - 1
hp/main.go

@@ -808,6 +808,8 @@ func setInvest(xlsx *excelize.File, subs []map[string]interface{}, name, sheet s
 		if GF.Env.City > 0 {
 			subtitles = append(subtitles, "省份", "城市", "区域/街道", "来源公司")
 			subkeys = append(subkeys, "company_area", "company_city", "company_district")
+			subtitles = append(subtitles, "法人代表", "注册资金")
+			subkeys = append(subkeys, "legal_person", "capital")
 		} else {
 			subtitles = append(subtitles, "来源公司")
 		}
@@ -1113,6 +1115,8 @@ func dealInves(ss []map[string]interface{}) []map[string]interface{} {
 					v["company_district"] = std["company_district"]
 					v["company_status"] = std["company_status"]
 					v["credit_no"] = std["credit_no"]
+					v["legal_person"] = std["legal_person"] //法人代表
+					v["capital"] = std["capital"]           // 注册资金
 				}
 				if !isInMapArray(v, invest1) {
 					invest1 = append(invest1, v)
@@ -1170,7 +1174,7 @@ func dealInves(ss []map[string]interface{}) []map[string]interface{} {
 				company_district = v["company_district"].(string)
 			}
 
-			tmp := []interface{}{L0, company_name, v["stock_proportion"], company_status, credit_no, company_area, company_city, company_district}
+			tmp := []interface{}{L0, company_name, v["stock_proportion"], company_status, credit_no, company_area, company_city, company_district, v["legal_person"], v["capital"]}
 			if !IsContained(tmp, lastRes[L0]) {
 				lastRes[L0] = append(lastRes[L0], tmp)
 			}

+ 16 - 0
mongodb-test/mgo_test.go

@@ -127,3 +127,19 @@ func Test2(T *testing.T) {
 		fmt.Println(222)
 	}
 }
+
+func Test3(T *testing.T) {
+	// 解析时间
+	t, err := time.Parse("2006-01-02 15:04:05", "2024-07-01 00:00:00")
+	if err != nil {
+		panic(err)
+	}
+
+	// 获取时间戳(秒)
+	seconds := t.Unix()
+
+	// 构造 ObjectID
+	objectID := primitive.NewObjectIDFromTimestamp(time.Unix(seconds, 0))
+
+	fmt.Println("ObjectID:", objectID.Hex())
+}

二進制
project_chuan/dealProposed22ConcurrentHistory


+ 13 - 0
project_chuan/init.go

@@ -1,6 +1,7 @@
 package main
 
 import (
+	"context"
 	"fmt"
 	"github.com/spf13/viper"
 	"go.uber.org/zap"
@@ -78,6 +79,12 @@ func InitMgo() {
 
 	MgoP.InitPool()
 
+	err := MgoP.C.Ping(context.Background(), nil)
+
+	if err != nil {
+		log.Info("InitMgo", zap.Any(GF.MongoP.Host, "链接失败"))
+	}
+
 	//bidding 查询
 	MgoB = &mongodb.MongodbSim{
 		MongodbAddr: GF.MongoB.Host,
@@ -90,6 +97,12 @@ func InitMgo() {
 
 	MgoB.InitPool()
 
+	err = MgoB.C.Ping(context.Background(), nil)
+
+	if err != nil {
+		log.Info("InitMgo", zap.Any(GF.MongoB.Host, "链接失败"))
+	}
+
 	//qyxy_std 查询
 	MgoQY = &mongodb.MongodbSim{
 		MongodbAddr: GF.MongoB.Host,

+ 15 - 5
project_chuan/project.go

@@ -199,8 +199,11 @@ func dealProposed22Concurrent() {
 		//	"$lte": 1748102400,
 		//},
 
-		"firsttime": map[string]interface{}{
-			"$lte": 1735660800,
+		//"firsttime": map[string]interface{}{
+		//	"$lte": 1735660800,
+		//},
+		"_id": map[string]interface{}{
+			"$lte": mongodb.StringTOBsonId("62b6fbc9fa39106bd5e599fc"),
 		},
 	}
 	iter := coll.Find(query).Select(nil).Sort("-_id").Iter()
@@ -564,12 +567,17 @@ func searchES23(client *elastic.Client, projectName, buyer2 string, scoreThresho
 			query := elastic.NewBoolQuery().
 				Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")).
 				Filter(filter...)
+			fetchFields := elastic.NewFetchSourceContext(true).Include("id",
+				"title", "projectname", "projectcode", "bidamount", "score", "area",
+				"city", "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
+				"s_winner", "winnertel", "agency", "publishtime")
 
 			// 执行查询
 			searchResult, err := client.Search().
 				Index("bidding").
 				Query(query).
 				Size(70).
+				FetchSourceContext(fetchFields). // 添加这一行
 				Do(context.Background())
 			if err != nil {
 				return nil, err
@@ -633,10 +641,12 @@ func searchES23(client *elastic.Client, projectName, buyer2 string, scoreThresho
 		}
 
 		doc["score"] = score
-		detail := util.ObjToString(doc["detail"])
-
+		//detail := util.ObjToString(doc["detail"])
+		id := util.ObjToString(doc["id"])
+		bidd, _ := MgoB.FindById("bidding", id, nil)
+		detail := util.ObjToString((*bidd)["detail"])
 		// 字段中必须包含 projectName
-		if buyer2 != "" {
+		if buyer2 != "" && detail != "" {
 			if !strings.Contains(detail, projectName) && !strings.Contains(detail, buyer2) {
 				continue
 			}

+ 23 - 0
project_chuan/project_test.go

@@ -0,0 +1,23 @@
+package main
+
+import (
+	"github.com/olivere/elastic/v7"
+	"go.uber.org/zap"
+	"jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
+	"testing"
+)
+
+func TestSearchES23(t *testing.T) {
+	client, err := elastic.NewClient(
+		elastic.SetURL(GF.Es.URL),
+		elastic.SetBasicAuth(GF.Es.Username, GF.Es.Password),
+		elastic.SetSniff(false),
+	)
+	if err != nil {
+		log.Fatal("创建 Elasticsearch 客户端失败", zap.Error(err))
+	}
+
+	projectName := "G655荔波至茂兰段公路改扩建工程"
+	results, err := searchES23(client, projectName, "", 20, 50)
+	log.Info("aaa", zap.Any("results", results))
+}

+ 243 - 0
updateBidding/bidding.go

@@ -3,10 +3,253 @@ package main
 import (
 	"fmt"
 	"github.com/wcc4869/common_utils/log"
+	"go.uber.org/zap"
 	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
+	"strings"
 )
 
+// updateBiddingBasicClassTest 测试环境更新   basicClass
+func updateBiddingBasicClassTest() {
+	defer util.Catch()
+	sess := MgoT.GetMgoConn()
+	defer MgoT.DestoryMongoConn(sess)
+
+	it := sess.DB("qfw_data").C("bidding").Find(nil).Select(nil).Sort("-_id").Iter()
+	count := 0
+
+	for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
+		if count%5000 == 0 {
+			log.Info("current", log.Int("count", count), log.Any("_id", tmp["_id"]))
+		}
+
+		biddingID := mongodb.BsonIdToSId(tmp["_id"])
+
+		update := make(map[string]interface{})
+
+		if _, ok := tmp["basicClass"]; ok && util.ObjToString(tmp["basicClass"]) != "" {
+			update["basicClass"] = tmp["basicClass"]
+		}
+
+		// 更新Es 数据
+		if len(update) > 0 {
+			////更新MongoDB
+			//updatePool <- []map[string]interface{}{
+			//	{"_id": tmp["_id"]},
+			//	{"$set": update},
+			//}
+			// 更新es
+			updateEsPool <- []map[string]interface{}{
+				{"_id": biddingID},
+				update,
+			}
+		}
+
+	}
+
+}
+
+// updateBiddingBasicClass 更新bidding basicClass 存量数据
+func updateBiddingBasicClass() {
+	defer util.Catch()
+	sess := MgoB.GetMgoConn()
+	defer MgoB.DestoryMongoConn(sess)
+
+	where := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$lte": 1751273717,
+		},
+	}
+
+	it := sess.DB("qfw").C("bidding").Find(where).Select(nil).Sort("-_id").Iter()
+	count := 0
+
+	for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
+		if count%5000 == 0 {
+			log.Info("current", log.Int("count", count), log.Any("_id", tmp["_id"]))
+		}
+
+		biddingID := mongodb.BsonIdToSId(tmp["_id"])
+
+		update := make(map[string]interface{})
+
+		if _, ok := tmp["basicClass"]; ok && util.ObjToString(tmp["basicClass"]) != "" {
+			update["basicClass"] = tmp["basicClass"]
+		}
+
+		// 更新Es 数据
+		if len(update) > 0 {
+			////更新MongoDB
+			//updatePool <- []map[string]interface{}{
+			//	{"_id": tmp["_id"]},
+			//	{"$set": update},
+			//}
+			// 更新es
+			updateEsPool <- []map[string]interface{}{
+				{"_id": biddingID},
+				update,
+			}
+		}
+
+	}
+
+}
+
+// updateBiddingType22 根据爬虫代码,更新标讯分类
+func updateBiddingTypeBySpidecode() {
+	defer util.Catch()
+	sess := MgoB.GetMgoConn()
+	defer MgoB.DestoryMongoConn(sess)
+
+	where := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": 1735660800,
+			//"$lte": 1750908768,
+			"$lte": 1751438445,
+		},
+	}
+	it := sess.DB("qfw").C("bidding").Find(where).Select(nil).Iter()
+	count := 0
+
+	for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
+		if count%1000 == 0 {
+			log.Info("current", log.Int("count", count), log.Any("_id", tmp["_id"]))
+		}
+
+		biddingID := mongodb.BsonIdToSId(tmp["_id"])
+		spidecode := util.ObjToString(tmp["spidercode"])
+		site := util.ObjToString(tmp["site"])
+		if spidecode == "tj_tjstzxmzxspbsdt_sybj" && site == "天津市投资项目在线审批办事大厅" {
+			update := make(map[string]interface{})
+
+			update["toptype"] = "拟建"
+			update["subtype"] = "拟建"
+			update["infoformat"] = 1
+			// 更新Es 数据
+			if len(update) > 0 {
+				////更新MongoDB
+				updatePool <- []map[string]interface{}{
+					{"_id": tmp["_id"]},
+					{"$set": update},
+				}
+				// 更新es
+				updateEsPool <- []map[string]interface{}{
+					{"_id": biddingID},
+					update,
+				}
+			}
+		}
+	}
+
+}
+
+func updateBiddingisValidFile() {
+	defer util.Catch()
+	sess := MgoB.GetMgoConn()
+	defer MgoB.DestoryMongoConn(sess)
+
+	where := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte": 1750755600,
+			"$lte": 1750822200,
+		},
+	}
+	it := sess.DB("qfw").C("bidding").Find(where).Select(nil).Sort("-_id").Iter()
+	count := 0
+
+	for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
+		if count%1000 == 0 {
+			log.Info("current", log.Int("count", count), log.Any("_id", tmp["_id"]))
+		}
+
+		biddingID := mongodb.BsonIdToSId(tmp["_id"])
+
+		update := make(map[string]interface{})
+
+		if _, ok := tmp["isValidFile"]; ok {
+			update["isValidFile"] = tmp["isValidFile"]
+		}
+
+		// 更新Es 数据
+		if len(update) > 0 {
+			////更新MongoDB
+			//updatePool <- []map[string]interface{}{
+			//	{"_id": tmp["_id"]},
+			//	{"$set": update},
+			//}
+			// 更新es
+			updateEsPool <- []map[string]interface{}{
+				{"_id": biddingID},
+				update,
+			}
+		}
+
+	}
+
+}
+
+// updateBiddingType 更新bidding 分类类型
+func updateBiddingType() {
+	defer util.Catch()
+	sess := MgoB.GetMgoConn()
+	defer MgoB.DestoryMongoConn(sess)
+
+	where := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"$gte": mongodb.StringTOBsonId("5a862f0640d2d9bbe88e3cec"),
+			//"$lte": mongodb.StringTOBsonId("68552e15c936757aa1774910"),
+			"$lte": mongodb.StringTOBsonId("68551ebfc936757aa176c9ae"),
+		},
+	}
+	it := sess.DB("qfw").C("bidding").Find(where).Select(nil).Sort("-_id").Iter()
+	count := 0
+	count2 := 0
+	ruleStr := `(开标记录表|开标一览表),(开标参与人|开标地点|开标时间|开标记录|开标一览表)3^(公开招标公告|中标人信息|供应商资格要求|投标人资格要求|招标条件|潜在投标人)`
+
+	for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
+		if count%1000 == 0 {
+			log.Info("current", log.Int("count", count), log.Any("_id", tmp["_id"]))
+		}
+
+		biddingID := mongodb.BsonIdToSId(tmp["_id"])
+		title := util.ObjToString(tmp["title"])
+		detail := util.ObjToString(tmp["detail"])
+		update := make(map[string]interface{})
+		//匹配标题
+		if strings.Contains(title, "开标记录") {
+			update["toptype"] = "结果"
+			update["subtype"] = "开标记录"
+		} else {
+			//匹配内容
+			result := MatchAllRules(detail, ruleStr)
+			if result.Matched {
+				update["toptype"] = "结果"
+				update["subtype"] = "开标记录"
+			}
+		}
+
+		// 更新Es 数据
+		if len(update) > 0 {
+			count2++
+			if count2%1000 == 0 {
+				log.Info("updateBiddingType", zap.Int("count2", count2), zap.Any("id", biddingID))
+			}
+			//更新MongoDB
+			updatePool <- []map[string]interface{}{
+				{"_id": tmp["_id"]},
+				{"$set": update},
+			}
+			// 更新es
+			updateEsPool <- []map[string]interface{}{
+				{"_id": biddingID},
+				update,
+			}
+		}
+
+	}
+
+}
+
 // dealBiddingNiJian 更新bidding ,owner 不为空的赋值给buyer
 //if  toptype == "拟建"
 //        if tmp["owner"] != nil

文件差異過大導致無法顯示
+ 8 - 0
updateBidding/bidding_test.go


+ 102 - 0
updateBidding/lua_test.go

@@ -0,0 +1,102 @@
+package main
+
+import (
+	"fmt"
+	"github.com/xuri/excelize/v2"
+	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
+	"log"
+	"strings"
+	"testing"
+)
+
+func TestUpdateLuaConfig(t *testing.T) {
+	//87 竞品
+	MgoLua := &mongodb.MongodbSim{
+		//MongodbAddr: "172.17.189.140:27080",
+		MongodbAddr: "127.0.0.1:27081",
+		Size:        10,
+		DbName:      "editor",
+		UserName:    "",
+		Password:    "",
+		Direct:      true,
+	}
+	MgoLua.InitPool()
+	sess := MgoLua.GetMgoConn()
+	defer MgoLua.DestoryMongoConn(sess)
+
+	f, err := excelize.OpenFile("./luaconfig.xlsx")
+
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	defer func() {
+		if err := f.Close(); err != nil {
+			fmt.Println(err)
+		}
+	}()
+
+	rows, err := f.GetRows("Sheet1")
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	/**
+	1、刷至 golua平台 爬虫,state=0,platform=golua平台
+	2、刷至通用平台爬虫,state=0,platform=通用平台,claimtype=1
+	3、刷至jschrome平台爬虫,state=0,platform=jschrome
+	4、以上所有爬虫均修改createuser、modifyuser、modifyuserid、createuserid、createuseremail、next字段。createuser、modifyuser 为user表s_name;modifyuserid、createuserid为user表_id;createuseremail、next为user表s_email
+	5、爬虫表87/editor/luaconfig 用户表87/editor/user
+	*/
+
+	for i := 1; i < len(rows); i++ {
+		row := rows[i]
+		code := strings.TrimSpace(row[0])
+		modifyuser := strings.TrimSpace(row[1])
+		platform := strings.TrimSpace(row[2])
+
+		//更新MongoDB
+		updateWhere := map[string]interface{}{
+			"code": code,
+		}
+
+		exists, _ := MgoLua.FindOne("luaconfig", updateWhere)
+		if len(*exists) == 0 {
+			log.Println("code 没有找到数据", code)
+			continue
+		}
+		log.Println(code, modifyuser, platform)
+		update := make(map[string]interface{})
+		if platform == "golua平台" {
+			update["state"] = 0
+			update["platform"] = "golua平台"
+		} else if platform == "通用平台" {
+			update["state"] = 0
+			update["platform"] = "通用平台"
+			update["claimtype"] = 1
+		} else if platform == "jschrome" {
+			update["state"] = 0
+			update["platform"] = "jschrome"
+		}
+
+		update["createuser"] = modifyuser
+		update["modifyuser"] = modifyuser
+
+		where := map[string]interface{}{
+			"s_name": modifyuser,
+		}
+
+		user, _ := MgoLua.FindOne("user", where)
+		if user == nil {
+			log.Println("user 查询失败", where)
+			return
+		}
+
+		update["modifyuserid"] = mongodb.BsonIdToSId((*user)["_id"])
+		update["createuserid"] = mongodb.BsonIdToSId((*user)["_id"])
+		update["createuseremail"] = (*user)["s_email"]
+		update["next"] = (*user)["s_email"]
+
+		MgoLua.Update("luaconfig", updateWhere, map[string]interface{}{"$set": update}, true, false)
+	}
+}

+ 48 - 0
updateBidding/luaconfig.go

@@ -0,0 +1,48 @@
+package main
+
+import (
+	"fmt"
+	"github.com/xuri/excelize/v2"
+	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
+	"log"
+)
+
+// updateLuaConfiig 更新爬虫采集平台配置
+func updateLuaConfiig() {
+	//87 竞品
+	MgoLua := &mongodb.MongodbSim{
+		//MongodbAddr: "172.17.189.140:27080",
+		MongodbAddr: "127.0.0.1:27081",
+		Size:        10,
+		DbName:      "editor",
+		UserName:    "",
+		Password:    "",
+		Direct:      true,
+	}
+	MgoLua.InitPool()
+	sess := MgoLua.GetMgoConn()
+	defer MgoLua.DestoryMongoConn(sess)
+
+	f, err := excelize.OpenFile("./luaconfig.xlsx")
+
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	defer func() {
+		if err := f.Close(); err != nil {
+			fmt.Println(err)
+		}
+	}()
+
+	rows, err := f.GetRows("Sheetq")
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	for i := 1; i < len(rows); i++ {
+		row := rows[i]
+		log.Println(row)
+	}
+}

二進制
updateBidding/luaconfig.xlsx


+ 75 - 33
updateBidding/main.go

@@ -25,7 +25,7 @@ var (
 	saveSize = 50
 	Es       *elastic.Elastic // 19908
 	EsNew    *elastic.Elastic //19905
-	//EsT      *elastic.Elastic
+	EsT      *elastic.Elastic
 
 	// 更新mongo
 	updatePool = make(chan []map[string]interface{}, 5000)
@@ -40,16 +40,16 @@ var (
 )
 
 func Init() {
-	//MgoB = &mongodb.MongodbSim{
-	//	MongodbAddr: "172.31.31.202:27081,172.20.45.128:27080",
-	//	//MongodbAddr: "127.0.0.1:27083",
-	//	DbName:   "qfw",
-	//	Size:     10,
-	//	UserName: "SJZY_RWbid_ES",
-	//	Password: "SJZY@B4i4D5e6S",
-	//	//Direct:      true,
-	//}
-	//MgoB.InitPool()
+	MgoB = &mongodb.MongodbSim{
+		MongodbAddr: "172.31.31.202:27081,172.20.45.128:27080",
+		//MongodbAddr: "127.0.0.1:27083",
+		DbName:   "qfw",
+		Size:     10,
+		UserName: "SJZY_RWbid_ES",
+		Password: "SJZY@B4i4D5e6S",
+		//Direct:      true,
+	}
+	MgoB.InitPool()
 
 	//MgoBAi = &mongodb.MongodbSim{
 	//	//MongodbAddr: "172.17.189.140:27080",
@@ -75,36 +75,34 @@ func Init() {
 	//Mgo.InitPool()
 
 	//85
-	MgoR = &mongodb.MongodbSim{
-		//MongodbAddr: "127.0.0.1:27080",
-		MongodbAddr: "172.17.4.85:27080",
-		DbName:      "qfw",
-		Size:        10,
-		//Direct: true,
-	}
-	MgoR.InitPool()
+	//MgoR = &mongodb.MongodbSim{
+	//	//MongodbAddr: "127.0.0.1:27080",
+	//	MongodbAddr: "172.17.4.85:27080",
+	//	DbName:      "qfw",
+	//	Size:        10,
+	//	//Direct: true,
+	//}
+	//MgoR.InitPool()
 
-	//测试环境MongoDB
+	////测试环境MongoDB
 	//MgoT = &mongodb.MongodbSim{
-	//	//MongodbAddr: "172.17.189.140:27080",
-	//	MongodbAddr: "192.168.3.206:27002",
+	//	MongodbAddr: "172.20.45.129:27002",
 	//	DbName:      "qfw_data",
 	//	Size:        10,
-	//	UserName:    "root",
-	//	Password:    "root",
+	//	UserName:    "",
+	//	Password:    "",
 	//	//Direct:      true,
 	//}
 	//MgoT.InitPool()
-
-	////测试环境es
-	//Es = &elastic.Elastic{
-	//	S_esurl: "http://192.168.3.149:9201",
-	//	//S_esurl:  "http://172.17.4.184:19805",
+	//
+	//////测试环境es
+	//EsT = &elastic.Elastic{
+	//	S_esurl:  "http://172.20.45.129:9206",
 	//	I_size:   5,
 	//	Username: "",
 	//	Password: "",
 	//}
-	//Es.InitElasticSize()
+	//EsT.InitElasticSize()
 
 	//es
 	Es = &elastic.Elastic{
@@ -130,8 +128,9 @@ func Init() {
 func main() {
 	Init()
 	//InitEsBiddingField()
-	//go updateMethod()   //更新mongodb
-	//go updateEsMethod() //更新es
+	go updateMethod()   //更新mongodb
+	go updateEsMethod() //更新es
+	//go updateEsMethodTest() // 更新测试环境ES
 	//go updateEsHrefMethod() //更新es href 字段
 	//go updateProjectEsMethod()
 	//taskRunProject()
@@ -145,7 +144,14 @@ func main() {
 	//updateBiddingBidamount()
 	//updateProject()
 	//-------------------------------//
-	fixBiddingEs()
+	//fixBiddingEs()
+
+	//updateBiddingType() //更新标讯分类
+
+	//updateBiddingisValidFile()      //更新bidding isValidFile字段
+	updateBiddingTypeBySpidecode() //更新bidding ;根据spidecode 字段
+	//updateBiddingBasicClass() 	//更新 存量数据 basicClass 字段
+	//updateBiddingBasicClassTest() //更新测试环境 	basicClass 字段
 	log.Info("over")
 	c := make(chan bool, 1)
 	<-c
@@ -1047,6 +1053,42 @@ func updateEsMethod() {
 	}
 }
 
+// updateEsMethodTest 更新测试环境ES
+func updateEsMethodTest() {
+	arru := make([][]map[string]interface{}, 200)
+	indexu := 0
+	for {
+		select {
+		case v := <-updateEsPool:
+			arru[indexu] = v
+			indexu++
+			if indexu == 200 {
+				updateEsSp <- true
+				go func(arru [][]map[string]interface{}) {
+					defer func() {
+						<-updateEsSp
+					}()
+					EsT.UpdateBulk("bidding", arru...)
+				}(arru)
+				arru = make([][]map[string]interface{}, 200)
+				indexu = 0
+			}
+		case <-time.After(1000 * time.Millisecond):
+			if indexu > 0 {
+				updateEsSp <- true
+				go func(arru [][]map[string]interface{}) {
+					defer func() {
+						<-updateEsSp
+					}()
+					EsT.UpdateBulk("bidding", arru...)
+				}(arru[:indexu])
+				arru = make([][]map[string]interface{}, 200)
+				indexu = 0
+			}
+		}
+	}
+}
+
 // updateEsMethod 更新es href 字段
 func updateEsHrefMethod() {
 	arru := make([][]map[string]interface{}, 200)

+ 113 - 0
updateBidding/tools.go

@@ -0,0 +1,113 @@
+package main
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+)
+
+type RuleDetail struct {
+	RuleRaw         string
+	RequiredMatched []string
+	ExcludedMatched []string
+	Matched         bool
+}
+
+type MatchResult struct {
+	Matched      bool
+	MatchedRules []string
+	FailedRules  []string
+	Details      []RuleDetail
+}
+
+func parseRule(rule string) (required []string, minCount int, excluded []string) {
+	rule = strings.TrimSpace(rule)
+
+	if strings.Contains(rule, "^") {
+		// 复合规则 (A|B|C)N^(X|Y|Z)
+		re := regexp.MustCompile(`^\(([^)]+)\)(\d+)\^\(([^)]+)\)$`)
+		matches := re.FindStringSubmatch(rule)
+		if len(matches) != 4 {
+			panic("复合规则格式错误:" + rule)
+		}
+		required = strings.Split(matches[1], "|")
+		minCount = atoi(matches[2])
+		excluded = strings.Split(matches[3], "|")
+	} else {
+		// 简单规则 (A|B|C)
+		re := regexp.MustCompile(`^\(([^)]+)\)$`)
+		matches := re.FindStringSubmatch(rule)
+		if len(matches) != 2 {
+			panic("简单规则格式错误:" + rule)
+		}
+		required = strings.Split(matches[1], "|")
+		minCount = 1
+		excluded = nil
+	}
+	return
+}
+
+func atoi(s string) int {
+	var i int
+	fmt.Sscanf(s, "%d", &i)
+	return i
+}
+
+func matchSingleRule(text string, rule string) RuleDetail {
+	required, minCount, excluded := parseRule(rule)
+
+	detail := RuleDetail{
+		RuleRaw: rule,
+	}
+
+	for _, kw := range required {
+		if strings.Contains(text, kw) {
+			detail.RequiredMatched = append(detail.RequiredMatched, kw)
+		}
+	}
+
+	for _, ex := range excluded {
+		if strings.Contains(text, ex) {
+			detail.ExcludedMatched = append(detail.ExcludedMatched, ex)
+		}
+	}
+
+	detail.Matched = len(detail.RequiredMatched) >= minCount && len(detail.ExcludedMatched) == 0
+	return detail
+}
+
+func MatchAllRules(text string, ruleStr string) MatchResult {
+	rules := splitRules(ruleStr)
+	res := MatchResult{}
+	for _, rule := range rules {
+		detail := matchSingleRule(text, rule)
+		res.Details = append(res.Details, detail)
+		if detail.Matched {
+			res.MatchedRules = append(res.MatchedRules, rule)
+		} else {
+			res.FailedRules = append(res.FailedRules, rule)
+		}
+	}
+
+	res.Matched = len(res.MatchedRules) > 0
+	return res
+}
+
+// 支持用逗号分割时括号内可能也有逗号的情况
+func splitRules(s string) []string {
+	var res []string
+	start := 0
+	level := 0
+	for i, c := range s {
+		if c == '(' {
+			level++
+		} else if c == ')' {
+			level--
+		} else if c == ',' && level == 0 {
+			res = append(res, strings.TrimSpace(s[start:i]))
+			start = i + 1
+		}
+	}
+	res = append(res, strings.TrimSpace(s[start:]))
+	return res
+}

二進制
updateBidding/updateBiddingTypeBySpidecode


+ 197 - 0
xlsx/181.go

@@ -0,0 +1,197 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"github.com/xuri/excelize/v2"
+	"go.mongodb.org/mongo-driver/bson"
+	"go.mongodb.org/mongo-driver/mongo"
+	"go.mongodb.org/mongo-driver/mongo/options"
+	"log"
+	"net/url"
+	"strings"
+)
+
+type Enterprise struct {
+	CompanyName string  `bson:"company_name"`
+	CompanyType string  `bson:"company_type"`
+	CreditNo    string  `bson:"credit_no"`
+	Organizer   string  `bson:"organizer"`
+	UseFlag     float64 `bson:"use_flag"`
+	RuleName    string  `bson:"rule_name"` // Excel B列内容
+}
+
+// matchSpecialEnterprise 匹配特殊企业
+func matchSpecialEnterprise() {
+	ctx := context.Background()
+	/**
+	  MgoQY := &mongodb.MongodbSim{
+	  	MongodbAddr: "172.17.4.181:27001",
+	  	//MongodbAddr: "127.0.0.1:27001",
+	  	DbName:   "mixdata",
+	  	Size:     10,
+	  	UserName: "",
+	  	Password: "",
+	  	//Direct:      true,
+	  }
+	  MgoQY.InitPool()
+	*/
+	username := ""
+	password := ""
+	hosts := []string{"172.17.4.181:27001"}
+
+	uri, err := BuildMongoURI(username, password, hosts, nil)
+	if err != nil {
+		panic(err)
+	}
+	// MongoDB连接
+	client, err := mongo.Connect(ctx, options.Client().ApplyURI(uri))
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer client.Disconnect(ctx)
+
+	db := client.Database("mixdata")
+	sourceCol := db.Collection("special_enterprise")
+	targetCol := db.Collection("special_enterprise_temp_03")
+
+	// 读取Excel
+	f, err := excelize.OpenFile("./政府、学校、医院、银行等.xlsx")
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer f.Close()
+
+	rows, err := f.GetRows("Sheet1")
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	var ruleNames []string
+	for idx, row := range rows {
+		if idx == 0 {
+			continue // 跳过标题
+		}
+		if len(row) > 1 {
+			ruleNames = append(ruleNames, strings.TrimSpace(row[1]))
+		}
+	}
+	log.Printf("读取到 %d 条规则", len(ruleNames))
+
+	// 从MongoDB一次性读出 use_flag=0 的数据
+	cursor, err := sourceCol.Find(ctx, bson.M{"use_flag": 0.0})
+	if err != nil {
+		log.Fatal("查询Mongo失败:", err)
+	}
+	defer cursor.Close(ctx)
+
+	var allEnterprises []Enterprise
+	for cursor.Next(ctx) {
+		var doc bson.M
+		if err := cursor.Decode(&doc); err != nil {
+			log.Println("解码失败:", err)
+			continue
+		}
+
+		ent := Enterprise{
+			CompanyName: getString(doc, "company_name"),
+			CompanyType: getString(doc, "company_type"),
+			CreditNo:    getString(doc, "credit_no"),
+			Organizer:   getString(doc, "organizer"),
+			UseFlag:     getFloat64(doc, "use_flag"),
+		}
+		allEnterprises = append(allEnterprises, ent)
+	}
+	log.Printf("从MongoDB读取到 %d 条数据", len(allEnterprises))
+
+	var matched []interface{}
+	for idx, ent := range allEnterprises {
+		if idx%10000 == 0 {
+			log.Println("当前匹配企业", idx, ent.CompanyName, "还剩余", len(allEnterprises)-idx)
+		}
+
+		if ent.Organizer == "" {
+			continue
+		}
+		for _, rule := range ruleNames {
+			if rule == "" {
+				continue
+			}
+			if strings.Contains(ent.Organizer, rule) || strings.Contains(rule, ent.Organizer) {
+				newEnt := ent
+				newEnt.RuleName = rule
+				matched = append(matched, newEnt)
+				break // 匹配到一个就退出,继续下一个企业
+			}
+		}
+	}
+
+	log.Printf("匹配到 %d 条记录,准备写入临时表", len(matched))
+
+	// 分批写入 MongoDB
+	const batchSize = 1000
+	for i := 0; i < len(matched); i += batchSize {
+		end := i + batchSize
+		if end > len(matched) {
+			end = len(matched)
+		}
+		_, err := targetCol.InsertMany(ctx, matched[i:end])
+		if err != nil {
+			log.Println("插入失败:", err)
+		}
+	}
+
+	log.Println("全部完成!")
+}
+
+// 工具函数:安全获取字符串
+func getString(m bson.M, key string) string {
+	if val, ok := m[key]; ok {
+		if s, ok := val.(string); ok {
+			return s
+		}
+	}
+	return ""
+}
+
+// 工具函数:安全获取float64
+func getFloat64(m bson.M, key string) float64 {
+	if val, ok := m[key]; ok {
+		switch v := val.(type) {
+		case float64:
+			return v
+		case int32:
+			return float64(v)
+		case int64:
+			return float64(v)
+		}
+	}
+	return 0
+}
+
+func BuildMongoURI(username, password string, hosts []string, options map[string]string) (string, error) {
+	if len(hosts) == 0 {
+		return "", fmt.Errorf("hosts cannot be empty")
+	}
+
+	hostList := strings.Join(hosts, ",")
+
+	var authPart string
+	if username != "" {
+		escapedUsername := url.QueryEscape(username)
+		escapedPassword := url.QueryEscape(password)
+		authPart = fmt.Sprintf("%s:%s@", escapedUsername, escapedPassword)
+		// 如果密码为空,也会拼成 username:@host ,MongoDB URI 是支持的,可以保留
+	}
+
+	var optionStr string
+	if len(options) > 0 {
+		query := url.Values{}
+		for k, v := range options {
+			query.Set(k, v)
+		}
+		optionStr = "?" + query.Encode()
+	}
+
+	return fmt.Sprintf("mongodb://%s%s%s", authPart, hostList, optionStr), nil
+}

+ 2 - 1
xlsx/main.go

@@ -58,7 +58,8 @@ func main() {
 	//select {}
 	//getCountBidding() //统计采购单位标讯数据量
 
-	processExcel()
+	//processExcel()
+	matchSpecialEnterprise()
 	log.Println("over")
 }
 

二進制
xlsx/matchSpecialEnterprise


二進制
xlsx/政府、学校、医院、银行等.xlsx


部分文件因文件數量過多而無法顯示