package main import ( "context" "encoding/json" "fmt" "github.com/olivere/elastic/v7" "io" util "jygit.jydev.jianyu360.cn/data_processing/common_utils" "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb" "log" "regexp" "sort" "strconv" "strings" "testing" ) func TestEs(T *testing.T) { s := "aa" fmt.Println(strings.Contains(s, ",")) //re := regexp.MustCompile(`包\d+`) // 标题只有一个包2 //re := regexp.MustCompile(`\d+批包(\d+(?:、\d+)*)`) // 定义正则表达式 //re3 := regexp.MustCompile(`(?:包)?(\d+(?:、\d+)*)包?`) //标题含有多个包;包10、12、14、17、18、19 //re3 := regexp.MustCompile(`包(\d+(?:、\d+)*)`) //标题含有多个包;包10、12、14、17、18、19 ////re3 := regexp.MustCompile(`\d+包`)//冀中股份2023年12月标准件计划框架协议采购2包12.7 ////re2 := regexp.MustCompile(`标包\d+`)//2024一季度水火电一般工序类中小金工件(标包1)-招标公告 //text := "中国绿发投资集团有限公司直属项目公司2023年第20批集中采购非招标项目(包10、12、14、17、18、19" //matches := re.FindAllString(util.ObjToString(text), -1) //log.Println(matches) //title := "中电科青岛科技产业园一期设计施工总承包1标段" //rea := regexp.MustCompile(`总承包\d标段`) //总承包1标段 //maches := rea.FindAllString(title, -1) //log.Println("rea", maches) //reb := regexp.MustCompile(`包\d[-~]\d`) //包1-6 //title = "2023年度分布式光伏发电项目主要设备(包1~6)设备采购合格供应商" //matches := reb.FindAllString(title, -1) //log.Println("reb", matches) //re := regexp.MustCompile(`\d+批包(\d+(?:、\d+)*)`) re := regexp.MustCompile(`包?\d{1,2}[-~、和](包)?\d{1,2}包?`) //1-6包;01-06包;01、02包;包1、包2 title := "中国绿发投资集团有限公司直属项目公司2023年第20批集中标段1采购非招标项目(包10、12、14、17、18、19" matches := re.FindAllString(title, -1) log.Println("rec", matches) // //re2 := regexp.MustCompile(`包?\d{1,2}`) // 标题只有一个包2 //re3 := regexp.MustCompile(`\d{1,2}包`) // 标题只有一个包2 re3 := regexp.MustCompile(`(标段[1-9一二三四五六七八九]|包[1-9一二三四五六七八九]?[0-9]|[1-9一二三四五六七八九]?[0-9]包|[a-kA-K]包)`) // 标题只有一个包2 title = "济南市历下标段3区东关中心幼儿园超市包21、22、23、24" matches = re3.FindAllString(title, -1) log.Println("aaa", matches) //ree := regexp.MustCompile(`第\d{1,2}[批]|\d{1,2}标段|标段\d{1,2}`) //ree2 := regexp.MustCompile(`(第)?\d{1,2}[标段\d{1,2}]`) //log.Println(ree2.FindAllString(title, -1)) // 原始数据 data := []string{"包1", "包1-2", "包4", "包4-5", "包3", "包5", "包9", "包11", "包12", "11包", "标段1", "包13"} // 调用去重函数 uniquePackages := removeDuplicates(data) // 输出去重后的结果 fmt.Println("去重后的数据:", uniquePackages) } func removeDuplicates(data []string) []string { // 存储已存在的包号 existingPackages := make(map[int]bool) // 存储包含包号信息的字符串 packages := make(map[int]string) // 匹配包号的正则表达式 re := regexp.MustCompile(`(包)(\d+)(?:-(\d+))?`) noexists := make([]string, 0) // 遍历数据 for _, item := range data { // 提取包号信息 matches := re.FindStringSubmatch(item) if len(matches) < 3 { noexists = append(noexists, item) continue } // 解析包号 start, _ := strconv.Atoi(matches[2]) end := start if len(matches[3]) > 0 { end, _ = strconv.Atoi(matches[3]) } // 添加到已存在的包号中 for i := start; i <= end; i++ { existingPackages[i] = true } // 将包含包号信息的字符串存储到 packages 中 packages[start] = matches[0] } // 从 map 中提取去重后的包号并排序 var uniquePackages []int for packageNum := range existingPackages { uniquePackages = append(uniquePackages, packageNum) } sort.Ints(uniquePackages) // 将连续的包号转换为包含范围的字符串 var result []string var start, end int for i, num := range uniquePackages { if i == 0 { start = num end = num } else if num == end+1 { end = num } else { if start == end { result = append(result, packages[start]) } else { result = append(result, fmt.Sprintf("包%d-%d", start, end)) } start = num end = num } } if start == end { result = append(result, packages[start]) } else { result = append(result, fmt.Sprintf("包%d-%d", start, end)) } result = append(result, noexists...) return result } // syncEs 同步es 数据道信集群 func TestSyncEs(T *testing.T) { //url := "http://172.17.4.184:19805" url := "http://127.0.0.1:19805" username := "es_all" password := "TopJkO2E_d1x" index := "bidding" //索引名称 // 创建 Elasticsearch 客户端 client, err := elastic.NewClient( elastic.SetURL(url), elastic.SetBasicAuth(username, password), elastic.SetSniff(false), ) if err != nil { log.Fatalf("创建 Elasticsearch 客户端失败:%s", err) } url2 := "http://127.0.0.1:19905" username2 := "jybid" password2 := "Top2023_JEB01i@31" // 创建 Elasticsearch 客户端 client2, err := elastic.NewClient( elastic.SetURL(url2), elastic.SetBasicAuth(username2, password2), elastic.SetSniff(false), ) if err != nil { log.Fatalf("创建 Elasticsearch 客户端失败:%s", err) } rangeQuery := elastic.NewRangeQuery("id").Gte("65869b436977356f55a01b0b").Lt("6586a4196977356f55a02c79") query := elastic.NewBoolQuery().Must(rangeQuery) ctx := context.Background() //开始滚动搜索 scrollID := "" scroll := "1m" searchSource := elastic.NewSearchSource(). Query(query). Size(10000). Sort("_doc", true) //升序排序 //Sort("_doc", false) //降序排序 searchService := client.Scroll(index). Size(10000). Scroll(scroll). SearchSource(searchSource) res, err := searchService.Do(ctx) if err != nil { if err == io.EOF { fmt.Println("没有数据") } else { panic(err) } } defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源 fmt.Println("总数是:", res.TotalHits()) total := 0 for len(res.Hits.Hits) > 0 { for _, hit := range res.Hits.Hits { var doc map[string]interface{} err := json.Unmarshal(hit.Source, &doc) if err != nil { log.Printf("解析文档失败:%s", err) continue } id := util.ObjToString(doc["id"]) client2.Index().Index(index).Id(id).BodyJson(doc).Do(ctx) } total = total + len(res.Hits.Hits) scrollID = res.ScrollId res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx) log.Println("current count:", total) if err != nil { if err == io.EOF { // 滚动到最后一批数据,退出循环 break } log.Printf("滚动搜索失败:%s", err) break // 处理错误时退出循环 } } fmt.Println("结束~~~~~~~~~~~~~~~") } func TestGetP(T *testing.T) { MgoB := &mongodb.MongodbSim{ //MongodbAddr: "172.17.189.140:27080", MongodbAddr: "127.0.0.1:27083", Size: 10, DbName: "qfw", UserName: "SJZY_RWbid_ES", Password: "SJZY@B4i4D5e6S", Direct: true, } MgoB.InitPool() //url := "http://172.17.4.184:19805" url := "http://127.0.0.1:19805" username := "es_all" password := "TopJkO2E_d1x" index := "projectset" //索引名称 // 创建 Elasticsearch 客户端 client, err := elastic.NewClient( elastic.SetURL(url), elastic.SetBasicAuth(username, password), elastic.SetSniff(false), ) if err != nil { log.Fatalf("创建 Elasticsearch 客户端失败:%s", err) } rangeQuery := elastic.NewRangeQuery("pici").Gt("1672502400").Lte("1704038400") //termQ := elastic.NewTermQuery("multipackage", 0) //rangeQuery := elastic.NewRangeQuery("id").Gt("657b08556977356f5578cb25").Lte("657b08556977356f5578cb26") query := elastic.NewBoolQuery().Must(rangeQuery) ctx := context.Background() //开始滚动搜索 scrollID := "" scroll := "1m" searchSource := elastic.NewSearchSource(). Query(query). Size(1000). Sort("_doc", true) //升序排序 //Sort("_doc", false) //降序排序 searchService := client.Scroll(index). Size(1000). Scroll(scroll). SearchSource(searchSource) res, err := searchService.Do(ctx) if err != nil { if err == io.EOF { fmt.Println("没有数据") } else { panic(err) } } defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源 fmt.Println("总数是:", res.TotalHits()) total := 0 for len(res.Hits.Hits) > 0 { for _, hit := range res.Hits.Hits { var doc map[string]interface{} err := json.Unmarshal(hit.Source, &doc) if err != nil { log.Printf("解析文档失败:%s", err) continue } //id := util.ObjToString(doc["id"]) //log.Println(id) var matchWords = make([]string, 0) var matchList = make([]interface{}, 0) if list, ok := doc["list"].([]interface{}); ok { for _, v := range list { if da, ok := v.(map[string]interface{}); ok { if util.ObjToString(da["toptype"]) != "招标" { continue } title := util.ObjToString(da["title"]) // 使用正则表达式进行匹配 matches := GetMatches(title) if len(matches) > 0 { matchList = append(matchList, da) } matchWords = append(matchWords, matches...) } } } insert := make(map[string]interface{}) insert["project_id"] = doc["id"] insert["_id"] = doc["id"] insert["multipackage"] = doc["multipackage"] insert["list"] = doc["list"] insert["projectname"] = doc["projectname"] insert["sourceinfourl"] = doc["sourceinfourl"] if len(matchWords) > 0 { insert["matchList"] = matchList insert["package_name"] = util.ObjToString(doc["projectname"]) + "-" + strings.Join(matchWords, "、") insert["type"] = 1 } MgoB.SaveByOriID("wcc_project_20240304", insert) } total = total + len(res.Hits.Hits) scrollID = res.ScrollId res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx) log.Println("current count:", total) if err != nil { if err == io.EOF { // 滚动到最后一批数据,退出循环 break } log.Printf("滚动搜索失败:%s", err) break // 处理错误时退出循环 } } fmt.Println("结束~~~~~~~~~~~~~~~") }