package main import ( "context" "encoding/json" "fmt" "github.com/olivere/elastic/v7" "gorm.io/driver/mysql" "gorm.io/gorm" "io" util "jygit.jydev.jianyu360.cn/data_processing/common_utils" "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb" "log" "strings" "sync" "unicode/utf8" ) var ( MgoB *mongodb.MongodbSim ) func InitMgo() { MgoB = &mongodb.MongodbSim{ //MongodbAddr: "172.31.31.202:27081,172.20.45.128:27080", MongodbAddr: "127.0.0.1:27083", Size: 10, DbName: "qfw", UserName: "SJZY_RWbid_ES", Password: "SJZY@B4i4D5e6S", Direct: true, } MgoB.InitPool() } func main() { InitMgo() getBidding2() //InitMgo() //getCountProjectWinner3() //CountProjectWinner() //getBiddingLimitData() //getQyxyNationToFiles() //exportQyxy() //导出企业数据 //dealXlsx() //getQyxyNation() //导出 国标行业分类,注册资金靠前的企业 //getQyxyNation() //InitMgo() //getDataFromFile() //updateXlsxDa() return /** getProjectData click 是一起使用的,统计获取中标企业信息 */ //getProjectDataFromEs() //1.拉取项目中标成交数据 //click() //2.处理项目数据,写入clickhouse //click2() //dealData() //getProject() //getQyLimitData() //getBiddingData() //getQyxytData() //getTidb() //getEntInfo() //法人库数据 //getBuyerData() //mgoBidding() //log.Println("开启第二轮") //mgoBidding() //updateMgoEntInfoBuyer() //getZhiMa() //log.Println("over ------------------ over") //fixProjectPortrait() // //ClickhouseData() //gorm 操作Clickhouse;gorm 对Clickhouse的bitmap兼容性不行,放弃 //dealClickhouse() //clickhouse-go 操作 //testUpdateBitmap() //测试环境测试更新Clickhouse bitmap字段 ///-------// //updateHrefByEs() //getGD() // 获取广东企业数据 // //getBidding2() //--------------// //dealYJG() // 处理姚静歌需求,处理项目数据到Clickhouse log.Println("over ------------------ over") } // dealYJG 处理姚静歌 、韩鸿飞 之前的需要,处理企业数据到Clickhouse func dealYJG() { getProjectDataFromEs() } // getBiddingData 获取标讯数据 func getBiddingData() { url := "http://172.17.4.184:19908" //url := "http://127.0.0.1:19908" username := "jybid" password := "Top2023_JEB01i@31" index := "bidding" //索引名称 //index := "projectset" //索引名称 // 创建 Elasticsearch 客户端 client, err := elastic.NewClient( elastic.SetURL(url), elastic.SetBasicAuth(username, password), elastic.SetSniff(false), ) if err != nil { log.Fatalf("创建 Elasticsearch 客户端失败:%s", err) } //85 抽取库 //Mgo := &mongodb.MongodbSim{ // //MongodbAddr: "127.0.0.1:27080", // MongodbAddr: "172.17.4.85:27080", // DbName: "top", // Size: 10, // //Direct: true, //} //Mgo.InitPool() MgoB := &mongodb.MongodbSim{ //MongodbAddr: "172.31.31.202:27081,172.20.45.128:27080", MongodbAddr: "127.0.0.1:27083", Size: 10, DbName: "qfw", UserName: "SJZY_RWbid_ES", Password: "SJZY@B4i4D5e6S", //Direct: true, } MgoB.InitPool() //2023年01-01 2023-10-01,,1-3季度 //2024-1 - 2024-4;1704038400-1711900800 //2023-10-1 2024-1-1;1696089600-1704038400 //areaTermsQuery := elastic.NewTermsQuery("area", "江苏", "安徽", "上海", "天津", "河北", "浙江", "天津市", "上海市", "河北省", "安徽省", "江苏省", "浙江省", "北京", "北京市") //rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1696089600).Lt(1704038400) //rangeQuery := elastic.NewRangeQuery("publishtime").Gte("1640966400") //query := elastic.NewBoolQuery(). // Must(rangeQuery). // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")) //Must(elastic.NewTermQuery("site", "中国招标与采购网")).Must(rangeQuery) //query := elastic.NewBoolQuery(). // //北京,天津,河北,上海,江苏,浙江,安徽 // //Must(elastic.NewTermQuery("area", "北京市")).sassss // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")). // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")). // Must(rangeQuery) // rangeQuery := elastic.NewRangeQuery("publishtime").Gte("1640966400") termsQuery := elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同") // 综合所有条件 query := elastic.NewBoolQuery(). Must(rangeQuery). Must(termsQuery) ctx := context.Background() //开始滚动搜索 scrollID := "" scroll := "10m" searchSource := elastic.NewSearchSource(). Query(query). Size(10000). Sort("_doc", true) //升序排序 //Sort("_doc", false) //降序排序 searchService := client.Scroll(index). Size(10000). Scroll(scroll). SearchSource(searchSource) res, err := searchService.Do(ctx) if err != nil { if err == io.EOF { fmt.Println("没有数据") } else { panic(err) } } //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源 fmt.Println("总数是:", res.TotalHits()) total := 0 for len(res.Hits.Hits) > 0 { for _, hit := range res.Hits.Hits { var doc map[string]interface{} err := json.Unmarshal(hit.Source, &doc) if err != nil { log.Printf("解析文档失败:%s", err) continue } delete(doc, "filetext") delete(doc, "detail") purchasing := util.ObjToString(doc["purchasing"]) if strings.Contains(purchasing, "新华三") || strings.Contains(purchasing, "华三") || strings.Contains(purchasing, "H3C") || strings.Contains(purchasing, "h3c") { //存入新表 err = MgoB.InsertOrUpdate("qfw", "wcc_bidding_test_250219", doc) if err != nil { log.Println("error", doc["id"]) } } // 处理查询结果 //area := util.ObjToString(doc["area"]) //areas := []string{"北京", "上海", "广东", "江苏", "浙江"} //if !IsInStringArray(area, areas) { // continue //} //projectName := util.ObjToString(doc["projectname"]) //if strings.Contains(projectName, "非政府") { // continue //} //buyerclass := util.ObjToString(doc["buyerclass"]) //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" { // continue //} ////存入新表 //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc) //if err != nil { // log.Println("error", doc["id"]) //} } total = total + len(res.Hits.Hits) scrollID = res.ScrollId res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx) log.Println("current count:", total) if err != nil { if err == io.EOF { // 滚动到最后一批数据,退出循环 break } log.Println("滚动搜索失败:", err, res) break // 处理错误时退出循环 } } // 在循环外调用 ClearScroll _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx) if err != nil { log.Printf("清理滚动搜索失败:%s", err) } fmt.Println("结束~~~~~~~~~~~~~~~") } // getProjectDataFromEs 获取项目 中标成交数据 func getProjectDataFromEs() { //url := "http://172.17.4.184:19908" url := "http://127.0.0.1:19908" username := "jybid" password := "Top2023_JEB01i@31" index := "projectset" //索引名称 // 创建 Elasticsearch 客户端 client, err := elastic.NewClient( elastic.SetURL(url), elastic.SetBasicAuth(username, password), elastic.SetSniff(false), ) if err != nil { log.Fatalf("创建 Elasticsearch 客户端失败:%s", err) } //85 抽取库 Mgo := &mongodb.MongodbSim{ MongodbAddr: "127.0.0.1:27080", //MongodbAddr: "172.17.4.85:27080", DbName: "top", Size: 10, Direct: true, } Mgo.InitPool() //MgoB := &mongodb.MongodbSim{ // MongodbAddr: "172.17.189.140:27080", // //MongodbAddr: "127.0.0.1:27083", // Size: 10, // DbName: "qfw", // UserName: "SJZY_RWbid_ES", // Password: "SJZY@B4i4D5e6S", // //Direct: true, //} //MgoB.InitPool() //2023年01-01 2023-10-01,,1-3季度 //2024-1 - 2024-4;1704038400-1711900800 //2023-10-1 2024-1-1;1696089600-1704038400 //areaTermsQuery := elastic.NewTermsQuery("area", "江苏", "安徽", "上海", "天津", "河北", "浙江", "天津市", "上海市", "河北省", "安徽省", "江苏省", "浙江省", "北京", "北京市") //rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1711900800).Lt(1719763200) //2024年4-7月 //rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1735660800).Lt(1743436800) //2025年1-3月;25年第一季度数据 rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1743436800).Lt(1751299200) //2025年4.1-7.1 ;25年第二季度数据 query := elastic.NewBoolQuery(). //Must(areaTermsQuery). Must(elastic.NewTermsQuery("bidstatus", "中标", "单一", "成交", "合同")). Must(rangeQuery) //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000") //query := elastic.NewBoolQuery(). // //北京,天津,河北,上海,江苏,浙江,安徽 // //Must(elastic.NewTermQuery("area", "北京市")). // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")). // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")). // Must(rangeQuery) ctx := context.Background() //开始滚动搜索 scrollID := "" scroll := "10m" searchSource := elastic.NewSearchSource(). Query(query). Size(10000). Sort("_doc", true) //升序排序 //Sort("_doc", false) //降序排序 searchService := client.Scroll(index). Size(10000). Scroll(scroll). SearchSource(searchSource) res, err := searchService.Do(ctx) if err != nil { if err == io.EOF { fmt.Println("没有数据") } else { panic(err) } } //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源 fmt.Println("总数是:", res.TotalHits()) total := 0 for len(res.Hits.Hits) > 0 { for _, hit := range res.Hits.Hits { var doc map[string]interface{} err := json.Unmarshal(hit.Source, &doc) if err != nil { log.Printf("解析文档失败:%s", err) continue } delete(doc, "filetext") delete(doc, "detail") sWinner := util.ObjToString(doc["s_winner"]) winners := strings.Split(sWinner, ",") for _, v := range winners { insert := doc insert["s_winner"] = v //存入新表 err = Mgo.InsertOrUpdate("top", "wcc_allcity_2025Q2", insert) if err != nil { log.Println("error", doc["id"]) } } // 处理查询结果 //area := util.ObjToString(doc["area"]) //areas := []string{"北京", "上海", "广东", "江苏", "浙江"} //if !IsInStringArray(area, areas) { // continue //} //projectName := util.ObjToString(doc["projectname"]) //if strings.Contains(projectName, "非政府") { // continue //} //buyerclass := util.ObjToString(doc["buyerclass"]) //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" { // continue //} ////存入新表 //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc) //if err != nil { // log.Println("error", doc["id"]) //} } total = total + len(res.Hits.Hits) scrollID = res.ScrollId res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx) log.Println("current count:", total) if err != nil { if err == io.EOF { // 滚动到最后一批数据,退出循环 break } log.Println("滚动搜索失败:", err, res) break // 处理错误时退出循环 } } // 在循环外调用 ClearScroll _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx) if err != nil { log.Printf("清理滚动搜索失败:%s", err) } fmt.Println("结束~~~~~~~~~~~~~~~") } // getQyxytData 获取企业数据 func getQyxytData() { //url := "http://172.17.4.184:19908" url := "http://127.0.0.1:19908" username := "jybid" password := "Top2023_JEB01i@31" index := "qyxy" //索引名称 // 创建 Elasticsearch 客户端 client, err := elastic.NewClient( elastic.SetURL(url), elastic.SetBasicAuth(username, password), elastic.SetSniff(false), ) if err != nil { log.Fatalf("创建 Elasticsearch 客户端失败:%s", err) } //85 抽取库 //Mgo := &mongodb.MongodbSim{ // //MongodbAddr: "127.0.0.1:27080", // MongodbAddr: "172.17.4.85:27080", // DbName: "top", // Size: 10, // //Direct: true, //} //Mgo.InitPool() //MgoB := &mongodb.MongodbSim{ // MongodbAddr: "172.17.189.140:27080", // //MongodbAddr: "127.0.0.1:27083", // Size: 10, // DbName: "qfw", // UserName: "SJZY_RWbid_ES", // Password: "SJZY@B4i4D5e6S", // //Direct: true, //} //MgoB.InitPool() //2023年01-01 2023-10-01,,1-3季度 //2024-1 - 2024-4;1704038400-1711900800 //2023-10-1 2024-1-1;1696089600-1704038400 //城市范围 //areaTermsQuery := elastic.NewTermsQuery("company_city", "北京市") //rangeQuery := elastic.NewRangeQuery("establish_date").Gte(1704038400) //query := elastic.NewBoolQuery(). // Must(areaTermsQuery). // Must(rangeQuery) //---------------------------// query := elastic.NewBoolQuery() query.Must(elastic.NewMatchQuery("business_scope", "招投标代理")) query.Must(elastic.NewTermQuery("company_city", "北京市")) //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000") //query := elastic.NewBoolQuery(). // //北京,天津,河北,上海,江苏,浙江,安徽 // //Must(elastic.NewTermQuery("area", "北京市")). // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")). // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")). // Must(rangeQuery) ctx := context.Background() //开始滚动搜索 scrollID := "" scroll := "10m" searchSource := elastic.NewSearchSource(). Query(query). Size(10000). Sort("_doc", true) //升序排序 //Sort("_doc", false) //降序排序 searchService := client.Scroll(index). Size(10000). Scroll(scroll). SearchSource(searchSource) res, err := searchService.Do(ctx) if err != nil { if err == io.EOF { fmt.Println("没有数据") } else { panic(err) } } //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源 fmt.Println("总数是:", res.TotalHits()) total := 0 for len(res.Hits.Hits) > 0 { for _, hit := range res.Hits.Hits { var doc map[string]interface{} err := json.Unmarshal(hit.Source, &doc) if err != nil { log.Printf("解析文档失败:%s", err) continue } if strings.Contains(util.ObjToString(doc["business_scope"]), "招投标代理") { //存入新表 insert := map[string]interface{}{ "company_name": doc["company_name"], "business_scope": doc["business_scope"], "employee_name": doc["employee_name"], "company_phone": doc["company_phone"], } err = MgoB.InsertOrUpdate("qfw", "wcc_2024_beijing_dailijigou", insert) if err != nil { log.Println("error", doc["id"]) } } //sWinner := util.ObjToString(doc["s_winner"]) //winners := strings.Split(sWinner, ",") //for _, v := range winners { // insert := doc // insert["s_winner"] = v // //存入新表 // err = MgoB.InsertOrUpdate("qfw", "wcc_2024_pingdingshan", insert) // if err != nil { // log.Println("error", doc["id"]) // } //} // 处理查询结果 //area := util.ObjToString(doc["area"]) //areas := []string{"北京", "上海", "广东", "江苏", "浙江"} //if !IsInStringArray(area, areas) { // continue //} //projectName := util.ObjToString(doc["projectname"]) //if strings.Contains(projectName, "非政府") { // continue //} //buyerclass := util.ObjToString(doc["buyerclass"]) //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" { // continue //} ////存入新表 //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc) //if err != nil { // log.Println("error", doc["id"]) //} } total = total + len(res.Hits.Hits) scrollID = res.ScrollId res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx) log.Println("current count:", total) if err != nil { if err == io.EOF { // 滚动到最后一批数据,退出循环 break } log.Println("滚动搜索失败:", err, res) break // 处理错误时退出循环 } } // 在循环外调用 ClearScroll _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx) if err != nil { log.Printf("清理滚动搜索失败:%s", err) } fmt.Println("结束~~~~~~~~~~~~~~~") } // getQyLimitData 获取qyxy 条件数据 func getQyLimitData() { //url := "http://172.17.4.184:19908" url := "http://127.0.0.1:19908" username := "jybid" password := "Top2023_JEB01i@31" index := "qyxy" //索引名称 // 创建 Elasticsearch 客户端 client, err := elastic.NewClient( elastic.SetURL(url), elastic.SetBasicAuth(username, password), elastic.SetSniff(false), ) if err != nil { log.Fatalf("创建 Elasticsearch 客户端失败:%s", err) } // 构建查询 query := elastic.NewBoolQuery(). Must(elastic.NewMatchQuery("company_area", "河南")). Must(elastic.NewMatchQuery("company_status", "存续")). MustNot(elastic.NewMatchQuery("company_type", "个体工商户")) // 执行查询 searchResult, err := client.Search().Size(50). Index(index). Query(query). Do(context.Background()) if err != nil { log.Fatalf("Error executing search: %s", err) } // 本地数据库 MgoB := &mongodb.MongodbSim{ MongodbAddr: "127.0.0.1:27017", Size: 10, DbName: "wcc", } MgoB.InitPool() for _, hit := range searchResult.Hits.Hits { var doc map[string]interface{} err := json.Unmarshal(hit.Source, &doc) if err != nil { log.Printf("解析文档失败:%s", err) continue } MgoB.SaveByOriID("wcc_henan_0428", doc) } } // getTidb 获取tidb 数据 func getTidb() { MgoB := &mongodb.MongodbSim{ MongodbAddr: "172.17.189.140:27080", //MongodbAddr: "127.0.0.1:27083", Size: 10, DbName: "qfw", UserName: "SJZY_RWbid_ES", Password: "SJZY@B4i4D5e6S", //Direct: true, } MgoB.InitPool() //tidb username := "datascbi" password := "Da#Bi20221111SC" //host := "127.0.0.1:4001" host := "172.17.162.25:4000" database := "global_common_data" dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8mb4&parseTime=True&loc=Local", username, password, host, database) // 连接到数据库 db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{}) if err != nil { log.Println("Failed to connect to database:", err) return } fmt.Println("Connected to the database!") defer util.Catch() sess := MgoB.GetMgoConn() defer MgoB.DestoryMongoConn(sess) it := sess.DB("qfw").C("wcc_2024_beijing_dailijigou").Find(nil).Select(nil).Iter() fmt.Println("taskRun 开始") count := 0 for tmp := make(map[string]interface{}); it.Next(&tmp); count++ { if count%10000 == 0 { log.Println("current:", count) } companyName := util.ObjToString(tmp["company_name"]) var baseInfo EnterpriseBaseInfo db.Where(&EnterpriseBaseInfo{Name: companyName}).First(&baseInfo) if baseInfo.ID > 0 { insert := map[string]interface{}{ "company_name": companyName, "name_id": baseInfo.NameID, "business_scope": tmp["business_scope"], } MgoB.InsertOrUpdate("qfw", "wcc_beijing_daili_bidding", insert) } } log.Println("over") } // getEntInfo 获取法人库数据 func getEntInfo() { url := "http://172.17.4.184:19908" //url := "http://127.0.0.1:19908" username := "jybid" password := "Top2023_JEB01i@31" index := "ent_info" //索引名称 // 创建 Elasticsearch 客户端 client, err := elastic.NewClient( elastic.SetURL(url), elastic.SetBasicAuth(username, password), elastic.SetSniff(false), ) if err != nil { log.Fatalf("创建 Elasticsearch 客户端失败:%s", err) } query := elastic.NewBoolQuery(). //北京,天津,河北,上海,江苏,浙江,安徽 //Must(elastic.NewMatchQuery("company_name", "医院")). //Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")). Must(elastic.NewExistsQuery("tag_labels")) //Must(rangeQuery) ctx := context.Background() //开始滚动搜索 scrollID := "" scroll := "10m" searchSource := elastic.NewSearchSource(). Query(query). Size(10000). Sort("_doc", true) //升序排序 //Sort("_doc", false) //降序排序 searchService := client.Scroll(index). Size(10000). Scroll(scroll). SearchSource(searchSource) res, err := searchService.Do(ctx) if err != nil { if err == io.EOF { fmt.Println("没有数据") } else { panic(err) } } //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源 fmt.Println("总数是:", res.TotalHits()) total := 0 for len(res.Hits.Hits) > 0 { for _, hit := range res.Hits.Hits { var doc map[string]interface{} err = json.Unmarshal(hit.Source, &doc) if err != nil { log.Printf("解析文档失败:%s", err) continue } name := util.ObjToString(doc["company_name"]) updateData := make(map[string]interface{}) if tag_labels, ok := doc["tag_labels"].([]interface{}); ok { updateData["main_label"] = tag_labels[0] _, err = client.Update(). Index(index). Id(util.ObjToString(doc["id"])). Doc(updateData). Do(context.Background()) if err != nil { log.Println("更新失败", name, tag_labels, err) } } } total = total + len(res.Hits.Hits) scrollID = res.ScrollId res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx) log.Println("current count:", total) if err != nil { if err == io.EOF { // 滚动到最后一批数据,退出循环 break } log.Println("滚动搜索失败:", err, res) break // 处理错误时退出循环 } } // 在循环外调用 ClearScroll _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx) if err != nil { log.Printf("清理滚动搜索失败:%s", err) } fmt.Println("结束~~~~~~~~~~~~~~~") } // getBuyerData 获取采购单位数据 func getBuyerData() { //key := "4d5206b1b297c1e7b77f9578edcb2cf7.TNU2i8G1oUNdR02i" //model := "glm-4-air" url := "http://172.17.4.184:19908" //url := "http://127.0.0.1:19908" username := "jybid" password := "Top2023_JEB01i@31" index := "buyer" //索引名称 // 创建 Elasticsearch 客户端 client, err := elastic.NewClient( elastic.SetURL(url), elastic.SetBasicAuth(username, password), elastic.SetSniff(false), ) if err != nil { log.Fatalf("创建 Elasticsearch 客户端失败:%s", err) } MgoB := &mongodb.MongodbSim{ MongodbAddr: "172.17.189.140:27080", //MongodbAddr: "127.0.0.1:27083", Size: 10, DbName: "qfw", UserName: "SJZY_RWbid_ES", Password: "SJZY@B4i4D5e6S", //Direct: true, } MgoB.InitPool() //query := elastic.NewBoolQuery(). // //北京,天津,河北,上海,江苏,浙江,安徽 // Must(elastic.NewMatchQuery("company_name", "医院")). // //Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")). // Must(elastic.NewTermsQuery("tag_labels", "学校", "教育")) //Must(rangeQuery) ctx := context.Background() //开始滚动搜索 scrollID := "" scroll := "10m" searchSource := elastic.NewSearchSource(). //Query(query). Size(10000). Sort("_doc", true) //升序排序 //Sort("_doc", false) //降序排序 searchService := client.Scroll(index). Size(10000). Scroll(scroll). SearchSource(searchSource) res, err := searchService.Do(ctx) if err != nil { if err == io.EOF { fmt.Println("没有数据") } else { panic(err) } } //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源 fmt.Println("总数是:", res.TotalHits()) total := 0 for len(res.Hits.Hits) > 0 { for _, hit := range res.Hits.Hits { var doc map[string]interface{} err = json.Unmarshal(hit.Source, &doc) if err != nil { log.Printf("解析文档失败:%s", err) continue } //name := util.ObjToString(doc["buyer_name"]) //ra := ZpAI(key, model, name) //if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) { // doc["national_top"] = ra["label1"] // doc["main_label"] = ra["label1"] //} //if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) { // doc["national_sub"] = ra["label2"] //} //if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) { // doc["national_subsub"] = ra["label3"] //} MgoB.Save("ent_info_buyer", doc) //time.Sleep(time.Microsecond) } total = total + len(res.Hits.Hits) scrollID = res.ScrollId res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx) log.Println("current count:", total) if err != nil { if err == io.EOF { // 滚动到最后一批数据,退出循环 break } log.Println("滚动搜索失败:", err, res) break // 处理错误时退出循环 } } // 在循环外调用 ClearScroll _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx) if err != nil { log.Printf("清理滚动搜索失败:%s", err) } fmt.Println("结束~~~~~~~~~~~~~~~") } // mgoBidding mgoBidding 数据 func mgoBidding() { MgoB := &mongodb.MongodbSim{ MongodbAddr: "172.17.189.140:27080", //MongodbAddr: "127.0.0.1:27083", Size: 10, DbName: "qfw", UserName: "SJZY_RWbid_ES", Password: "SJZY@B4i4D5e6S", //Direct: true, } MgoB.InitPool() sess := MgoB.GetMgoConn() defer MgoB.DestoryMongoConn(sess) //181 凭安库 MgoQY := &mongodb.MongodbSim{ MongodbAddr: "172.17.4.181:27001", //MongodbAddr: "127.0.0.1:27001", DbName: "mixdata", Size: 10, UserName: "", Password: "", //Direct: true, } MgoQY.InitPool() where := map[string]interface{}{ "qy_flag": 1, } query := sess.DB("qfw").C("ent_info_buyer").Find(where).Select(map[string]interface{}{ "contenthtml": 0}).Iter() count := 0 key := "4d5206b1b297c1e7b77f9578edcb2cf7.TNU2i8G1oUNdR02i" model := "glm-4-air" ch := make(chan bool, 10) wg := &sync.WaitGroup{} for tmp := make(map[string]interface{}); query.Next(tmp); count++ { if count%100 == 0 { log.Println("current:", count, tmp["name"]) } //存在就不再调用大模型 //if _, ok := tmp["national_top"]; ok { // continue //} if utf8.RuneCountInString(util.ObjToString(tmp["name"])) < 4 { continue } ch <- true wg.Add(1) go func(tmp map[string]interface{}) { defer func() { <-ch wg.Done() }() // biddingID := mongodb.BsonIdToSId(tmp["_id"]) update := make(map[string]interface{}) name := util.ObjToString(tmp["name"]) where2 := map[string]interface{}{ "company_name": name, } data, _ := MgoQY.FindOne("company_base", where2) businessScope := util.ObjToString((*data)["business_scope"]) ra := ZpAI1(key, model, name, businessScope) if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) { //update["national_top"] = ra["label1"] //update["main_label"] = ra["label1"] update["label1"] = ra["label1"] } if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) { //update["national_sub"] = ra["label2"] update["label2"] = ra["label2"] } if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) { //update["national_subsub"] = ra["label3"] update["label3"] = ra["label3"] } if len(update) > 0 { MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update}) } }(tmp) tmp = map[string]interface{}{} } wg.Wait() log.Println("over 22222222222") //log.Println("开始第二轮迭代") //for tmp := make(map[string]interface{}); query.Next(tmp); count++ { // if _, ok := tmp["national_top"]; ok { // continue // } // biddingID := mongodb.BsonIdToSId(tmp["_id"]) // name := util.ObjToString(tmp["name"]) // update := make(map[string]interface{}) // ra := ZpAI(key, model, name) // if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) { // update["national_top"] = ra["label1"] // update["main_label"] = ra["label1"] // } // if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) { // update["national_sub"] = ra["label2"] // } // if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) { // update["national_subsub"] = ra["label3"] // } // if count%1000 == 0 { // log.Println("current", count, name, ra["label1"], ra["label2"]) // } // // if len(update) > 0 { // MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update}) // } // //time.Sleep(time.Microsecond) //} // //log.Println("开始第3轮迭代") //for tmp := make(map[string]interface{}); query.Next(tmp); count++ { // if _, ok := tmp["national_top"]; ok { // continue // } // biddingID := mongodb.BsonIdToSId(tmp["_id"]) // name := util.ObjToString(tmp["name"]) // update := make(map[string]interface{}) // ra := ZpAI(key, model, name) // if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) { // update["national_top"] = ra["label1"] // update["main_label"] = ra["label1"] // } // if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) { // update["national_sub"] = ra["label2"] // } // if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) { // update["national_subsub"] = ra["label3"] // } // if count%1000 == 0 { // log.Println("current", count, name, ra["label1"], ra["label2"]) // } // // if len(update) > 0 { // MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update}) // } // //time.Sleep(time.Microsecond) //} } // fixProjectPortrait 修复画像数据重复 func fixProjectPortrait() { url := "http://172.17.4.184:19908" //url := "http://127.0.0.1:19908" username := "jybid" password := "Top2023_JEB01i@31" index := "project_portrait" //索引名称 buyerMap := make(map[string]int) buyerDatas := make(map[string][]map[string]interface{}) // 创建 Elasticsearch 客户端 client, err := elastic.NewClient( elastic.SetURL(url), elastic.SetBasicAuth(username, password), elastic.SetSniff(false), ) if err != nil { log.Fatalf("创建 Elasticsearch 客户端失败:%s", err) } query := elastic.NewBoolQuery() //query.Must(elastic.NewMatchQuery("business_scope", "招投标代理")) query.Must(elastic.NewTermQuery("class", "情报_安防")) ctx := context.Background() //开始滚动搜索 scrollID := "" scroll := "10m" searchSource := elastic.NewSearchSource(). Query(query). Size(10000). //Sort("_doc", true) //升序排序 Sort("_doc", false) //降序排序 searchService := client.Scroll(index). Size(10000). Scroll(scroll). SearchSource(searchSource) res, err := searchService.Do(ctx) if err != nil { if err == io.EOF { fmt.Println("没有数据") } else { panic(err) } } //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源 fmt.Println("project_portrait 总数是:", res.TotalHits()) MgoB := &mongodb.MongodbSim{ MongodbAddr: "172.17.189.140:27080", //MongodbAddr: "127.0.0.1:27083", Size: 10, DbName: "qfw", UserName: "SJZY_RWbid_ES", Password: "SJZY@B4i4D5e6S", //Direct: true, } MgoB.InitPool() //wher := map[string]interface{}{ // "_id": mongodb.StringTOBsonId("66faf189bf905908d4a252d6"), //} //MgoB.Delete("project_portrait", wher) // //return total := 0 for len(res.Hits.Hits) > 0 { for _, hit := range res.Hits.Hits { var doc map[string]interface{} err := json.Unmarshal(hit.Source, &doc) if err != nil { log.Printf("解析文档失败:%s", err) continue } buyerName := util.ObjToString(doc["buyer"]) buyerMap[buyerName]++ buyerArr := buyerDatas[buyerName] buyerArr = append(buyerArr, doc) buyerDatas[buyerName] = buyerArr } total = total + len(res.Hits.Hits) scrollID = res.ScrollId res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx) log.Println("current count:", total) if err != nil { if err == io.EOF { // 滚动到最后一批数据,退出循环 break } log.Println("滚动搜索失败:", err, res) break // 处理错误时退出循环 } } // 在循环外调用 ClearScroll _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx) if err != nil { log.Printf("清理滚动搜索失败:%s", err) } fmt.Println("迭代结束~~~~~~~~~~~~~~~") su := 0 for k, v := range buyerMap { su++ if su%1000 == 0 { log.Println("su", su) } if v > 1 { buyerName := k buyerArr := buyerDatas[buyerName] doc := buyerArr[0] doc["_id"] = mongodb.StringTOBsonId(util.ObjToString(doc["id"])) MgoB.SaveByOriID("project_portrait_1030_test", doc) for kk, vv := range buyerArr { id := util.ObjToString(vv["id"]) where := map[string]interface{}{ "_id": mongodb.StringTOBsonId(util.ObjToString(doc["id"])), } MgoB.Delete("project_portrait", where) if kk > 0 { client.Delete().Index(index).Id(id).Do(context.Background()) } } } } } // updateMgoEntInfoBuyer updateMgoEntInfoBuyer func updateMgoEntInfoBuyer() { MgoB := &mongodb.MongodbSim{ MongodbAddr: "172.17.189.140:27080", //MongodbAddr: "127.0.0.1:27083", Size: 10, DbName: "qfw", UserName: "SJZY_RWbid_ES", Password: "SJZY@B4i4D5e6S", //Direct: true, } MgoB.InitPool() //181 凭安库 MgoQY := &mongodb.MongodbSim{ MongodbAddr: "172.17.4.181:27001", //MongodbAddr: "127.0.0.1:27001", DbName: "mixdata", Size: 10, UserName: "", Password: "", //Direct: true, } MgoQY.InitPool() sess := MgoB.GetMgoConn() defer MgoB.DestoryMongoConn(sess) query := sess.DB("qfw").C("ent_info_buyer").Find(nil).Select(map[string]interface{}{ "contenthtml": 0}).Iter() count := 0 for tmp := make(map[string]interface{}); query.Next(tmp); count++ { if count%1000 == 0 { log.Println("current:", count, tmp["name"]) } name := util.ObjToString(tmp["name"]) where := map[string]interface{}{ "company_name": name, } id := mongodb.BsonIdToSId(tmp["_id"]) data, _ := MgoQY.FindOne("company_base", where) if data != nil && len(*data) > 0 { update := map[string]interface{}{ "qy_flag": 1, "use_flag": (*data)["use_flag"], "company_type": (*data)["company_type"], "company_status": (*data)["company_status"], "credit_no": (*data)["credit_no"], "business_scope": (*data)["business_scope"], } MgoB.UpdateById("ent_info_buyer", id, map[string]interface{}{"$set": update}) } } }