1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186 |
- package main
- import (
- "context"
- "encoding/json"
- "fmt"
- "github.com/olivere/elastic/v7"
- "gorm.io/driver/mysql"
- "gorm.io/gorm"
- "io"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
- "log"
- "strings"
- "sync"
- "unicode/utf8"
- )
- func main() {
- /**
- getProjectData click 是一起使用的,统计获取中标企业信息
- */
- //getProjectDataFromEs() //1.拉取项目中标成交数据
- //click() //2.处理项目数据,写入clickhouse
- //click2()
- //dealData()
- //getProject()
- //getQyLimitData()
- getBiddingData()
- //getQyxytData()
- //getTidb()
- //getEntInfo() //法人库数据
- //getBuyerData()
- //mgoBidding()
- //log.Println("开启第二轮")
- //mgoBidding()
- //updateMgoEntInfoBuyer()
- //getZhiMa()
- //log.Println("over ------------------ over")
- //fixProjectPortrait()
- //
- //ClickhouseData() //gorm 操作Clickhouse;gorm 对Clickhouse的bitmap兼容性不行,放弃
- //dealClickhouse() //clickhouse-go 操作
- //testUpdateBitmap() //测试环境测试更新Clickhouse bitmap字段
- ///-------//
- //updateHrefByEs()
- log.Println("over ------------------ over")
- }
- // getBiddingData 获取标讯数据
- func getBiddingData() {
- //url := "http://172.17.4.184:19908"
- url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "bidding" //索引名称
- //index := "projectset" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- //85 抽取库
- //Mgo := &mongodb.MongodbSim{
- // //MongodbAddr: "127.0.0.1:27080",
- // MongodbAddr: "172.17.4.85:27080",
- // DbName: "top",
- // Size: 10,
- // //Direct: true,
- //}
- //Mgo.InitPool()
- MgoB := &mongodb.MongodbSim{
- //MongodbAddr: "172.17.189.140:27080",
- MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- Direct: true,
- }
- MgoB.InitPool()
- //2023年01-01 2023-10-01,,1-3季度
- //2024-1 - 2024-4;1704038400-1711900800
- //2023-10-1 2024-1-1;1696089600-1704038400
- //areaTermsQuery := elastic.NewTermsQuery("area", "江苏", "安徽", "上海", "天津", "河北", "浙江", "天津市", "上海市", "河北省", "安徽省", "江苏省", "浙江省", "北京", "北京市")
- //rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1696089600).Lt(1704038400)
- rangeQuery := elastic.NewRangeQuery("publishtime").Gte("1733760000").Lt("1734451200")
- query := elastic.NewBoolQuery().
- //Must(rangeQuery).
- Must(elastic.NewTermQuery("site", "中国招标与采购网")).Must(rangeQuery)
- //query := elastic.NewBoolQuery().
- // //北京,天津,河北,上海,江苏,浙江,安徽
- // //Must(elastic.NewTermQuery("area", "北京市")).
- // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
- // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")).
- // Must(rangeQuery)
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- Query(query).
- Size(10000).
- Sort("_doc", true) //升序排序
- //Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("总数是:", res.TotalHits())
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- delete(doc, "filetext")
- delete(doc, "detail")
- //存入新表
- err = MgoB.InsertOrUpdate("qfw", "wcc_bidding_test_1218", doc)
- if err != nil {
- log.Println("error", doc["id"])
- }
- // 处理查询结果
- //area := util.ObjToString(doc["area"])
- //areas := []string{"北京", "上海", "广东", "江苏", "浙江"}
- //if !IsInStringArray(area, areas) {
- // continue
- //}
- //projectName := util.ObjToString(doc["projectname"])
- //if strings.Contains(projectName, "非政府") {
- // continue
- //}
- //buyerclass := util.ObjToString(doc["buyerclass"])
- //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" {
- // continue
- //}
- ////存入新表
- //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc)
- //if err != nil {
- // log.Println("error", doc["id"])
- //}
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("结束~~~~~~~~~~~~~~~")
- }
- // getProjectDataFromEs 获取项目 中标成交数据
- func getProjectDataFromEs() {
- url := "http://172.17.4.184:19908"
- //url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "projectset" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- //85 抽取库
- Mgo := &mongodb.MongodbSim{
- //MongodbAddr: "127.0.0.1:27080",
- MongodbAddr: "172.17.4.85:27080",
- DbName: "top",
- Size: 10,
- //Direct: true,
- }
- Mgo.InitPool()
- //MgoB := &mongodb.MongodbSim{
- // MongodbAddr: "172.17.189.140:27080",
- // //MongodbAddr: "127.0.0.1:27083",
- // Size: 10,
- // DbName: "qfw",
- // UserName: "SJZY_RWbid_ES",
- // Password: "SJZY@B4i4D5e6S",
- // //Direct: true,
- //}
- //MgoB.InitPool()
- //2023年01-01 2023-10-01,,1-3季度
- //2024-1 - 2024-4;1704038400-1711900800
- //2023-10-1 2024-1-1;1696089600-1704038400
- //areaTermsQuery := elastic.NewTermsQuery("area", "江苏", "安徽", "上海", "天津", "河北", "浙江", "天津市", "上海市", "河北省", "安徽省", "江苏省", "浙江省", "北京", "北京市")
- rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1711900800).Lt(1719763200) //2024年4-7月
- query := elastic.NewBoolQuery().
- //Must(areaTermsQuery).
- Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
- Must(rangeQuery)
- //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000")
- //query := elastic.NewBoolQuery().
- // //北京,天津,河北,上海,江苏,浙江,安徽
- // //Must(elastic.NewTermQuery("area", "北京市")).
- // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
- // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")).
- // Must(rangeQuery)
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- Query(query).
- Size(10000).
- Sort("_doc", true) //升序排序
- //Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("总数是:", res.TotalHits())
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- delete(doc, "filetext")
- delete(doc, "detail")
- sWinner := util.ObjToString(doc["s_winner"])
- winners := strings.Split(sWinner, ",")
- for _, v := range winners {
- insert := doc
- insert["s_winner"] = v
- //存入新表
- err = Mgo.InsertOrUpdate("top", "wcc_allcity_2024Q2", insert)
- if err != nil {
- log.Println("error", doc["id"])
- }
- }
- // 处理查询结果
- //area := util.ObjToString(doc["area"])
- //areas := []string{"北京", "上海", "广东", "江苏", "浙江"}
- //if !IsInStringArray(area, areas) {
- // continue
- //}
- //projectName := util.ObjToString(doc["projectname"])
- //if strings.Contains(projectName, "非政府") {
- // continue
- //}
- //buyerclass := util.ObjToString(doc["buyerclass"])
- //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" {
- // continue
- //}
- ////存入新表
- //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc)
- //if err != nil {
- // log.Println("error", doc["id"])
- //}
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("结束~~~~~~~~~~~~~~~")
- }
- // getQyxytData 获取企业数据
- func getQyxytData() {
- url := "http://172.17.4.184:19908"
- //url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "qyxy" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- //85 抽取库
- //Mgo := &mongodb.MongodbSim{
- // //MongodbAddr: "127.0.0.1:27080",
- // MongodbAddr: "172.17.4.85:27080",
- // DbName: "top",
- // Size: 10,
- // //Direct: true,
- //}
- //Mgo.InitPool()
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "172.17.189.140:27080",
- //MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- //2023年01-01 2023-10-01,,1-3季度
- //2024-1 - 2024-4;1704038400-1711900800
- //2023-10-1 2024-1-1;1696089600-1704038400
- //城市范围
- //areaTermsQuery := elastic.NewTermsQuery("company_city", "北京市")
- //rangeQuery := elastic.NewRangeQuery("establish_date").Gte(1704038400)
- //query := elastic.NewBoolQuery().
- // Must(areaTermsQuery).
- // Must(rangeQuery)
- //---------------------------//
- query := elastic.NewBoolQuery()
- query.Must(elastic.NewMatchQuery("business_scope", "招投标代理"))
- query.Must(elastic.NewTermQuery("company_city", "北京市"))
- //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000")
- //query := elastic.NewBoolQuery().
- // //北京,天津,河北,上海,江苏,浙江,安徽
- // //Must(elastic.NewTermQuery("area", "北京市")).
- // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
- // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")).
- // Must(rangeQuery)
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- Query(query).
- Size(10000).
- Sort("_doc", true) //升序排序
- //Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("总数是:", res.TotalHits())
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- if strings.Contains(util.ObjToString(doc["business_scope"]), "招投标代理") {
- //存入新表
- insert := map[string]interface{}{
- "company_name": doc["company_name"],
- "business_scope": doc["business_scope"],
- "employee_name": doc["employee_name"],
- "company_phone": doc["company_phone"],
- }
- err = MgoB.InsertOrUpdate("qfw", "wcc_2024_beijing_dailijigou", insert)
- if err != nil {
- log.Println("error", doc["id"])
- }
- }
- //sWinner := util.ObjToString(doc["s_winner"])
- //winners := strings.Split(sWinner, ",")
- //for _, v := range winners {
- // insert := doc
- // insert["s_winner"] = v
- // //存入新表
- // err = MgoB.InsertOrUpdate("qfw", "wcc_2024_pingdingshan", insert)
- // if err != nil {
- // log.Println("error", doc["id"])
- // }
- //}
- // 处理查询结果
- //area := util.ObjToString(doc["area"])
- //areas := []string{"北京", "上海", "广东", "江苏", "浙江"}
- //if !IsInStringArray(area, areas) {
- // continue
- //}
- //projectName := util.ObjToString(doc["projectname"])
- //if strings.Contains(projectName, "非政府") {
- // continue
- //}
- //buyerclass := util.ObjToString(doc["buyerclass"])
- //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" {
- // continue
- //}
- ////存入新表
- //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc)
- //if err != nil {
- // log.Println("error", doc["id"])
- //}
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("结束~~~~~~~~~~~~~~~")
- }
- // getQyLimitData 获取qyxy 条件数据
- func getQyLimitData() {
- //url := "http://172.17.4.184:19908"
- url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "qyxy" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- // 构建查询
- query := elastic.NewBoolQuery().
- Must(elastic.NewMatchQuery("company_area", "河南")).
- Must(elastic.NewMatchQuery("company_status", "存续")).
- MustNot(elastic.NewMatchQuery("company_type", "个体工商户"))
- // 执行查询
- searchResult, err := client.Search().Size(50).
- Index(index).
- Query(query).
- Do(context.Background())
- if err != nil {
- log.Fatalf("Error executing search: %s", err)
- }
- // 本地数据库
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "127.0.0.1:27017",
- Size: 10,
- DbName: "wcc",
- }
- MgoB.InitPool()
- for _, hit := range searchResult.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- MgoB.SaveByOriID("wcc_henan_0428", doc)
- }
- }
- // getTidb 获取tidb 数据
- func getTidb() {
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "172.17.189.140:27080",
- //MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- //tidb
- username := "datascbi"
- password := "Da#Bi20221111SC"
- //host := "127.0.0.1:4001"
- host := "172.17.162.25:4000"
- database := "global_common_data"
- dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8mb4&parseTime=True&loc=Local", username, password, host, database)
- // 连接到数据库
- db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{})
- if err != nil {
- log.Println("Failed to connect to database:", err)
- return
- }
- fmt.Println("Connected to the database!")
- defer util.Catch()
- sess := MgoB.GetMgoConn()
- defer MgoB.DestoryMongoConn(sess)
- it := sess.DB("qfw").C("wcc_2024_beijing_dailijigou").Find(nil).Select(nil).Iter()
- fmt.Println("taskRun 开始")
- count := 0
- for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
- if count%10000 == 0 {
- log.Println("current:", count)
- }
- companyName := util.ObjToString(tmp["company_name"])
- var baseInfo EnterpriseBaseInfo
- db.Where(&EnterpriseBaseInfo{Name: companyName}).First(&baseInfo)
- if baseInfo.ID > 0 {
- insert := map[string]interface{}{
- "company_name": companyName,
- "name_id": baseInfo.NameID,
- "business_scope": tmp["business_scope"],
- }
- MgoB.InsertOrUpdate("qfw", "wcc_beijing_daili_bidding", insert)
- }
- }
- log.Println("over")
- }
- // getEntInfo 获取法人库数据
- func getEntInfo() {
- url := "http://172.17.4.184:19908"
- //url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "ent_info" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- query := elastic.NewBoolQuery().
- //北京,天津,河北,上海,江苏,浙江,安徽
- //Must(elastic.NewMatchQuery("company_name", "医院")).
- //Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
- Must(elastic.NewExistsQuery("tag_labels"))
- //Must(rangeQuery)
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- Query(query).
- Size(10000).
- Sort("_doc", true) //升序排序
- //Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("总数是:", res.TotalHits())
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err = json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- name := util.ObjToString(doc["company_name"])
- updateData := make(map[string]interface{})
- if tag_labels, ok := doc["tag_labels"].([]interface{}); ok {
- updateData["main_label"] = tag_labels[0]
- _, err = client.Update().
- Index(index).
- Id(util.ObjToString(doc["id"])).
- Doc(updateData).
- Do(context.Background())
- if err != nil {
- log.Println("更新失败", name, tag_labels, err)
- }
- }
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("结束~~~~~~~~~~~~~~~")
- }
- // getBuyerData 获取采购单位数据
- func getBuyerData() {
- //key := "4d5206b1b297c1e7b77f9578edcb2cf7.TNU2i8G1oUNdR02i"
- //model := "glm-4-air"
- url := "http://172.17.4.184:19908"
- //url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "buyer" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "172.17.189.140:27080",
- //MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- //query := elastic.NewBoolQuery().
- // //北京,天津,河北,上海,江苏,浙江,安徽
- // Must(elastic.NewMatchQuery("company_name", "医院")).
- // //Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
- // Must(elastic.NewTermsQuery("tag_labels", "学校", "教育"))
- //Must(rangeQuery)
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- //Query(query).
- Size(10000).
- Sort("_doc", true) //升序排序
- //Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("总数是:", res.TotalHits())
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err = json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- //name := util.ObjToString(doc["buyer_name"])
- //ra := ZpAI(key, model, name)
- //if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
- // doc["national_top"] = ra["label1"]
- // doc["main_label"] = ra["label1"]
- //}
- //if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
- // doc["national_sub"] = ra["label2"]
- //}
- //if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
- // doc["national_subsub"] = ra["label3"]
- //}
- MgoB.Save("ent_info_buyer", doc)
- //time.Sleep(time.Microsecond)
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("结束~~~~~~~~~~~~~~~")
- }
- // mgoBidding mgoBidding 数据
- func mgoBidding() {
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "172.17.189.140:27080",
- //MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- sess := MgoB.GetMgoConn()
- defer MgoB.DestoryMongoConn(sess)
- //181 凭安库
- MgoQY := &mongodb.MongodbSim{
- MongodbAddr: "172.17.4.181:27001",
- //MongodbAddr: "127.0.0.1:27001",
- DbName: "mixdata",
- Size: 10,
- UserName: "",
- Password: "",
- //Direct: true,
- }
- MgoQY.InitPool()
- where := map[string]interface{}{
- "qy_flag": 1,
- }
- query := sess.DB("qfw").C("ent_info_buyer").Find(where).Select(map[string]interface{}{
- "contenthtml": 0}).Iter()
- count := 0
- key := "4d5206b1b297c1e7b77f9578edcb2cf7.TNU2i8G1oUNdR02i"
- model := "glm-4-air"
- ch := make(chan bool, 10)
- wg := &sync.WaitGroup{}
- for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- if count%100 == 0 {
- log.Println("current:", count, tmp["name"])
- }
- //存在就不再调用大模型
- //if _, ok := tmp["national_top"]; ok {
- // continue
- //}
- if utf8.RuneCountInString(util.ObjToString(tmp["name"])) < 4 {
- continue
- }
- ch <- true
- wg.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-ch
- wg.Done()
- }()
- //
- biddingID := mongodb.BsonIdToSId(tmp["_id"])
- update := make(map[string]interface{})
- name := util.ObjToString(tmp["name"])
- where2 := map[string]interface{}{
- "company_name": name,
- }
- data, _ := MgoQY.FindOne("company_base", where2)
- businessScope := util.ObjToString((*data)["business_scope"])
- ra := ZpAI1(key, model, name, businessScope)
- if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
- //update["national_top"] = ra["label1"]
- //update["main_label"] = ra["label1"]
- update["label1"] = ra["label1"]
- }
- if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
- //update["national_sub"] = ra["label2"]
- update["label2"] = ra["label2"]
- }
- if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
- //update["national_subsub"] = ra["label3"]
- update["label3"] = ra["label3"]
- }
- if len(update) > 0 {
- MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update})
- }
- }(tmp)
- tmp = map[string]interface{}{}
- }
- wg.Wait()
- log.Println("over 22222222222")
- //log.Println("开始第二轮迭代")
- //for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- // if _, ok := tmp["national_top"]; ok {
- // continue
- // }
- // biddingID := mongodb.BsonIdToSId(tmp["_id"])
- // name := util.ObjToString(tmp["name"])
- // update := make(map[string]interface{})
- // ra := ZpAI(key, model, name)
- // if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
- // update["national_top"] = ra["label1"]
- // update["main_label"] = ra["label1"]
- // }
- // if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
- // update["national_sub"] = ra["label2"]
- // }
- // if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
- // update["national_subsub"] = ra["label3"]
- // }
- // if count%1000 == 0 {
- // log.Println("current", count, name, ra["label1"], ra["label2"])
- // }
- //
- // if len(update) > 0 {
- // MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update})
- // }
- // //time.Sleep(time.Microsecond)
- //}
- //
- //log.Println("开始第3轮迭代")
- //for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- // if _, ok := tmp["national_top"]; ok {
- // continue
- // }
- // biddingID := mongodb.BsonIdToSId(tmp["_id"])
- // name := util.ObjToString(tmp["name"])
- // update := make(map[string]interface{})
- // ra := ZpAI(key, model, name)
- // if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
- // update["national_top"] = ra["label1"]
- // update["main_label"] = ra["label1"]
- // }
- // if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
- // update["national_sub"] = ra["label2"]
- // }
- // if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
- // update["national_subsub"] = ra["label3"]
- // }
- // if count%1000 == 0 {
- // log.Println("current", count, name, ra["label1"], ra["label2"])
- // }
- //
- // if len(update) > 0 {
- // MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update})
- // }
- // //time.Sleep(time.Microsecond)
- //}
- }
- // fixProjectPortrait 修复画像数据重复
- func fixProjectPortrait() {
- url := "http://172.17.4.184:19908"
- //url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "project_portrait" //索引名称
- buyerMap := make(map[string]int)
- buyerDatas := make(map[string][]map[string]interface{})
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- query := elastic.NewBoolQuery()
- //query.Must(elastic.NewMatchQuery("business_scope", "招投标代理"))
- query.Must(elastic.NewTermQuery("class", "情报_安防"))
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- Query(query).
- Size(10000).
- //Sort("_doc", true) //升序排序
- Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("project_portrait 总数是:", res.TotalHits())
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "172.17.189.140:27080",
- //MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- //wher := map[string]interface{}{
- // "_id": mongodb.StringTOBsonId("66faf189bf905908d4a252d6"),
- //}
- //MgoB.Delete("project_portrait", wher)
- //
- //return
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- buyerName := util.ObjToString(doc["buyer"])
- buyerMap[buyerName]++
- buyerArr := buyerDatas[buyerName]
- buyerArr = append(buyerArr, doc)
- buyerDatas[buyerName] = buyerArr
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("迭代结束~~~~~~~~~~~~~~~")
- su := 0
- for k, v := range buyerMap {
- su++
- if su%1000 == 0 {
- log.Println("su", su)
- }
- if v > 1 {
- buyerName := k
- buyerArr := buyerDatas[buyerName]
- doc := buyerArr[0]
- doc["_id"] = mongodb.StringTOBsonId(util.ObjToString(doc["id"]))
- MgoB.SaveByOriID("project_portrait_1030_test", doc)
- for kk, vv := range buyerArr {
- id := util.ObjToString(vv["id"])
- where := map[string]interface{}{
- "_id": mongodb.StringTOBsonId(util.ObjToString(doc["id"])),
- }
- MgoB.Delete("project_portrait", where)
- if kk > 0 {
- client.Delete().Index(index).Id(id).Do(context.Background())
- }
- }
- }
- }
- }
- // updateMgoEntInfoBuyer updateMgoEntInfoBuyer
- func updateMgoEntInfoBuyer() {
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "172.17.189.140:27080",
- //MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- //181 凭安库
- MgoQY := &mongodb.MongodbSim{
- MongodbAddr: "172.17.4.181:27001",
- //MongodbAddr: "127.0.0.1:27001",
- DbName: "mixdata",
- Size: 10,
- UserName: "",
- Password: "",
- //Direct: true,
- }
- MgoQY.InitPool()
- sess := MgoB.GetMgoConn()
- defer MgoB.DestoryMongoConn(sess)
- query := sess.DB("qfw").C("ent_info_buyer").Find(nil).Select(map[string]interface{}{
- "contenthtml": 0}).Iter()
- count := 0
- for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- if count%1000 == 0 {
- log.Println("current:", count, tmp["name"])
- }
- name := util.ObjToString(tmp["name"])
- where := map[string]interface{}{
- "company_name": name,
- }
- id := mongodb.BsonIdToSId(tmp["_id"])
- data, _ := MgoQY.FindOne("company_base", where)
- if data != nil && len(*data) > 0 {
- update := map[string]interface{}{
- "qy_flag": 1,
- "use_flag": (*data)["use_flag"],
- "company_type": (*data)["company_type"],
- "company_status": (*data)["company_status"],
- "credit_no": (*data)["credit_no"],
- "business_scope": (*data)["business_scope"],
- }
- MgoB.UpdateById("ent_info_buyer", id, map[string]interface{}{"$set": update})
- }
- }
- }
|