12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247 |
- package main
- import (
- "context"
- "encoding/json"
- "fmt"
- "github.com/olivere/elastic/v7"
- "gorm.io/driver/mysql"
- "gorm.io/gorm"
- "io"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
- "log"
- "strings"
- "sync"
- "unicode/utf8"
- )
- var (
- MgoB *mongodb.MongodbSim
- )
- func InitMgo() {
- MgoB = &mongodb.MongodbSim{
- //MongodbAddr: "172.31.31.202:27081,172.20.45.128:27080",
- MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- Direct: true,
- }
- MgoB.InitPool()
- }
- func main() {
- InitMgo()
- getBidding2()
- //InitMgo()
- //getCountProjectWinner3()
- //CountProjectWinner()
- //getBiddingLimitData()
- //getQyxyNationToFiles()
- //exportQyxy() //导出企业数据
- //dealXlsx()
- //getQyxyNation() //导出 国标行业分类,注册资金靠前的企业
- //getQyxyNation()
- //InitMgo()
- //getDataFromFile()
- //updateXlsxDa()
- return
- /**
- getProjectData click 是一起使用的,统计获取中标企业信息
- */
- //getProjectDataFromEs() //1.拉取项目中标成交数据
- //click() //2.处理项目数据,写入clickhouse
- //click2()
- //dealData()
- //getProject()
- //getQyLimitData()
- //getBiddingData()
- //getQyxytData()
- //getTidb()
- //getEntInfo() //法人库数据
- //getBuyerData()
- //mgoBidding()
- //log.Println("开启第二轮")
- //mgoBidding()
- //updateMgoEntInfoBuyer()
- //getZhiMa()
- //log.Println("over ------------------ over")
- //fixProjectPortrait()
- //
- //ClickhouseData() //gorm 操作Clickhouse;gorm 对Clickhouse的bitmap兼容性不行,放弃
- //dealClickhouse() //clickhouse-go 操作
- //testUpdateBitmap() //测试环境测试更新Clickhouse bitmap字段
- ///-------//
- //updateHrefByEs()
- //getGD() // 获取广东企业数据
- //
- //getBidding2()
- //--------------//
- //dealYJG() // 处理姚静歌需求,处理项目数据到Clickhouse
- log.Println("over ------------------ over")
- }
- // dealYJG 处理姚静歌 、韩鸿飞 之前的需要,处理企业数据到Clickhouse
- func dealYJG() {
- getProjectDataFromEs()
- }
- // getBiddingData 获取标讯数据
- func getBiddingData() {
- url := "http://172.17.4.184:19908"
- //url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "bidding" //索引名称
- //index := "projectset" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- //85 抽取库
- //Mgo := &mongodb.MongodbSim{
- // //MongodbAddr: "127.0.0.1:27080",
- // MongodbAddr: "172.17.4.85:27080",
- // DbName: "top",
- // Size: 10,
- // //Direct: true,
- //}
- //Mgo.InitPool()
- MgoB := &mongodb.MongodbSim{
- //MongodbAddr: "172.31.31.202:27081,172.20.45.128:27080",
- MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- //2023年01-01 2023-10-01,,1-3季度
- //2024-1 - 2024-4;1704038400-1711900800
- //2023-10-1 2024-1-1;1696089600-1704038400
- //areaTermsQuery := elastic.NewTermsQuery("area", "江苏", "安徽", "上海", "天津", "河北", "浙江", "天津市", "上海市", "河北省", "安徽省", "江苏省", "浙江省", "北京", "北京市")
- //rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1696089600).Lt(1704038400)
- //rangeQuery := elastic.NewRangeQuery("publishtime").Gte("1640966400")
- //query := elastic.NewBoolQuery().
- // Must(rangeQuery).
- // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同"))
- //Must(elastic.NewTermQuery("site", "中国招标与采购网")).Must(rangeQuery)
- //query := elastic.NewBoolQuery().
- // //北京,天津,河北,上海,江苏,浙江,安徽
- // //Must(elastic.NewTermQuery("area", "北京市")).sassss
- // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
- // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")).
- // Must(rangeQuery)
- //
- rangeQuery := elastic.NewRangeQuery("publishtime").Gte("1640966400")
- termsQuery := elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")
- // 综合所有条件
- query := elastic.NewBoolQuery().
- Must(rangeQuery).
- Must(termsQuery)
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- Query(query).
- Size(10000).
- Sort("_doc", true) //升序排序
- //Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("总数是:", res.TotalHits())
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- delete(doc, "filetext")
- delete(doc, "detail")
- purchasing := util.ObjToString(doc["purchasing"])
- if strings.Contains(purchasing, "新华三") || strings.Contains(purchasing, "华三") || strings.Contains(purchasing, "H3C") || strings.Contains(purchasing, "h3c") {
- //存入新表
- err = MgoB.InsertOrUpdate("qfw", "wcc_bidding_test_250219", doc)
- if err != nil {
- log.Println("error", doc["id"])
- }
- }
- // 处理查询结果
- //area := util.ObjToString(doc["area"])
- //areas := []string{"北京", "上海", "广东", "江苏", "浙江"}
- //if !IsInStringArray(area, areas) {
- // continue
- //}
- //projectName := util.ObjToString(doc["projectname"])
- //if strings.Contains(projectName, "非政府") {
- // continue
- //}
- //buyerclass := util.ObjToString(doc["buyerclass"])
- //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" {
- // continue
- //}
- ////存入新表
- //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc)
- //if err != nil {
- // log.Println("error", doc["id"])
- //}
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("结束~~~~~~~~~~~~~~~")
- }
- // getProjectDataFromEs 获取项目 中标成交数据
- func getProjectDataFromEs() {
- //url := "http://172.17.4.184:19908"
- url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "projectset" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- //85 抽取库
- Mgo := &mongodb.MongodbSim{
- MongodbAddr: "127.0.0.1:27080",
- //MongodbAddr: "172.17.4.85:27080",
- DbName: "top",
- Size: 10,
- Direct: true,
- }
- Mgo.InitPool()
- //MgoB := &mongodb.MongodbSim{
- // MongodbAddr: "172.17.189.140:27080",
- // //MongodbAddr: "127.0.0.1:27083",
- // Size: 10,
- // DbName: "qfw",
- // UserName: "SJZY_RWbid_ES",
- // Password: "SJZY@B4i4D5e6S",
- // //Direct: true,
- //}
- //MgoB.InitPool()
- //2023年01-01 2023-10-01,,1-3季度
- //2024-1 - 2024-4;1704038400-1711900800
- //2023-10-1 2024-1-1;1696089600-1704038400
- //areaTermsQuery := elastic.NewTermsQuery("area", "江苏", "安徽", "上海", "天津", "河北", "浙江", "天津市", "上海市", "河北省", "安徽省", "江苏省", "浙江省", "北京", "北京市")
- //rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1711900800).Lt(1719763200) //2024年4-7月
- //rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1735660800).Lt(1743436800) //2025年1-3月;25年第一季度数据
- rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1743436800).Lt(1751299200) //2025年4.1-7.1 ;25年第二季度数据
- query := elastic.NewBoolQuery().
- //Must(areaTermsQuery).
- Must(elastic.NewTermsQuery("bidstatus", "中标", "单一", "成交", "合同")).
- Must(rangeQuery)
- //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000")
- //query := elastic.NewBoolQuery().
- // //北京,天津,河北,上海,江苏,浙江,安徽
- // //Must(elastic.NewTermQuery("area", "北京市")).
- // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
- // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")).
- // Must(rangeQuery)
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- Query(query).
- Size(10000).
- Sort("_doc", true) //升序排序
- //Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("总数是:", res.TotalHits())
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- delete(doc, "filetext")
- delete(doc, "detail")
- sWinner := util.ObjToString(doc["s_winner"])
- winners := strings.Split(sWinner, ",")
- for _, v := range winners {
- insert := doc
- insert["s_winner"] = v
- //存入新表
- err = Mgo.InsertOrUpdate("top", "wcc_allcity_2025Q2", insert)
- if err != nil {
- log.Println("error", doc["id"])
- }
- }
- // 处理查询结果
- //area := util.ObjToString(doc["area"])
- //areas := []string{"北京", "上海", "广东", "江苏", "浙江"}
- //if !IsInStringArray(area, areas) {
- // continue
- //}
- //projectName := util.ObjToString(doc["projectname"])
- //if strings.Contains(projectName, "非政府") {
- // continue
- //}
- //buyerclass := util.ObjToString(doc["buyerclass"])
- //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" {
- // continue
- //}
- ////存入新表
- //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc)
- //if err != nil {
- // log.Println("error", doc["id"])
- //}
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("结束~~~~~~~~~~~~~~~")
- }
- // getQyxytData 获取企业数据
- func getQyxytData() {
- //url := "http://172.17.4.184:19908"
- url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "qyxy" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- //85 抽取库
- //Mgo := &mongodb.MongodbSim{
- // //MongodbAddr: "127.0.0.1:27080",
- // MongodbAddr: "172.17.4.85:27080",
- // DbName: "top",
- // Size: 10,
- // //Direct: true,
- //}
- //Mgo.InitPool()
- //MgoB := &mongodb.MongodbSim{
- // MongodbAddr: "172.17.189.140:27080",
- // //MongodbAddr: "127.0.0.1:27083",
- // Size: 10,
- // DbName: "qfw",
- // UserName: "SJZY_RWbid_ES",
- // Password: "SJZY@B4i4D5e6S",
- // //Direct: true,
- //}
- //MgoB.InitPool()
- //2023年01-01 2023-10-01,,1-3季度
- //2024-1 - 2024-4;1704038400-1711900800
- //2023-10-1 2024-1-1;1696089600-1704038400
- //城市范围
- //areaTermsQuery := elastic.NewTermsQuery("company_city", "北京市")
- //rangeQuery := elastic.NewRangeQuery("establish_date").Gte(1704038400)
- //query := elastic.NewBoolQuery().
- // Must(areaTermsQuery).
- // Must(rangeQuery)
- //---------------------------//
- query := elastic.NewBoolQuery()
- query.Must(elastic.NewMatchQuery("business_scope", "招投标代理"))
- query.Must(elastic.NewTermQuery("company_city", "北京市"))
- //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000")
- //query := elastic.NewBoolQuery().
- // //北京,天津,河北,上海,江苏,浙江,安徽
- // //Must(elastic.NewTermQuery("area", "北京市")).
- // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
- // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")).
- // Must(rangeQuery)
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- Query(query).
- Size(10000).
- Sort("_doc", true) //升序排序
- //Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("总数是:", res.TotalHits())
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- if strings.Contains(util.ObjToString(doc["business_scope"]), "招投标代理") {
- //存入新表
- insert := map[string]interface{}{
- "company_name": doc["company_name"],
- "business_scope": doc["business_scope"],
- "employee_name": doc["employee_name"],
- "company_phone": doc["company_phone"],
- }
- err = MgoB.InsertOrUpdate("qfw", "wcc_2024_beijing_dailijigou", insert)
- if err != nil {
- log.Println("error", doc["id"])
- }
- }
- //sWinner := util.ObjToString(doc["s_winner"])
- //winners := strings.Split(sWinner, ",")
- //for _, v := range winners {
- // insert := doc
- // insert["s_winner"] = v
- // //存入新表
- // err = MgoB.InsertOrUpdate("qfw", "wcc_2024_pingdingshan", insert)
- // if err != nil {
- // log.Println("error", doc["id"])
- // }
- //}
- // 处理查询结果
- //area := util.ObjToString(doc["area"])
- //areas := []string{"北京", "上海", "广东", "江苏", "浙江"}
- //if !IsInStringArray(area, areas) {
- // continue
- //}
- //projectName := util.ObjToString(doc["projectname"])
- //if strings.Contains(projectName, "非政府") {
- // continue
- //}
- //buyerclass := util.ObjToString(doc["buyerclass"])
- //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" {
- // continue
- //}
- ////存入新表
- //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc)
- //if err != nil {
- // log.Println("error", doc["id"])
- //}
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("结束~~~~~~~~~~~~~~~")
- }
- // getQyLimitData 获取qyxy 条件数据
- func getQyLimitData() {
- //url := "http://172.17.4.184:19908"
- url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "qyxy" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- // 构建查询
- query := elastic.NewBoolQuery().
- Must(elastic.NewMatchQuery("company_area", "河南")).
- Must(elastic.NewMatchQuery("company_status", "存续")).
- MustNot(elastic.NewMatchQuery("company_type", "个体工商户"))
- // 执行查询
- searchResult, err := client.Search().Size(50).
- Index(index).
- Query(query).
- Do(context.Background())
- if err != nil {
- log.Fatalf("Error executing search: %s", err)
- }
- // 本地数据库
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "127.0.0.1:27017",
- Size: 10,
- DbName: "wcc",
- }
- MgoB.InitPool()
- for _, hit := range searchResult.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- MgoB.SaveByOriID("wcc_henan_0428", doc)
- }
- }
- // getTidb 获取tidb 数据
- func getTidb() {
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "172.17.189.140:27080",
- //MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- //tidb
- username := "datascbi"
- password := "Da#Bi20221111SC"
- //host := "127.0.0.1:4001"
- host := "172.17.162.25:4000"
- database := "global_common_data"
- dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8mb4&parseTime=True&loc=Local", username, password, host, database)
- // 连接到数据库
- db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{})
- if err != nil {
- log.Println("Failed to connect to database:", err)
- return
- }
- fmt.Println("Connected to the database!")
- defer util.Catch()
- sess := MgoB.GetMgoConn()
- defer MgoB.DestoryMongoConn(sess)
- it := sess.DB("qfw").C("wcc_2024_beijing_dailijigou").Find(nil).Select(nil).Iter()
- fmt.Println("taskRun 开始")
- count := 0
- for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
- if count%10000 == 0 {
- log.Println("current:", count)
- }
- companyName := util.ObjToString(tmp["company_name"])
- var baseInfo EnterpriseBaseInfo
- db.Where(&EnterpriseBaseInfo{Name: companyName}).First(&baseInfo)
- if baseInfo.ID > 0 {
- insert := map[string]interface{}{
- "company_name": companyName,
- "name_id": baseInfo.NameID,
- "business_scope": tmp["business_scope"],
- }
- MgoB.InsertOrUpdate("qfw", "wcc_beijing_daili_bidding", insert)
- }
- }
- log.Println("over")
- }
- // getEntInfo 获取法人库数据
- func getEntInfo() {
- url := "http://172.17.4.184:19908"
- //url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "ent_info" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- query := elastic.NewBoolQuery().
- //北京,天津,河北,上海,江苏,浙江,安徽
- //Must(elastic.NewMatchQuery("company_name", "医院")).
- //Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
- Must(elastic.NewExistsQuery("tag_labels"))
- //Must(rangeQuery)
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- Query(query).
- Size(10000).
- Sort("_doc", true) //升序排序
- //Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("总数是:", res.TotalHits())
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err = json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- name := util.ObjToString(doc["company_name"])
- updateData := make(map[string]interface{})
- if tag_labels, ok := doc["tag_labels"].([]interface{}); ok {
- updateData["main_label"] = tag_labels[0]
- _, err = client.Update().
- Index(index).
- Id(util.ObjToString(doc["id"])).
- Doc(updateData).
- Do(context.Background())
- if err != nil {
- log.Println("更新失败", name, tag_labels, err)
- }
- }
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("结束~~~~~~~~~~~~~~~")
- }
- // getBuyerData 获取采购单位数据
- func getBuyerData() {
- //key := "4d5206b1b297c1e7b77f9578edcb2cf7.TNU2i8G1oUNdR02i"
- //model := "glm-4-air"
- url := "http://172.17.4.184:19908"
- //url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "buyer" //索引名称
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "172.17.189.140:27080",
- //MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- //query := elastic.NewBoolQuery().
- // //北京,天津,河北,上海,江苏,浙江,安徽
- // Must(elastic.NewMatchQuery("company_name", "医院")).
- // //Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
- // Must(elastic.NewTermsQuery("tag_labels", "学校", "教育"))
- //Must(rangeQuery)
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- //Query(query).
- Size(10000).
- Sort("_doc", true) //升序排序
- //Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("总数是:", res.TotalHits())
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err = json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- //name := util.ObjToString(doc["buyer_name"])
- //ra := ZpAI(key, model, name)
- //if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
- // doc["national_top"] = ra["label1"]
- // doc["main_label"] = ra["label1"]
- //}
- //if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
- // doc["national_sub"] = ra["label2"]
- //}
- //if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
- // doc["national_subsub"] = ra["label3"]
- //}
- MgoB.Save("ent_info_buyer", doc)
- //time.Sleep(time.Microsecond)
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("结束~~~~~~~~~~~~~~~")
- }
- // mgoBidding mgoBidding 数据
- func mgoBidding() {
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "172.17.189.140:27080",
- //MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- sess := MgoB.GetMgoConn()
- defer MgoB.DestoryMongoConn(sess)
- //181 凭安库
- MgoQY := &mongodb.MongodbSim{
- MongodbAddr: "172.17.4.181:27001",
- //MongodbAddr: "127.0.0.1:27001",
- DbName: "mixdata",
- Size: 10,
- UserName: "",
- Password: "",
- //Direct: true,
- }
- MgoQY.InitPool()
- where := map[string]interface{}{
- "qy_flag": 1,
- }
- query := sess.DB("qfw").C("ent_info_buyer").Find(where).Select(map[string]interface{}{
- "contenthtml": 0}).Iter()
- count := 0
- key := "4d5206b1b297c1e7b77f9578edcb2cf7.TNU2i8G1oUNdR02i"
- model := "glm-4-air"
- ch := make(chan bool, 10)
- wg := &sync.WaitGroup{}
- for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- if count%100 == 0 {
- log.Println("current:", count, tmp["name"])
- }
- //存在就不再调用大模型
- //if _, ok := tmp["national_top"]; ok {
- // continue
- //}
- if utf8.RuneCountInString(util.ObjToString(tmp["name"])) < 4 {
- continue
- }
- ch <- true
- wg.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-ch
- wg.Done()
- }()
- //
- biddingID := mongodb.BsonIdToSId(tmp["_id"])
- update := make(map[string]interface{})
- name := util.ObjToString(tmp["name"])
- where2 := map[string]interface{}{
- "company_name": name,
- }
- data, _ := MgoQY.FindOne("company_base", where2)
- businessScope := util.ObjToString((*data)["business_scope"])
- ra := ZpAI1(key, model, name, businessScope)
- if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
- //update["national_top"] = ra["label1"]
- //update["main_label"] = ra["label1"]
- update["label1"] = ra["label1"]
- }
- if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
- //update["national_sub"] = ra["label2"]
- update["label2"] = ra["label2"]
- }
- if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
- //update["national_subsub"] = ra["label3"]
- update["label3"] = ra["label3"]
- }
- if len(update) > 0 {
- MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update})
- }
- }(tmp)
- tmp = map[string]interface{}{}
- }
- wg.Wait()
- log.Println("over 22222222222")
- //log.Println("开始第二轮迭代")
- //for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- // if _, ok := tmp["national_top"]; ok {
- // continue
- // }
- // biddingID := mongodb.BsonIdToSId(tmp["_id"])
- // name := util.ObjToString(tmp["name"])
- // update := make(map[string]interface{})
- // ra := ZpAI(key, model, name)
- // if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
- // update["national_top"] = ra["label1"]
- // update["main_label"] = ra["label1"]
- // }
- // if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
- // update["national_sub"] = ra["label2"]
- // }
- // if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
- // update["national_subsub"] = ra["label3"]
- // }
- // if count%1000 == 0 {
- // log.Println("current", count, name, ra["label1"], ra["label2"])
- // }
- //
- // if len(update) > 0 {
- // MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update})
- // }
- // //time.Sleep(time.Microsecond)
- //}
- //
- //log.Println("开始第3轮迭代")
- //for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- // if _, ok := tmp["national_top"]; ok {
- // continue
- // }
- // biddingID := mongodb.BsonIdToSId(tmp["_id"])
- // name := util.ObjToString(tmp["name"])
- // update := make(map[string]interface{})
- // ra := ZpAI(key, model, name)
- // if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
- // update["national_top"] = ra["label1"]
- // update["main_label"] = ra["label1"]
- // }
- // if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
- // update["national_sub"] = ra["label2"]
- // }
- // if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
- // update["national_subsub"] = ra["label3"]
- // }
- // if count%1000 == 0 {
- // log.Println("current", count, name, ra["label1"], ra["label2"])
- // }
- //
- // if len(update) > 0 {
- // MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update})
- // }
- // //time.Sleep(time.Microsecond)
- //}
- }
- // fixProjectPortrait 修复画像数据重复
- func fixProjectPortrait() {
- url := "http://172.17.4.184:19908"
- //url := "http://127.0.0.1:19908"
- username := "jybid"
- password := "Top2023_JEB01i@31"
- index := "project_portrait" //索引名称
- buyerMap := make(map[string]int)
- buyerDatas := make(map[string][]map[string]interface{})
- // 创建 Elasticsearch 客户端
- client, err := elastic.NewClient(
- elastic.SetURL(url),
- elastic.SetBasicAuth(username, password),
- elastic.SetSniff(false),
- )
- if err != nil {
- log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
- }
- query := elastic.NewBoolQuery()
- //query.Must(elastic.NewMatchQuery("business_scope", "招投标代理"))
- query.Must(elastic.NewTermQuery("class", "情报_安防"))
- ctx := context.Background()
- //开始滚动搜索
- scrollID := ""
- scroll := "10m"
- searchSource := elastic.NewSearchSource().
- Query(query).
- Size(10000).
- //Sort("_doc", true) //升序排序
- Sort("_doc", false) //降序排序
- searchService := client.Scroll(index).
- Size(10000).
- Scroll(scroll).
- SearchSource(searchSource)
- res, err := searchService.Do(ctx)
- if err != nil {
- if err == io.EOF {
- fmt.Println("没有数据")
- } else {
- panic(err)
- }
- }
- //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
- fmt.Println("project_portrait 总数是:", res.TotalHits())
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "172.17.189.140:27080",
- //MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- //wher := map[string]interface{}{
- // "_id": mongodb.StringTOBsonId("66faf189bf905908d4a252d6"),
- //}
- //MgoB.Delete("project_portrait", wher)
- //
- //return
- total := 0
- for len(res.Hits.Hits) > 0 {
- for _, hit := range res.Hits.Hits {
- var doc map[string]interface{}
- err := json.Unmarshal(hit.Source, &doc)
- if err != nil {
- log.Printf("解析文档失败:%s", err)
- continue
- }
- buyerName := util.ObjToString(doc["buyer"])
- buyerMap[buyerName]++
- buyerArr := buyerDatas[buyerName]
- buyerArr = append(buyerArr, doc)
- buyerDatas[buyerName] = buyerArr
- }
- total = total + len(res.Hits.Hits)
- scrollID = res.ScrollId
- res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
- log.Println("current count:", total)
- if err != nil {
- if err == io.EOF {
- // 滚动到最后一批数据,退出循环
- break
- }
- log.Println("滚动搜索失败:", err, res)
- break // 处理错误时退出循环
- }
- }
- // 在循环外调用 ClearScroll
- _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
- if err != nil {
- log.Printf("清理滚动搜索失败:%s", err)
- }
- fmt.Println("迭代结束~~~~~~~~~~~~~~~")
- su := 0
- for k, v := range buyerMap {
- su++
- if su%1000 == 0 {
- log.Println("su", su)
- }
- if v > 1 {
- buyerName := k
- buyerArr := buyerDatas[buyerName]
- doc := buyerArr[0]
- doc["_id"] = mongodb.StringTOBsonId(util.ObjToString(doc["id"]))
- MgoB.SaveByOriID("project_portrait_1030_test", doc)
- for kk, vv := range buyerArr {
- id := util.ObjToString(vv["id"])
- where := map[string]interface{}{
- "_id": mongodb.StringTOBsonId(util.ObjToString(doc["id"])),
- }
- MgoB.Delete("project_portrait", where)
- if kk > 0 {
- client.Delete().Index(index).Id(id).Do(context.Background())
- }
- }
- }
- }
- }
- // updateMgoEntInfoBuyer updateMgoEntInfoBuyer
- func updateMgoEntInfoBuyer() {
- MgoB := &mongodb.MongodbSim{
- MongodbAddr: "172.17.189.140:27080",
- //MongodbAddr: "127.0.0.1:27083",
- Size: 10,
- DbName: "qfw",
- UserName: "SJZY_RWbid_ES",
- Password: "SJZY@B4i4D5e6S",
- //Direct: true,
- }
- MgoB.InitPool()
- //181 凭安库
- MgoQY := &mongodb.MongodbSim{
- MongodbAddr: "172.17.4.181:27001",
- //MongodbAddr: "127.0.0.1:27001",
- DbName: "mixdata",
- Size: 10,
- UserName: "",
- Password: "",
- //Direct: true,
- }
- MgoQY.InitPool()
- sess := MgoB.GetMgoConn()
- defer MgoB.DestoryMongoConn(sess)
- query := sess.DB("qfw").C("ent_info_buyer").Find(nil).Select(map[string]interface{}{
- "contenthtml": 0}).Iter()
- count := 0
- for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- if count%1000 == 0 {
- log.Println("current:", count, tmp["name"])
- }
- name := util.ObjToString(tmp["name"])
- where := map[string]interface{}{
- "company_name": name,
- }
- id := mongodb.BsonIdToSId(tmp["_id"])
- data, _ := MgoQY.FindOne("company_base", where)
- if data != nil && len(*data) > 0 {
- update := map[string]interface{}{
- "qy_flag": 1,
- "use_flag": (*data)["use_flag"],
- "company_type": (*data)["company_type"],
- "company_status": (*data)["company_status"],
- "credit_no": (*data)["credit_no"],
- "business_scope": (*data)["business_scope"],
- }
- MgoB.UpdateById("ent_info_buyer", id, map[string]interface{}{"$set": update})
- }
- }
- }
|