package main import ( "fmt" util "jygit.jydev.jianyu360.cn/data_processing/common_utils" "jygit.jydev.jianyu360.cn/data_processing/common_utils/elastic" "jygit.jydev.jianyu360.cn/data_processing/common_utils/log" "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb" "jygit.jydev.jianyu360.cn/data_processing/common_utils/mysqldb" "os" "sync" "tieta_data/config" ) var ( MongoTool *mongodb.MongodbSim MysqlTool *mysqldb.Mysql Es *elastic.Elastic findLock sync.Mutex mapPb, mapPn, mapPc map[string]*Key AllIdsMapLock sync.Mutex AllIdsMap map[string]*ID IsProject bool //仅初始化使用 compareNoPass = map[string]bool{} compareAB = map[string]bool{} compareAB2D = map[string]bool{} compareABD = map[string]bool{} compareAB2CD = map[string]bool{} compareABCD = map[string]bool{} FieldArr = []string{"项目id", "事件id", "业务场景", "事件标题", "招标单位", "中标单位", "省份", "地市", "县(区)", "项目编号", "招标infoId", "招标单位运营商标签", "招标单位一级行业", "招标单位二级行业", "招标单位联系人", "招标单位联系电话", "招标数据更新时间", "招标信息信息类型", "预算金额(元)", "招标代理机构", "招标代理机构联系人", "招标代理机构联系电话", "公告地址", "招标剑鱼标讯链接", "中标infoId", "中标单位运营商标签", "中标单位联系人", "中标单位联系电话", "中标数据更新时间", "中标信息信息类型", "中标金额(元)", "公告地址(URL)", "剑鱼标讯链接"} FieldMap = map[string]string{ "项目id": "_id", "事件id": "sourceinfoid", "业务场景": "scenetag", "事件标题": "projectname", "招标单位": "buyer", "中标单位": "s_winner", "省份": "area", "地市": "city", "县(区)": "district", "项目编号": "projectcode", "招标infoId": "zb_infoid", "招标单位运营商标签": "buyertag", "招标单位一级行业": "tagname", "招标单位二级行业": "buyerclass", "招标单位联系人": "buyerperson", "招标单位联系电话": "buyertel", "招标数据更新时间": "zb_updatetime", "招标信息信息类型": "bidtype", "预算金额(元)": "budget", "招标代理机构": "agency", "招标代理机构联系人": "agencyperson", "招标代理机构联系电话": "agencytel", "公告地址": "zb_href", "招标剑鱼标讯链接": "zb_jybxhref", "中标infoId": "infoid", "中标单位运营商标签": "tagname_3", "中标单位联系人": "winnerperson", "中标单位联系电话": "winnertel", "中标数据更新时间": "updatetime", "中标信息信息类型": "bidstatus", "中标金额(元)": "bidamount", "公告地址(URL)": "href", "剑鱼标讯链接": "jybxhref", } FieldArr1 = []string{"信息id", "词包", "项目id", "事件id", "标文关键词", "业务场景", "招标方式", "信息类型", "二级信息类型", "信息标题", "省", "市", "县", "发布时间", "招标/项目编号", "招标单位", "招标单位行业归类", "招标单位一级标签", "招标单位二级行业", "招标预算", "招标单位联系人", "招标单位联系方式", "中标单位", "中标单位行业归类", "中标金额", "中标单位联系人", "中标单位联系方式", "代理机构", "代理联系人", "代理联系电话", "URL", "剑鱼标讯链接", "标书获取时间", "标书截止时间", "投标开始时间", "投标截止时间", "开标时间", "是否电子招标"} FieldMap1 = map[string]string{ "信息id": "_id", "词包": "rulename", "项目id": "pid", "事件id": "id", "标文关键词": "matchkey", "业务场景": "scenetag", "招标方式": "subtype", "信息类型": "toptype", "二级信息类型": "subtype", "信息标题": "title", "省": "area", "市": "city", "县": "district", "发布时间": "publishtime", "招标/项目编号": "projectcode", "招标单位": "buyer", "招标单位行业归类": "buyertag", "招标单位一级标签": "tagname", "招标单位二级行业": "buyerclass", "招标预算": "budget", "招标单位联系人": "buyerperson", "招标单位联系方式": "buyertel", "中标单位": "s_winner", "中标单位行业归类": "tagname_3", "中标金额": "bidamount", "中标单位联系人": "winnerperson", "中标单位联系方式": "winnertel", "代理机构": "agency", "代理联系人": "agencyperson", "代理联系电话": "agencytel", "URL": "href", "剑鱼标讯链接": "jybxhref", "标书获取时间": "docstarttime", "标书截止时间": "bidendtime", "投标开始时间": "bidstarttime", "投标截止时间": "bidendtime", "开标时间": "bidopentime", "是否电子招标": "bidway", } ) type ID struct { Id string Lock sync.Mutex P *Project } type Key struct { Arr []string Lock sync.Mutex } func init() { wg = sync.WaitGroup{} mapPn = make(map[string]*Key, 5000000) AllIdsMap = make(map[string]*ID, 5000000) mapPb = make(map[string]*Key, 1500000) mapPn = make(map[string]*Key, 5000000) mapPc = make(map[string]*Key, 5000000) //加载项目数据 //---不能通过 vm := []string{"C", "D"} for i := 0; i < 2; i++ { for j := 0; j < 2; j++ { for k := 0; k < 2; k++ { key := vm[i] + vm[j] + vm[k] compareNoPass[key] = true //fmt.Println(key) } } } //fmt.Println("-------------------") //三个元素一致 [AB][AB][AB],分值最高 vm = []string{"A", "B"} for i := 0; i < 2; i++ { for j := 0; j < 2; j++ { for k := 0; k < 2; k++ { key := vm[i] + vm[j] + vm[k] compareAB[key] = true //fmt.Println(key) } } } //fmt.Println("-------------------", len(compareAB)) //---至少两个一致,其他可能不存在 //[AB][AB][ABD] //[AB][ABD][AB] vm = []string{"A", "B"} vm2 := []string{"A", "B", "D"} for i := 0; i < 2; i++ { for j := 0; j < 2; j++ { for k := 0; k < 3; k++ { key := vm[i] + vm[j] + vm2[k] if !compareAB[key] { compareAB2D[key] = true //fmt.Println(key) } } } } for i := 0; i < 2; i++ { for j := 0; j < 3; j++ { for k := 0; k < 2; k++ { key := vm[i] + vm2[j] + vm[k] if !compareAB[key] { compareAB2D[key] = true //fmt.Println(key) } } } } //fmt.Println("-------------------", len(compareAB2D)) //---至少一个一致,其他可能不存在 //[ABD][ABD][ABD] //已经删除DDD vm = []string{"A", "B", "D"} for i := 0; i < 3; i++ { for j := 0; j < 3; j++ { for k := 0; k < 3; k++ { key := vm[i] + vm[j] + vm[k] if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] { compareABD[key] = true //fmt.Println(key) } } } } //fmt.Println("-------------------", len(compareABD)) //[AB][ABCD][AB] //[AB][AB][ABCD] vm = []string{"A", "B"} vm2 = []string{"A", "B", "C", "D"} for i := 0; i < 2; i++ { for j := 0; j < 4; j++ { for k := 0; k < 2; k++ { key := vm[i] + vm2[j] + vm[k] if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] { compareAB2CD[key] = true //fmt.Println(key) } } } } for i := 0; i < 2; i++ { for j := 0; j < 2; j++ { for k := 0; k < 4; k++ { key := vm[i] + vm[j] + vm2[k] if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] { compareAB2CD[key] = true //fmt.Println(key) } } } } //fmt.Println("-------------------", len(compareAB2CD)) //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论 vm = []string{"A", "B", "C", "D"} for i := 0; i < 4; i++ { for j := 0; j < 4; j++ { for k := 0; k < 4; k++ { key := vm[i] + vm[j] + vm[k] if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] { compareABCD[key] = true //fmt.Println(key) } } } } } func InitEs() { util.Debug(config.Conf.DB.Es.Addr, config.Conf.DB.Es.Index) Es = &elastic.Elastic{ S_esurl: config.Conf.DB.Es.Addr, I_size: config.Conf.DB.Es.Size, Username: config.Conf.DB.Es.User, Password: config.Conf.DB.Es.Password, } Es.InitElasticSize() } func InitMysql() { dbcfg := config.Conf.DB.Mysql MysqlTool = &mysqldb.Mysql{ Address: dbcfg.Addr, DBName: dbcfg.Dbname, UserName: dbcfg.User, PassWord: dbcfg.Password, } MysqlTool.Init() } func InitLog() { logcfg := config.Conf.Log err := log.InitLog( log.Path(logcfg.LogPath), log.Level(logcfg.LogLevel), log.Compress(logcfg.Compress), log.MaxSize(logcfg.MaxSize), log.MaxBackups(logcfg.MaxBackups), log.MaxAge(logcfg.MaxAge), log.Format(logcfg.Format), ) if err != nil { fmt.Printf("InitLog failed: %v\n", err) os.Exit(1) } } type Project struct { MsgId string `json:"msg_Id"` ProjectId string `json:"projectId"` FirstTime int64 `json:"firsttime,omitempty"` //项目的最早时间 LastTime int64 `json:"lasttime,omitempty"` //项目的最后时间 ProjectName string `json:"projectname,omitempty"` //项目名称 ProjectCode string `json:"projectcode,omitempty"` //项目代码 Buyer string `json:"buyer,omitempty"` //采购单位 Agency string `json:"agency"` //代理机构 Winners string `json:"s_winner,omitempty"` //中标人 Area string `json:"area"` //地区 City string `json:"city"` //地市 District string `json:"district"` //区县 Bidamount float64 `json:"bidamount,omitempty"` //中标金额 Budget float64 `json:"budget,omitempty"` //预算 score int comStr string resVal, pjVal int Topscopeclass []string `json:"topscopeclass,omitempty"` Subscopeclass []string `json:"subscopeclass,omitempty"` //子行业分类 Buyerperson string `json:"buyerperson"` //采购联系人 Buyertel string `json:"buyertel"` //采购联系人电话 AgencyPerson string `json:"agencyperson"` AgencyTel string `json:"agencytel"` WinnerPerson string `json:"winnerperson"` WinnerTel string `json:"winnertel"` Buyerclass string `json:"buyerclass"` //采购单位分类 Bidopentime int64 `json:"bidopentime,omitempty"` //开标时间 Jgtime int64 `json:"jgtime"` //结果中标时间 Zbtime int64 `json:"zbtime"` //招标时间 MultiPackage int `json:"multipackage"` // 多包标记 MultiPackageId string `json:"multipackageid"` ZbInfoId string `json:"zb_infoid"` //招标公告唯一标识 ZbUpdateTime int64 `json:"zb_updatetime"` //招标一级公告类型 ZbTopType string `json:"zb_toptype"` //招标二级公告类型 ZbHref string `json:"zb_href"` ZbJybxHref string `json:"zb_jybxhref"` InfoId string `json:"infoid"` //中标公告唯一标识 UpdateTime string `json:"updatetime"` //中标公告发布时间 TopType string `json:"toptype"` //中标一级公告类型 SubType string `json:"subtype"` //中标二级公告类型 Href string `json:"href"` JybxHref string `json:"jybxhref"` SceneTag string `json:"scenetag"` //业务场景 BuyerTag string `json:"buyertag"` //招标单位行业归类 TagName string `json:"tagname"` //招标单位一级标签 TagName2 string `json:"tagname2"` //招标单位二级行业 TagName3 string `json:"tagname3"` //中标单位行业归类 } type Info struct { MsgId int64 `json:"msg_id"` Id string `json:"id"` RuleName string `json:"rulename"` TagName string `json:"tagname"` TagName2 string `json:"tagname2"` TagName3 string `json:"tagname3"` Href string `json:"href"` //源地址 JybxHref string `json:"jybxhref"` Publishtime int64 `json:"publishtime"` Comeintime int64 `json:"comeintime"` Title string `json:"title"` TopType string `json:"toptype"` SubType string `json:"subtype"` ProjectName string `json:"projectname"` ProjectCode string `json:"projectcode"` Buyer string `json:"buyer"` Buyerperson string `json:"buyerperson"` Buyertel string `json:"buyertel"` WinnerTel string `json:"winnertel"` WinnerPerson string `json:"winnerperson"` Agency string `json:"agency"` AgencyPerson string `json:"agencyperson"` AgencyTel string `json:"agencytel"` Area string `json:"area"` City string `json:"city"` District string `json:"district"` Buyerclass string `json:"buyerclass"` Budget float64 `json:"budget"` Bidamount float64 `json:"bidamount"` MultiPackage int `json:"multipackage"` MultiPackageId string `json:"multipackage_id"` Nid string `json:"id_new"` Winners []string pnbval int //项目名称、编号、采购单位存在的个数 LenPC int //项目编号长度 LenPN int //项目名称长度 LenPTC int //标题抽的项目编号长度 }