123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384 |
- package main
- import (
- "fmt"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/elastic"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/mysqldb"
- "os"
- "sync"
- "tieta_data/config"
- )
- var (
- MongoTool *mongodb.MongodbSim
- MysqlTool *mysqldb.Mysql
- Es *elastic.Elastic
- findLock sync.Mutex
- mapPb, mapPn, mapPc map[string]*Key
- AllIdsMapLock sync.Mutex
- AllIdsMap map[string]*ID
- IsProject bool
- //仅初始化使用
- compareNoPass = map[string]bool{}
- compareAB = map[string]bool{}
- compareAB2D = map[string]bool{}
- compareABD = map[string]bool{}
- compareAB2CD = map[string]bool{}
- compareABCD = map[string]bool{}
- FieldArr = []string{"项目id", "事件id", "业务场景", "事件标题", "招标单位", "中标单位", "省份", "地市", "县(区)", "项目编号", "招标infoId",
- "招标单位运营商标签", "招标单位一级行业", "招标单位二级行业", "招标单位联系人", "招标单位联系电话", "招标数据更新时间", "招标信息信息类型",
- "预算金额(元)", "招标代理机构", "招标代理机构联系人", "招标代理机构联系电话", "公告地址", "招标剑鱼标讯链接", "中标infoId", "中标单位运营商标签",
- "中标单位联系人", "中标单位联系电话", "中标数据更新时间", "中标信息信息类型", "中标金额(元)", "公告地址(URL)", "剑鱼标讯链接"}
- FieldMap = map[string]string{
- "项目id": "_id",
- "事件id": "sourceinfoid",
- "业务场景": "scenetag",
- "事件标题": "projectname",
- "招标单位": "buyer",
- "中标单位": "s_winner",
- "省份": "area",
- "地市": "city",
- "县(区)": "district",
- "项目编号": "projectcode",
- "招标infoId": "zb_infoid",
- "招标单位运营商标签": "buyertag",
- "招标单位一级行业": "tagname",
- "招标单位二级行业": "buyerclass",
- "招标单位联系人": "buyerperson",
- "招标单位联系电话": "buyertel",
- "招标数据更新时间": "zb_updatetime",
- "招标信息信息类型": "bidtype",
- "预算金额(元)": "budget",
- "招标代理机构": "agency",
- "招标代理机构联系人": "agencyperson",
- "招标代理机构联系电话": "agencytel",
- "公告地址": "zb_href",
- "招标剑鱼标讯链接": "zb_jybxhref",
- "中标infoId": "infoid",
- "中标单位运营商标签": "tagname_3",
- "中标单位联系人": "winnerperson",
- "中标单位联系电话": "winnertel",
- "中标数据更新时间": "updatetime",
- "中标信息信息类型": "bidstatus",
- "中标金额(元)": "bidamount",
- "公告地址(URL)": "href",
- "剑鱼标讯链接": "jybxhref",
- }
- FieldArr1 = []string{"信息id", "词包", "项目id", "事件id", "标文关键词", "业务场景", "招标方式", "信息类型", "二级信息类型",
- "信息标题", "省", "市", "县", "发布时间", "招标/项目编号", "招标单位", "招标单位行业归类", "招标单位一级标签",
- "招标单位二级行业", "招标预算", "招标单位联系人", "招标单位联系方式", "中标单位", "中标单位行业归类", "中标金额",
- "中标单位联系人", "中标单位联系方式", "代理机构", "代理联系人", "代理联系电话", "URL", "剑鱼标讯链接", "标书获取时间", "标书截止时间",
- "投标开始时间", "投标截止时间", "开标时间", "是否电子招标"}
- FieldMap1 = map[string]string{
- "信息id": "_id",
- "词包": "rulename",
- "项目id": "pid",
- "事件id": "id",
- "标文关键词": "matchkey",
- "业务场景": "scenetag",
- "招标方式": "subtype",
- "信息类型": "toptype",
- "二级信息类型": "subtype",
- "信息标题": "title",
- "省": "area",
- "市": "city",
- "县": "district",
- "发布时间": "publishtime",
- "招标/项目编号": "projectcode",
- "招标单位": "buyer",
- "招标单位行业归类": "buyertag",
- "招标单位一级标签": "tagname",
- "招标单位二级行业": "buyerclass",
- "招标预算": "budget",
- "招标单位联系人": "buyerperson",
- "招标单位联系方式": "buyertel",
- "中标单位": "s_winner",
- "中标单位行业归类": "tagname_3",
- "中标金额": "bidamount",
- "中标单位联系人": "winnerperson",
- "中标单位联系方式": "winnertel",
- "代理机构": "agency",
- "代理联系人": "agencyperson",
- "代理联系电话": "agencytel",
- "URL": "href",
- "剑鱼标讯链接": "jybxhref",
- "标书获取时间": "docstarttime",
- "标书截止时间": "bidendtime",
- "投标开始时间": "bidstarttime",
- "投标截止时间": "bidendtime",
- "开标时间": "bidopentime",
- "是否电子招标": "bidway",
- }
- )
- type ID struct {
- Id string
- Lock sync.Mutex
- P *Project
- }
- type Key struct {
- Arr []string
- Lock sync.Mutex
- }
- func init() {
- wg = sync.WaitGroup{}
- mapPn = make(map[string]*Key, 5000000)
- AllIdsMap = make(map[string]*ID, 5000000)
- mapPb = make(map[string]*Key, 1500000)
- mapPn = make(map[string]*Key, 5000000)
- mapPc = make(map[string]*Key, 5000000)
- //加载项目数据
- //---不能通过
- vm := []string{"C", "D"}
- for i := 0; i < 2; i++ {
- for j := 0; j < 2; j++ {
- for k := 0; k < 2; k++ {
- key := vm[i] + vm[j] + vm[k]
- compareNoPass[key] = true
- //fmt.Println(key)
- }
- }
- }
- //fmt.Println("-------------------")
- //三个元素一致 [AB][AB][AB],分值最高
- vm = []string{"A", "B"}
- for i := 0; i < 2; i++ {
- for j := 0; j < 2; j++ {
- for k := 0; k < 2; k++ {
- key := vm[i] + vm[j] + vm[k]
- compareAB[key] = true
- //fmt.Println(key)
- }
- }
- }
- //fmt.Println("-------------------", len(compareAB))
- //---至少两个一致,其他可能不存在
- //[AB][AB][ABD]
- //[AB][ABD][AB]
- vm = []string{"A", "B"}
- vm2 := []string{"A", "B", "D"}
- for i := 0; i < 2; i++ {
- for j := 0; j < 2; j++ {
- for k := 0; k < 3; k++ {
- key := vm[i] + vm[j] + vm2[k]
- if !compareAB[key] {
- compareAB2D[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- for i := 0; i < 2; i++ {
- for j := 0; j < 3; j++ {
- for k := 0; k < 2; k++ {
- key := vm[i] + vm2[j] + vm[k]
- if !compareAB[key] {
- compareAB2D[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- //fmt.Println("-------------------", len(compareAB2D))
- //---至少一个一致,其他可能不存在
- //[ABD][ABD][ABD] //已经删除DDD
- vm = []string{"A", "B", "D"}
- for i := 0; i < 3; i++ {
- for j := 0; j < 3; j++ {
- for k := 0; k < 3; k++ {
- key := vm[i] + vm[j] + vm[k]
- if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
- compareABD[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- //fmt.Println("-------------------", len(compareABD))
- //[AB][ABCD][AB]
- //[AB][AB][ABCD]
- vm = []string{"A", "B"}
- vm2 = []string{"A", "B", "C", "D"}
- for i := 0; i < 2; i++ {
- for j := 0; j < 4; j++ {
- for k := 0; k < 2; k++ {
- key := vm[i] + vm2[j] + vm[k]
- if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
- compareAB2CD[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- for i := 0; i < 2; i++ {
- for j := 0; j < 2; j++ {
- for k := 0; k < 4; k++ {
- key := vm[i] + vm[j] + vm2[k]
- if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
- compareAB2CD[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- //fmt.Println("-------------------", len(compareAB2CD))
- //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
- vm = []string{"A", "B", "C", "D"}
- for i := 0; i < 4; i++ {
- for j := 0; j < 4; j++ {
- for k := 0; k < 4; k++ {
- key := vm[i] + vm[j] + vm[k]
- if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
- compareABCD[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- }
- func InitEs() {
- util.Debug(config.Conf.DB.Es.Addr, config.Conf.DB.Es.Index)
- Es = &elastic.Elastic{
- S_esurl: config.Conf.DB.Es.Addr,
- I_size: config.Conf.DB.Es.Size,
- Username: config.Conf.DB.Es.User,
- Password: config.Conf.DB.Es.Password,
- }
- Es.InitElasticSize()
- }
- func InitMysql() {
- dbcfg := config.Conf.DB.Mysql
- MysqlTool = &mysqldb.Mysql{
- Address: dbcfg.Addr,
- DBName: dbcfg.Dbname,
- UserName: dbcfg.User,
- PassWord: dbcfg.Password,
- }
- MysqlTool.Init()
- }
- func InitLog() {
- logcfg := config.Conf.Log
- err := log.InitLog(
- log.Path(logcfg.LogPath),
- log.Level(logcfg.LogLevel),
- log.Compress(logcfg.Compress),
- log.MaxSize(logcfg.MaxSize),
- log.MaxBackups(logcfg.MaxBackups),
- log.MaxAge(logcfg.MaxAge),
- log.Format(logcfg.Format),
- )
- if err != nil {
- fmt.Printf("InitLog failed: %v\n", err)
- os.Exit(1)
- }
- }
- type Project struct {
- MsgId string `json:"msg_Id"`
- ProjectId string `json:"projectId"`
- FirstTime int64 `json:"firsttime,omitempty"` //项目的最早时间
- LastTime int64 `json:"lasttime,omitempty"` //项目的最后时间
- ProjectName string `json:"projectname,omitempty"` //项目名称
- ProjectCode string `json:"projectcode,omitempty"` //项目代码
- Buyer string `json:"buyer,omitempty"` //采购单位
- Agency string `json:"agency"` //代理机构
- Winners string `json:"s_winner,omitempty"` //中标人
- Area string `json:"area"` //地区
- City string `json:"city"` //地市
- District string `json:"district"` //区县
- Bidamount float64 `json:"bidamount,omitempty"` //中标金额
- Budget float64 `json:"budget,omitempty"` //预算
- score int
- comStr string
- resVal, pjVal int
- Topscopeclass []string `json:"topscopeclass,omitempty"`
- Subscopeclass []string `json:"subscopeclass,omitempty"` //子行业分类
- Buyerperson string `json:"buyerperson"` //采购联系人
- Buyertel string `json:"buyertel"` //采购联系人电话
- AgencyPerson string `json:"agencyperson"`
- AgencyTel string `json:"agencytel"`
- WinnerPerson string `json:"winnerperson"`
- WinnerTel string `json:"winnertel"`
- Buyerclass string `json:"buyerclass"` //采购单位分类
- Bidopentime int64 `json:"bidopentime,omitempty"` //开标时间
- Jgtime int64 `json:"jgtime"` //结果中标时间
- Zbtime int64 `json:"zbtime"` //招标时间
- MultiPackage int `json:"multipackage"` // 多包标记
- MultiPackageId string `json:"multipackageid"`
- ZbInfoId string `json:"zb_infoid"` //招标公告唯一标识
- ZbUpdateTime int64 `json:"zb_updatetime"` //招标一级公告类型
- ZbTopType string `json:"zb_toptype"` //招标二级公告类型
- ZbHref string `json:"zb_href"`
- ZbJybxHref string `json:"zb_jybxhref"`
- InfoId string `json:"infoid"` //中标公告唯一标识
- UpdateTime string `json:"updatetime"` //中标公告发布时间
- TopType string `json:"toptype"` //中标一级公告类型
- SubType string `json:"subtype"` //中标二级公告类型
- Href string `json:"href"`
- JybxHref string `json:"jybxhref"`
- SceneTag string `json:"scenetag"` //业务场景
- BuyerTag string `json:"buyertag"` //招标单位行业归类
- TagName string `json:"tagname"` //招标单位一级标签
- TagName2 string `json:"tagname2"` //招标单位二级行业
- TagName3 string `json:"tagname3"` //中标单位行业归类
- }
- type Info struct {
- MsgId int64 `json:"msg_id"`
- Id string `json:"id"`
- RuleName string `json:"rulename"`
- TagName string `json:"tagname"`
- TagName2 string `json:"tagname2"`
- TagName3 string `json:"tagname3"`
- Href string `json:"href"` //源地址
- JybxHref string `json:"jybxhref"`
- Publishtime int64 `json:"publishtime"`
- Comeintime int64 `json:"comeintime"`
- Title string `json:"title"`
- TopType string `json:"toptype"`
- SubType string `json:"subtype"`
- ProjectName string `json:"projectname"`
- ProjectCode string `json:"projectcode"`
- Buyer string `json:"buyer"`
- Buyerperson string `json:"buyerperson"`
- Buyertel string `json:"buyertel"`
- WinnerTel string `json:"winnertel"`
- WinnerPerson string `json:"winnerperson"`
- Agency string `json:"agency"`
- AgencyPerson string `json:"agencyperson"`
- AgencyTel string `json:"agencytel"`
- Area string `json:"area"`
- City string `json:"city"`
- District string `json:"district"`
- Buyerclass string `json:"buyerclass"`
- Budget float64 `json:"budget"`
- Bidamount float64 `json:"bidamount"`
- MultiPackage int `json:"multipackage"`
- MultiPackageId string `json:"multipackage_id"`
- Nid string `json:"id_new"`
- Winners []string
- pnbval int //项目名称、编号、采购单位存在的个数
- LenPC int //项目编号长度
- LenPN int //项目名称长度
- LenPTC int //标题抽的项目编号长度
- }
|