123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301 |
- package main
- import (
- "log"
- mu "mfw/util"
- "qfw/util"
- "qfw/util/mongodb"
- "regexp"
- "sync"
- "gopkg.in/mgo.v2/bson"
- )
- const (
- ProjectCache = "info" //存放每条项目信息,key为项目ID
- )
- var (
- Sysconfig map[string]interface{} //读取配置文件
- MongoTool mongodb.MongodbSim //mongodb连接
- ExtractColl, ProjectColl string //抽取表、项目表
- CurrentMegerTime int64 //当前合并到的信息的时间
- CurrentMegerCount int //当前合并计数
- MultiThread = make(chan bool, 5) //项目合并线程
- AllIdsMap2 = map[string]*ID{}
- AllIdsMapLock = sync.Mutex{}
- )
- var (
- //判断是日期
- _datereg = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
- _numreg1 = regexp.MustCompile("^[0-9-]{1,8}$")
- _zimureg1 = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
- _nzreg = regexp.MustCompile("^[0-9a-zA-Z-]+$")
- _hanreg = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
- replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
- //判断带有分包、等特定词的
- pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
- //判断包含数值
- nreg1 = regexp.MustCompile("[0-9]{2,}")
- //判断包含字母
- zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
- //判断包含汉字
- hreg1 = regexp.MustCompile(`[\p{Han}]+`)
- //判断项目编号是在10以内的纯数字结构
- numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
- //存放项目名称
- mapPn = map[string]*Key{}
- //存放项目编号
- mapPc = map[string]*Key{}
- //存放采购单位
- mapPb = map[string]*Key{}
- //仅初始化使用
- compareNoPass = map[string]bool{}
- compareAB = map[string]bool{}
- compareAB2D = map[string]bool{}
- compareABD = map[string]bool{}
- compareAB2CD = map[string]bool{}
- compareABCD = map[string]bool{}
- )
- func init() {
- util.ReadConfig(&Sysconfig)
- MultiThread = make(chan bool, util.IntAllDef(Sysconfig["thread"], 5))
- MongoTool = mongodb.MongodbSim{
- MongodbAddr: Sysconfig["mongodbServers"].(string),
- Size: util.IntAll(Sysconfig["mongodbPoolSize"]),
- DbName: Sysconfig["mongodbName"].(string),
- }
- MongoTool.InitPool()
- ExtractColl = Sysconfig["extractColl"].(string)
- ProjectColl = Sysconfig["projectColl"].(string)
- udpport, _ := Sysconfig["udpport"].(string)
- udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
- udpclient.Listen(processUdpMsg)
- log.Println("Udp服务监听", udpport)
- //---不能通过
- vm := []string{"C", "D"}
- for i := 0; i < 2; i++ {
- for j := 0; j < 2; j++ {
- for k := 0; k < 2; k++ {
- key := vm[i] + vm[j] + vm[k]
- compareNoPass[key] = true
- //fmt.Println(key)
- }
- }
- }
- //fmt.Println("-------------------")
- //三个元素一致 [AB][AB][AB],分值最高
- vm = []string{"A", "B"}
- for i := 0; i < 2; i++ {
- for j := 0; j < 2; j++ {
- for k := 0; k < 2; k++ {
- key := vm[i] + vm[j] + vm[k]
- compareAB[key] = true
- //fmt.Println(key)
- }
- }
- }
- //fmt.Println("-------------------", len(compareAB))
- //---至少两个一致,其他可能不存在
- //[AB][AB][ABD]
- //[AB][ABD][AB]
- vm = []string{"A", "B"}
- vm2 := []string{"A", "B", "D"}
- for i := 0; i < 2; i++ {
- for j := 0; j < 2; j++ {
- for k := 0; k < 3; k++ {
- key := vm[i] + vm[j] + vm2[k]
- if !compareAB[key] {
- compareAB2D[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- for i := 0; i < 2; i++ {
- for j := 0; j < 3; j++ {
- for k := 0; k < 2; k++ {
- key := vm[i] + vm2[j] + vm[k]
- if !compareAB[key] {
- compareAB2D[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- //fmt.Println("-------------------", len(compareAB2D))
- //---至少一个一致,其他可能不存在
- //[ABD][ABD][ABD] //已经删除DDD
- vm = []string{"A", "B", "D"}
- for i := 0; i < 3; i++ {
- for j := 0; j < 3; j++ {
- for k := 0; k < 3; k++ {
- key := vm[i] + vm[j] + vm[k]
- if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
- compareABD[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- //fmt.Println("-------------------", len(compareABD))
- //[AB][ABCD][AB]
- //[AB][AB][ABCD]
- vm = []string{"A", "B"}
- vm2 = []string{"A", "B", "C", "D"}
- for i := 0; i < 2; i++ {
- for j := 0; j < 4; j++ {
- for k := 0; k < 2; k++ {
- key := vm[i] + vm2[j] + vm[k]
- if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
- compareAB2CD[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- for i := 0; i < 2; i++ {
- for j := 0; j < 2; j++ {
- for k := 0; k < 4; k++ {
- key := vm[i] + vm[j] + vm2[k]
- if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
- compareAB2CD[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- //fmt.Println("-------------------", len(compareAB2CD))
- //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
- vm = []string{"A", "B", "C", "D"}
- for i := 0; i < 4; i++ {
- for j := 0; j < 4; j++ {
- for k := 0; k < 4; k++ {
- key := vm[i] + vm[j] + vm[k]
- if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
- compareABCD[key] = true
- //fmt.Println(key)
- }
- }
- }
- }
- }
- func CheckHanAndNum(str string) (b bool) {
- return nreg1.MatchString(str) && hreg1.MatchString(str)
- }
- func CheckZimuAndNum(str string) (b bool) {
- return zreg1.MatchString(str) && nreg1.MatchString(str)
- }
- type KeyMap struct {
- Lock sync.Mutex
- Map map[string]*Key
- }
- type ID struct {
- Id string
- Lock sync.Mutex
- lastTime int64
- pos int
- P *ProjectInfo
- }
- type Key struct {
- Arr []string
- Lock sync.Mutex
- }
- type IdAndLock struct {
- Id string
- Lock sync.Mutex
- }
- func NewKeyMap() *KeyMap {
- return &KeyMap{
- Map: map[string]*Key{},
- Lock: sync.Mutex{},
- }
- }
- //招标信息实体类
- type Info struct {
- Id string `json:"_id"`
- Href string `json:"href"` //源地址
- Publishtime int64 `json:"publishtime"`
- Title string `json:"title"`
- TopType string `json:"toptype"`
- SubType string `json:"subtype"`
- ProjectName string `json:"projectname"`
- ProjectCode string `json:"projectcode"`
- Buyer string `json:"buyer"`
- Buyerperson string `json:"buyerperson"`
- Buyertel string `json:"buyertel"`
- Agency string `json:"agency"`
- Area string `json:"area"`
- City string `json:"city"`
- District string `json:"district"`
- HasPackage bool `json:"haspackage"`
- Package map[string]interface{} `json:"package"`
- PNum string `json:"pnum"`
- Topscopeclass []string `json:"topscopeclass"`
- Subscopeclass []string `json:"subscopeclass"`
- Buyerclass string `json:"buyerclass"`
- Bidopentime int64 `json:"bidopentime"`
- Budget float64 `json:"budget"`
- Bidamount float64 `json:"bidamount"`
- Winners []string
- dealtype int
- Winnerorder []string
- PTC string //从标题中抽的项目编号
- pnbval int //项目名称、编号、采购单位存在的个数
- LenPC int //项目编号长度
- LenPN int //项目名称长度
- LenPTC int //标题抽的项目编号长度
- //以下三个元素做对比,计算包含时候使用
- PNBH int //0初始,+包含,-被包含
- PCBH int
- PTCBH int
- }
- //项目实体类
- type ProjectInfo struct {
- Id bson.ObjectId `bson:"_id"`
- FirstTime int64 `json:"firsttime"` //项目的最早时间
- LastTime int64 `json:"lasttime"` //项目的最后时间
- Ids []string `json:"ids"`
- Topscopeclass []string `json:"topscopeclass"`
- Subscopeclass []string `json:"subscopeclass"` //子行业分类
- Winners []string `json:"winners"` //中标人
- ProjectName string `json:"projectname"` //项目名称
- ProjectCode string `json:"projectcode"` //项目代码唯一(纯数字的权重低)
- Buyer string `json:"buyer"` //采购单位唯一
- MPN []string `json:"mpn"` //合并后多余的项目名称
- MPC []string `json:"mpc"` //合并后多余的项目编号
- Buyerperson string `json:"buyerperson"` //采购联系人
- Buyertel string `json:"buyertel"` //采购联系人电话
- Agency string `json:"agency"` //代理机构
- Area string `json:"area"` //地区
- City string `json:"city"` //地市
- District string `json:"district"` //区县
- HasPackage bool `json:"haspackage"` //是否有分包
- Package map[string]interface{} `json:"package"` //分包的对比对象
- Buyerclass string `json:"buyerclass"` //采购单位分类
- Bidopentime int64 `json:"bidopentime"` //开标时间
- Zbtime int64 `json:"zbtime"` //招标时间
- Jgtime int64 `json:"jgtime"` //结果中标时间
- Bidamount float64 `json:"bidamount"` //中标金额
- Budget float64 `json:"budget"` //预算
- Winnerorder []string `json:"winnerorder"` //中标候选人
- score int
- comStr string
- }
|