package main import ( "encoding/json" // "fmt" du "jy/util" qu "qfw/util" "regexp" "strings" "sync" "time" "gopkg.in/mgo.v2/bson" ) //抽取信息映射实体类 type Info struct { Id string `json:"_id"` Href string `json:"href"` Publishtime int64 `json:"publishtime"` Title string `json:"title"` TopType string `json:"toptype"` SubType string `json:"subtype"` ProjectName string `json:"projectname"` ProjectCode string `json:"projectcode"` Buyer string `json:"buyer"` Buyerperson string `json:"buyerperson"` Buyertel string `json:"buyertel"` Agency string `json:"agency"` Area string `json:"area"` City string `json:"city"` HasPackage bool `json:"haspackage"` Package map[string]interface{} `json:"package"` PNum string `json:"pnum"` Topscopeclass []string `json:"topscopeclass"` Subscopeclass []string `json:"subscopeclass"` Winners []string dealtype int Buyerclass string `json:"buyerclass"` Bidopentime int64 `json:"bidopentime"` District string `json:"district"` Winnerorder []string PTC string pnbval int LenPC int LenPN int LenPTC int } var ( PNKeyMap, PCKeyMap, PBKeyMap = sync.Map{}, sync.Map{}, sync.Map{} pnreg = regexp.MustCompile("^(及编号[::])|(项目|采购|招标|中标|成交|结果|[_]|公告)$") titleGetPn = regexp.MustCompile("^([\\[【((]?.?(资格预审|中标|招标|延期|成交|结果|合同|失败|询价|关于对?)(公告)?[\\]】))]?([::]|关于对?)?)?(.{4,70}?(采购|工程)?(项目)?)([((【]?(第?[一二三四五六七八九1-9再]次|重新|重招|公开|[预拟]).{0,3}?[))】]?)?(招标|采购|采购计划|发包|结果|变更|更正|成交|网上(竞价)?|电子化|电子反拍|询比?价|比价|竞争性(谈判|磋商)|流标|废标|邀请|合同|验收|违规|资格|预审|中标(结果)?|延期|澄清|暂停|补遗|终止|文件|标前|征求|报建|征集|论证|谈判|拟实施|中止|需求|比选|评标(过程)?及?|磋商|未入围|进口|投标|答疑|抽签|异常|质疑|答复|回复|应答|遴选|最高|拦标|推迟|开标|取消|延迟|撤销|控制价|场外|作废|候选人|采用|实施|预|不良记录|竞买|反拍|修正|调整|简称|小型)?(公告|记录|公示|预告|通知[函书]?|意见[函书]?|[函书])?([((【].*?[))】])?$") titleGetPc = regexp.MustCompile("^([-0-9a-zA-Z第号采招政询电审竞#]{8,}[-0-9a-zA-Z#]+)") titleGetPc1 = regexp.MustCompile("[\\[【((](.{0,6}(编号|编码|项号|包号|代码|标段?号)[::为])?([-0-9a-zA-Z第号采招政询电审竞#]{5,}([\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+[\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+)?)[\\]】))]") titleGetPc2 = regexp.MustCompile("([-0-9a-zA-Z第号采政招询电审竞#]{8,}[-0-9a-zA-Z#]+)(.{0,5}公告)?$") numreg = regexp.MustCompile("^[0-9]$") numreg2 = regexp.MustCompile("^[0-9]+$") numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$") TitleReg = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ、\\-~至]+(子|合同|分|施工|监理|标)?[包标段][号段]?[、]?)+|((子|合同|分|施工|监理|标)?[包标段][号段]?[一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ、\\-~至]+[、]?)+|(子|合同|分|施工|监理|标)?[包标段][号段]?[a-zA-Z0-9]+[\\-~-至、](子|合同|分|施工|监理|标)?[包标段][号段]?[a-zA-Z0-9]+") ) type ProjectInfo struct { Id string `json:"id"` Publistime []int64 `json:"publistime"` //多条信息的发布时间、跨度 InfoType [][]string `json:"infotype"` //多条信息内的 toptype、subtype Ids []string `json:"ids"` Topscopeclass []string `json:"topscopeclass"` Subscopeclass []string `json:"subscopeclass"` Winners []string `json:"winners"` ProjectName string `json:"projectname"` ProjectCode string `json:"projectcode"` //项目代码唯一(纯数字的权重低) Buyer string `json:"buyer"` //采购单位唯一 MPN []string `json:"mpn"` //合并后多余的项目名称 MPC []string `json:"mpc"` //合并后多余的项目编号 Buyerperson string `json:"buyerperson"` Buyertel string `json:"buyertel"` Agency string `json:"agency"` //代理机构唯一 Area string `json:"area"` //地区唯一 City string `json:"city"` //地市 District string `json:"district"` //区县 HasPackage bool `json:"haspackage"` //是否有分包 Package map[string]interface{} `json:"package"` //分包的对比对象 Buyerclass string `json:"buyerclass"` //采购单位分类 Bidopentime int64 `json:"bidopentime"` //开标时间 Winnerorder []string //中标候选人 score int comStr string } type KeyMap struct { Lock sync.Mutex Map map[string]*Key } type Key struct { Arr []string Lock sync.Mutex } type IdAndLock struct { Id string Lock sync.Mutex } func NewKeyMap() *KeyMap { return &KeyMap{ Map: map[string]*Key{}, Lock: sync.Mutex{}, } } var size, idsMapSize = 30, 100 var AllPNMap = make([]*KeyMap, size) //存储 项目名称,值为id数组 var AllPCMap = make([]*KeyMap, size) //存储 项目编号,值为id数组 var AllPTCMap = make([]*KeyMap, size) //存储 项目编号,值为id数组 var AllPBMap = make([]*KeyMap, size) //存储 采购单位,值为id数组 type ID struct { Id string Lock sync.Mutex lastTime int64 pos int } //所有项目id对象,加锁,删除等用 var AllIdsMap = make([]map[string]*ID, idsMapSize) var AllIdsMap2 = map[string]*ID{} var AllIdsMapLock = sync.Mutex{} //预定义字符串 [ABCD][ABCD][ABCD] 项目名称/编号/标题编号 var compareNoPass = map[string]bool{} var compareAB = map[string]bool{} var compareAB2D = map[string]bool{} var compareABD = map[string]bool{} var compareAB2CD = map[string]bool{} var compareABCD = map[string]bool{} func init() { for i := 0; i < size; i++ { AllPNMap[i] = NewKeyMap() AllPCMap[i] = NewKeyMap() AllPTCMap[i] = NewKeyMap() AllPBMap[i] = NewKeyMap() } for i := 0; i < idsMapSize; i++ { AllIdsMap[i] = map[string]*ID{} } //---不能通过 vm := []string{"C", "D"} for i := 0; i < 2; i++ { for j := 0; j < 2; j++ { for k := 0; k < 2; k++ { key := vm[i] + vm[j] + vm[k] compareNoPass[key] = true //fmt.Println(key) } } } //fmt.Println("-------------------") //三个元素一致 [AB][AB][AB],分值最高 vm = []string{"A", "B"} for i := 0; i < 2; i++ { for j := 0; j < 2; j++ { for k := 0; k < 2; k++ { key := vm[i] + vm[j] + vm[k] compareAB[key] = true //fmt.Println(key) } } } //fmt.Println("-------------------", len(compareAB)) //---至少两个一致,其他可能不存在 //[AB][AB][ABD] //[AB][ABD][AB] vm = []string{"A", "B"} vm2 := []string{"A", "B", "D"} for i := 0; i < 2; i++ { for j := 0; j < 2; j++ { for k := 0; k < 3; k++ { key := vm[i] + vm[j] + vm2[k] if !compareAB[key] { compareAB2D[key] = true //fmt.Println(key) } } } } for i := 0; i < 2; i++ { for j := 0; j < 3; j++ { for k := 0; k < 2; k++ { key := vm[i] + vm2[j] + vm[k] if !compareAB[key] { compareAB2D[key] = true //fmt.Println(key) } } } } //fmt.Println("-------------------", len(compareAB2D)) //---至少一个一致,其他可能不存在 //[ABD][ABD][ABD] //已经删除DDD vm = []string{"A", "B", "D"} for i := 0; i < 3; i++ { for j := 0; j < 3; j++ { for k := 0; k < 3; k++ { key := vm[i] + vm[j] + vm[k] if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] { compareABD[key] = true //fmt.Println(key) } } } } //fmt.Println("-------------------", len(compareABD)) //[AB][ABCD][AB] //[AB][AB][ABCD] vm = []string{"A", "B"} vm2 = []string{"A", "B", "C", "D"} for i := 0; i < 2; i++ { for j := 0; j < 4; j++ { for k := 0; k < 2; k++ { key := vm[i] + vm2[j] + vm[k] if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] { compareAB2CD[key] = true //fmt.Println(key) } } } } for i := 0; i < 2; i++ { for j := 0; j < 2; j++ { for k := 0; k < 4; k++ { key := vm[i] + vm[j] + vm2[k] if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] { compareAB2CD[key] = true //fmt.Println(key) } } } } //fmt.Println("-------------------", len(compareAB2CD)) //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论 vm = []string{"A", "B", "C", "D"} for i := 0; i < 4; i++ { for j := 0; j < 4; j++ { for k := 0; k < 4; k++ { key := vm[i] + vm[j] + vm[k] if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] { compareABCD[key] = true //fmt.Println(key) } } } } //fmt.Println("-------------------", len(compareABCD)) } //预处理信息 func PreThisInfo(tmp map[string]interface{}) *Info { bys, _ := json.Marshal(tmp) var thisinfo *Info json.Unmarshal(bys, &thisinfo) if thisinfo == nil { return nil } if len(thisinfo.Topscopeclass) == 0 { thisinfo.Topscopeclass = []string{} } if len(thisinfo.Subscopeclass) == 0 { thisinfo.Subscopeclass = []string{} } //去重 thisinfo.Subscopeclass = RemoveDup(thisinfo.Subscopeclass) if len(thisinfo.Package) > 0 { //信息是否分包 thisinfo.HasPackage = true } else if thisinfo.TopType == "结果" && TitleReg.MatchString(thisinfo.Title) { //当信息类型是结果时,并且标题中包含分包字样,找到包号,用以后面比较打分 res := TitleReg.FindAllStringSubmatch(thisinfo.Title, -1) pnum := du.PackageNumberConvert(res[0][0]) //du.Debug(pnum, res) thisinfo.PNum = pnum } // if checkInfoAlter(tmp) { // thisinfo.SubType = "变更" // } //计算中标人 winner, _ := tmp["winner"].(string) m1 := map[string]bool{} winners := []string{} if winner != "" { m1[winner] = true winners = append(winners, winner) } if thisinfo.HasPackage { packageM, _ := tmp["package"].(bson.M) for _, p := range packageM { pm, _ := p.(map[string]interface{}) pw, _ := pm["winner"].(string) if pw != "" { m1[pw] = true winners = append(winners, pw) } } } thisinfo.Winners = winners m1 = nil //中标候选人 winnerorder := []string{} if winorders, ok := tmp["winnerorder"].([]interface{}); ok { for _, wins := range winorders { if win, ok := wins.(map[string]interface{}); ok { entname := qu.ObjToString(win["entname"]) if entname != "" && len([]rune(entname)) > 6 { winnerorder = append(winnerorder, entname) } } } } thisinfo.Winnerorder = winnerorder res := titleGetPc.FindStringSubmatch(thisinfo.Title) if len(res) > 1 && len(res[1]) > 8 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) { thisinfo.PTC = res[1] thisinfo.pnbval++ } else { res = titleGetPc1.FindStringSubmatch(thisinfo.Title) if len(res) > 3 && len(res[3]) > 8 && thisinfo.ProjectCode != res[3] && !numCheckPc.MatchString(res[3]) && !_zimureg1.MatchString(res[3]) { thisinfo.PTC = res[3] thisinfo.pnbval++ } else { res = titleGetPc2.FindStringSubmatch(thisinfo.Title) if len(res) > 1 && len(res[1]) > 8 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) { thisinfo.PTC = res[1] thisinfo.pnbval++ } } } if thisinfo.ProjectName != "" { thisinfo.pnbval++ } if thisinfo.ProjectCode != "" && !_zimureg1.MatchString(thisinfo.ProjectCode) { thisinfo.pnbval++ } else { thisinfo.ProjectCode = "" } if thisinfo.Buyer != "" { thisinfo.pnbval++ } thisinfo.LenPC = len([]rune(thisinfo.ProjectCode)) thisinfo.LenPTC = len([]rune(thisinfo.PTC)) thisinfo.LenPN = len([]rune(thisinfo.ProjectName)) return thisinfo } //移除数组中重复的元素 func RemoveDup(arr []string) (newarr []string) { m1 := map[string]bool{} newarr = []string{} for _, k := range arr { if !m1[k] { m1[k] = true newarr = append(newarr, k) } } return } //阻塞同名的pb、pc、pv并发 func lockPNCBMap(thisinfo *Info) { for { //等待其他任务完成 ok := true if thisinfo.LenPN > 0 { if _, b := PNKeyMap.Load(thisinfo.ProjectName); b { ok = false } } if thisinfo.LenPC > 0 { if _, b := PCKeyMap.Load(thisinfo.ProjectCode); b { ok = false } } if thisinfo.LenPTC > 0 { if _, b := PCKeyMap.Load(thisinfo.PTC); b { ok = false } } if len(thisinfo.Buyer) > 0 { if _, b := PBKeyMap.Load(thisinfo.Buyer); b { ok = false } } if ok { break } else { time.Sleep(30 * time.Millisecond) } } } //放行 func unlockPNCBMap(thisinfo *Info) { PNKeyMap.Delete(thisinfo.ProjectName) PCKeyMap.Delete(thisinfo.ProjectCode) PCKeyMap.Delete(thisinfo.PTC) PBKeyMap.Delete(thisinfo.Buyer) } //判断信息是否是变更 func checkInfoAlter(tmp map[string]interface{} /*新信息*/) bool { toptype := qu.ObjToString(tmp["toptype"]) subtype := qu.ObjToString(tmp["subtype"]) title := qu.ObjToString(tmp["title"]) if subtype == "变更" || strings.Index(title, "变更公告") > -1 || strings.Index(title, "更正公告") > -1 { //当信息类型是变更或标题中含变更时 if toptype == "招标" { //招标的变更公告,不作处理 } else if toptype == "结果" { subtype = "变更" } } return subtype == "变更" }