package main import ( "encoding/json" "log" mu "mfw/util" "net" "qfw/util" "regexp" "strings" "sync" "time" "github.com/robfig/cron" ) /** 任务入口 全量、增量合并 更新、插入,内存清理 转换成info对象 **/ //项目合并对象 type ProjectTask struct { InitMinTime int64 //最小时间,小于0的处理一次 name string thread int //线程数 //查找锁 findLock sync.Mutex wg sync.WaitGroup //map锁 AllIdsMapLock sync.Mutex //对应的id AllIdsMap map[string]*ID //采购单位、项目名称、项目编号 mapPb, mapPn, mapPc map[string]*Key //更新或新增通道 updatePool chan []map[string]interface{} //表名 coll string //当前状态是全量还是增量 currentType string //当前是跑全量还是跑增量 // clearContimes int //当前时间 currentTime int64 //保存长度 saveSize int } func NewPT() *ProjectTask { return &ProjectTask{ InitMinTime: int64(1325347200), name: "全/增量对象", thread: 3, updatePool: make(chan []map[string]interface{}, 2000), wg: sync.WaitGroup{}, AllIdsMap: make(map[string]*ID, 5000000), mapPb: make(map[string]*Key, 5000000), mapPn: make(map[string]*Key, 5000000), mapPc: make(map[string]*Key, 5000000), saveSize: 200, coll: ProjectColl, } } var P_QL *ProjectTask //初始化全量合并对象 func init() { P_QL = NewPT() go P_QL.updateQueue() go P_QL.clearMem() } //项目保存和更新通道 func (p *ProjectTask) updateQueue() { arr := make([][]map[string]interface{}, p.saveSize) index := 0 for { select { case v := <-p.updatePool: arr[index] = v index++ if index == p.saveSize { MongoTool.UpSertBulk(p.coll, arr...) arr = make([][]map[string]interface{}, p.saveSize) index = 0 } case <-time.After(2 * time.Second): if index > 0 { MongoTool.UpSertBulk(p.coll, arr[:index]...) arr = make([][]map[string]interface{}, p.saveSize) index = 0 } } } } //项目合并内存更新 func (p *ProjectTask) clearMem() { c := cron.New() //在内存中保留最近6个月的信息 validTime := int64(6 * 30 * 86400) //跑全量时每4分钟跑一次,跑增量时400分钟跑一次 c.AddFunc("50 0/4 * * * *", func() { if p.currentType == "ql" || p.clearContimes >= 100 { //跳过的次数清零 p.clearContimes = 0 //信息进入查找对比全局锁 p.findLock.Lock() defer p.findLock.Unlock() //合并进行的任务都完成 p.wg.Wait() //遍历id //所有内存中的项目信息 p.AllIdsMapLock.Lock() defer p.AllIdsMapLock.Unlock() //清除计数 clearNum := 0 for k, v := range p.AllIdsMap { if p.currentTime-v.P.LastTime > validTime { clearNum++ //删除id的map delete(p.AllIdsMap, k) //删除pb if v.P.Buyer != "" { ids := p.mapPb[v.P.Buyer] if ids != nil { ids.Lock.Lock() ids.Arr = deleteSlice(ids.Arr, k) if len(ids.Arr) == 0 { delete(p.mapPb, v.P.Buyer) } ids.Lock.Unlock() } } //删除mapPn for _, vn := range append([]string{v.P.ProjectName}, v.P.MPN...) { if vn != "" { ids := p.mapPn[vn] if ids != nil { ids.Lock.Lock() ids.Arr = deleteSlice(ids.Arr, k) if len(ids.Arr) == 0 { delete(p.mapPn, vn) } ids.Lock.Unlock() } } } //删除mapPc for _, vn := range append([]string{v.P.ProjectCode}, v.P.MPC...) { if vn != "" { ids := p.mapPc[vn] if ids != nil { ids.Lock.Lock() ids.Arr = deleteSlice(ids.Arr, k) if len(ids.Arr) == 0 { delete(p.mapPc, vn) } ids.Lock.Unlock() } } } v = nil } } log.Println("清除完成:", clearNum, len(p.AllIdsMap)) } else { p.clearContimes++ } }) c.Start() select {} } //全量合并 func (p *ProjectTask) taskQl(udpInfo map[string]interface{}) { defer util.Catch() //1、检查pubilshtime索引 db, _ := udpInfo["db"].(string) if db == "" { db = MongoTool.DbName } coll, _ := udpInfo["coll"].(string) if coll == "" { coll = ExtractColl } sess := MongoTool.GetMgoConn() bcon := false if sess.DB(db).C(coll).EnsureIndexKey("publishtime_1", "publishtime_-1") == nil { bcon = true } else { log.Println("publishtime_1索引不存在") } MongoTool.DestoryMongoConn(sess) thread := util.IntAllDef(udpInfo["thread"], 3) if thread > 0 { p.thread = thread } if bcon { //生成查询语句执行 p.enter(db, coll, map[string]interface{}{}) } } //增量合并 func (p *ProjectTask) taskZl(udpInfo map[string]interface{}) { defer util.Catch() //1、检查pubilshtime索引 db, _ := udpInfo["db"].(string) if db == "" { db = MongoTool.DbName } coll, _ := udpInfo["coll"].(string) if coll == "" { coll = ExtractColl } thread := util.IntAllDef(udpInfo["thread"], 3) if thread > 0 { p.thread = thread } //开始id和结束id q, _ := udpInfo["query"].(map[string]interface{}) gtid := udpInfo["gtid"].(string) lteid := udpInfo["lteid"].(string) if q == nil { q = map[string]interface{}{ "_id": map[string]interface{}{ "$gt": util.StringTOBsonId(gtid), //util.StringTOBsonId(udpInfo["gtid"].(string)), "$lte": util.StringTOBsonId(lteid), //util.StringTOBsonId(udpInfo["lteid"].(string)), }, } } pici := time.Now().Unix() if q != nil { //生成查询语句执行 p.enter(db, coll, q) } nextNode(gtid, lteid, "project", pici) } //通知下个节点nextNode func nextNode(gtid, lteid, stype string, pici int64) { by, _ := json.Marshal(map[string]interface{}{ "gtid": gtid, "lteid": lteid, "stype": stype, "query": map[string]interface{}{ "pici": pici, }, }) log.Println("nextnode", string(by)) for _, v := range NextNode { if node, ok := v.(map[string]interface{}); ok { udpclient.WriteUdp(by, mu.OP_TYPE_DATA, &net.UDPAddr{ IP: net.ParseIP(node["addr"].(string)), Port: util.IntAll(node["port"]), }) } } } func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) { defer util.Catch() sess := MongoTool.GetMgoConn() defer MongoTool.DestoryMongoConn(sess) query := sess.DB(db).C(coll).Find(q).Sort("publishtime").Iter() pool := make(chan bool, p.thread) count := 0 for tmp := make(map[string]interface{}); query.Next(tmp); count++ { info := ParseInfo(tmp) if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) { pool <- true go func(info *Info, tmp map[string]interface{}) { defer func() { p.currentTime = info.Publishtime <-pool }() p.startProjectMerge(info, tmp) }(info, tmp) } else { //信息错误,进行更新 } if count%1000 == 0 { log.Println("current", count) } tmp = make(map[string]interface{}) } //阻塞 for n := 0; n < p.thread; n++ { pool <- true } log.Println("所有线程执行完成...", count) } var ( //从标题获取项目编号 titleGetPc = regexp.MustCompile("^([-0-9a-zA-Z第号采招政询电审竞#]{8,}[-0-9a-zA-Z#]+)") titleGetPc1 = regexp.MustCompile("[\\[【((](.{0,6}(编号|编码|项号|包号|代码|标段?号)[::为])?([-0-9a-zA-Z第号采招政询电审竞#]{5,}([\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+[\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+)?)[\\]】))]") titleGetPc2 = regexp.MustCompile("([-0-9a-zA-Z第号采政招询电审竞#]{8,}[-0-9a-zA-Z#]+)(.{0,5}公告)?$") //项目编号过滤 pcReplace = regexp.MustCompile("([\\[【((〖〔《{﹝{](重|第?[二三四再]次.{0,4})[\\]】))〗〕》}﹞}])$|[\\[\\]【】()()〖〗〔〕《》{}﹝﹞-;{}–  ]+|(号|重|第?[二三四五再]次(招标)?)$|[ __]+|((采购)?项目|采购(项目)?)$") //项目编号只是数字或只是字母4个以下 StrOrNum = regexp.MustCompile("^[0-9_-]{1,4}$|^[a-zA-Z_-]{1,4}$") //纯数字或纯字母 StrOrNum2 = regexp.MustCompile("^[0-9_-]+$|^[a-zA-Z_-]+$") ) func ParseInfo(tmp map[string]interface{}) (info *Info) { bys, _ := json.Marshal(tmp) var thisinfo *Info json.Unmarshal(bys, &thisinfo) if thisinfo == nil { return nil } if len(thisinfo.Topscopeclass) == 0 { thisinfo.Topscopeclass = []string{} } if len(thisinfo.Subscopeclass) == 0 { thisinfo.Subscopeclass = []string{} } //从标题中查找项目编号 res := titleGetPc.FindStringSubmatch(thisinfo.Title) if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) { thisinfo.PTC = res[1] } else { res = titleGetPc1.FindStringSubmatch(thisinfo.Title) if len(res) > 3 && len(res[3]) > 6 && thisinfo.ProjectCode != res[3] && !numCheckPc.MatchString(res[3]) && !_zimureg1.MatchString(res[3]) { thisinfo.PTC = res[3] } else { res = titleGetPc2.FindStringSubmatch(thisinfo.Title) if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) { thisinfo.PTC = res[1] } } } if thisinfo.ProjectName != "" && len([]rune(thisinfo.ProjectName)) > 0 { thisinfo.ProjectName = pcReplace.ReplaceAllString(thisinfo.ProjectName, "") if thisinfo.ProjectName != "" { thisinfo.pnbval++ } } if thisinfo.ProjectCode != "" || thisinfo.PTC != "" { if thisinfo.ProjectCode != "" { thisinfo.ProjectCode = pcReplace.ReplaceAllString(thisinfo.ProjectCode, "") if thisinfo.pnbval == 0 && len([]rune(thisinfo.ProjectCode)) < 5 { thisinfo.ProjectCode = StrOrNum.ReplaceAllString(thisinfo.ProjectCode, "") } } else { thisinfo.PTC = pcReplace.ReplaceAllString(thisinfo.PTC, "") if thisinfo.pnbval == 0 && len([]rune(thisinfo.PTC)) < 5 { thisinfo.PTC = StrOrNum.ReplaceAllString(thisinfo.PTC, "") } } if thisinfo.ProjectCode != "" || thisinfo.PTC != "" { thisinfo.pnbval++ } } if thisinfo.ProjectCode == thisinfo.PTC || strings.Index(thisinfo.ProjectCode, thisinfo.PTC) > -1 { thisinfo.PTC = "" } if thisinfo.Buyer != "" && len([]rune(thisinfo.Buyer)) > 2 { thisinfo.pnbval++ } else { thisinfo.Buyer = "" } //winners整理 winner, _ := tmp["winner"].(string) m1 := map[string]bool{} winners := []string{} if winner != "" { m1[winner] = true winners = append(winners, winner) } if thisinfo.HasPackage { packageM, _ := tmp["package"].(map[string]interface{}) for _, p := range packageM { pm, _ := p.(map[string]interface{}) pw, _ := pm["winner"].(string) if pw != "" { m1[pw] = true winners = append(winners, pw) } } } thisinfo.Winners = winners thisinfo.LenPC = len([]rune(thisinfo.ProjectCode)) thisinfo.LenPTC = len([]rune(thisinfo.PTC)) thisinfo.LenPN = len([]rune(thisinfo.ProjectName)) return thisinfo } //从数组中删除元素 func deleteSlice(arr []string, v string) []string { for k, v1 := range arr { if v1 == v { return append(arr[:k], arr[k+1:]...) } } return arr }