package main import ( "encoding/json" "fmt" "log" mu "mfw/util" "qfw/util" "regexp" "strings" "sync" "time" "github.com/robfig/cron" "go.mongodb.org/mongo-driver/bson/primitive" ) /** 任务入口 全量、增量合并 更新、插入,内存清理 转换成info对象 **/ //项目合并对象 type ProjectTask struct { InitMinTime int64 //最小时间,小于0的处理一次 name string thread int //线程数 //查找锁 findLock sync.Mutex wg sync.WaitGroup //map锁 AllIdsMapLock sync.Mutex //对应的id AllIdsMap map[string]*ID //采购单位、项目名称、项目编号 mapPb, mapPn, mapPc map[string]*Key //流程数据 字段相同,直接合并 mapHref map[string]string mapHrefLock sync.Mutex //站点 mapSite map[string]*Site mapSiteLock sync.Mutex //更新或新增通道 updatePool chan []map[string]interface{} //savePool chan map[string]interface{} //saveSign, updateSign chan bool //表名 coll string //当前状态是全量还是增量 currentType string //当前是跑全量还是跑增量 // clearContimes int //当前时间 currentTime int64 //保存长度 saveSize int pici int64 validTime int64 // LockPool chan *sync.Mutex // LockPoolLock sync.Mutex // m1, m23, m4 map[int]int // l1, l23, l4 map[int]*sync.Mutex Brun bool } func NewPT() *ProjectTask { p := &ProjectTask{ InitMinTime: int64(1325347200), name: "全/增量对象", thread: 4, updatePool: make(chan []map[string]interface{}, 5000), //savePool: make(chan map[string]interface{}, 2000), wg: sync.WaitGroup{}, AllIdsMap: make(map[string]*ID, 5000000), mapPb: make(map[string]*Key, 1500000), mapPn: make(map[string]*Key, 5000000), mapPc: make(map[string]*Key, 5000000), mapHref: make(map[string]string, 1500000), mapSite: make(map[string]*Site, 1000000), saveSize: 400, //saveSign: make(chan bool, 1), //updateSign: make(chan bool, 1), coll: ProjectColl, validTime: int64(util.IntAllDef(Sysconfig["validdays"], 150) * 86400), } return p } var P_QL *ProjectTask var sp = make(chan bool, 5) //初始化全量合并对象 func init() { P_QL = NewPT() log.Println(len(P_QL.updatePool)) go P_QL.updateAllQueue() go P_QL.clearMem() } func (p *ProjectTask) updateAllQueue() { arru := make([][]map[string]interface{}, p.saveSize) indexu := 0 for { select { case v := <-p.updatePool: arru[indexu] = v indexu++ if indexu == p.saveSize { sp <- true go func(arru [][]map[string]interface{}) { defer func() { <-sp }() MongoTool.UpSertBulk(p.coll, arru...) }(arru) arru = make([][]map[string]interface{}, p.saveSize) indexu = 0 } case <-time.After(1000 * time.Millisecond): if indexu > 0 { sp <- true go func(arru [][]map[string]interface{}) { defer func() { <-sp }() MongoTool.UpSertBulk(p.coll, arru...) }(arru[:indexu]) arru = make([][]map[string]interface{}, p.saveSize) indexu = 0 } } } } //项目合并内存更新 func (p *ProjectTask) clearMem() { c := cron.New() //在内存中保留最近6个月的信息 //跑全量时每4分钟跑一次,跑增量时400分钟跑一次 _ = c.AddFunc("50 0/15 * * * *", func() { if p.currentType == "ql" || p.clearContimes >= 60 { //跳过的次数清零 p.clearContimes = 0 //信息进入查找对比全局锁 p.findLock.Lock() //defer p.findLock.Unlock() //合并进行的任务都完成 p.wg.Wait() //遍历id //所有内存中的项目信息 p.AllIdsMapLock.Lock() p.mapHrefLock.Lock() //清除计数 clearNum := 0 for k, v := range p.AllIdsMap { if p.currentTime-v.P.LastTime > p.validTime { clearNum++ //删除id的map delete(p.AllIdsMap, k) //删除pb if v.P.Buyer != "" { ids := p.mapPb[v.P.Buyer] if ids != nil { ids.Lock.Lock() ids.Arr = deleteSlice(ids.Arr, k) if len(ids.Arr) == 0 { delete(p.mapPb, v.P.Buyer) } ids.Lock.Unlock() } } //删除mapPn for _, vn := range append([]string{v.P.ProjectName}, v.P.MPN...) { if vn != "" { ids := p.mapPn[vn] if ids != nil { ids.Lock.Lock() ids.Arr = deleteSlice(ids.Arr, k) if len(ids.Arr) == 0 { delete(p.mapPn, vn) } ids.Lock.Unlock() } } } //删除mapPc for _, vn := range append([]string{v.P.ProjectCode}, v.P.MPC...) { if vn != "" { ids := p.mapPc[vn] if ids != nil { ids.Lock.Lock() ids.Arr = deleteSlice(ids.Arr, k) if len(ids.Arr) == 0 { delete(p.mapPc, vn) } ids.Lock.Unlock() } } } for kHref, pid := range p.mapHref{ if pid == k { delete(p.mapHref, kHref) } } v = nil } } p.mapHrefLock.Unlock() p.AllIdsMapLock.Unlock() p.findLock.Unlock() log.Println("清除完成:", clearNum, len(p.AllIdsMap), len(p.mapPn), len(p.mapPc), len(p.mapPb)) } else { p.clearContimes++ } }) c.Start() select {} } //全量合并 func (p *ProjectTask) taskQl(udpInfo map[string]interface{}) { defer util.Catch() //1、检查pubilshtime索引 db, _ := udpInfo["db"].(string) if db == "" { db = MongoTool.DbName } coll, _ := udpInfo["coll"].(string) if coll == "" { coll = ExtractColl } thread := util.IntAllDef(udpInfo["thread"], 4) if thread > 0 { p.thread = thread } q, _ := udpInfo["query"].(map[string]interface{}) if q == nil { q = map[string]interface{}{} lteid, _ := udpInfo["lteid"].(string) var idmap map[string]interface{} if len(lteid) > 15 { idmap = map[string]interface{}{ "$lte": StringTOBsonId(lteid), } } gtid, _ := udpInfo["gtid"].(string) if len(gtid) > 15 { if idmap == nil { idmap = map[string]interface{}{} } idmap["$gt"] = StringTOBsonId(gtid) } if idmap != nil { q["_id"] = idmap } } //生成查询语句执行 log.Println("查询语句:", q) p.enter(db, coll, q) } //增量合并 func (p *ProjectTask) taskZl(udpInfo map[string]interface{}) { defer util.Catch() //1、检查pubilshtime索引 db, _ := udpInfo["db"].(string) if db == "" { db = MongoTool.DbName } coll, _ := udpInfo["coll"].(string) if coll == "" { coll = ExtractColl } thread := util.IntAllDef(udpInfo["thread"], 4) if thread > 0 { p.thread = thread } //开始id和结束id q, _ := udpInfo["query"].(map[string]interface{}) gtid := udpInfo["gtid"].(string) lteid := udpInfo["lteid"].(string) if q == nil { q = map[string]interface{}{ "_id": map[string]interface{}{ "$gte": StringTOBsonId(gtid), "$lte": StringTOBsonId(lteid), }, } } if q != nil { //生成查询语句执行 p.enter(db, coll, q) } if udpInfo["stop"] == nil { for i := 0; i < 5; i++ { sp <- true } for i := 0; i < 5; i++ { <-sp } log.Println("保存完成,生索引", p.pici) time.Sleep(5 * time.Second) nextNode(udpInfo, p.pici) } } //招标字段更新 func (p *ProjectTask) taskUpdateInfo(udpInfo map[string]interface{}) { defer util.Catch() db, _ := udpInfo["db"].(string) if db == "" { db = MongoTool.DbName } coll, _ := udpInfo["coll"].(string) if coll == "" { coll = ExtractColl } thread := util.IntAllDef(udpInfo["thread"], 4) if thread > 0 { p.thread = thread } q, _ := udpInfo["query"].(map[string]interface{}) gtid := udpInfo["gtid"].(string) lteid := udpInfo["lteid"].(string) if q == nil { q = map[string]interface{}{ "_id": map[string]interface{}{ "$gt": StringTOBsonId(gtid), "$lte": StringTOBsonId(lteid), }, "is_m": 1, } } log.Println("查询语句:", q) p.enter(db, coll, q) } func StringTOBsonId(id string) primitive.ObjectID { objectId, _ := primitive.ObjectIDFromHex(id) return objectId } //通知下个节点nextNode func nextNode(mapInfo map[string]interface{}, pici int64) { mapInfo["stype"] = "project" mapInfo["query"] = map[string]interface{}{ "pici": pici, } for n, to := range toaddr { key := fmt.Sprintf("%d-%s-%d", pici, "project", n) mapInfo["key"] = key datas, _ := json.Marshal(mapInfo) node := &udpNode{datas, to, time.Now().Unix(), 0} udptaskmap.Store(key, node) _ = udpclient.WriteUdp(datas, mu.OP_TYPE_DATA, to) } } func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) { defer util.Catch() defer func() { p.Brun = false }() p.Brun = true count, taskcount := 0, 0 pool := make(chan bool, p.thread) log.Println("start project", q) sess := MongoTool.GetMgoConn() defer MongoTool.DestoryMongoConn(sess) infoPool := make(chan map[string]interface{}, 2000) over := make(chan bool) go func() { L: for { select { case tmp := <-infoPool: pool <- true taskcount++ go func(tmp map[string]interface{}) { defer func() { <-pool }() if util.IntAll(tmp["repeat"]) == 0 { p.fillInPlace(tmp) if p.currentType == "updateInfo" { //招标信息更改合并 p.updateJudge(tmp) }else { //普通合并 p.CommonMerge(tmp) } }else { //信息错误,进行更新 } }(tmp) case <-over: break L } } }() ms := sess.DB(db).C(coll).Find(q).Sort("publishtime") if Sysconfig["hints"] != nil { ms.Hint(Sysconfig["hints"]) } query := ms.Iter() // var lastid interface{} L: for { select { case <-queryClose: log.Println("receive interrupt sign") log.Println("close iter..", lastid, query.Cursor.Close(nil)) queryCloseOver <- true break L default: tmp := make(map[string]interface{}) if query.Next(&tmp) { lastid = tmp["_id"] if count%2000 == 0 { log.Println("current", count, lastid) } infoPool <- tmp count++ } else { break L } } } time.Sleep(5 * time.Second) over <- true //阻塞 for n := 0; n < p.thread; n++ { pool <- true } log.Println("所有线程执行完成...", count, taskcount) } var ( //从标题获取项目编号 titleGetPc = regexp.MustCompile("^([-0-9a-zA-Z第号采招政询电审竞#]{8,}[-0-9a-zA-Z#]+)") titleGetPc1 = regexp.MustCompile("[\\[【((](.{0,6}(编号|编码|项号|包号|代码|标段?号)[::为])?([-0-9a-zA-Z第号采招政询电审竞#]{5,}([\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+[\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+)?)[\\]】))]") titleGetPc2 = regexp.MustCompile("([-0-9a-zA-Z第号采政招询电审竞#]{8,}[-0-9a-zA-Z#]+)(.{0,5}公告)?$") //项目编号过滤 pcReplace = regexp.MustCompile("([\\[【((〖〔《{﹝{](重|第?[二三四再]次.{0,4})[\\]】))〗〕》}﹞}])$|[\\[\\]【】()()〖〗〔〕《》{}﹝﹞-;{}–  ]+|(号|重|第?[二三四五再]次(招标)?)$|[ __]+|((采购)?项目|采购(项目)?)$") //项目编号只是数字或只是字母4个以下 StrOrNum = regexp.MustCompile("^[0-9_-]{1,4}$|^[a-zA-Z_-]{1,4}$") //纯数字或纯字母 StrOrNum2 = regexp.MustCompile("^[0-9_-]+$|^[a-zA-Z_-]+$") ) func (p *ProjectTask) CommonMerge(tmp map[string]interface{}) { info := ParseInfo(tmp) if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) { if jsonData, ok := tmp["jsondata"].(map[string]interface{}); ok { if jsonData != nil && jsonData["projecthref"] != nil { //projectHref字段合并 proHref := jsonData["projecthref"].(string) tmp["projecthref"] = proHref p.mapHrefLock.Lock() defer p.mapHrefLock.Unlock() p.AllIdsMapLock.Lock() defer p.AllIdsMapLock.Unlock() if p.mapHref[proHref] != "" { pid := p.mapHref[proHref] comparePro := p.AllIdsMap[pid].P _, ex := CompareStatus(comparePro, info) p.UpdateProject(tmp, info, comparePro, -1, "AAAAAAAAAA", ex) } else { id, p1 := p.NewProject(tmp, info) p.mapHref[proHref] = id p.AllIdsMap[id] = &ID{Id: id, P: p1} } }else { //项目合并 p.currentTime = info.Publishtime p.startProjectMerge(info, tmp) } }else { //项目合并 p.currentTime = info.Publishtime p.startProjectMerge(info, tmp) } } } func ParseInfo(tmp map[string]interface{}) (info *Info) { bys, _ := json.Marshal(tmp) var thisinfo *Info _ = json.Unmarshal(bys, &thisinfo) if thisinfo == nil { return nil } if len(thisinfo.Topscopeclass) == 0 { thisinfo.Topscopeclass = []string{} } if len(thisinfo.Subscopeclass) == 0 { thisinfo.Subscopeclass = []string{} } //从标题中查找项目编号 res := titleGetPc.FindStringSubmatch(thisinfo.Title) if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) { thisinfo.PTC = res[1] } else { res = titleGetPc1.FindStringSubmatch(thisinfo.Title) if len(res) > 3 && len(res[3]) > 6 && thisinfo.ProjectCode != res[3] && !numCheckPc.MatchString(res[3]) && !_zimureg1.MatchString(res[3]) { thisinfo.PTC = res[3] } else { res = titleGetPc2.FindStringSubmatch(thisinfo.Title) if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) { thisinfo.PTC = res[1] } } } if thisinfo.ProjectName != "" && len([]rune(thisinfo.ProjectName)) > 0 { thisinfo.ProjectName = pcReplace.ReplaceAllString(thisinfo.ProjectName, "") if thisinfo.ProjectName != "" { thisinfo.pnbval++ } } if thisinfo.ProjectCode != "" || thisinfo.PTC != "" { if thisinfo.ProjectCode != "" { thisinfo.ProjectCode = pcReplace.ReplaceAllString(thisinfo.ProjectCode, "") if thisinfo.pnbval == 0 && len([]rune(thisinfo.ProjectCode)) < 5 { thisinfo.ProjectCode = StrOrNum.ReplaceAllString(thisinfo.ProjectCode, "") } } else { thisinfo.PTC = pcReplace.ReplaceAllString(thisinfo.PTC, "") if thisinfo.pnbval == 0 && len([]rune(thisinfo.PTC)) < 5 { thisinfo.PTC = StrOrNum.ReplaceAllString(thisinfo.PTC, "") } } if thisinfo.ProjectCode != "" || thisinfo.PTC != "" { thisinfo.pnbval++ } } if thisinfo.ProjectCode == thisinfo.PTC || strings.Index(thisinfo.ProjectCode, thisinfo.PTC) > -1 { thisinfo.PTC = "" } if thisinfo.Buyer != "" && len([]rune(thisinfo.Buyer)) > 2 { thisinfo.pnbval++ } else { thisinfo.Buyer = "" } //winners整理 winner, _ := tmp["winner"].(string) m1 := map[string]bool{} winners := []string{} if winner != "" { m1[winner] = true winners = append(winners, winner) } packageM, _ := tmp["package"].(map[string]interface{}) if packageM != nil { thisinfo.HasPackage = true for _, p := range packageM { pm, _ := p.(map[string]interface{}) pw, _ := pm["winner"].(string) if pw != "" && !m1[pw] { m1[pw] = true winners = append(winners, pw) } } } thisinfo.Winners = winners thisinfo.LenPC = len([]rune(thisinfo.ProjectCode)) thisinfo.LenPTC = len([]rune(thisinfo.PTC)) thisinfo.LenPN = len([]rune(thisinfo.ProjectName)) return thisinfo } func (p *ProjectTask) updateJudge(tmp map[string]interface{}) { index := -1 pInfoId := "" info := ParseInfo(tmp) p.AllIdsMapLock.Lock() F: for k, ID := range p.AllIdsMap { for i, id := range ID.P.Ids{ if info.Id == id { pInfoId = k index = i break F } } } p.AllIdsMapLock.Unlock() //未找到招标信息 if index == -1 { if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) { p.currentTime = info.Publishtime p.startProjectMerge(info, tmp) } }else { tmpPro := MongoTool.FindById(ProjectColl, pInfoId) infoList := []interface{}(tmpPro["list"].(primitive.A)) infoMap := infoList[index].(map[string]interface{}) modifyMap, f := modifyEle(infoMap, tmp) //projecthref字段 jsonData := tmp["jsondata"].(map[string]interface{}) if jsonData != nil && jsonData["projecthref"] != "" { proHref := jsonData["projecthref"].(string) tmp["projecthref"] = proHref p.mapHrefLock.Lock() defer p.mapHrefLock.Unlock() pid := p.mapHref[proHref] if pid == pInfoId { p.modifyUpdate(pInfoId, index, info, tmp, tmpPro, modifyMap) return } } if f { //合并、修改 log.Println("合并修改更新", "----------------------------") p.mergeAndModify(pInfoId, index, info, tmp, tmpPro, modifyMap) }else { //修改 log.Println("修改更新", "----------------------------") p.modifyUpdate(pInfoId, index, info, tmp, tmpPro, modifyMap) } } } var Elements = []string{ "projectname", "projectcode", "agency", "budget", "bidamount", "buyerperson", "area", "city", "publishtime", } /** 判断修改的字段是否是影响合并流程的要素字段 */ func modifyEle(tmpPro map[string]interface{}, tmp map[string]interface{}) (map[string]interface{}, bool) { modifyMap := map[string]interface{}{} for k, _ := range tmpPro { for k1, _ := range tmp { if k == k1 && tmpPro[k] != tmp[k1] { modifyMap[k] = tmp[k1] break } } } for k, _ := range modifyMap { for _, str := range Elements{ if k == str { return modifyMap, true } } } delete(modifyMap, "_id") return modifyMap, false } //补全位置信息 func (p *ProjectTask) fillInPlace(tmp map[string]interface{}) { area := tmp["area"].(string) city := tmp["city"].(string) district := tmp["district"].(string) if area != "" && city != "" && district != "" { return } p.mapSiteLock.Lock() defer p.mapSiteLock.Unlock() tmpSite := tmp["site"].(string) site := p.mapSite[tmpSite] if site != nil { if area != "" { if area == "全国" { tmp["area"] = site.Area tmp["city"] = site.City tmp["district"] = site.District return } if area != site.Area { return }else { if city == site.City { if district == "" { tmp["district"] = site.District return } }else if city == "" { tmp["city"] = site.City tmp["district"] = site.District return } } }else { tmp["area"] = site.Area tmp["city"] = site.City tmp["district"] = site.District return } } } //从数组中删除元素 func deleteSlice(arr []string, v string) []string { for k, v1 := range arr { if v1 == v { return append(arr[:k], arr[k+1:]...) } } return arr }