package main import ( "context" "encoding/json" "fmt" "log" qu "qfw/util" "qfw/util/redis" "regexp" . "sqlmodel" "strings" "sync" "time" mgoutil "util/mgodb" "gopkg.in/mgo.v2/bson" "github.com/cron" esv "es" "github.com/antonmedv/expr" "github.com/donnie4w/go-logger/logger" "go.mongodb.org/mongo-driver/bson/primitive" esV7 "github.com/olivere/elastic" ) func TimeTask() { StartTask() c := cron.New() //cronstr := "0 */" + fmt.Sprint(TaskTime) + " * * * ?" cronstr := "0 5 */" + fmt.Sprint(TaskTime) + " * * ?" //每TaskTime小时执行一次 c.AddFunc(cronstr, func() { StartTask() }) c.Start() } func StartTask() { GetCustomerData() } // 加载客户 func GetCustomerData() { defer qu.Catch() log.Println("Init Customer...") idRange, idRange2, ok, endTime := GetIdRange() //获取id区间 if !ok { return } logger.Debug("此次任务区间:", idRange, idRange2) //查询企业库开启推送的客户 customers, _ := MgoTag.Find("euser", map[string]interface{}{"i_push": 1, "b_delete": false}, nil, nil) for _, c := range customers { customerId := mgoutil.BsonTOStringId(c["_id"]) customer := qu.ObjToString(c["s_name"]) //客户名称 appId := qu.ObjToString(c["s_appid"]) //appid extends := qu.ObjToString(c["s_extends"]) //扩展信息 pushModel := qu.IntAll(c["i_pushmodel"]) //推送模式 dataSave := qu.ObjToString(c["s_dataSave"]) log.Println("当前客户 ", customer) cus := &Customer{} cus.SaveDataMap = map[string]map[string]interface{}{} cus.SaveDataArr = map[string]map[string]interface{}{} cus.IdRange = idRange cus.IdRanges = idRange2 cus.ID = customerId cus.Name = customer cus.PushModel = pushModel cus.AppId = appId cus.DataSave = dataSave for _, v := range strings.Split(extends, ",") { if v == "hospitalgrade" { cus.IsSearchHosp = true } else if v == "enterpise" { cus.IsSearchEnps = true } } // if projectAppidMap[appId] { start := time.Now().Unix() log.Println("加载projectId---开始") InitProjectId(appId) end := time.Now().Unix() log.Println("加载projectId---结束,耗时", end-start, "秒") } else { projectIdMap = sync.Map{} } // cus.GetTagRules() //获取客户打标签规则 cus.GetDepartments("") //获取客户信息 //PrintLog(cus) //打印查看初始化的信息 qu.Debug("customer:", cus.ID, cus.Name, cus.PushModel, cus.AppId, cus.IsTagRule, cus.IsTagRule2, cus.IsTagRule3, cus.IsSearchHosp, cus.IsSearchEnps, len(cus.TagRules), len(cus.TagRules2), len(cus.TagRules3), len(cus.Departments)) cus.GetData("") //获取数据 cus.RemoveRepeatData() //数据去重 cus.AssembelAndSaveData() //组装、保存数据 } Sysconfig.LatestId = LatestId Sysconfig.LatestTime = endTime qu.WriteSysConfig(Sysconfig) logger.Debug("定时任务结束-endId-Sysconfig.LatestTime ", endTime) } // 获取客户打标签规则 func (c *Customer) GetTagRules() { log.Println("开始加载标签规则...") defer qu.Catch() tagRules, _ := MgoTag.Find("eusertagrule", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false}, nil, nil) if len(tagRules) > 0 { for _, tr := range tagRules { tagType := qu.ObjToString(tr["tagType"]) if tagType == "" || tagType == "1" { c.IsTagRule = true //查到打标签规则,表示打标签 TR := &TagRule{} TR.Fields = make(map[string]interface{}) TR.DepartRuleIds = make(map[string]bool) id := mgoutil.BsonTOStringId(tr["_id"]) name := qu.ObjToString(tr["s_name"]) TR.ID = id TR.Name = name TR.CustomerId = c.ID //部门规则id组 if departRuleIds := qu.ObjToString(tr["o_departruleids"]); departRuleIds != "" { for _, drid := range strings.Split(departRuleIds, ",") { TR.DepartRuleIds[drid] = true } } //规则 if o_list, ok := tr["o_list"].(primitive.A); ok && len(o_list) > 0 { TR.GetKeyAddNotKeyWord(o_list) } c.TagRules = append(c.TagRules, TR) } } } tagRules2, _ := MgoTag.Find("eusertagrule", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false, "tagType": "2"}, nil, nil) if len(tagRules2) > 0 { c.IsTagRule2 = true //查到打标签规则,表示打标签 for _, tr := range tagRules2 { TR := &TagRule{} TR.Fields = make(map[string]interface{}) TR.DepartRuleIds = make(map[string]bool) id := mgoutil.BsonTOStringId(tr["_id"]) name := qu.ObjToString(tr["s_name"]) TR.ID = id TR.Name = name TR.CustomerId = c.ID //部门规则id组 if departRuleIds := qu.ObjToString(tr["o_departruleids"]); departRuleIds != "" { for _, drid := range strings.Split(departRuleIds, ",") { TR.DepartRuleIds[drid] = true } } //规则 if o_list, ok := tr["o_list"].(primitive.A); ok && len(o_list) > 0 { TR.GetKeyAddNotKeyWord(o_list) } c.TagRules2 = append(c.TagRules2, TR) } } tagRules3, _ := MgoTag.Find("eusertagrule", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false, "tagType": "3"}, nil, nil) if len(tagRules3) > 0 { c.IsTagRule3 = true //查到打标签规则,表示打标签 for _, tr := range tagRules3 { TR := &TagRule{} TR.Fields = make(map[string]interface{}) TR.DepartRuleIds = make(map[string]bool) id := mgoutil.BsonTOStringId(tr["_id"]) name := qu.ObjToString(tr["s_name"]) TR.ID = id TR.Name = name TR.CustomerId = c.ID //部门规则id组 if departRuleIds := qu.ObjToString(tr["o_departruleids"]); departRuleIds != "" { for _, drid := range strings.Split(departRuleIds, ",") { TR.DepartRuleIds[drid] = true } } //规则 if o_list, ok := tr["o_list"].(primitive.A); ok && len(o_list) > 0 { TR.GetKeyAddNotKeyWord(o_list) } c.TagRules3 = append(c.TagRules3, TR) } } } // 获取部门信息 func (c *Customer) GetDepartments(stype string) { log.Println("开始获取部门信息...") defer qu.Catch() departments, _ := MgoTag.Find("euserdepart", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false}, nil, nil) if len(departments) > 0 { for _, ds := range departments { DM := &Department{} DM.DataLock = &sync.Mutex{} DM.DepartmentData = map[string][]map[string]interface{}{} DM.SaveDataMap = map[string]map[string]interface{}{} id := mgoutil.BsonTOStringId(ds["_id"]) name := qu.ObjToString(ds["s_name"]) DM.ID = id DM.Name = name DM.CustomerID = c.ID DM.GetSearchRules(c.ID, stype, c.IdRange, c.IdRanges) //获取某个部门的所有规则 c.Departments = append(c.Departments, DM) //qu.Debug("Departments---", DM.ID, DM.Name, DM.CustomerID, len(DM.Rules)) } } } // 获取数据 func (c *Customer) GetData(stype string) { log.Println("开始匹配数据...") defer qu.Catch() esConfig := Sysconfig.Es esversion := qu.ObjToString(esConfig["version"]) if esversion == "v1" { } else { esCon := esv.VarEs.(*esv.EsV7) c.EsConGetDataV7(stype, esCon) } } type MySource struct { Querys string } func (m *MySource) Source() (interface{}, error) { mp := make(map[string]interface{}) json.Unmarshal([]byte(m.Querys), &mp) return mp["query"], nil } func (c *Customer) EsConGetDataV7(stype string, esCon *esv.EsV7) { client := esCon.GetEsConn() defer esCon.DestoryEsConn(client) ctx, _ := context.WithTimeout(context.Background(), 30*time.Minute) for _, dm := range c.Departments { for _, sr := range dm.Rules { for { listLen := redis.GetInt("session", "es_status") if listLen == 0 { log.Println("es空闲!") break } else if listLen == 1 || listLen == 2 { log.Println("系统繁忙,请稍后再试 ", listLen) } time.Sleep(5 * time.Second) } //测试 // MgoDataTest(sr, dm, c) // return // ch := make(chan bool, 10) // wg := &sync.WaitGroup{} escount := Es.Count(Index, Itype, sr.EsQuery) log.Println("查询总数:", escount, "规则ID:", sr.ID, "EsQuery:", sr.EsQuery) if escount == 0 { continue } //查询条件类型转换 // var q esV7.Query //sr.EsQuery = `{"query":{"filtered":{"filter":{"bool":{"must":[{"bool":{"should":[{"terms":{"city":["上海市"]}}]}},{"terms":{"toptype":["招标","结果"]}},{"range":{"publishtime":{"gte":1588262400,"lt":1608825600}}}]}},"query":{"bool":{"must":[{"bool":{"should":[{"bool":{"must":[{"bool":{"should":[{"bool":{"must":[{"terms":{"buyer":["上海城建职业学院","上海市第一人民医院","中国银联股份有限公司","上海立信会计金融学院法规处","中国东方航空股份有限公司","上海外国语大学","上海小昆山环卫服务有限公司","国家税务总局上海市税务局","中国浦东干部学院","上海市浦东新区老港镇人民政府","咪咕视讯科技有限公司","上海交通大学医学院附属新华医院","交通运输部上海打捞局","松江区体育局","复旦大学附属中山医院","上海交通大学医学院附属瑞金医院","中国科学院上海应用物理研究所"]}}]}}]}}]}}]}}],"must_not":[{"constant_score":{"filter":{"missing":{"field":"filetext"}}}}]}}}}}` cc := &MySource{ Querys: sr.EsQuery, } //游标查询,index不支持别名,只能写索引库的名称 res, err := client.Scroll(Index).Query(cc).Size(200).Do(ctx) //查询一条获取游标 if err == nil { numDocs := 0 scrollId := res.ScrollId count := 1 for { if scrollId == "" { log.Println("ScrollId Is Error") break } var searchResult *esV7.SearchResult var err error if count == 1 { searchResult = res } else { searchResult, err = client.Scroll(Index).Size(200).ScrollId(scrollId).Do(ctx) //查询 if err != nil { if err.Error() == "EOS" { //迭代完毕 log.Println("Es Search Data Over:", err) } else { log.Println("Es Search Data Error:", err) } break } } log.Println("此次处理条数 ", len(searchResult.Hits.Hits)) if err != nil { if err.Error() == "EOS" { //迭代完毕 log.Println("Es Search Data Over:", err) } else { log.Println("Es Search Data Error:", err) } break } for _, hit := range searchResult.Hits.Hits { //开始处理数据 tmp := make(map[string]interface{}) if json.Unmarshal(hit.Source, &tmp) == nil { if stype != "history" { if !SkipData(tmp) { qu.Debug("跳过该条数据,发布时间在入库时间7天之前,", qu.ObjToString(tmp["_id"])) continue } } id := qu.ObjToString(tmp["id"]) //亚信 if CheckBidOpenAppidMap[c.AppId] { if tmp["bidopentime"] != nil { bidopentime := qu.Int64All(tmp["bidopentime"]) comeintime := qu.Int64All(tmp["comeintime"]) if bidopentime-comeintime <= 7*24*60*60 { qu.Debug("跳过该条数据,开标时间-入库时间<=7天,", id) continue } } } //河南移动,过滤掉中国移动采购网招标数据 if CheckBidHrefRuleIdMap[dm.ID] { if strings.Contains(qu.ObjToString(tmp["href"]), "b2b.10086.cn") { qu.Debug("跳过该条数据,公告原网址中包含 b2b.10086.cn,", id) continue } } isExists, err := redis.Exists("datag", c.AppId+"_"+id) if err != nil { log.Println("redis信息id判重出错 ", err) } else if isExists { log.Println("信息id重复 ", id) continue } tmp["id"] = id //记录数据原有id delete(tmp, "_id") if sr.ExtFieldType == 2 { findwinner := "" s_winner := strings.Split(qu.ObjToString(tmp["s_winner"]), ",") if len(s_winner) > 0 { for i := 0; i < len(s_winner); i++ { findwinners := strings.TrimSpace(s_winner[i]) if findwinners != "" { for _, v := range Sysconfig.SWinnerFilter { strings.ReplaceAll(findwinners, v, "") } if findwinners != "" { findwinner = findwinners break } } } } // findwinner := strings.TrimSpace(qu.ObjToString(tmp["winner"])) if findwinner != "" { finddata := MgoEnps.FindOne(EnpsColl, bson.M{"company_name": findwinner}) if finddata != nil { if legal_person := qu.ObjToString(finddata["legal_person"]); legal_person != "" { tmp["legal_person"] = legal_person } if email := qu.ObjToString(finddata["company_email"]); email != "" { tmp["company_email"] = email } if phone := qu.ObjToString(finddata["company_phone"]); phone != "" { tmp["company_phone"] = phone } //从最新年报中获取 中标单位联系电话、中标单位邮箱 // if annual_reports, ok := finddata["annual_reports"].(primitive.A); ok && len(annual_reports) > 0 { // anreport := Sort_year_report(annual_reports) // if len(anreport) > 0 { // if email := qu.ObjToString(anreport["company_email"]); email != "" { // tmp["company_email"] = email // } // if phone := qu.ObjToString(anreport["company_phone"]); phone != "" { // tmp["company_phone"] = phone // } // } // } } } } matchKey := map[string]bool{} //记录所有匹配上的关键词 matchKeyType := map[string]bool{} //记录关键词对应的匹配方式 //先获取用到的所有字段值 fieldText := map[string]interface{}{} for field, _ := range sr.Fields { text := qu.ObjToString(tmp[field]) text = ProcessData(text) //处理文本(字母转大写,删除一些符号) fieldText[field] = text } //清理词清理 for _, cwm := range sr.GCW.MatchType { if text := qu.ObjToString(fieldText[cwm]); text != "" { for _, gcw_reg := range sr.GCW.KeyReg { text = gcw_reg.ReplaceAllString(text, "") } fieldText[cwm] = text } } //精准筛选规则2022-10-19 if c.Exact == 1 && sr.ExactRule != "" { nameArr := []string{} data, _ := MgoTag.Find("groups", map[string]interface{}{"ruleId": sr.ID}, nil, nil) if data != nil && len(data) > 0 { for _, v := range data { nameArr = append(nameArr, qu.ObjToString(v["name"])) } } exactResult := exactMatchs(sr.ExactRule, qu.ObjToString(tmp["title"]), qu.ObjToString(tmp["detail"]), sr.Maths, nameArr) qu.Debug("-------------------精准匹配", id, exactResult) if !exactResult { continue } } /* 因为要记录所有匹配上的关键词,所有优先匹配附加词,在匹配关键词 */ //1.附加词匹配 IsMatch := false //qu.Debug("sr.AW---", len(sr.AW)) for i, aw := range sr.AW { //qu.Debug("-------------------------开始附加词匹配--------------------------") IsMatchAddKey := RegMatch(fieldText, aw.MatchType, aw.KeyReg, nil, nil, false, true) //qu.Debug(IsMatchAddKey, "------------------------------------------------------------") //2.关键词匹配 if IsMatchAddKey { kw := sr.KW[i] //qu.Debug("-------------------------开始关键词匹配--------------------------") IsMatchKey := RegMatch(fieldText, kw.MatchType, kw.KeyReg, matchKey, matchKeyType, true, false) //qu.Debug(IsMatchKey, "------------------------------------------------------------") if IsMatchKey { IsMatch = true } } } if len(sr.AW) == 0 { IsMatch = true } /* 到此已经匹配完数据 */ qu.Debug("---------------------", id, IsMatch, matchKey) if IsMatch { //匹配成功,数据上新增规则id,matchKey,item并临时保存数据 // tmpMatchKey := MapDataToArr(matchKey) tmpMatchKeyType := MapDataToArr(matchKeyType) tmp["matchkey"] = GetMactchKeys(sr.Maths, tmp) tmp["matchtype"] = strings.Join(tmpMatchKeyType, ",") tmp["ruleid"] = sr.ID tmp["rulename"] = sr.Name tmpBuyerClass := qu.ObjToString(tmp["buyerclass"]) //开始打标签 //qu.Debug("c.IsTagRule+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") if c.IsTagRule { tagNameMap := map[string]bool{} tagIdMap := map[string]bool{} //qu.Debug("c.TagRules---", len(c.TagRules)) //log.Println(c.TagRules,"=========",) for _, tr := range c.TagRules { if tr.DepartRuleIds[sr.ID] { // log.Println(tr.TagNames, "===========打标签") //先获取用到的所有字段值 for field, _ := range tr.Fields { if fieldText[field] == nil { //补充fieldText text := qu.ObjToString(tmp[field]) text = ProcessData(text) //处理文本(字母转大写,删除一些符号) fieldText[field] = text } } //qu.Debug("-------------------------开始排除词匹配--------------------------") //qu.Debug("tr.NW---", len(tr.NW)) matchKeyTag := map[string]bool{} //记录所有标签里的匹配上的关键词 matchKeyTypeTag := map[string]bool{} //记录标签里的关键词对应的匹配方式 for j, tag_nw := range tr.NW { //排除词匹配 IsMatchNotKey := RegMatch(fieldText, tag_nw.MatchType, tag_nw.KeyReg, nil, nil, false, false) //qu.Debug(IsMatchNotKey, "------------------------------------------------------------") if !IsMatchNotKey { //排除词未匹配,匹配附加词关键词 // log.Println(j, tr.TagNames[j]) if RegMatch(fieldText, tr.AW[j].MatchType, tr.AW[j].KeyReg, nil, nil, false, true) && RegMatch(fieldText, tr.KW[j].MatchType, tr.KW[j].KeyReg, matchKeyTag, matchKeyTypeTag, true, false) { tagname := tr.TagNames[j] tagBuyerClass := tr.BuyerClass[j] if tagBuyerClass != "" { if strings.Contains(tagBuyerClass, tmpBuyerClass) { if tagname == "" { tempList := []string{} for k, _ := range matchKeyTag { tempList = append(tempList, k) } tagname = strings.Join(tempList, ",") log.Println("=====tagname为空取匹配词为标签名称", tagname) } //qu.Debug("tagname-----", tagname) tagNameMap[tagname] = true tagIdMap[tr.ID] = true } } else { if tagname == "" { tempList := []string{} for k, _ := range matchKeyTag { tempList = append(tempList, k) } tagname = strings.Join(tempList, ",") log.Println("=====tagname为空取匹配词为标签名称", tagname) } //qu.Debug("tagname-----", tagname) tagNameMap[tagname] = true tagIdMap[tr.ID] = true } } } } } } //tagname tagNameArr := MapDataToArr(tagNameMap) tagIdArr := MapDataToArr(tagIdMap) if len(tagNameArr) > 0 { tmp["tagname"] = strings.Join(tagNameArr, ",") if DisPackageAppidMap[c.AppId] { tmp["buyer_type"] = strings.Join(tagNameArr, ",") } if c.PushModel == 2 { tmp["item"] = strings.Join(tagNameArr, ",") } tmp["tagid"] = strings.Join(tagIdArr, ",") } } if c.IsTagRule2 { tagNameMap := map[string]bool{} tagIdMap := map[string]bool{} //qu.Debug("c.TagRules---", len(c.TagRules)) //log.Println(c.TagRules,"=========",) for _, tr := range c.TagRules2 { if tr.DepartRuleIds[sr.ID] { // log.Println(tr.TagNames, "===========打标签") //先获取用到的所有字段值 for field, _ := range tr.Fields { if fieldText[field] == nil { //补充fieldText text := qu.ObjToString(tmp[field]) text = ProcessData(text) //处理文本(字母转大写,删除一些符号) fieldText[field] = text } } //qu.Debug("-------------------------开始排除词匹配--------------------------") //qu.Debug("tr.NW---", len(tr.NW)) matchKeyTag := map[string]bool{} //记录所有标签里的匹配上的关键词 matchKeyTypeTag := map[string]bool{} //记录标签里的关键词对应的匹配方式 for j, tag_nw := range tr.NW { //排除词匹配 IsMatchNotKey := RegMatch(fieldText, tag_nw.MatchType, tag_nw.KeyReg, nil, nil, false, false) //qu.Debug(IsMatchNotKey, "------------------------------------------------------------") if !IsMatchNotKey { //排除词未匹配,匹配附加词关键词 // log.Println(j, tr.TagNames[j]) if RegMatch(fieldText, tr.AW[j].MatchType, tr.AW[j].KeyReg, nil, nil, false, true) && RegMatch(fieldText, tr.KW[j].MatchType, tr.KW[j].KeyReg, matchKeyTag, matchKeyTypeTag, true, false) { tagname := tr.TagNames[j] tagBuyerClass := tr.BuyerClass[j] if tagBuyerClass != "" { if strings.Contains(tagBuyerClass, tmpBuyerClass) { if tagname == "" { tempList := []string{} for k, _ := range matchKeyTag { tempList = append(tempList, k) } tagname = strings.Join(tempList, ",") log.Println("=====tagname为空取匹配词为标签名称", tagname) } //qu.Debug("tagname-----", tagname) tagNameMap[tagname] = true tagIdMap[tr.ID] = true } } else { if tagname == "" { tempList := []string{} for k, _ := range matchKeyTag { tempList = append(tempList, k) } tagname = strings.Join(tempList, ",") log.Println("=====tagname为空取匹配词为标签名称", tagname) } //qu.Debug("tagname-----", tagname) tagNameMap[tagname] = true tagIdMap[tr.ID] = true } } } } } } //tagname tagNameArr := MapDataToArr(tagNameMap) tagIdArr := MapDataToArr(tagIdMap) if len(tagNameArr) > 0 { tmp["tagname2"] = strings.Join(tagNameArr, ",") if DisPackageAppidMap[c.AppId] { tmp["buyer_type2"] = strings.Join(tagNameArr, ",") } if c.PushModel == 2 { tmp["item2"] = strings.Join(tagNameArr, ",") } tmp["tagid2"] = strings.Join(tagIdArr, ",") } } if c.IsTagRule3 { tagNameMap := map[string]bool{} tagIdMap := map[string]bool{} //qu.Debug("c.TagRules---", len(c.TagRules)) //log.Println(c.TagRules,"=========",) for _, tr := range c.TagRules3 { if tr.DepartRuleIds[sr.ID] { // log.Println(tr.TagNames, "===========打标签") //先获取用到的所有字段值 for field, _ := range tr.Fields { if fieldText[field] == nil { //补充fieldText text := qu.ObjToString(tmp[field]) text = ProcessData(text) //处理文本(字母转大写,删除一些符号) fieldText[field] = text } } //qu.Debug("-------------------------开始排除词匹配--------------------------") //qu.Debug("tr.NW---", len(tr.NW)) matchKeyTag := map[string]bool{} //记录所有标签里的匹配上的关键词 matchKeyTypeTag := map[string]bool{} //记录标签里的关键词对应的匹配方式 for j, tag_nw := range tr.NW { //排除词匹配 IsMatchNotKey := RegMatch(fieldText, tag_nw.MatchType, tag_nw.KeyReg, nil, nil, false, false) //qu.Debug(IsMatchNotKey, "------------------------------------------------------------") if !IsMatchNotKey { //排除词未匹配,匹配附加词关键词 // log.Println(j, tr.TagNames[j]) if RegMatch(fieldText, tr.AW[j].MatchType, tr.AW[j].KeyReg, nil, nil, false, true) && RegMatch(fieldText, tr.KW[j].MatchType, tr.KW[j].KeyReg, matchKeyTag, matchKeyTypeTag, true, false) { tagname := tr.TagNames[j] tagBuyerClass := tr.BuyerClass[j] if tagBuyerClass != "" { if strings.Contains(tagBuyerClass, tmpBuyerClass) { if tagname == "" { tempList := []string{} for k, _ := range matchKeyTag { tempList = append(tempList, k) } tagname = strings.Join(tempList, ",") log.Println("=====tagname为空取匹配词为标签名称", tagname) } //qu.Debug("tagname-----", tagname) tagNameMap[tagname] = true tagIdMap[tr.ID] = true } } else { if tagname == "" { tempList := []string{} for k, _ := range matchKeyTag { tempList = append(tempList, k) } tagname = strings.Join(tempList, ",") log.Println("=====tagname为空取匹配词为标签名称", tagname) } //qu.Debug("tagname-----", tagname) tagNameMap[tagname] = true tagIdMap[tr.ID] = true } } } } } } //tagname tagNameArr := MapDataToArr(tagNameMap) tagIdArr := MapDataToArr(tagIdMap) if len(tagNameArr) > 0 { tmp["tagname3"] = strings.Join(tagNameArr, ",") if DisPackageAppidMap[c.AppId] { tmp["buyer_type3"] = strings.Join(tagNameArr, ",") } if c.PushModel == 2 { tmp["item3"] = strings.Join(tagNameArr, ",") } tmp["tagid3"] = strings.Join(tagIdArr, ",") } } //item switch c.PushModel { case 0: tmp["item"] = "数据" case 1: tmp["item"] = dm.Name case 2: //tmp["item"] = sr.Name case 3: tmp["item"] = dm.Name + "_" + sr.Name case 4: tmp["item"] = sr.Name } //appid tmp["appid"] = c.AppId //部门名称 tmp["departname"] = dm.Name tmp["departid"] = dm.ID //存储数据 dm.DataLock.Lock() //qu.Debug("tmp---", tmp) tmpMap := map[string]interface{}{id: tmp} dm.DepartmentData[sr.ID] = append(dm.DepartmentData[sr.ID], tmpMap) dm.DataLock.Unlock() } else { qu.Debug("------------", id, IsMatch) } } numDocs += 1 if numDocs%500 == 0 { log.Println("Current:", numDocs) } } scrollId = searchResult.ScrollId count++ } // wg.Wait() client.ClearScroll().ScrollId(scrollId).Do(ctx) //清理游标 log.Println("SearchRule ID", sr.ID, "Result Data Count:", numDocs) } else { log.Println("Customer:", c.Name, "Departmnet", dm.Name, "TagName", sr.Name, "Es Search Data Error,Tag ID:", sr.ID) } time.Sleep(2 * time.Second) } } } // 数据去重 func (c *Customer) RemoveRepeatData() { log.Println("开始数据去重...") defer qu.Catch() for _, dm := range c.Departments { for _, dataMapArr := range dm.DepartmentData { //一个部门的所有数据 for _, dataMap := range dataMapArr { for dataId, data := range dataMap { tmp := data.(map[string]interface{}) if c.PushModel == 0 { //全局模式所有数据去重 if c.SaveDataMap[dataId] == nil { c.SaveDataMap[dataId] = tmp } else { //数据重复 cus_history := c.SaveDataMap[dataId] MergeData(cus_history, tmp, c.IsTagRule, true, c.PushModel) //合并字段 //c.SaveDataMap[dataId] = cus_history } } else if c.PushModel == 2 || c.PushModel == 3 { //部门内部去重 if dm.SaveDataMap[dataId] == nil { dm.SaveDataMap[dataId] = tmp } else { //数据重复 dm_history := dm.SaveDataMap[dataId] MergeData(dm_history, tmp, c.IsTagRule, false, c.PushModel) //合并字段 //dm.SaveDataMap[dataId] = dm_history } } else if c.PushModel == 4 { //规则模式不去重 //c.SaveDataArr = append(c.SaveDataArr, tmp) if c.SaveDataArr[dataId] == nil { tmp["itemdist"] = map[string]interface{}{qu.ObjToString(tmp["item"]): qu.ObjToString(tmp["matchkey"])} c.SaveDataArr[dataId] = tmp } else { //数据重复 dm_history := c.SaveDataArr[dataId] MergeDatas(dm_history, tmp, c.IsTagRule, false) //合并字段 //dm.SaveDataMap[dataId] = dm_history } } else if c.PushModel == 1 { if c.SaveDataMap[dataId] == nil { tmp["itemdist"] = map[string]interface{}{qu.ObjToString(tmp["item"]): qu.ObjToString(tmp["matchkey"])} c.SaveDataMap[dataId] = tmp } else { //数据重复 dm_history := c.SaveDataMap[dataId] MergeData(dm_history, tmp, c.IsTagRule, true, c.PushModel) //合并字段 } } } } } //将部门数据清空 dm.DepartmentData = map[string][]map[string]interface{}{} } } // 组装保存数据 func (c *Customer) AssembelAndSaveData() { log.Println("开始组装保存数据...") defer qu.Catch() ch := make(chan bool, 10) wg := &sync.WaitGroup{} n := 0 if (c.PushModel == 0 || c.PushModel == 1) && len(c.SaveDataMap) > 0 { for _, tmp := range c.SaveDataMap { wg.Add(1) ch <- true go func(data map[string]interface{}) { defer func() { <-ch wg.Done() }() ok := AssembelSave(data, c.IsSearchHosp, c.IsSearchEnps, c.AppId, c.DataSave) if !ok { n-- } }(tmp) n++ if n%500 == 0 { log.Println("Current:", n) } } wg.Wait() } else if c.PushModel == 2 || c.PushModel == 3 { for _, dm := range c.Departments { if len(dm.SaveDataMap) > 0 { for _, tmp := range dm.SaveDataMap { wg.Add(1) ch <- true go func(data map[string]interface{}) { defer func() { <-ch wg.Done() }() ok := AssembelSave(data, c.IsSearchHosp, c.IsSearchEnps, c.AppId, c.DataSave) if !ok { n-- } }(tmp) n++ if n%500 == 0 { log.Println("Current:", n) } } } } wg.Wait() } else if c.PushModel == 4 && len(c.SaveDataArr) > 0 { for _, tmp := range c.SaveDataArr { wg.Add(1) ch <- true go func(data map[string]interface{}) { defer func() { <-ch wg.Done() }() ok := AssembelSave(data, c.IsSearchHosp, c.IsSearchEnps, c.AppId, c.DataSave) if !ok { n-- } }(tmp) n++ if n%500 == 0 { log.Println("Current:", n) } } wg.Wait() } log.Println("数据保存完毕... Save Number:", n) } // 获取用户所有规则 func (d *Department) GetSearchRules(cid, stype string, idRange, idRanges bson.M) { defer qu.Catch() searchRules, _ := MgoTag.Find("euserdepartrule", map[string]interface{}{"s_userid": cid, "s_departid": d.ID, "i_isuse": 1, "b_delete": false}, nil, nil) if len(searchRules) > 0 { for _, sr := range searchRules { SR := &SearchRule{} SR.Fields = make(map[string]interface{}) id := mgoutil.BsonTOStringId(sr["_id"]) name := qu.ObjToString(sr["s_name"]) SR.ID = id SR.Name = name SR.CustomerID = cid SR.DepartmentID = d.ID SR.ExtFieldType = qu.IntAll(sr["i_extfieldstype"]) //SR.RuleData = &sync.Map{} esquery := qu.ObjToString(sr["s_esquery"]) if IsNewSql != 0 { esquery = qu.ObjToString(sr["s_esquery_search"]) } clearKey := qu.ObjToString(sr["s_globalclearkey"]) clearKeyMatch := qu.ObjToString(sr["s_globalclearkeymatch"]) //获取es if stype == "history" { SR.EsQuery = esquery } else { SR.GetEs(d.Name, esquery, idRange, idRanges) } //获取关键词和附加词 if o_rules, ok := sr["o_rules"].(primitive.A); ok && len(o_rules) > 0 { SR.GetKeyAddWord(o_rules) for _, v := range o_rules { orule, _ := v.(map[string]interface{}) SR.Maths = append(SR.Maths, map[string]string{ "s_matchkey": qu.ObjToString(orule["s_matchkey"]), "s_keymatch": qu.ObjToString(orule["s_keymatch"]), "s_group": qu.ObjToString(orule["s_group"]), }) } } //获取全局清理词 SR.GetClearWord(clearKey, clearKeyMatch) d.Rules = append(d.Rules, SR) } } } // 获取转换后的es语句 func (sr *SearchRule) GetEs(department, esquery string, tmpRange, tmpRanges bson.M) { defer qu.Catch() query := map[string]*QueryObjecct{} if json.Unmarshal([]byte(esquery), &query) == nil { qb := query["query"] filter := qb.Bool if filter != nil { //有filter index := -1 //记录range的位置 for i, m := range filter.Must { mMap := m.(map[string]interface{}) if esRange, ok := mMap["range"].(map[string]interface{}); ok && esRange != nil { //有range if esRange["publishtime"] != nil { index = i break } } } if index > -1 { filter.Must[index] = tmpRange } else { filter.Must = append(filter.Must, tmpRange) } if len(tmpRanges) > 0 { filter.Must = append(filter.Must, tmpRanges) } } else { //无filter则添加 bo := &BoolObject{} bo.Must = append(bo.Must, tmpRange) if len(tmpRanges) > 0 { bo.Must = append(bo.Must, tmpRanges) } // tmpFilter := &Filter{ // Bool: bo, // } qb.Bool = bo } strquery, err := json.Marshal(query) if err == nil { sr.EsQuery = string(strquery) } else { log.Println("Department:", department, "Es Error,Tag ID:", sr.ID) } } else { log.Println("Department:", department, "Es Error,Tag ID:", sr.ID) } } // 全局清理词处理 func (sr *SearchRule) GetClearWord(key, match string) { defer qu.Catch() //匹配方式 cwmArr := []string{} for _, mv := range strings.Split(match, ",") { if field := qu.ObjToString(MatchType[mv]); field != "" { cwmArr = append(cwmArr, field) sr.Fields[field] = true } } //清理词正则 cwkArr := []*regexp.Regexp{} for _, kv := range strings.Split(key, ",") { if LetterCase.MatchString(kv) { //字母转大写 kv = strings.ToUpper(kv) } reg := regexp.MustCompile(kv) cwkArr = append(cwkArr, reg) } cw := &ClearWord{ KeyReg: cwkArr, MatchType: cwmArr, } sr.GCW = cw } // 关键词、附加词处理 func (sr *SearchRule) GetKeyAddWord(o_rules primitive.A) { defer qu.Catch() kw, aw, _, _, _ := GetNotkeyAndKeyAddWord(o_rules, sr.Fields, false) sr.KW = kw sr.AW = aw //sr.Fields = fields } // 排除词、关键词、附加词处理 func (tr *TagRule) GetKeyAddNotKeyWord(o_list primitive.A) { defer qu.Catch() kw, aw, nkw, tagnames, buyerclass := GetNotkeyAndKeyAddWord(o_list, tr.Fields, true) tr.NW = nkw tr.KW = kw tr.AW = aw //tr.Fields = fields tr.TagNames = tagnames tr.BuyerClass = buyerclass } func GetMactchKeys(match []map[string]string, data map[string]interface{}) string { keyWord := []string{} for _, keys := range match { types := keys["s_keymatch"] key := keys["s_matchkey"] if strings.Contains(types, "1") { title := qu.ObjToString(data["title"]) keyWord = KeyWordToDatas(types, title, key, keyWord) } if strings.Contains(types, "2") { detail := qu.ObjToString(data["detail"]) keyWord = KeyWordToDatas(types, detail, key, keyWord) } if strings.Contains(types, "3") { purchasing := qu.ObjToString(data["purchasing"]) keyWord = KeyWordToDatas(types, purchasing, key, keyWord) } if strings.Contains(types, "4") { filetext := qu.ObjToString(data["filetext"]) keyWord = KeyWordToDatas(types, filetext, key, keyWord) } if strings.Contains(types, "5") { projectname := qu.ObjToString(data["projectname"]) keyWord = KeyWordToDatas(types, projectname, key, keyWord) } if strings.Contains(types, "6") || strings.Contains(types, "8") { buyer := qu.ObjToString(data["buyer"]) keyWord = KeyWordToDatas(types, buyer, key, keyWord) } if strings.Contains(types, "7") || strings.Contains(types, "9") { winner := qu.ObjToString(data["s_winner"]) keyWord = KeyWordToDatas(types, winner, key, keyWord) } } keyMap := map[string]bool{} keyArr := []string{} for _, key := range keyWord { keyMap[key] = true } for k, _ := range keyMap { keyArr = append(keyArr, k) } return strings.Join(keyArr, ",") } func KeyWordToDatas(types, item, key string, keyWord []string) []string { for _, mk := range strings.Split(key, ",") { if strings.Contains(mk, "&&") { arr := strings.Split(mk, "&&") isok := true for _, s := range arr { if s != "" { if !strings.Contains(strings.ToUpper(item), strings.ToUpper(s)) { isok = false } } } if isok { keyWord = append(keyWord, mk) } } else { if strings.Contains(strings.ToUpper(item), strings.ToUpper(mk)) { keyWord = append(keyWord, mk) } } } return keyWord } func exactMatchs(rule, title, detail string, match []map[string]string, nameArr []string) bool { realdata := map[string]float64{} for _, v := range nameArr { realdata["title_"+v] = 0 realdata["content_"+v] = 0 } mapping := map[string]string{ " and ": " && ", " or ": " || ", " not ": " ! ", } for k, v := range mapping { rule = strings.ReplaceAll(rule, k, v) } //可以将编译后的表达式,存放在缓存中 program, err := expr.Compile(rule, expr.Env(realdata)) if err != nil { log.Println("表达式错误 ", err) return false } for _, keys := range match { types := keys["s_keymatch"] // 1,2,3 key := keys["s_matchkey"] //软件,工程 group := keys["s_group"] if strings.Contains(types, "1") { for _, v := range strings.Split(key, ",") { if strings.Contains(strings.ToUpper(title), strings.ToUpper(v)) { realdata["title_"+group] = realdata["title_"+group] + 1 } } } if strings.Contains(types, "2") { for _, v := range strings.Split(key, ",") { if strings.Contains(strings.ToUpper(detail), strings.ToUpper(v)) { realdata["content_"+group] = realdata["content_"+group] + 1 } } } } log.Println("匹配结果 ", realdata) output, err := expr.Run(program, realdata) if err != nil { log.Println("表达式执行错误 ", err) return false } return output.(bool) }