1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117 |
- package main
- import (
- "context"
- "encoding/json"
- "fmt"
- "log"
- qu "qfw/util"
- "qfw/util/redis"
- "regexp"
- . "sqlmodel"
- "strings"
- "sync"
- "time"
- mgoutil "util/mgodb"
- "gopkg.in/mgo.v2/bson"
- "github.com/cron"
- esv "es"
- "github.com/antonmedv/expr"
- "github.com/donnie4w/go-logger/logger"
- "go.mongodb.org/mongo-driver/bson/primitive"
- esV7 "github.com/olivere/elastic"
- )
- func TimeTask() {
- StartTask()
- c := cron.New()
- //cronstr := "0 */" + fmt.Sprint(TaskTime) + " * * * ?"
- cronstr := "0 5 */" + fmt.Sprint(TaskTime) + " * * ?" //每TaskTime小时执行一次
- c.AddFunc(cronstr, func() { StartTask() })
- c.Start()
- }
- func StartTask() {
- GetCustomerData()
- }
- // 加载客户
- func GetCustomerData() {
- defer qu.Catch()
- log.Println("Init Customer...")
- idRange, idRange2, ok, endTime := GetIdRange() //获取id区间
- if !ok {
- return
- }
- logger.Debug("此次任务区间:", idRange, idRange2)
- //查询企业库开启推送的客户
- customers, _ := MgoTag.Find("euser", map[string]interface{}{"i_push": 1, "b_delete": false}, nil, nil)
- for _, c := range customers {
- customerId := mgoutil.BsonTOStringId(c["_id"])
- customer := qu.ObjToString(c["s_name"]) //客户名称
- appId := qu.ObjToString(c["s_appid"]) //appid
- extends := qu.ObjToString(c["s_extends"]) //扩展信息
- pushModel := qu.IntAll(c["i_pushmodel"]) //推送模式
- dataSave := qu.ObjToString(c["s_dataSave"])
- log.Println("当前客户 ", customer)
- cus := &Customer{}
- cus.SaveDataMap = map[string]map[string]interface{}{}
- cus.SaveDataArr = map[string]map[string]interface{}{}
- cus.IdRange = idRange
- cus.IdRanges = idRange2
- cus.ID = customerId
- cus.Name = customer
- cus.PushModel = pushModel
- cus.AppId = appId
- cus.DataSave = dataSave
- for _, v := range strings.Split(extends, ",") {
- if v == "hospitalgrade" {
- cus.IsSearchHosp = true
- } else if v == "enterpise" {
- cus.IsSearchEnps = true
- }
- }
- //
- if projectAppidMap[appId] {
- start := time.Now().Unix()
- log.Println("加载projectId---开始")
- InitProjectId(appId)
- end := time.Now().Unix()
- log.Println("加载projectId---结束,耗时", end-start, "秒")
- } else {
- projectIdMap = sync.Map{}
- }
- //
- cus.GetTagRules() //获取客户打标签规则
- cus.GetDepartments("") //获取客户信息
- //PrintLog(cus) //打印查看初始化的信息
- qu.Debug("customer:", cus.ID, cus.Name, cus.PushModel, cus.AppId, cus.IsTagRule, cus.IsTagRule2, cus.IsTagRule3, cus.IsSearchHosp, cus.IsSearchEnps, len(cus.TagRules), len(cus.TagRules2), len(cus.TagRules3), len(cus.Departments))
- cus.GetData("") //获取数据
- cus.RemoveRepeatData() //数据去重
- cus.AssembelAndSaveData() //组装、保存数据
- }
- Sysconfig.LatestId = LatestId
- Sysconfig.LatestTime = endTime
- qu.WriteSysConfig(Sysconfig)
- logger.Debug("定时任务结束-endId-Sysconfig.LatestTime ", endTime)
- }
- // 获取客户打标签规则
- func (c *Customer) GetTagRules() {
- log.Println("开始加载标签规则...")
- defer qu.Catch()
- tagRules, _ := MgoTag.Find("eusertagrule", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false}, nil, nil)
- if len(tagRules) > 0 {
- for _, tr := range tagRules {
- tagType := qu.ObjToString(tr["tagType"])
- if tagType == "" || tagType == "1" {
- c.IsTagRule = true //查到打标签规则,表示打标签
- TR := &TagRule{}
- TR.Fields = make(map[string]interface{})
- TR.DepartRuleIds = make(map[string]bool)
- id := mgoutil.BsonTOStringId(tr["_id"])
- name := qu.ObjToString(tr["s_name"])
- TR.ID = id
- TR.Name = name
- TR.CustomerId = c.ID
- //部门规则id组
- if departRuleIds := qu.ObjToString(tr["o_departruleids"]); departRuleIds != "" {
- for _, drid := range strings.Split(departRuleIds, ",") {
- TR.DepartRuleIds[drid] = true
- }
- }
- //规则
- if o_list, ok := tr["o_list"].(primitive.A); ok && len(o_list) > 0 {
- TR.GetKeyAddNotKeyWord(o_list)
- }
- c.TagRules = append(c.TagRules, TR)
- }
- }
- }
- tagRules2, _ := MgoTag.Find("eusertagrule", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false, "tagType": "2"}, nil, nil)
- if len(tagRules2) > 0 {
- c.IsTagRule2 = true //查到打标签规则,表示打标签
- for _, tr := range tagRules2 {
- TR := &TagRule{}
- TR.Fields = make(map[string]interface{})
- TR.DepartRuleIds = make(map[string]bool)
- id := mgoutil.BsonTOStringId(tr["_id"])
- name := qu.ObjToString(tr["s_name"])
- TR.ID = id
- TR.Name = name
- TR.CustomerId = c.ID
- //部门规则id组
- if departRuleIds := qu.ObjToString(tr["o_departruleids"]); departRuleIds != "" {
- for _, drid := range strings.Split(departRuleIds, ",") {
- TR.DepartRuleIds[drid] = true
- }
- }
- //规则
- if o_list, ok := tr["o_list"].(primitive.A); ok && len(o_list) > 0 {
- TR.GetKeyAddNotKeyWord(o_list)
- }
- c.TagRules2 = append(c.TagRules2, TR)
- }
- }
- tagRules3, _ := MgoTag.Find("eusertagrule", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false, "tagType": "3"}, nil, nil)
- if len(tagRules3) > 0 {
- c.IsTagRule3 = true //查到打标签规则,表示打标签
- for _, tr := range tagRules3 {
- TR := &TagRule{}
- TR.Fields = make(map[string]interface{})
- TR.DepartRuleIds = make(map[string]bool)
- id := mgoutil.BsonTOStringId(tr["_id"])
- name := qu.ObjToString(tr["s_name"])
- TR.ID = id
- TR.Name = name
- TR.CustomerId = c.ID
- //部门规则id组
- if departRuleIds := qu.ObjToString(tr["o_departruleids"]); departRuleIds != "" {
- for _, drid := range strings.Split(departRuleIds, ",") {
- TR.DepartRuleIds[drid] = true
- }
- }
- //规则
- if o_list, ok := tr["o_list"].(primitive.A); ok && len(o_list) > 0 {
- TR.GetKeyAddNotKeyWord(o_list)
- }
- c.TagRules3 = append(c.TagRules3, TR)
- }
- }
- }
- // 获取部门信息
- func (c *Customer) GetDepartments(stype string) {
- log.Println("开始获取部门信息...")
- defer qu.Catch()
- departments, _ := MgoTag.Find("euserdepart", map[string]interface{}{"s_userid": c.ID, "i_isuse": 1, "b_delete": false}, nil, nil)
- if len(departments) > 0 {
- for _, ds := range departments {
- DM := &Department{}
- DM.DataLock = &sync.Mutex{}
- DM.DepartmentData = map[string][]map[string]interface{}{}
- DM.SaveDataMap = map[string]map[string]interface{}{}
- id := mgoutil.BsonTOStringId(ds["_id"])
- name := qu.ObjToString(ds["s_name"])
- DM.ID = id
- DM.Name = name
- DM.CustomerID = c.ID
- DM.GetSearchRules(c.ID, stype, c.IdRange, c.IdRanges) //获取某个部门的所有规则
- c.Departments = append(c.Departments, DM)
- //qu.Debug("Departments---", DM.ID, DM.Name, DM.CustomerID, len(DM.Rules))
- }
- }
- }
- // 获取数据
- func (c *Customer) GetData(stype string) {
- log.Println("开始匹配数据...")
- defer qu.Catch()
- esConfig := Sysconfig.Es
- esversion := qu.ObjToString(esConfig["version"])
- if esversion == "v1" {
- } else {
- esCon := esv.VarEs.(*esv.EsV7)
- c.EsConGetDataV7(stype, esCon)
- }
- }
- type MySource struct {
- Querys string
- }
- func (m *MySource) Source() (interface{}, error) {
- mp := make(map[string]interface{})
- json.Unmarshal([]byte(m.Querys), &mp)
- return mp["query"], nil
- }
- func (c *Customer) EsConGetDataV7(stype string, esCon *esv.EsV7) {
- client := esCon.GetEsConn()
- defer esCon.DestoryEsConn(client)
- ctx, _ := context.WithTimeout(context.Background(), 30*time.Minute)
- for _, dm := range c.Departments {
- for _, sr := range dm.Rules {
- for {
- listLen := redis.GetInt("session", "es_status")
- if listLen == 0 {
- log.Println("es空闲!")
- break
- } else if listLen == 1 || listLen == 2 {
- log.Println("系统繁忙,请稍后再试 ", listLen)
- }
- time.Sleep(5 * time.Second)
- }
- //测试
- // MgoDataTest(sr, dm, c)
- // return
- // ch := make(chan bool, 10)
- // wg := &sync.WaitGroup{}
- escount := Es.Count(Index, Itype, sr.EsQuery)
- log.Println("查询总数:", escount, "规则ID:", sr.ID, "EsQuery:", sr.EsQuery)
- if escount == 0 {
- continue
- }
- //查询条件类型转换
- // var q esV7.Query
- //sr.EsQuery = `{"query":{"filtered":{"filter":{"bool":{"must":[{"bool":{"should":[{"terms":{"city":["上海市"]}}]}},{"terms":{"toptype":["招标","结果"]}},{"range":{"publishtime":{"gte":1588262400,"lt":1608825600}}}]}},"query":{"bool":{"must":[{"bool":{"should":[{"bool":{"must":[{"bool":{"should":[{"bool":{"must":[{"terms":{"buyer":["上海城建职业学院","上海市第一人民医院","中国银联股份有限公司","上海立信会计金融学院法规处","中国东方航空股份有限公司","上海外国语大学","上海小昆山环卫服务有限公司","国家税务总局上海市税务局","中国浦东干部学院","上海市浦东新区老港镇人民政府","咪咕视讯科技有限公司","上海交通大学医学院附属新华医院","交通运输部上海打捞局","松江区体育局","复旦大学附属中山医院","上海交通大学医学院附属瑞金医院","中国科学院上海应用物理研究所"]}}]}}]}}]}}]}}],"must_not":[{"constant_score":{"filter":{"missing":{"field":"filetext"}}}}]}}}}}`
- cc := &MySource{
- Querys: sr.EsQuery,
- }
- //游标查询,index不支持别名,只能写索引库的名称
- res, err := client.Scroll(Index).Query(cc).Size(200).Do(ctx) //查询一条获取游标
- if err == nil {
- numDocs := 0
- scrollId := res.ScrollId
- count := 1
- for {
- if scrollId == "" {
- log.Println("ScrollId Is Error")
- break
- }
- var searchResult *esV7.SearchResult
- var err error
- if count == 1 {
- searchResult = res
- } else {
- searchResult, err = client.Scroll(Index).Size(200).ScrollId(scrollId).Do(ctx) //查询
- if err != nil {
- if err.Error() == "EOS" { //迭代完毕
- log.Println("Es Search Data Over:", err)
- } else {
- log.Println("Es Search Data Error:", err)
- }
- break
- }
- }
- log.Println("此次处理条数 ", len(searchResult.Hits.Hits))
- if err != nil {
- if err.Error() == "EOS" { //迭代完毕
- log.Println("Es Search Data Over:", err)
- } else {
- log.Println("Es Search Data Error:", err)
- }
- break
- }
- for _, hit := range searchResult.Hits.Hits {
- //开始处理数据
- tmp := make(map[string]interface{})
- if json.Unmarshal(hit.Source, &tmp) == nil {
- if stype != "history" {
- if !SkipData(tmp) {
- qu.Debug("跳过该条数据,发布时间在入库时间7天之前,", qu.ObjToString(tmp["_id"]))
- continue
- }
- }
- id := qu.ObjToString(tmp["id"])
- //亚信
- if CheckBidOpenAppidMap[c.AppId] {
- if tmp["bidopentime"] != nil {
- bidopentime := qu.Int64All(tmp["bidopentime"])
- comeintime := qu.Int64All(tmp["comeintime"])
- if bidopentime-comeintime <= 7*24*60*60 {
- qu.Debug("跳过该条数据,开标时间-入库时间<=7天,", id)
- continue
- }
- }
- }
- //河南移动,过滤掉中国移动采购网招标数据
- if CheckBidHrefRuleIdMap[dm.ID] {
- if strings.Contains(qu.ObjToString(tmp["href"]), "b2b.10086.cn") {
- qu.Debug("跳过该条数据,公告原网址中包含 b2b.10086.cn,", id)
- continue
- }
- }
- isExists, err := redis.Exists("datag", c.AppId+"_"+id)
- if err != nil {
- log.Println("redis信息id判重出错 ", err)
- } else if isExists {
- log.Println("信息id重复 ", id)
- continue
- }
- tmp["id"] = id //记录数据原有id
- delete(tmp, "_id")
- if sr.ExtFieldType == 2 {
- findwinner := ""
- s_winner := strings.Split(qu.ObjToString(tmp["s_winner"]), ",")
- if len(s_winner) > 0 {
- for i := 0; i < len(s_winner); i++ {
- findwinners := strings.TrimSpace(s_winner[i])
- if findwinners != "" {
- for _, v := range Sysconfig.SWinnerFilter {
- strings.ReplaceAll(findwinners, v, "")
- }
- if findwinners != "" {
- findwinner = findwinners
- break
- }
- }
- }
- }
- // findwinner := strings.TrimSpace(qu.ObjToString(tmp["winner"]))
- if findwinner != "" {
- finddata := MgoEnps.FindOne(EnpsColl, bson.M{"company_name": findwinner})
- if finddata != nil {
- if legal_person := qu.ObjToString(finddata["legal_person"]); legal_person != "" {
- tmp["legal_person"] = legal_person
- }
- if email := qu.ObjToString(finddata["company_email"]); email != "" {
- tmp["company_email"] = email
- }
- if phone := qu.ObjToString(finddata["company_phone"]); phone != "" {
- tmp["company_phone"] = phone
- }
- //从最新年报中获取 中标单位联系电话、中标单位邮箱
- // if annual_reports, ok := finddata["annual_reports"].(primitive.A); ok && len(annual_reports) > 0 {
- // anreport := Sort_year_report(annual_reports)
- // if len(anreport) > 0 {
- // if email := qu.ObjToString(anreport["company_email"]); email != "" {
- // tmp["company_email"] = email
- // }
- // if phone := qu.ObjToString(anreport["company_phone"]); phone != "" {
- // tmp["company_phone"] = phone
- // }
- // }
- // }
- }
- }
- }
- matchKey := map[string]bool{} //记录所有匹配上的关键词
- matchKeyType := map[string]bool{} //记录关键词对应的匹配方式
- //先获取用到的所有字段值
- fieldText := map[string]interface{}{}
- for field, _ := range sr.Fields {
- text := qu.ObjToString(tmp[field])
- text = ProcessData(text) //处理文本(字母转大写,删除一些符号)
- fieldText[field] = text
- }
- //清理词清理
- for _, cwm := range sr.GCW.MatchType {
- if text := qu.ObjToString(fieldText[cwm]); text != "" {
- for _, gcw_reg := range sr.GCW.KeyReg {
- text = gcw_reg.ReplaceAllString(text, "")
- }
- fieldText[cwm] = text
- }
- }
- //精准筛选规则2022-10-19
- if c.Exact == 1 && sr.ExactRule != "" {
- nameArr := []string{}
- data, _ := MgoTag.Find("groups", map[string]interface{}{"ruleId": sr.ID}, nil, nil)
- if data != nil && len(data) > 0 {
- for _, v := range data {
- nameArr = append(nameArr, qu.ObjToString(v["name"]))
- }
- }
- exactResult := exactMatchs(sr.ExactRule, qu.ObjToString(tmp["title"]), qu.ObjToString(tmp["detail"]), sr.Maths, nameArr)
- qu.Debug("-------------------精准匹配", id, exactResult)
- if !exactResult {
- continue
- }
- }
- /*
- 因为要记录所有匹配上的关键词,所有优先匹配附加词,在匹配关键词
- */
- //1.附加词匹配
- IsMatch := false
- //qu.Debug("sr.AW---", len(sr.AW))
- for i, aw := range sr.AW {
- //qu.Debug("-------------------------开始附加词匹配--------------------------")
- IsMatchAddKey := RegMatch(fieldText, aw.MatchType, aw.KeyReg, nil, nil, false, true)
- //qu.Debug(IsMatchAddKey, "------------------------------------------------------------")
- //2.关键词匹配
- if IsMatchAddKey {
- kw := sr.KW[i]
- //qu.Debug("-------------------------开始关键词匹配--------------------------")
- IsMatchKey := RegMatch(fieldText, kw.MatchType, kw.KeyReg, matchKey, matchKeyType, true, false)
- //qu.Debug(IsMatchKey, "------------------------------------------------------------")
- if IsMatchKey {
- IsMatch = true
- }
- }
- }
- if len(sr.AW) == 0 {
- IsMatch = true
- }
- /*
- 到此已经匹配完数据
- */
- qu.Debug("---------------------", id, IsMatch, matchKey)
- if IsMatch { //匹配成功,数据上新增规则id,matchKey,item并临时保存数据
- // tmpMatchKey := MapDataToArr(matchKey)
- tmpMatchKeyType := MapDataToArr(matchKeyType)
- tmp["matchkey"] = GetMactchKeys(sr.Maths, tmp)
- tmp["matchtype"] = strings.Join(tmpMatchKeyType, ",")
- tmp["ruleid"] = sr.ID
- tmp["rulename"] = sr.Name
- tmpBuyerClass := qu.ObjToString(tmp["buyerclass"])
- //开始打标签
- //qu.Debug("c.IsTagRule+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
- if c.IsTagRule {
- tagNameMap := map[string]bool{}
- tagIdMap := map[string]bool{}
- //qu.Debug("c.TagRules---", len(c.TagRules))
- //log.Println(c.TagRules,"=========",)
- for _, tr := range c.TagRules {
- if tr.DepartRuleIds[sr.ID] {
- // log.Println(tr.TagNames, "===========打标签")
- //先获取用到的所有字段值
- for field, _ := range tr.Fields {
- if fieldText[field] == nil { //补充fieldText
- text := qu.ObjToString(tmp[field])
- text = ProcessData(text) //处理文本(字母转大写,删除一些符号)
- fieldText[field] = text
- }
- }
- //qu.Debug("-------------------------开始排除词匹配--------------------------")
- //qu.Debug("tr.NW---", len(tr.NW))
- matchKeyTag := map[string]bool{} //记录所有标签里的匹配上的关键词
- matchKeyTypeTag := map[string]bool{} //记录标签里的关键词对应的匹配方式
- for j, tag_nw := range tr.NW { //排除词匹配
- IsMatchNotKey := RegMatch(fieldText, tag_nw.MatchType, tag_nw.KeyReg, nil, nil, false, false)
- //qu.Debug(IsMatchNotKey, "------------------------------------------------------------")
- if !IsMatchNotKey { //排除词未匹配,匹配附加词关键词
- // log.Println(j, tr.TagNames[j])
- if RegMatch(fieldText, tr.AW[j].MatchType, tr.AW[j].KeyReg, nil, nil, false, true) && RegMatch(fieldText, tr.KW[j].MatchType, tr.KW[j].KeyReg, matchKeyTag, matchKeyTypeTag, true, false) {
- tagname := tr.TagNames[j]
- tagBuyerClass := tr.BuyerClass[j]
- if tagBuyerClass != "" {
- if strings.Contains(tagBuyerClass, tmpBuyerClass) {
- if tagname == "" {
- tempList := []string{}
- for k, _ := range matchKeyTag {
- tempList = append(tempList, k)
- }
- tagname = strings.Join(tempList, ",")
- log.Println("=====tagname为空取匹配词为标签名称", tagname)
- }
- //qu.Debug("tagname-----", tagname)
- tagNameMap[tagname] = true
- tagIdMap[tr.ID] = true
- }
- } else {
- if tagname == "" {
- tempList := []string{}
- for k, _ := range matchKeyTag {
- tempList = append(tempList, k)
- }
- tagname = strings.Join(tempList, ",")
- log.Println("=====tagname为空取匹配词为标签名称", tagname)
- }
- //qu.Debug("tagname-----", tagname)
- tagNameMap[tagname] = true
- tagIdMap[tr.ID] = true
- }
- }
- }
- }
- }
- }
- //tagname
- tagNameArr := MapDataToArr(tagNameMap)
- tagIdArr := MapDataToArr(tagIdMap)
- if len(tagNameArr) > 0 {
- tmp["tagname"] = strings.Join(tagNameArr, ",")
- if DisPackageAppidMap[c.AppId] {
- tmp["buyer_type"] = strings.Join(tagNameArr, ",")
- }
- if c.PushModel == 2 {
- tmp["item"] = strings.Join(tagNameArr, ",")
- }
- tmp["tagid"] = strings.Join(tagIdArr, ",")
- }
- }
- if c.IsTagRule2 {
- tagNameMap := map[string]bool{}
- tagIdMap := map[string]bool{}
- //qu.Debug("c.TagRules---", len(c.TagRules))
- //log.Println(c.TagRules,"=========",)
- for _, tr := range c.TagRules2 {
- if tr.DepartRuleIds[sr.ID] {
- // log.Println(tr.TagNames, "===========打标签")
- //先获取用到的所有字段值
- for field, _ := range tr.Fields {
- if fieldText[field] == nil { //补充fieldText
- text := qu.ObjToString(tmp[field])
- text = ProcessData(text) //处理文本(字母转大写,删除一些符号)
- fieldText[field] = text
- }
- }
- //qu.Debug("-------------------------开始排除词匹配--------------------------")
- //qu.Debug("tr.NW---", len(tr.NW))
- matchKeyTag := map[string]bool{} //记录所有标签里的匹配上的关键词
- matchKeyTypeTag := map[string]bool{} //记录标签里的关键词对应的匹配方式
- for j, tag_nw := range tr.NW { //排除词匹配
- IsMatchNotKey := RegMatch(fieldText, tag_nw.MatchType, tag_nw.KeyReg, nil, nil, false, false)
- //qu.Debug(IsMatchNotKey, "------------------------------------------------------------")
- if !IsMatchNotKey { //排除词未匹配,匹配附加词关键词
- // log.Println(j, tr.TagNames[j])
- if RegMatch(fieldText, tr.AW[j].MatchType, tr.AW[j].KeyReg, nil, nil, false, true) && RegMatch(fieldText, tr.KW[j].MatchType, tr.KW[j].KeyReg, matchKeyTag, matchKeyTypeTag, true, false) {
- tagname := tr.TagNames[j]
- tagBuyerClass := tr.BuyerClass[j]
- if tagBuyerClass != "" {
- if strings.Contains(tagBuyerClass, tmpBuyerClass) {
- if tagname == "" {
- tempList := []string{}
- for k, _ := range matchKeyTag {
- tempList = append(tempList, k)
- }
- tagname = strings.Join(tempList, ",")
- log.Println("=====tagname为空取匹配词为标签名称", tagname)
- }
- //qu.Debug("tagname-----", tagname)
- tagNameMap[tagname] = true
- tagIdMap[tr.ID] = true
- }
- } else {
- if tagname == "" {
- tempList := []string{}
- for k, _ := range matchKeyTag {
- tempList = append(tempList, k)
- }
- tagname = strings.Join(tempList, ",")
- log.Println("=====tagname为空取匹配词为标签名称", tagname)
- }
- //qu.Debug("tagname-----", tagname)
- tagNameMap[tagname] = true
- tagIdMap[tr.ID] = true
- }
- }
- }
- }
- }
- }
- //tagname
- tagNameArr := MapDataToArr(tagNameMap)
- tagIdArr := MapDataToArr(tagIdMap)
- if len(tagNameArr) > 0 {
- tmp["tagname2"] = strings.Join(tagNameArr, ",")
- if DisPackageAppidMap[c.AppId] {
- tmp["buyer_type2"] = strings.Join(tagNameArr, ",")
- }
- if c.PushModel == 2 {
- tmp["item2"] = strings.Join(tagNameArr, ",")
- }
- tmp["tagid2"] = strings.Join(tagIdArr, ",")
- }
- }
- if c.IsTagRule3 {
- tagNameMap := map[string]bool{}
- tagIdMap := map[string]bool{}
- //qu.Debug("c.TagRules---", len(c.TagRules))
- //log.Println(c.TagRules,"=========",)
- for _, tr := range c.TagRules3 {
- if tr.DepartRuleIds[sr.ID] {
- // log.Println(tr.TagNames, "===========打标签")
- //先获取用到的所有字段值
- for field, _ := range tr.Fields {
- if fieldText[field] == nil { //补充fieldText
- text := qu.ObjToString(tmp[field])
- text = ProcessData(text) //处理文本(字母转大写,删除一些符号)
- fieldText[field] = text
- }
- }
- //qu.Debug("-------------------------开始排除词匹配--------------------------")
- //qu.Debug("tr.NW---", len(tr.NW))
- matchKeyTag := map[string]bool{} //记录所有标签里的匹配上的关键词
- matchKeyTypeTag := map[string]bool{} //记录标签里的关键词对应的匹配方式
- for j, tag_nw := range tr.NW { //排除词匹配
- IsMatchNotKey := RegMatch(fieldText, tag_nw.MatchType, tag_nw.KeyReg, nil, nil, false, false)
- //qu.Debug(IsMatchNotKey, "------------------------------------------------------------")
- if !IsMatchNotKey { //排除词未匹配,匹配附加词关键词
- // log.Println(j, tr.TagNames[j])
- if RegMatch(fieldText, tr.AW[j].MatchType, tr.AW[j].KeyReg, nil, nil, false, true) && RegMatch(fieldText, tr.KW[j].MatchType, tr.KW[j].KeyReg, matchKeyTag, matchKeyTypeTag, true, false) {
- tagname := tr.TagNames[j]
- tagBuyerClass := tr.BuyerClass[j]
- if tagBuyerClass != "" {
- if strings.Contains(tagBuyerClass, tmpBuyerClass) {
- if tagname == "" {
- tempList := []string{}
- for k, _ := range matchKeyTag {
- tempList = append(tempList, k)
- }
- tagname = strings.Join(tempList, ",")
- log.Println("=====tagname为空取匹配词为标签名称", tagname)
- }
- //qu.Debug("tagname-----", tagname)
- tagNameMap[tagname] = true
- tagIdMap[tr.ID] = true
- }
- } else {
- if tagname == "" {
- tempList := []string{}
- for k, _ := range matchKeyTag {
- tempList = append(tempList, k)
- }
- tagname = strings.Join(tempList, ",")
- log.Println("=====tagname为空取匹配词为标签名称", tagname)
- }
- //qu.Debug("tagname-----", tagname)
- tagNameMap[tagname] = true
- tagIdMap[tr.ID] = true
- }
- }
- }
- }
- }
- }
- //tagname
- tagNameArr := MapDataToArr(tagNameMap)
- tagIdArr := MapDataToArr(tagIdMap)
- if len(tagNameArr) > 0 {
- tmp["tagname3"] = strings.Join(tagNameArr, ",")
- if DisPackageAppidMap[c.AppId] {
- tmp["buyer_type3"] = strings.Join(tagNameArr, ",")
- }
- if c.PushModel == 2 {
- tmp["item3"] = strings.Join(tagNameArr, ",")
- }
- tmp["tagid3"] = strings.Join(tagIdArr, ",")
- }
- }
- //item
- switch c.PushModel {
- case 0:
- tmp["item"] = "数据"
- case 1:
- tmp["item"] = dm.Name
- case 2:
- //tmp["item"] = sr.Name
- case 3:
- tmp["item"] = dm.Name + "_" + sr.Name
- case 4:
- tmp["item"] = sr.Name
- }
- //appid
- tmp["appid"] = c.AppId
- //部门名称
- tmp["departname"] = dm.Name
- tmp["departid"] = dm.ID
- //存储数据
- dm.DataLock.Lock()
- //qu.Debug("tmp---", tmp)
- tmpMap := map[string]interface{}{id: tmp}
- dm.DepartmentData[sr.ID] = append(dm.DepartmentData[sr.ID], tmpMap)
- dm.DataLock.Unlock()
- } else {
- qu.Debug("------------", id, IsMatch)
- }
- }
- numDocs += 1
- if numDocs%500 == 0 {
- log.Println("Current:", numDocs)
- }
- }
- scrollId = searchResult.ScrollId
- count++
- }
- // wg.Wait()
- client.ClearScroll().ScrollId(scrollId).Do(ctx) //清理游标
- log.Println("SearchRule ID", sr.ID, "Result Data Count:", numDocs)
- } else {
- log.Println("Customer:", c.Name, "Departmnet", dm.Name, "TagName", sr.Name, "Es Search Data Error,Tag ID:", sr.ID)
- }
- time.Sleep(2 * time.Second)
- }
- }
- }
- // 数据去重
- func (c *Customer) RemoveRepeatData() {
- log.Println("开始数据去重...")
- defer qu.Catch()
- for _, dm := range c.Departments {
- for _, dataMapArr := range dm.DepartmentData { //一个部门的所有数据
- for _, dataMap := range dataMapArr {
- for dataId, data := range dataMap {
- tmp := data.(map[string]interface{})
- if c.PushModel == 0 { //全局模式所有数据去重
- if c.SaveDataMap[dataId] == nil {
- c.SaveDataMap[dataId] = tmp
- } else { //数据重复
- cus_history := c.SaveDataMap[dataId]
- MergeData(cus_history, tmp, c.IsTagRule, true, c.PushModel) //合并字段
- //c.SaveDataMap[dataId] = cus_history
- }
- } else if c.PushModel == 2 || c.PushModel == 3 { //部门内部去重
- if dm.SaveDataMap[dataId] == nil {
- dm.SaveDataMap[dataId] = tmp
- } else { //数据重复
- dm_history := dm.SaveDataMap[dataId]
- MergeData(dm_history, tmp, c.IsTagRule, false, c.PushModel) //合并字段
- //dm.SaveDataMap[dataId] = dm_history
- }
- } else if c.PushModel == 4 { //规则模式不去重
- //c.SaveDataArr = append(c.SaveDataArr, tmp)
- if c.SaveDataArr[dataId] == nil {
- tmp["itemdist"] = map[string]interface{}{qu.ObjToString(tmp["item"]): qu.ObjToString(tmp["matchkey"])}
- c.SaveDataArr[dataId] = tmp
- } else { //数据重复
- dm_history := c.SaveDataArr[dataId]
- MergeDatas(dm_history, tmp, c.IsTagRule, false) //合并字段
- //dm.SaveDataMap[dataId] = dm_history
- }
- } else if c.PushModel == 1 {
- if c.SaveDataMap[dataId] == nil {
- tmp["itemdist"] = map[string]interface{}{qu.ObjToString(tmp["item"]): qu.ObjToString(tmp["matchkey"])}
- c.SaveDataMap[dataId] = tmp
- } else { //数据重复
- dm_history := c.SaveDataMap[dataId]
- MergeData(dm_history, tmp, c.IsTagRule, true, c.PushModel) //合并字段
- }
- }
- }
- }
- }
- //将部门数据清空
- dm.DepartmentData = map[string][]map[string]interface{}{}
- }
- }
- // 组装保存数据
- func (c *Customer) AssembelAndSaveData() {
- log.Println("开始组装保存数据...")
- defer qu.Catch()
- ch := make(chan bool, 10)
- wg := &sync.WaitGroup{}
- n := 0
- if (c.PushModel == 0 || c.PushModel == 1) && len(c.SaveDataMap) > 0 {
- for _, tmp := range c.SaveDataMap {
- wg.Add(1)
- ch <- true
- go func(data map[string]interface{}) {
- defer func() {
- <-ch
- wg.Done()
- }()
- ok := AssembelSave(data, c.IsSearchHosp, c.IsSearchEnps, c.AppId, c.DataSave)
- if !ok {
- n--
- }
- }(tmp)
- n++
- if n%500 == 0 {
- log.Println("Current:", n)
- }
- }
- wg.Wait()
- } else if c.PushModel == 2 || c.PushModel == 3 {
- for _, dm := range c.Departments {
- if len(dm.SaveDataMap) > 0 {
- for _, tmp := range dm.SaveDataMap {
- wg.Add(1)
- ch <- true
- go func(data map[string]interface{}) {
- defer func() {
- <-ch
- wg.Done()
- }()
- ok := AssembelSave(data, c.IsSearchHosp, c.IsSearchEnps, c.AppId, c.DataSave)
- if !ok {
- n--
- }
- }(tmp)
- n++
- if n%500 == 0 {
- log.Println("Current:", n)
- }
- }
- }
- }
- wg.Wait()
- } else if c.PushModel == 4 && len(c.SaveDataArr) > 0 {
- for _, tmp := range c.SaveDataArr {
- wg.Add(1)
- ch <- true
- go func(data map[string]interface{}) {
- defer func() {
- <-ch
- wg.Done()
- }()
- ok := AssembelSave(data, c.IsSearchHosp, c.IsSearchEnps, c.AppId, c.DataSave)
- if !ok {
- n--
- }
- }(tmp)
- n++
- if n%500 == 0 {
- log.Println("Current:", n)
- }
- }
- wg.Wait()
- }
- log.Println("数据保存完毕... Save Number:", n)
- }
- // 获取用户所有规则
- func (d *Department) GetSearchRules(cid, stype string, idRange, idRanges bson.M) {
- defer qu.Catch()
- searchRules, _ := MgoTag.Find("euserdepartrule", map[string]interface{}{"s_userid": cid, "s_departid": d.ID, "i_isuse": 1, "b_delete": false}, nil, nil)
- if len(searchRules) > 0 {
- for _, sr := range searchRules {
- SR := &SearchRule{}
- SR.Fields = make(map[string]interface{})
- id := mgoutil.BsonTOStringId(sr["_id"])
- name := qu.ObjToString(sr["s_name"])
- SR.ID = id
- SR.Name = name
- SR.CustomerID = cid
- SR.DepartmentID = d.ID
- SR.ExtFieldType = qu.IntAll(sr["i_extfieldstype"])
- //SR.RuleData = &sync.Map{}
- esquery := qu.ObjToString(sr["s_esquery"])
- if IsNewSql != 0 {
- esquery = qu.ObjToString(sr["s_esquery_search"])
- }
- clearKey := qu.ObjToString(sr["s_globalclearkey"])
- clearKeyMatch := qu.ObjToString(sr["s_globalclearkeymatch"])
- //获取es
- if stype == "history" {
- SR.EsQuery = esquery
- } else {
- SR.GetEs(d.Name, esquery, idRange, idRanges)
- }
- //获取关键词和附加词
- if o_rules, ok := sr["o_rules"].(primitive.A); ok && len(o_rules) > 0 {
- SR.GetKeyAddWord(o_rules)
- for _, v := range o_rules {
- orule, _ := v.(map[string]interface{})
- SR.Maths = append(SR.Maths, map[string]string{
- "s_matchkey": qu.ObjToString(orule["s_matchkey"]),
- "s_keymatch": qu.ObjToString(orule["s_keymatch"]),
- "s_group": qu.ObjToString(orule["s_group"]),
- })
- }
- }
- //获取全局清理词
- SR.GetClearWord(clearKey, clearKeyMatch)
- d.Rules = append(d.Rules, SR)
- }
- }
- }
- // 获取转换后的es语句
- func (sr *SearchRule) GetEs(department, esquery string, tmpRange, tmpRanges bson.M) {
- defer qu.Catch()
- query := map[string]*QueryObjecct{}
- if json.Unmarshal([]byte(esquery), &query) == nil {
- qb := query["query"]
- filter := qb.Bool
- if filter != nil { //有filter
- index := -1 //记录range的位置
- for i, m := range filter.Must {
- mMap := m.(map[string]interface{})
- if esRange, ok := mMap["range"].(map[string]interface{}); ok && esRange != nil { //有range
- if esRange["publishtime"] != nil {
- index = i
- break
- }
- }
- }
- if index > -1 {
- filter.Must[index] = tmpRange
- } else {
- filter.Must = append(filter.Must, tmpRange)
- }
- if len(tmpRanges) > 0 {
- filter.Must = append(filter.Must, tmpRanges)
- }
- } else { //无filter则添加
- bo := &BoolObject{}
- bo.Must = append(bo.Must, tmpRange)
- if len(tmpRanges) > 0 {
- bo.Must = append(bo.Must, tmpRanges)
- }
- // tmpFilter := &Filter{
- // Bool: bo,
- // }
- qb.Bool = bo
- }
- strquery, err := json.Marshal(query)
- if err == nil {
- sr.EsQuery = string(strquery)
- } else {
- log.Println("Department:", department, "Es Error,Tag ID:", sr.ID)
- }
- } else {
- log.Println("Department:", department, "Es Error,Tag ID:", sr.ID)
- }
- }
- // 全局清理词处理
- func (sr *SearchRule) GetClearWord(key, match string) {
- defer qu.Catch()
- //匹配方式
- cwmArr := []string{}
- for _, mv := range strings.Split(match, ",") {
- if field := qu.ObjToString(MatchType[mv]); field != "" {
- cwmArr = append(cwmArr, field)
- sr.Fields[field] = true
- }
- }
- //清理词正则
- cwkArr := []*regexp.Regexp{}
- for _, kv := range strings.Split(key, ",") {
- if LetterCase.MatchString(kv) { //字母转大写
- kv = strings.ToUpper(kv)
- }
- reg := regexp.MustCompile(kv)
- cwkArr = append(cwkArr, reg)
- }
- cw := &ClearWord{
- KeyReg: cwkArr,
- MatchType: cwmArr,
- }
- sr.GCW = cw
- }
- // 关键词、附加词处理
- func (sr *SearchRule) GetKeyAddWord(o_rules primitive.A) {
- defer qu.Catch()
- kw, aw, _, _, _ := GetNotkeyAndKeyAddWord(o_rules, sr.Fields, false)
- sr.KW = kw
- sr.AW = aw
- //sr.Fields = fields
- }
- // 排除词、关键词、附加词处理
- func (tr *TagRule) GetKeyAddNotKeyWord(o_list primitive.A) {
- defer qu.Catch()
- kw, aw, nkw, tagnames, buyerclass := GetNotkeyAndKeyAddWord(o_list, tr.Fields, true)
- tr.NW = nkw
- tr.KW = kw
- tr.AW = aw
- //tr.Fields = fields
- tr.TagNames = tagnames
- tr.BuyerClass = buyerclass
- }
- func GetMactchKeys(match []map[string]string, data map[string]interface{}) string {
- keyWord := []string{}
- for _, keys := range match {
- types := keys["s_keymatch"]
- key := keys["s_matchkey"]
- if strings.Contains(types, "1") {
- title := qu.ObjToString(data["title"])
- keyWord = KeyWordToDatas(types, title, key, keyWord)
- }
- if strings.Contains(types, "2") {
- detail := qu.ObjToString(data["detail"])
- keyWord = KeyWordToDatas(types, detail, key, keyWord)
- }
- if strings.Contains(types, "3") {
- purchasing := qu.ObjToString(data["purchasing"])
- keyWord = KeyWordToDatas(types, purchasing, key, keyWord)
- }
- if strings.Contains(types, "4") {
- filetext := qu.ObjToString(data["filetext"])
- keyWord = KeyWordToDatas(types, filetext, key, keyWord)
- }
- if strings.Contains(types, "5") {
- projectname := qu.ObjToString(data["projectname"])
- keyWord = KeyWordToDatas(types, projectname, key, keyWord)
- }
- if strings.Contains(types, "6") || strings.Contains(types, "8") {
- buyer := qu.ObjToString(data["buyer"])
- keyWord = KeyWordToDatas(types, buyer, key, keyWord)
- }
- if strings.Contains(types, "7") || strings.Contains(types, "9") {
- winner := qu.ObjToString(data["s_winner"])
- keyWord = KeyWordToDatas(types, winner, key, keyWord)
- }
- }
- keyMap := map[string]bool{}
- keyArr := []string{}
- for _, key := range keyWord {
- keyMap[key] = true
- }
- for k, _ := range keyMap {
- keyArr = append(keyArr, k)
- }
- return strings.Join(keyArr, ",")
- }
- func KeyWordToDatas(types, item, key string, keyWord []string) []string {
- for _, mk := range strings.Split(key, ",") {
- if strings.Contains(mk, "&&") {
- arr := strings.Split(mk, "&&")
- isok := true
- for _, s := range arr {
- if s != "" {
- if !strings.Contains(strings.ToUpper(item), strings.ToUpper(s)) {
- isok = false
- }
- }
- }
- if isok {
- keyWord = append(keyWord, mk)
- }
- } else {
- if strings.Contains(strings.ToUpper(item), strings.ToUpper(mk)) {
- keyWord = append(keyWord, mk)
- }
- }
- }
- return keyWord
- }
- func exactMatchs(rule, title, detail string, match []map[string]string, nameArr []string) bool {
- realdata := map[string]float64{}
- for _, v := range nameArr {
- realdata["title_"+v] = 0
- realdata["content_"+v] = 0
- }
- mapping := map[string]string{
- " and ": " && ",
- " or ": " || ",
- " not ": " ! ",
- }
- for k, v := range mapping {
- rule = strings.ReplaceAll(rule, k, v)
- }
- //可以将编译后的表达式,存放在缓存中
- program, err := expr.Compile(rule, expr.Env(realdata))
- if err != nil {
- log.Println("表达式错误 ", err)
- return false
- }
- for _, keys := range match {
- types := keys["s_keymatch"] // 1,2,3
- key := keys["s_matchkey"] //软件,工程
- group := keys["s_group"]
- if strings.Contains(types, "1") {
- for _, v := range strings.Split(key, ",") {
- if strings.Contains(strings.ToUpper(title), strings.ToUpper(v)) {
- realdata["title_"+group] = realdata["title_"+group] + 1
- }
- }
- }
- if strings.Contains(types, "2") {
- for _, v := range strings.Split(key, ",") {
- if strings.Contains(strings.ToUpper(detail), strings.ToUpper(v)) {
- realdata["content_"+group] = realdata["content_"+group] + 1
- }
- }
- }
- }
- log.Println("匹配结果 ", realdata)
- output, err := expr.Run(program, realdata)
- if err != nil {
- log.Println("表达式执行错误 ", err)
- return false
- }
- return output.(bool)
- }
|