123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811 |
- package main
- import (
- "fmt"
- "log"
- qu "qfw/util"
- "regexp"
- "strings"
- "sync"
- "time"
- "github.com/donnie4w/go-logger/logger"
- "go.mongodb.org/mongo-driver/bson/primitive"
- "gopkg.in/mgo.v2/bson"
- )
- var LetterCase = regexp.MustCompile("[A-Za-z]")
- var LetterCase2 = regexp.MustCompile("[A-Za-z0-9]")
- var FilteReg = regexp.MustCompile("[()(){}]*")
- //匹配方式map
- var MatchType = map[string]interface{}{
- "1": "title",
- "2": "detail",
- "3": "purchasing",
- "4": "filetext",
- "5": "projectname",
- "6": "buyer",
- "7": "s_winner",
- }
- //加载排除词、附加词、关键词
- func GetNotkeyAndKeyAddWord(list primitive.A, fieldMap map[string]interface{}, hasNotKey bool) (kws []*KeyWord, aws []*AddWord, nkws []*NotKeyWord, tagnames []string) {
- defer qu.Catch()
- for _, rules := range list {
- ruleMap := rules.(map[string]interface{})
- if hasNotKey { //是否处理排除词
- //排除词匹配方式
- nkm := qu.ObjToString(ruleMap["s_notkeymatch"])
- nkmArr := []string{}
- for _, nv := range strings.Split(nkm, ",") {
- if field := qu.ObjToString(MatchType[nv]); field != "" {
- nkmArr = append(nkmArr, field)
- fieldMap[field] = true
- }
- }
- //排除词
- nkw := &NotKeyWord{}
- nkw.MatchType = nkmArr
- notkeyword := qu.ObjToString(ruleMap["s_notkey"])
- nkw_commaArr := strings.Split(notkeyword, ",")
- nkw_reg := &Reg{}
- for _, comma := range nkw_commaArr {
- nkw_addArr := strings.Split(comma, "&&")
- if len(nkw_addArr) == 1 { //,
- tmp_nkw := nkw_addArr[0]
- if tmp_nkw != "" {
- cr := &CommonReg{}
- cr.CsVal = tmp_nkw //记录原值
- if LetterCase.MatchString(tmp_nkw) { //判断附加词中是否有英文
- tmp_nkw = strings.ToUpper(tmp_nkw) //附加词中有英文全部转为大写
- cr.IsLetter = true //含字母
- }
- cr.CrVal = regexp.MustCompile(tmp_nkw) //记录reg值
- nkw_reg.CReg = append(nkw_reg.CReg, cr)
- }
- } else { //&&
- arp := &AndRegPre{}
- arp.CsVal = comma //记录原值
- for _, and := range nkw_addArr {
- if and != "" {
- ar := &AndReg{}
- if LetterCase.MatchString(and) { //判断附加词中是否有英文
- and = strings.ToUpper(and) //附加词中有英文全部转为大写
- ar.IsLetter = true //含字母
- }
- ar.CrVal = regexp.MustCompile(and)
- arp.AndRegKid = append(arp.AndRegKid, ar)
- }
- }
- nkw_reg.AReg = append(nkw_reg.AReg, arp)
- }
- }
- nkw.KeyReg = nkw_reg
- nkws = append(nkws, nkw)
- //获取每组关键词的标签名称
- tagname := qu.ObjToString(ruleMap["s_tagname"])
- tagnames = append(tagnames, tagname)
- }
- //附加词匹配方式
- awm := qu.ObjToString(ruleMap["s_addkeymatch"])
- awmArr := []string{}
- for _, av := range strings.Split(awm, ",") {
- if field := qu.ObjToString(MatchType[av]); field != "" {
- awmArr = append(awmArr, field)
- fieldMap[field] = true
- }
- }
- //附加词
- aw := &AddWord{}
- aw.MatchType = awmArr
- aw_reg := &Reg{}
- addword := qu.ObjToString(ruleMap["s_addkey"])
- aw_commaArr := strings.Split(addword, ",")
- for _, comma := range aw_commaArr {
- aw_addArr := strings.Split(comma, "&&")
- if len(aw_addArr) == 1 { //,
- tmp_aw := aw_addArr[0]
- if tmp_aw != "" {
- cr := &CommonReg{}
- cr.CsVal = tmp_aw //记录原值
- if LetterCase.MatchString(tmp_aw) { //判断附加词中是否有英文
- tmp_aw = strings.ToUpper(tmp_aw) //附加词中有英文全部转为大写
- cr.IsLetter = true //含字母
- }
- cr.CrVal = regexp.MustCompile(tmp_aw) //记录reg值
- aw_reg.CReg = append(aw_reg.CReg, cr)
- }
- } else { //&&
- arp := &AndRegPre{}
- arp.CsVal = comma //记录原值
- for _, and := range aw_addArr {
- if and != "" {
- ar := &AndReg{}
- if LetterCase.MatchString(and) { //判断附加词中是否有英文
- and = strings.ToUpper(and) //附加词中有英文全部转为大写
- ar.IsLetter = true //含字母
- }
- ar.CrVal = regexp.MustCompile(and)
- arp.AndRegKid = append(arp.AndRegKid, ar)
- }
- }
- aw_reg.AReg = append(aw_reg.AReg, arp)
- }
- }
- aw.KeyReg = aw_reg
- aws = append(aws, aw)
- //关键词匹配方式
- kwm := qu.ObjToString(ruleMap["s_keymatch"])
- kwmArr := []string{}
- for _, kv := range strings.Split(kwm, ",") {
- if field := qu.ObjToString(MatchType[kv]); field != "" {
- kwmArr = append(kwmArr, field)
- fieldMap[field] = true
- }
- }
- //关键词
- kw := &KeyWord{}
- kw.MatchType = kwmArr
- kw_reg := &Reg{}
- keyword := qu.ObjToString(ruleMap["s_matchkey"])
- kw_commaArr := strings.Split(keyword, ",")
- for _, comma := range kw_commaArr {
- kw_addArr := strings.Split(comma, "&&")
- if len(kw_addArr) == 1 { //,
- tmp_kw := kw_addArr[0]
- if tmp_kw != "" {
- cr := &CommonReg{}
- cr.CsVal = tmp_kw //记录原值
- if LetterCase.MatchString(tmp_kw) {
- tmp_kw = strings.ToUpper(tmp_kw)
- cr.IsLetter = true //含字母
- }
- cr.CrVal = regexp.MustCompile(tmp_kw) //记录reg值
- kw_reg.CReg = append(kw_reg.CReg, cr)
- }
- } else { //&&
- arp := &AndRegPre{}
- arp.CsVal = comma //记录原值
- for _, and := range kw_addArr {
- if and != "" {
- ar := &AndReg{}
- if LetterCase.MatchString(and) {
- and = strings.ToUpper(and) //附加词中有英文全部转为大写
- ar.IsLetter = true //含字母
- }
- ar.CrVal = regexp.MustCompile(and)
- arp.AndRegKid = append(arp.AndRegKid, ar)
- }
- }
- kw_reg.AReg = append(kw_reg.AReg, arp)
- }
- }
- kw.KeyReg = kw_reg
- kws = append(kws, kw)
- }
- return
- }
- //根据时间获取起始和终止ID范围
- func GetIdRange() (bson.M, bool) {
- defer qu.Catch()
- now := time.Now().Unix()
- for { //当前时间一直向前推半小时,直到取到数据
- now = now - 600 //10分钟前
- endTime := time.Unix(now, 0)
- endId := bson.NewObjectIdWithTime(endTime).Hex()
- if endId > LatestId {
- esquery := `{"query": {"bool": {"must": [{"range": {"id": {"gt": "` + LatestId + `" , "lte": "` + endId + `"}}}]}}, "sort": [{"comeintime": "desc"}]}`
- if Es.Count(Index, Itype, esquery) > 0 { //有数据返回id区间
- list := Es.Get(Index, Itype, esquery)
- tmpRange := bson.M{
- "range": bson.M{
- "id": bson.M{
- "lte": endId,
- "gt": LatestId,
- },
- },
- }
- LatestId = qu.ObjToString((*list)[0]["_id"])
- return tmpRange, true
- }
- } else { //结束id不大于起始id 退出
- logger.Debug("Search End ID Range Error. Sid:", LatestId, "Eid:", endId)
- break
- }
- }
- return bson.M{}, false
- // now := time.Now()
- // end := now.Unix() - int64(60*now.Minute()) - int64(now.Second())
- // start := end - TaskTime*3600
- // endTime := time.Unix(end, 0)
- // startTime := time.Unix(start, 0)
- // eid := bson.NewObjectIdWithTime(endTime).Hex()
- // sid := bson.NewObjectIdWithTime(startTime).Hex()
- // query := bson.M{
- // "_id": bson.M{
- // "$gt": mongodb.StringTOBsonId(LatestId),
- // },
- // }
- // sort := bson.M{
- // "_id": -1,
- // }
- // fields := bson.M{
- // "_id": 1,
- // }
- // //查抽取表最后一个id
- // extData, err := MgoExt.FindByLimit(ExtColl, query, sort, fields, 0, 1)
- // if len(extData) == 1 && err == nil {
- // endId := mongodb.BsonTOStringId(extData[0]["_id"])
- // if endId > LatestId {
- // tmpRange := bson.M{
- // "range": bson.M{
- // "id": bson.M{
- // "lte": endId,
- // "gt": LatestId,
- // },
- // },
- // }
- // LatestId = endId
- // return tmpRange, true
- // } else {
- // logger.Debug("ID Range Error,Start ID:", LatestId, "End ID:", endId)
- // return bson.M{}, false
- // }
- // }
- // logger.Debug("Search End ID No Data", query, "Error:", err)
- }
- //处理文本
- func ProcessData(text string) string {
- defer qu.Catch()
- text = strings.ToUpper(text) //文本中的英文全转为大写
- text = FilteReg.ReplaceAllString(text, "") //去除一些特殊符号
- return text
- }
- //校验字母
- func CheckLetter(text string, reg *regexp.Regexp, indexArr [][]int) (flag bool) {
- defer qu.Catch()
- for _, tmpArr := range indexArr {
- sIndex := tmpArr[0]
- eIndex := tmpArr[1]
- sbyte := ""
- ebyte := ""
- //log.Println("---", sIndex, eIndex)
- if sIndex != 0 {
- sbyte = text[sIndex-1 : sIndex]
- if eIndex != len(text) { //BAIB
- ebyte = text[eIndex : eIndex+1]
- } /*else { //BAI
- }*/
- } else {
- if eIndex != len(text) { //AIB
- ebyte = text[eIndex : eIndex+1]
- } /*else { //AI
- }*/
- }
- //log.Println("sssss", "s:", sbyte, "e:", ebyte, LetterCase2.Match([]byte(sbyte)), LetterCase2.Match([]byte(ebyte)))
- if !LetterCase2.Match([]byte(sbyte)) && !LetterCase2.Match([]byte(ebyte)) {
- flag = true
- break
- }
- }
- return
- }
- //匹配
- func RegMatch(fieldText map[string]interface{}, matchType []string, matchReg *Reg, matchKey map[string]bool, matchKeyType map[string]bool, goon, isAddWord bool) (match bool) {
- defer qu.Catch()
- if len(matchType) == 0 && isAddWord { //特殊处理附加词为空的情况
- match = true
- return
- }
- for _, mt := range matchType {
- if text := qu.ObjToString(fieldText[mt]); text != "" {
- for _, cr := range matchReg.CReg { //逗号分隔,任意一个匹配表示匹配成功
- if goon && matchKey[cr.CsVal] { //matchkey已存在不在匹配
- continue
- }
- if indexArr := cr.CrVal.FindAllStringIndex(text, -1); len(indexArr) > 0 { //匹配成功
- if !cr.IsLetter { //reg无字母
- if goon {
- matchKey[cr.CsVal] = true
- matchKeyType[mt] = true
- match = true
- } else {
- match = true
- return
- }
- } else if cr.IsLetter && CheckLetter(text, cr.CrVal, indexArr) { //reg有字母,判断是否是包含关系(AAAIBBB or AI){//
- if goon {
- matchKey[cr.CsVal] = true
- matchKeyType[mt] = true
- match = true
- } else {
- match = true
- return
- }
- }
- }
- }
- for _, ar := range matchReg.AReg { //&&分割,所有匹配表示匹配成功
- if goon && matchKey[ar.CsVal] {
- continue
- }
- IsAregMatch := false
- for n, arc := range ar.AndRegKid { //ar.AndRegKid若有值必不小于2
- if indexArr := arc.CrVal.FindAllStringIndex(text, -1); len(indexArr) < 1 { //匹配失败(ar.AndRegKid中任意一个未匹配则失败)
- break
- } else { //匹配成功,判断字母
- if arc.IsLetter && !CheckLetter(text, arc.CrVal, indexArr) { //reg有字母,判断是否是包含关系(AAAIBBB or AI)
- break
- }
- }
- if n == len(ar.AndRegKid)-1 {
- IsAregMatch = true
- }
- }
- if IsAregMatch {
- if goon {
- matchKey[ar.CsVal] = true
- matchKeyType[mt] = true
- match = true
- } else {
- match = true
- return
- }
- }
- }
- }
- }
- return
- }
- //map数据转数组
- func MapDataToArr(tmpMap map[string]bool) (tmpArr []string) {
- for tm, _ := range tmpMap {
- tmpArr = append(tmpArr, tm)
- }
- return
- }
- //合并数据
- func MergeData(history, tmp map[string]interface{}, isTagRule, isDepartRmvRep bool) {
- //matchkey、matchtype、ruleid均非空
- matchkey1 := qu.ObjToString(history["matchkey"])
- matchkey2 := qu.ObjToString(tmp["matchkey"])
- history["matchkey"] = MergeField(matchkey1, matchkey2)
- matchkeytype1 := qu.ObjToString(history["matchtype"])
- matchkeytype2 := qu.ObjToString(tmp["matchtype"])
- history["matchtype"] = MergeField(matchkeytype1, matchkeytype2)
- ruleid1 := qu.ObjToString(history["ruleid"])
- ruleid2 := qu.ObjToString(tmp["ruleid"])
- history["ruleid"] = MergeField(ruleid1, ruleid2)
- rulename1 := qu.ObjToString(history["rulename"])
- rulename2 := qu.ObjToString(tmp["rulename"])
- history["rulename"] = MergeField(rulename1, rulename2)
- if isTagRule { //标签模式 tagname、tagid合并
- tagname1 := qu.ObjToString(history["tagname"])
- tagname2 := qu.ObjToString(tmp["tagname"])
- if tagNameResult := MergeField(tagname1, tagname2); tagNameResult != "" {
- history["tagname"] = tagNameResult
- }
- tagid1 := qu.ObjToString(history["tagid"])
- tagid2 := qu.ObjToString(tmp["tagid"])
- if tagIdResult := MergeField(tagid1, tagid2); tagIdResult != "" {
- history["tagid"] = tagIdResult
- }
- }
- if isDepartRmvRep { //全局模式 部门合并
- departname1 := qu.ObjToString(history["departname"])
- departname2 := qu.ObjToString(tmp["departname"])
- history["departname"] = MergeField(departname1, departname2)
- departid1 := qu.ObjToString(history["departid"])
- departid2 := qu.ObjToString(tmp["departid"])
- history["departid"] = MergeField(departid1, departid2)
- }
- }
- //合并字段
- func MergeField(str1, str2 string) string {
- if str1 == "" {
- return str2
- } else if str2 == "" {
- return str1
- }
- slice1 := strings.Split(str1, ",")
- slice2 := strings.Split(str2, ",")
- m := make(map[string]int)
- for _, v := range slice1 {
- m[v]++
- }
- for _, v := range slice2 {
- times, _ := m[v]
- if times == 0 {
- slice1 = append(slice1, v)
- }
- }
- return strings.Join(slice1, ",")
- }
- //补充信息并保存
- func AssembelSave(tmp map[string]interface{}, IsSearchHosp, IsSearchEnps bool, appid string) bool {
- if IsSearchHosp { //医院信息
- SearchHospInfo(tmp)
- }
- if IsSearchEnps { //企业信息
- SearchEnterpriseInfo(tmp)
- }
- tmp["createtime"] = time.Now().Unix()
- id := qu.ObjToString(tmp["id"])
- tmp["jybxhref"] = `https://www.jianyu360.com/article/content/` + qu.CommonEncodeArticle("content", id) + `.html`
- if publishtime, ok := tmp["publishtime"].(float64); ok && publishtime > 0 {
- tmp["publishtime"] = qu.Int64All(publishtime)
- }
- if bidopentime, ok := tmp["bidopentime"].(float64); ok && bidopentime > 0 {
- tmp["bidopentime"] = qu.Int64All(bidopentime)
- }
- //
- if appid == "jyOh1XQgUJBQ5bTUlKCyZ1" {
- projectId := GetProjectId(id)
- if projectId != "" {
- tmp["projectId"] = projectId
- if _, ok := projectIdMap.Load(projectId); ok {
- MgoSave.Save(SaveCollProject, tmp)
- return false
- }
- projectIdMap.Store(projectId, true)
- }
- }
- //
- MgoSaveCache <- tmp
- return true
- }
- //查询第三方医院等级信息和招标客户的社会征信代码
- func SearchHospInfo(tmp map[string]interface{}) {
- if buyer := qu.ObjToString(tmp["buyer"]); buyer != "" { //buyer存在
- //医院等级
- hospData := MgoBuyer.FindOne(HospColl, bson.M{"name": buyer})
- if hospData != nil && len(hospData) > 0 {
- if rank := qu.ObjToString(hospData["rank"]); rank != "" {
- tmp["rank"] = hospData["rank"]
- }
- }
- //招标客户的社会征信代码
- buyerEntData := MgoBuyer.FindOne(BuyerEntColl, bson.M{"company_name": buyer})
- if buyerEntData != nil && len(buyerEntData) > 0 {
- if credit_no := qu.ObjToString(buyerEntData["credit_no"]); credit_no != "" {
- tmp["buyer_credit_no"] = credit_no
- }
- }
- }
- }
- //查询第三方中标企业信息
- func SearchEnterpriseInfo(tmp map[string]interface{}) {
- if s_winner := qu.ObjToString(tmp["s_winner"]); s_winner != "" { //buyer存在
- winner := strings.Split(s_winner, ",")[0] //取第一个中标单位
- data := MgoEnps.FindOne(EnpsColl, bson.M{"company_name": winner})
- if data != nil && len(data) > 0 {
- //中标单位联系人
- if legal_person := qu.ObjToString(data["legal_person"]); legal_person != "" {
- tmp["legal_person"] = legal_person
- }
- //中标单位的注册地址
- if company_address := qu.ObjToString(data["company_address"]); company_address != "" {
- tmp["company_address"] = company_address
- }
- //注册资金"capital" : "324 万","capital" : 124.8,
- if capital := qu.ObjToString(data["capital"]); capital != "" {
- tmp["capital"] = capital
- //if capitalStr, ok := data["capital"].(string); ok && capitalStr != "" {
- // if capital := ObjToMoney(capitalStr); capital != 0 { //金额转换
- // tmp["capital"] = capital
- // }
- //} else if capitalFloat, ok := data["capital"].(float64); ok && capitalFloat != 0 {
- // tmp["capital"] = capitalFloat
- //}
- }
- //注册时间"establish_date" : ISODate("1949-10-01T00:00:00.000+0000")
- if data["establish_date"] != nil {
- if establish_date, ok := data["establish_date"].(primitive.DateTime); ok {
- t := establish_date.Time()
- tmp["establish_date"] = qu.FormatDate(&t, qu.Date_Short_Layout)
- }
- }
- //经营范围
- if business_scope := qu.ObjToString(data["business_scope"]); business_scope != "" {
- tmp["business_scope"] = business_scope
- }
- //中标单位的社会征信代码
- if credit_no := qu.ObjToString(data["credit_no"]); credit_no != "" {
- tmp["winner_credit_no"] = credit_no
- }
- //股东名单
- if partners, ok := data["partners"].(primitive.A); ok && len(partners) > 0 {
- stock_name_arr := []string{}
- for _, partner := range partners {
- p := partner.(map[string]interface{})
- if stock_name := qu.ObjToString(p["stock_name"]); stock_name != "" {
- stock_name_arr = append(stock_name_arr, stock_name)
- }
- }
- if len(stock_name_arr) > 0 {
- tmp["stock_name"] = strings.Join(stock_name_arr, ",")
- }
- }
- //从最新年报中获取 中标单位联系电话、中标单位邮箱
- if annual_reports, ok := data["annual_reports"].(primitive.A); ok && len(annual_reports) > 0 {
- if anreport, ok := annual_reports[0].(map[string]interface{}); ok { //最新年报
- if email := qu.ObjToString(anreport["company_email"]); email != "" {
- tmp["company_email"] = email
- }
- if phone := qu.ObjToString(anreport["company_phone"]); phone != "" {
- tmp["company_phone"] = phone
- }
- }
- }
- }
- }
- }
- //数据存库
- func SaveMgo() {
- log.Println("Mgo Save...")
- arru := make([]map[string]interface{}, 500)
- indexu := 0
- for {
- select {
- case v := <-MgoSaveCache:
- arru[indexu] = v
- indexu++
- if indexu == 500 {
- SP <- true
- go func(arru []map[string]interface{}) {
- defer func() {
- <-SP
- }()
- MgoSave.SaveBulk(SaveColl, arru...)
- }(arru)
- arru = make([]map[string]interface{}, 500)
- indexu = 0
- }
- case <-time.After(1000 * time.Millisecond):
- if indexu > 0 {
- SP <- true
- go func(arru []map[string]interface{}) {
- defer func() {
- <-SP
- }()
- MgoSave.SaveBulk(SaveColl, arru...)
- }(arru[:indexu])
- arru = make([]map[string]interface{}, 500)
- indexu = 0
- }
- }
- }
- }
- //打印初始化信息
- func PrintLog(cus *Customer) {
- qu.Debug("----------------------打标签规则----------------------------")
- for i, tr := range cus.TagRules {
- qu.Debug("tagrules:", i, tr.ID, tr.Name, tr.CustomerId, tr.DepartRuleIds, tr.Fields)
- for j, kw := range tr.KW {
- qu.Debug("kw_matchkey---", kw.MatchType, len(kw.MatchType))
- qu.Debug("------------CReg--------------")
- for ck1, cr := range kw.KeyReg.CReg {
- qu.Debug("CR---", ck1, cr.CrVal, cr.CsVal, cr.IsLetter)
- }
- qu.Debug("------------AReg--------------")
- for ck1, ar := range kw.KeyReg.AReg {
- qu.Debug("AR---", ck1, ar.CsVal)
- for _, arc := range ar.AndRegKid {
- qu.Debug("ARC---", arc.CrVal, arc.IsLetter)
- }
- }
- qu.Debug("+++++++++++++++++++++AW++++++++++++++++++++++")
- qu.Debug("aw_matchkey---", tr.AW[j].MatchType, len(tr.AW[j].MatchType))
- qu.Debug("------------CReg--------------")
- for ck2, cr := range tr.AW[j].KeyReg.CReg {
- qu.Debug("CR---", ck2, cr.CrVal, cr.CsVal, cr.IsLetter)
- }
- qu.Debug("------------AReg--------------")
- for ck2, ar := range tr.AW[j].KeyReg.AReg {
- qu.Debug("AR---", ck2, ar.CsVal)
- for _, arc := range ar.AndRegKid {
- qu.Debug("ARC---", arc.CrVal, arc.IsLetter)
- }
- }
- qu.Debug("++++++++++++++++++++NW+++++++++++++++++++++++")
- qu.Debug("nw_matchkey---", tr.NW[j].MatchType, len(tr.NW[j].MatchType))
- qu.Debug("------------CReg--------------")
- for ck1, cr := range tr.NW[j].KeyReg.CReg {
- qu.Debug("CR---", ck1, cr.CrVal, cr.CsVal, cr.IsLetter)
- }
- qu.Debug("------------AReg--------------")
- for ck1, ar := range tr.NW[j].KeyReg.AReg {
- qu.Debug("AR---", ck1, ar.CsVal)
- for _, arc := range ar.AndRegKid {
- qu.Debug("ARC---", arc.CrVal, arc.IsLetter)
- }
- }
- qu.Debug("tagname-------------", tr.TagNames[j])
- qu.Debug("-------------------------------------------------")
- }
- }
- qu.Debug("----------------------查询规则----------------------------")
- for i, dm := range cus.Departments {
- qu.Debug("deparment:", i, dm.ID, dm.Name, dm.CustomerID)
- for j, sr := range dm.Rules {
- qu.Debug("ck---", j, sr.ID, sr.Name, sr.CustomerID, sr.DepartmentID, sr.GCW.KeyReg, len(sr.GCW.KeyReg), sr.GCW.MatchType, len(sr.GCW.MatchType), sr.Fields, len(sr.Fields), sr.EsQuery)
- for j, kw := range sr.KW {
- qu.Debug("kw_matchkey---", kw.MatchType, len(kw.MatchType))
- qu.Debug("------------CReg--------------")
- for ck1, cr := range kw.KeyReg.CReg {
- qu.Debug("CR---", ck1, cr.CrVal, cr.CsVal, cr.IsLetter)
- }
- qu.Debug("------------AReg--------------")
- for ck1, ar := range kw.KeyReg.AReg {
- qu.Debug("AR---", ck1, ar.CsVal)
- for _, arc := range ar.AndRegKid {
- qu.Debug("ARC---", arc.CrVal, arc.IsLetter)
- }
- }
- qu.Debug("+++++++++++++++++++++AW++++++++++++++++++++++")
- qu.Debug("aw_matchkey---", sr.AW[j].MatchType, len(sr.AW[j].MatchType))
- qu.Debug("------------CReg--------------")
- for ck2, cr := range sr.AW[j].KeyReg.CReg {
- qu.Debug("CR---", ck2, cr.CrVal, cr.CsVal, cr.IsLetter)
- }
- qu.Debug("------------AReg--------------")
- for ck2, ar := range sr.AW[j].KeyReg.AReg {
- qu.Debug("AR---", ck2, ar.CsVal)
- for _, arc := range ar.AndRegKid {
- qu.Debug("ARC---", arc.CrVal, arc.IsLetter)
- }
- }
- qu.Debug("--------------------------------------------------------------------------------------")
- }
- }
- }
- }
- //匹配
- func RegMatchTest(fieldText map[string]interface{}, matchType []string, matchReg *Reg, matchKey map[string]bool, matchKeyType map[string]bool, goon, isAddWord bool) (match bool) {
- defer qu.Catch()
- qu.Debug("matchType---", matchType)
- if len(matchType) == 0 && isAddWord { //特殊处理附加词为空的情况
- match = true
- return
- }
- for _, mt := range matchType {
- if text := qu.ObjToString(fieldText[mt]); text != "" {
- qu.Debug("匹配方式---", mt, "text---", text)
- qu.Debug("--------------开始查找逗号分隔----------------", len(matchReg.CReg))
- for i, cr := range matchReg.CReg { //逗号分隔,任意一个匹配表示匹配成功
- qu.Debug("iiii---", i, cr.CrVal, goon, matchKey)
- if goon && matchKey[cr.CsVal] { //matchkey已存在不在匹配
- continue
- }
- if indexArr := cr.CrVal.FindAllStringIndex(text, -1); len(indexArr) > 0 { //匹配成功
- if !cr.IsLetter { //reg无字母
- qu.Debug("goon---", goon)
- if goon {
- qu.Debug("key++++++++++ ++++++++++", cr.CsVal)
- matchKey[cr.CsVal] = true
- matchKeyType[mt] = true
- match = true
- } else {
- match = true
- return
- }
- } else if cr.IsLetter && CheckLetter(text, cr.CrVal, indexArr) { //reg有字母,判断是否是包含关系(AAAIBBB or AI){//
- qu.Debug("goon---", goon)
- if goon {
- qu.Debug("key++++++++++++++++++++", cr.CsVal)
- matchKey[cr.CsVal] = true
- matchKeyType[mt] = true
- match = true
- } else {
- match = true
- return
- }
- }
- }
- }
- qu.Debug("--------------开始查找&&分隔----------------", len(matchReg.AReg))
- for j, ar := range matchReg.AReg { //&&分割,所有匹配表示匹配成功
- qu.Debug("jjjj---", j, ar.CsVal, goon, matchKey)
- if goon && matchKey[ar.CsVal] {
- continue
- }
- IsAregMatch := false
- qu.Debug("ar.AndRegKid---", j, ar.AndRegKid, len(ar.AndRegKid))
- for n, arc := range ar.AndRegKid { //ar.AndRegKid若有值必不小于2
- qu.Debug("nnnn---", n, arc.CrVal, arc.IsLetter)
- if indexArr := arc.CrVal.FindAllStringIndex(text, -1); len(indexArr) < 1 { //匹配失败(ar.AndRegKid中任意一个未匹配则失败)
- break
- } else { //匹配成功,判断字母
- if arc.IsLetter && !CheckLetter(text, arc.CrVal, indexArr) { //reg有字母,判断是否是包含关系(AAAIBBB or AI)
- break
- }
- }
- if n == len(ar.AndRegKid)-1 {
- IsAregMatch = true
- }
- }
- qu.Debug("IsAregMatch---", IsAregMatch)
- if IsAregMatch {
- qu.Debug("goon---", goon)
- if goon {
- qu.Debug("key++++++++++++++++++++", ar.CsVal)
- matchKey[ar.CsVal] = true
- matchKeyType[mt] = true
- match = true
- } else {
- match = true
- return
- }
- }
- }
- }
- }
- return
- }
- //发布时间不在范围内(7天)不要这条数据
- func SkipData(tmp map[string]interface{}) bool {
- comeIn := qu.Int64All(tmp["comeintime"])
- if qu.Int64All(tmp["publishtime"]) > (comeIn - 7*24*60*60) {
- return true
- }
- return false
- }
- func GetProjectId(id string) string {
- query := `{"query": {"bool": {"must": [{"term": {"projectset.ids": "%s"}}],"must_not": [],"should": []}},"size": 1}`
- querys := fmt.Sprintf(query, id)
- projectId := ""
- data := Es.Get("projectset", "projectset", querys)
- if data != nil && *data != nil {
- projectId = qu.ObjToString((*data)[0]["id"])
- }
- return projectId
- }
- func InitProjectId() {
- session := MgoSave.GetMgoConn()
- count := 0
- defer func() {
- MgoSave.DestoryMongoConn(session)
- log.Printf("本次共取到%d个projectId\n", count)
- }()
- save := Sysconfig["save"].(map[string]interface{})
- query := map[string]interface{}{"appid": "jyOh1XQgUJBQ5bTUlKCyZ1"}
- field := map[string]interface{}{"projectId": 1}
- iter := session.DB(qu.ObjToString(save["db"])).C(SaveColl).Find(query).Select(field).Sort("_id").Iter()
- thisData := map[string]interface{}{}
- for {
- if !iter.Next(&thisData) {
- break
- }
- projectId := qu.ObjToString(thisData["projectId"])
- projectIdMap.Store(projectId, true)
- count++
- thisData = map[string]interface{}{}
- }
- }
|