package extract import ( "fmt" "jy/clear" db "jy/mongodbutil" "jy/pretreated" ju "jy/util" "log" qu "qfw/util" redis "qfw/util/redis" "regexp" "strconv" "strings" "sync" "time" "gopkg.in/mgo.v2/bson" ) var ( lock sync.RWMutex cut = ju.NewCut() //获取正文并清理 ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志 TaskList map[string]*ExtractTask //任务列表 saveLimit = 200 //抽取日志批量保存 PageSize = 5000 //查询分页 Fields = `{"title":1,"detail":1,"contenthtml":1,"href":1,"site":1,"spidercode":1,"toptype":1,"area":1,"city":1}` AuditFields = []string{} //需要审核的字段名称 ) //启动测试抽取 func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bool { defer qu.Catch() ext := &ExtractTask{} ext.Id = taskId ext.IsRun = true ext.InitTestTaskInfo(resultcoll, trackcoll) ext.TaskInfo.DB = db.MgoFactory(1, 3, 120, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB) ext.InitRulePres() ext.InitRuleBacks() ext.InitRuleCore() ext.InitTag() ext.InitClearFn() //城市 ext.InitProvince() ext.InitCityAll() ext.InitCitySim() InitDFA() //质量审核 InitAuditRule() InitAuditClass() InitAuditRecogField() return RunExtractTestTask(ext, startId, num) } func IdTrans(startId string) bson.ObjectId { defer qu.Catch() return bson.ObjectIdHex(startId) } //开始测试任务抽取 func RunExtractTestTask(ext *ExtractTask, startId, num string) bool { n, _ := strconv.Atoi(num) id := IdTrans(startId) if id.Valid() { query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(startId)}} list, _ := ext.TaskInfo.DB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, n) for _, v := range *list { j := PreInfo(v) ext.TaskInfo.ProcessPool <- true go ext.ExtractProcess(j) } return true } else { return false } } //启动抽取 func StartExtractTaskId(taskId string) bool { isgo := false ext := TaskList[taskId] if ext == nil { ext = &ExtractTask{} ext.Id = taskId ext.InitTaskInfo() isgo = true } else { ext.Id = taskId ext.InitTaskInfo() } ext.TaskInfo.DB = db.MgoFactory(10, 30, 120, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB) ext.InitRulePres() ext.InitRuleBacks() ext.InitRuleCore() ext.InitTag() ext.InitClearFn() //城市 ext.InitProvince() ext.InitCityAll() ext.InitCitySim() InitDFA() //质量审核 InitAuditRule() InitAuditClass() InitAuditRecogField() ext.IsRun = true if isgo { go RunExtractTask(taskId) } TaskList[taskId] = ext return true } //停止抽取 func StopExtractTaskId(taskId string) bool { ext := TaskList[taskId] if ext != nil { ext.IsRun = false TaskList[taskId] = ext } //更新task.s_extlastid db.Mgo.UpdateById("task", taskId, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`) return true } //开始抽取 func RunExtractTask(taskId string) { ext := TaskList[taskId] query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(ext.TaskInfo.LastExtId)}} count := ext.TaskInfo.DB.Count(ext.TaskInfo.FromColl, query) pageNum := (count + PageSize - 1) / PageSize log.Printf("count=%d,pageNum=%d,query=%v", count, pageNum, query) for i := 0; i < pageNum; i++ { query = bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(ext.TaskInfo.LastExtId)}} log.Printf("page=%d,query=%v", i+1, query) list, _ := ext.TaskInfo.DB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, PageSize) for _, v := range *list { log.Println(v["_id"]) if !ext.IsRun { break } j := PreInfo(v) ext.TaskInfo.ProcessPool <- true go ext.ExtractProcess(j) ext.TaskInfo.LastExtId = qu.BsonIdToSId(v["_id"]) } db.Mgo.UpdateById("task", ext.Id, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`) if !ext.IsRun { break } } //更新task.s_extlastid time.AfterFunc(1*time.Minute, func() { RunExtractTask(taskId) }) } //信息预处理 func PreInfo(doc map[string]interface{}) *ju.Job { detail := "" d1, _ := doc["detail"].(string) d2, _ := doc["contenthtml"].(string) if len(d1) >= len(d2) || d2 == "" { detail = d1 } else { detail = d2 } detail = ju.CutLableStr(detail) detail = cut.ClearHtml(detail) doc["detail"] = detail href := qu.ObjToString(doc["href"]) if strings.HasPrefix(href, "http://") { href = href[7:] } else if strings.HasPrefix(href, "https://") { href = href[8:] } pos := strings.Index(href, "/") if pos > 0 { href = href[:pos] } doc["domain"] = href toptype := qu.ObjToString(doc["toptype"]) if qu.ObjToString(doc["type"]) == "bid" { toptype = "结果" } if toptype == "" { toptype = "*" } j := &ju.Job{ SourceMid: qu.BsonIdToSId(doc["_id"]), Category: toptype, Content: qu.ObjToString(doc["detail"]), SpiderCode: qu.ObjToString(doc["spidercode"]), Domain: qu.ObjToString(doc["domain"]), Href: qu.ObjToString(doc["href"]), Title: qu.ObjToString(doc["title"]), Data: &doc, City: qu.ObjToString(doc["city"]), Province: qu.ObjToString(doc["area"]), Result: map[string][]*ju.ExtField{}, BuyerAddr: qu.ObjToString(doc["buyeraddr"]), } pretreated.AnalyStart(j) return j } //抽取 func (e *ExtractTask) ExtractProcess(j *ju.Job) { // for _, bl := range j.Block { // log.Println(bl.ColonKV.Kv) // } // for k, v := range j.BlockPackage { // //bs, _ := json.Marshal(v.TableKV) // log.Println(k, v.WinnerOrder) // } //log.Println("Winnerorder", j.Winnerorder) qu.Try(func() { doc := *j.Data //全局前置规则,结果覆盖doc属性 for _, v := range e.RulePres { doc = ExtRegPre(doc, j, v, e.TaskInfo) } //抽取规则 for _, vc := range e.RuleCores { tmp := ju.DeepCopy(doc).(map[string]interface{}) //是否进入逻辑 if !ju.Logic(vc.LuaLogic, tmp) { continue } //抽取-前置规则 for _, v := range vc.RulePres { tmp = ExtRegPre(tmp, j, v, e.TaskInfo) } //log.Println("抽取-前置规则", tmp) //抽取-规则 for _, v := range vc.RuleCores { ExtRegCore(vc.ExtFrom, tmp, j, v, e) } //log.Println("抽取-规则", tmp) //抽取-后置规则 for _, v := range vc.RuleBacks { ExtRegBack(j, v, e.TaskInfo) } //log.Println("抽取-后置规则", tmp) } //全局后置规则 for _, v := range e.RuleBacks { ExtRegBack(j, v, e.TaskInfo) } //函数清理 for key, val := range j.Result { for _, v := range val { data := clear.DoClearFn(e.ClearFn[key], []interface{}{v.Value, j.Content}) v.Value = data[0] } } PackageDetail(j, e) //处理分包信息 // bs, _ := json.Marshal(j.Result) // log.Println("抽取结果", j.Title, j.SourceMid, string(bs)) //分析抽取结果并保存 todo AnalysisSaveResult(j, e.TaskInfo) }, func(err interface{}) { log.Println(err) <-e.TaskInfo.ProcessPool }) <-e.TaskInfo.ProcessPool } //前置过滤 func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInfo) map[string]interface{} { before := ju.DeepCopy(doc).(map[string]interface{}) extinfo := map[string]interface{}{} if in.IsLua { lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText} if j != nil { lua.Block = j.Block } extinfo = lua.RunScript("pre") for k, v := range extinfo { //结果覆盖原doc doc[k] = v } AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志 } else { key := qu.If(in.Field == "", "detail", in.Field).(string) text := qu.ObjToString(doc[key]) extinfo[key] = in.RegPreBac.Reg.ReplaceAllString(text, "") doc[key] = extinfo[key] //结果覆盖原doc AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志 } return doc } //抽取-规则 func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) { if in.IsLua { lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText} lua.KvMap = getKvByLuaFields(extfrom, j, in, et.Tag) lua.Block = j.Block extinfo := lua.RunScript("core") for k, v := range extinfo { if k == in.Field { if j.Result[k] == nil { j.Result[k] = [](*ju.ExtField){} } if tmps, ok := v.([]map[string]interface{}); ok { for _, tmp := range tmps { j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), extfrom, tmp["value"], 0}) } } } } if len(extinfo) > 0 { AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志 } } else { //全文正则 text := qu.ObjToString(doc[extfrom]) if in.Field != "" { extinfo := extRegCoreToResult(extfrom, text, j, in) if len(extinfo) > 0 { AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志 } } } } //lua脚本根据属性设置提取kv值 func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string][]map[string]interface{} { kvmap := map[string][]map[string]interface{}{} for fieldname, field := range in.LFields { tags := t[field] //获取对应标签库 for _, bl := range j.Block { //冒号kv if bl.ColonKV != nil { kvs := bl.ColonKV.Kvs kvs2 := bl.ColonKV.Kvs_2 //log.Println("ColonKV1", kvs) //log.Println("ColonKV2", kvs2) for _, tag := range tags { for _, kv := range kvs { if tag.Type == "string" { if kv.Key == tag.Key { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "colon1", "matchtype": "tag_string", }) } break } } else if tag.Type == "regexp" { if tag.Reg.MatchString(kv.Key) { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "colon1", "matchtype": "tag_regexp", }) } break } } } for _, kv := range kvs2 { if tag.Type == "string" { if kv.Key == tag.Key { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "colon2", "matchtype": "tag_string", }) } break } } else if tag.Type == "regexp" { if tag.Reg.MatchString(kv.Key) { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "colon2", "matchtype": "tag_regexp", }) } break } } } } } //空格kv if bl.SpaceKV != nil { kvs := bl.SpaceKV.Kvs //log.Println("SpaceKV", kvs) for _, tag := range tags { for _, kv := range kvs { if tag.Type == "string" { if kv.Key == tag.Key { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "space", "matchtype": "tag_string", }) } break } } else if tag.Type == "regexp" { if tag.Reg.MatchString(kv.Key) { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "space", "matchtype": "tag_regexp", }) } break } } } } } //表格kv if bl.TableKV != nil { tkv := bl.TableKV //log.Println("tkv", tkv) for k, v := range tkv.Kv { if k == fieldname { if len(tags) > -tkv.KvIndex[fieldname] { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tags[-tkv.KvIndex[fieldname]].Key, "extfrom": "table", "value": v, "type": "table", "matchtype": "tag_string", }) } else { //涉及其他待处理 //log.Println(tags) } } } } } } return kvmap } //正则提取结果 func extRegCoreToResult(extfrom, text string, j *ju.Job, v *RegLuaInfo) map[string][]map[string]interface{} { extinfo := map[string][]map[string]interface{}{} if v.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线) apos := v.RegCore.Reg.FindAllStringSubmatchIndex(text, -1) if len(apos) > 0 { pos := apos[0] for k, p := range v.RegCore.ExtractPos { if len(pos) > p { if pos[p] == -1 || pos[p+1] == -1 { continue } val := text[pos[p]:pos[p+1]] tmps := []map[string]interface{}{} tmp := map[string]interface{}{ "field": v.Field, "code": v.Code, "ruletext": v.RuleText, "extfrom": extfrom, "value": val, "type": "regexp", "matchtype": "regcontent", } tmps = append(tmps, tmp) extinfo[k] = tmps if val != "" { if j.Result[v.Field] == nil { j.Result[k] = [](*ju.ExtField){} } j.Result[k] = append(j.Result[k], &ju.ExtField{k, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val, 0}) } } } } } else { pos := v.RegCore.Reg.FindStringIndex(text) val := "" if len(pos) == 2 { text = text[pos[1]:] rs := regexp.MustCompile("[^\r\n\t]+") tmp := rs.FindAllString(text, -1) if len(tmp) > 0 { val = tmp[0] } } if val != "" { tmps := []map[string]interface{}{} tmp := map[string]interface{}{ "field": v.Field, "code": v.Code, "ruletext": v.RuleText, "extfrom": extfrom, "value": val, "type": "regexp", "matchtype": "regcontent", } tmps = append(tmps, tmp) extinfo[v.Field] = tmps if j.Result[v.Field] == nil { j.Result[v.Field] = [](*ju.ExtField){} } j.Result[v.Field] = append(j.Result[v.Field], &ju.ExtField{v.Field, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val, 0}) } } return extinfo } //后置过滤 func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) { if in.IsLua { result := GetResultMapForLua(j) lua := ju.LuaScript{Code: in.Code, Name: in.Name, Result: result, Script: in.RuleText} if j != nil { lua.Block = j.Block } extinfo := lua.RunScript("back") for k, v := range extinfo { if tmps, ok := v.([]map[string]interface{}); ok { j.Result[k] = [](*ju.ExtField){} for _, tmp := range tmps { j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"], 0}) } } } if len(extinfo) > 0 { AddExtLog("clear", j.SourceMid, result, extinfo, in, t) //抽取日志 } } else { extinfo := map[string]interface{}{} if in.Field != "" { if j.Result[in.Field] != nil { tmp := j.Result[in.Field] exts := []interface{}{} for k, v := range tmp { if v.Type == "table" { //table抽取到的数据不清理 continue } text := qu.ObjToString(v.Value) if text != "" { text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace) } j.Result[in.Field][k].Value = text exts = append(exts, map[string]interface{}{ "field": v.Field, "code": v.Code, "ruletext": v.RuleText, "type": v.Type, "matchtype": v.MatchType, "extfrom": v.ExtFrom, "value": text, }) } extinfo[in.Field] = exts if len(extinfo) > 0 { AddExtLog("clear", j.SourceMid, tmp, extinfo, in, t) //抽取日志 } } } else { for key, tmp := range j.Result { exts := []interface{}{} for k, v := range tmp { if v.Type == "table" { //table抽取到的数据不清理 continue } text := qu.ObjToString(v.Value) if text != "" { text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace) } j.Result[key][k].Value = text exts = append(exts, map[string]interface{}{ "field": v.Field, "code": v.Code, "ruletext": v.RuleText, "type": v.Type, "matchtype": v.MatchType, "extfrom": v.ExtFrom, "value": text, }) } extinfo[key] = exts } if len(extinfo) > 0 { AddExtLog("clear", j.SourceMid, j.Result, extinfo, in, t) //抽取日志 } } } } //获取抽取结果map[string][]interface{},lua脚本使用 func GetResultMapForLua(j *ju.Job) map[string][]map[string]interface{} { result := map[string][]map[string]interface{}{} for key, val := range j.Result { if result[key] == nil { result[key] = []map[string]interface{}{} } for _, v := range val { tmp := map[string]interface{}{ "field": v.Field, "code": v.Code, "ruletext": v.RuleText, "value": v.Value, "type": v.Type, "matchtype": v.MatchType, "extfrom": v.ExtFrom, } result[key] = append(result[key], tmp) } } return result } //抽取日志 func AddExtLog(ftype, sid string, before interface{}, extinfo interface{}, v *RegLuaInfo, t *TaskInfo) { if !t.IsEtxLog { return } logdata := map[string]interface{}{ "code": v.Code, "name": v.Name, "type": ftype, "ruletext": v.RuleText, "islua": v.IsLua, "field": v.Field, "version": t.Version, "taskname": t.Name, "before": before, "extinfo": extinfo, "sid": sid, "comeintime": time.Now().Unix(), } lock.Lock() ExtLogs[t] = append(ExtLogs[t], logdata) lock.Unlock() } //保存抽取日志 func SaveExtLog() { tmpLogs := map[*TaskInfo][]map[string]interface{}{} lock.Lock() tmpLogs = ExtLogs ExtLogs = map[*TaskInfo][]map[string]interface{}{} lock.Unlock() for k, v := range tmpLogs { if len(v) < saveLimit { db.Mgo.SaveBulk(k.TrackColl, v...) } else { for { if len(v) > saveLimit { tmp := v[:saveLimit] db.Mgo.SaveBulk(k.TrackColl, tmp...) v = v[saveLimit:] } else { db.Mgo.SaveBulk(k.TrackColl, v...) break } } } } time.AfterFunc(10*time.Second, SaveExtLog) } type FieldValue struct { Value interface{} Count int } //分析抽取结果并保存 func AnalysisSaveResult(j *ju.Job, task *TaskInfo) { doc := j.Data result := j.Result _id := qu.BsonIdToSId((*doc)["_id"]) iscore, _ := ju.Config["fieldscore"].(bool) if iscore { //打分 result = ScoreFields(result) } //结果排序 values := map[string][]*ju.SortObject{} for key, val := range result { fieldValue := map[string][]interface{}{} if iscore { //走打分 for _, v := range val { if len(fmt.Sprint(v.Value)) < 1 { continue //去除空串 } fieldValue[fmt.Sprint(v.Value)+v.Type] = []interface{}{v.Score, v.Value} } } else { //不走打分,按出现频次 for _, v := range val { if len(fmt.Sprint(v.Value)) < 1 { continue //去除空串 } if fieldValue[fmt.Sprint(v.Value)] == nil { fieldValue[fmt.Sprint(v.Value)] = []interface{}{0, v.Value} } else { fieldValue[fmt.Sprint(v.Value)][0] = qu.IntAll(fieldValue[fmt.Sprint(v.Value)][0]) + 1 } } } objects := []*ju.SortObject{} for k, v := range fieldValue { tmp := &ju.SortObject{ Key: k, Value: qu.IntAll(v[0]), Object: v[1], } objects = append(objects, tmp) } values[key] = ju.ExtSort(objects) } //从排序结果中取值 tmp := map[string]interface{}{} //抽取值 for key, val := range values { for _, v := range val { //取第一个 if v.Key != "" { tmp[key] = v.Object break } } } //resulttmp := tmp resulttmp, _ := ju.DeepCopy(tmp).(map[string]interface{}) //保存结果 resulttmp["result"] = result if len(j.PackageInfo) > 0 { //分包信息 resulttmp["package"] = j.PackageInfo } if len(j.Winnerorder) > 0 { //候选人信息 resulttmp["winnerorder"] = j.Winnerorder } for k, v := range *doc { if resulttmp[k] == nil { //&& (k != "detail" || k != "contenthtml") { resulttmp[k] = v } } //质量审核 if ju.Config["qualityaudit"].(bool) { QualityAudit(resulttmp) } b, p, c, d := TransmitData(resulttmp, _id) //抽取省份城市 //log.Println("抽取省份,城市,县结果=====", b, p, c, d) resulttmp["district"] = d if b { resulttmp["city"] = c resulttmp["area"] = p } if task.TestColl == "" { if len(tmp) > 0 { //保存抽取结果 b := task.DB.Update(task.SaveColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false) if !b { log.Println(task.SaveColl, _id) } } if b, ok := ju.Config["saveresult"].(bool); ok && b { b := db.Mgo.Update("extract_result", `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": resulttmp}, true, false) if !b { log.Println("extract_result", _id) } } } else { //测试结果 if len(j.BlockPackage) > 0 { //分包详情 resulttmp["epackage"] = j.BlockPackage } b := db.Mgo.Update(task.TestColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": resulttmp}, true, false) if !b { log.Println(task.TestColl, _id) } } } func QualityAudit(resulttmp map[string]interface{}) { //获取审核字段 //log.Println("需要审核的字段-----", AuditFields) if len(AuditFields) == 0 { v, _ := db.Mgo.FindOne("version", `{"isuse":true,"delete":false}`) //查找当前使用版本 if len(*v) > 0 { //查找当前使用版本中属性配置需要审核的字段 vid := qu.BsonIdToSId((*v)["_id"]) query := map[string]interface{}{ "isaudit": true, "delete": false, "vid": vid, } data, _ := db.Mgo.Find("versioninfo", query, `{"_id":-1}`, `{"s_field":1}`, false, -1, -1) for _, d := range *data { field := qu.ObjToString(d["s_field"]) AuditFields = append(AuditFields, field) } } } for _, field := range AuditFields { //1.分包 if resulttmp["package"] != nil { packagedata := resulttmp["package"].(map[string]map[string]interface{}) for _, val := range packagedata { if val[field] != nil { fv := qu.ObjToString(val[field]) if fv != "" { if field == "buyer" || field == "winner" { //field为buyer和winner时特殊处理,先从Redis中查,有直接通过,没有走匹配规则 RedisMatch(field, fv, val) //redis匹配 } else { //除了buyer和winner,其他字段走规则匹配 fv := qu.ObjToString(resulttmp[field]) //resulttmp[field+"_isredis"] = false RuleMatch(field, fv, resulttmp) } } } } } //2.外围 if resulttmp[field] != nil { fv := qu.ObjToString(resulttmp[field]) if fv != "" { if field == "buyer" || field == "winner" { //field为buyer和winner时特殊处理,先从Redis中查,有直接通过,没有走匹配规则 RedisMatch(field, fv, resulttmp) //redis匹配 } else { //除了buyer和winner,其他字段走规则匹配 fv := qu.ObjToString(resulttmp[field]) //resulttmp[field+"_isredis"] = false RuleMatch(field, fv, resulttmp) } } } } } //Redis匹配 func RedisMatch(field, fv string, val map[string]interface{}) { i := redis.GetInt(field, field+"_"+fv) //查找redis if i == 0 { //reids未找到,执行规则匹配 val[field+"_isredis"] = false RuleMatch(field, fv, val) //规则匹配 } else { //redis找到,打标识存库 val[field+"_isredis"] = true } } //规则匹配 func RuleMatch(field, fieldval string, tmpMap map[string]interface{}) { if fieldval != "" { SMap := StartMatch(field, fieldval) //SMap.AddKey(field+"_isaudit", false) for _, k := range SMap.Keys { tmpMap[k] = SMap.Map[k] } tmpMap[field+"_isaudit"] = false } } //开始规则匹配 func StartMatch(field, text string) *pretreated.SortMap { SMap := pretreated.NewSortMap() f := RecogFieldMap[field] if len(f) > 0 { fid := qu.BsonIdToSId(f["_id"]) recogFieldPreRule := qu.ObjToString(f["s_recogfield_prerule"]) textAfterRecogFieldPrerule := ju.PreFilter(text, recogFieldPreRule) //识别字段的前置过滤 if textAfterRecogFieldPrerule != "" { classMap := FidClassMap[fid] L: for _, c := range classMap { //class classid := qu.BsonIdToSId(c["_id"]) classPrerule := qu.ObjToString(c["s_class_prerule"]) savefield := qu.ObjToString(c["s_savefield"]) //保存字段 textAfterClassPrerule := ju.PreFilter(textAfterRecogFieldPrerule, classPrerule) //class的前置过滤 if textAfterClassPrerule != "" { ruleMap := CidRuleMap[classid] for _, r := range ruleMap { //rule rulePrerule := qu.ObjToString(r["s_rule_prerule"]) s_code := qu.ObjToString(r["s_code"]) rule := r["rule"].([]interface{}) textAfterRulePrerule := ju.PreFilter(textAfterClassPrerule, rulePrerule) //class的前置过滤 if textAfterRulePrerule != "" { b, _ := ju.RecogAnalyRules(textAfterRulePrerule, rule) if b { //匹配到一个分类下某个规则时,不再继续匹配 if savefield != "" { //保存字段不为空,存储代码信息 SMap.AddKey(field+"_"+savefield, s_code) } break L } } } } } } } return SMap }