package extract import ( "fmt" log "github.com/donnie4w/go-logger/logger" "gopkg.in/mgo.v2/bson" "jy/clear" db "jy/mongodbutil" "jy/pretreated" ju "jy/util" qu "qfw/util" "qfw/util/redis" "strconv" "strings" "time" "unicode/utf8" ) // 结果追踪调试 func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bool { defer qu.Catch() ext := TaskList[taskId] if ext == nil { ext = &ExtractTask{} ext.Id = taskId ext.InitTestTaskInfo(resultcoll, trackcoll) ext.IsRun = true ext.TaskInfo.FDB = db.MgoFactory(1, 3, 120, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB) } ext.InitSite() ext.InitRulePres() ext.InitRuleBacks(false) ext.InitRuleBacks(true) ext.InitRuleCore(false) ext.InitRuleCore(true) ext.InitPkgCore() ext.InitBlockRule() ext.InfoTypeList() ext.InitTag(false) ext.InitTag(true) ext.InitClearFn(false) ext.InitClearFn(true) ext.Lock() if ext.IsExtractCity && ext.ProvinceMap == nil { //版本上控制是否开始城市抽取 ext.InitCityInfo() ext.InitAreaCode() ext.InitPostCode() } ext.Unlock() //质量审核 ext.InitAuditFields() ext.InitAuditRule() ext.InitAuditClass() ext.InitAuditRecogField() //品牌抽取是否开启 ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool) //价格个数抽取是否开启 ju.IsPriceNumber, _ = ju.Config["pricenumber"].(bool) //附件抽取是否开启 ext.InitFile() ext.TaskInfo.TestColl = resultcoll TaskList[taskId] = ext return RunExtractTestTask(ext, startId, num) } func IdTrans(startId string) bson.ObjectId { defer qu.Catch() return bson.ObjectIdHex(startId) } // 开始测试任务抽取~结果追踪 func RunExtractTestTask(ext *ExtractTask, startId, num string) bool { n, _ := strconv.Atoi(num) id := IdTrans(startId) if id.Valid() { query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(startId)}} list, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, n) for _, v := range *list { if spidercode[qu.ObjToString(v["spidercode"])] { //临时开标记录 continue } var j, jf *ju.Job var isSite bool j, _, isSite = ext.PreInfo(v) go ext.ExtractProcess(j, jf, isSite) //抽取-打分-保存 ext.TaskInfo.ProcessPool <- true } return true } else { return false } } // 启动抽取 func StartExtractTaskId(taskId string) bool { defer qu.Catch() isgo := false ext := TaskList[taskId] if ext == nil { ext = &ExtractTask{} ext.Id = taskId ext.InitTaskInfo() isgo = true } else { ext.Id = taskId ext.InitTaskInfo() } ext.TaskInfo.FDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB) ext.TaskInfo.TDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.ToDbAddr, ext.TaskInfo.ToDB) ext.InitSite() ext.InitRulePres() ext.InitRuleBacks(false) ext.InitRuleBacks(true) ext.InitRuleCore(false) ext.InitRuleCore(true) ext.InitPkgCore() ext.InitBlockRule() ext.InfoTypeList() ext.InitTag(false) ext.InitTag(true) ext.InitClearFn(false) ext.InitClearFn(true) ext.Lock() if ext.IsExtractCity { //版本上控制是否开始城市抽取 ext.InitCityInfo() ext.InitAreaCode() ext.InitPostCode() } ext.Unlock() //质量审核 ext.InitAuditFields() ext.InitAuditRule() ext.InitAuditClass() ext.InitAuditRecogField() //品牌抽取是否开启 ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool) //价格个数抽取是否开启 ju.IsPriceNumber, _ = ju.Config["pricenumber"].(bool) //附件抽取是否开启 ext.InitFile() ext.IsRun = true go ext.ResultSave(true) go ext.BidSave(true) if isgo { go RunExtractTask(taskId) } TaskList[taskId] = ext return true } // 停止抽取 func StopExtractTaskId(taskId string) bool { defer qu.Catch() ext := TaskList[taskId] if ext != nil { ext.IsRun = false TaskList[taskId] = ext } //更新task.s_extlastid db.Mgo.UpdateById("task", taskId, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`) return true } // 开始抽取 func RunExtractTask(taskId string) { defer qu.Catch() ext := TaskList[taskId] query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(ext.TaskInfo.LastExtId)}} count := ext.TaskInfo.FDB.Count(ext.TaskInfo.FromColl, query) pageNum := (count + PageSize - 1) / PageSize limit := PageSize if count < PageSize { limit = count } fmt.Printf("count=%d,pageNum=%d,query=%v", count, pageNum, query) for i := 0; i < pageNum; i++ { query = bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(ext.TaskInfo.LastExtId)}} list, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, limit) fmt.Printf("page=%d,query=%v", i+1, query, len(*list)) for _, v := range *list { //根据标题判断是否抽取 b := IsExtract("title", qu.ObjToString(v["title"]), "") if !b { continue } _id := qu.BsonIdToSId(v["_id"]) //log.Debug(_id) if !ext.IsRun { break } var j, jf *ju.Job var isSite bool if ext.IsFileField && (v["projectinfo"] != nil || v["attach_text"] != nil) { v["isextFile"] = true j, jf, isSite = ext.PreInfo(v) } else { j, _, isSite = ext.PreInfo(v) } go ext.ExtractProcess(j, jf, isSite) ext.TaskInfo.LastExtId = _id ext.TaskInfo.ProcessPool <- true } db.Mgo.UpdateById("task", ext.Id, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`) if !ext.IsRun { break } } //更新task.s_extlastid time.AfterFunc(1*time.Minute, func() { RunExtractTask(taskId) }) } // 信息预处理-不和版本关联,取最新版本的配置项 func PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool) { return (&ExtractTask{}).PreInfo(doc) } // 信息预处理-和版本关联-处理表格-附件-kv标签库-中标候选人 func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool) { defer qu.Catch() //判断是否有附件这个字段 var isextFile bool if doc["isextFile"] != nil { isextFile = doc["isextFile"].(bool) } isextFile = false detail := "" summary := qu.ObjToString(doc["summary"]) detail = CleanDetailText(qu.ObjToString(doc["detail"]), summary) //调整采用detail抽取 if utf8.RuneCountInString(detail) > 10000 { detail = string(([]rune(detail))[:10000]) } doc["detail"] = detail isClearnMoney := !clearMoneyReg.MatchString(detail) if isClearnMoney { isClearnMoney = !clearMoneyReg.MatchString(qu.ObjToString(doc["title"])) } isClearnMoneystr := qu.ObjToString(qu.If(isClearnMoney, "T", "")) if isextFile { file2text(&doc) //附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果 } toptype := qu.ObjToString(doc["toptype"]) subtype := qu.ObjToString(doc["subtype"]) if qu.ObjToString(doc["type"]) == "bid" { toptype = "结果" } if subtype == "其他" { subtype = "其它" } if toptype == "" || subtype == "" { toptype, subtype = "all", "all" } if toptype == "采购意向" || subtype == "采购意向" { toptype, subtype = "招标", "招标" //暂时按照"招标" } toMap := qu.ObjToMap(doc["jsondata"]) if (*toMap) != nil { if (*toMap)["extweight"] == nil { (*toMap)["extweight"] = ju.Config["jsondata_extweight"] } if (*toMap)["jsoncontent"] != nil { delete(*toMap, "jsoncontent") } for k, v := range *toMap { if _, ok := v.(float64); ok { continue } else if _, ok := v.(int64); ok { continue } else if _, ok2 := v.(string); ok2 { continue } else { delete(*toMap, k) } } } j = &ju.Job{ SourceMid: qu.BsonIdToSId(doc["_id"]), Category: toptype, CategorySecond: subtype, Content: qu.ObjToString(doc["detail"]), SpiderCode: qu.ObjToString(doc["spidercode"]), Site: qu.ObjToString(doc["site"]), Title: qu.ObjToString(doc["title"]), Data: &doc, City: qu.ObjToString(doc["city"]), Province: qu.ObjToString(doc["area"]), Jsondata: toMap, Result: map[string][]*ju.ExtField{}, BuyerAddr: qu.ObjToString(doc["buyeraddr"]), RuleBlock: e.RuleBlock, Dataging: qu.IntAll(doc["dataging"]), IsClearnMoney: isClearnMoneystr, IsUnRulesTab: false, } if isextFile { jf = &ju.Job{ SourceMid: qu.BsonIdToSId(doc["_id"]), Category: toptype, CategorySecond: subtype, Content: qu.ObjToString(doc["detailfile"]), SpiderCode: qu.ObjToString(doc["spidercode"]), Site: qu.ObjToString(doc["site"]), Title: qu.ObjToString(doc["title"]), Data: &doc, City: qu.ObjToString(doc["city"]), Province: qu.ObjToString(doc["area"]), Jsondata: toMap, Result: map[string][]*ju.ExtField{}, BuyerAddr: qu.ObjToString(doc["buyeraddr"]), RuleBlock: e.RuleBlock, IsFile: isextFile, Dataging: qu.IntAll(doc["dataging"]), IsClearnMoney: isClearnMoneystr, IsUnRulesTab: false, } } codeSite := j.SpiderCode //是否启用站点 if value, ok := e.SiteMerge.Load(codeSite); ok { isSite = value.(bool) } if isSite { //是否配置站点 exp, isSite := e.Luacodes.Load(codeSite) if isSite { if exp.(map[string]interface{})["e.SiteClearFn"] != nil { e.SiteClearFn = exp.(map[string]interface{})["e.SiteClearFn"].(map[string][]string) } if exp.(map[string]interface{})["e.SiteTag"] != nil { e.SiteTag = exp.(map[string]interface{})["e.SiteTag"].(map[string][]*Tag) } if exp.(map[string]interface{})["e.SiteRuleCores"] != nil { e.SiteRuleCores = exp.(map[string]interface{})["e.SiteRuleCores"].(map[string]map[string][]*RuleCore) } if exp.(map[string]interface{})["e.SiteRuleBacks"] != nil { e.SiteRuleBacks = exp.(map[string]interface{})["e.SiteRuleBacks"].([]*RegLuaInfo) } } } qu.Try(func() { //不解析表格 pretreated.AnalyStartNoTable(j, isSite, codeSite) //job.Block分块 if isextFile && strings.TrimSpace(jf.Content) != "" { pretreated.AnalyStartNoTable(jf, isSite, codeSite) } }, func(err interface{}) { log.Debug("pretreated.AnalyStart", err, j.SourceMid) }) return j, jf, isSite } // 抽取-正文 func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) { e.ExtractDetail(j, isSite, j.SpiderCode) //正文抽取属性 if jf != nil && jf.IsFile { //附件jf → j 合并 e.ExtractDetail(jf, isSite, j.SpiderCode) for tmpk, xs := range jf.Result { if len(j.Result[tmpk]) == 0 { if tmpk == "budget" || tmpk == "bidamount" { for _, v := range xs { if fv, ok := v.Value.(float64); ok && fv > 100 && fv < 50000000000 { j.Result[tmpk] = append(j.Result[tmpk], v) } } } else { if tmpk == "winner" && j.Category == "招标" && j.CategorySecond != "单一" { continue } j.Result[tmpk] = append(j.Result[tmpk], jf.Result[tmpk]...) } } else { if tmpk == "winner" { //均没有有效值~采用附件的 isUsed := false for _, v := range j.Result[tmpk] { if v.Value != "" { isUsed = true break } } if !isUsed { if j.Category == "招标" && j.CategorySecond != "单一" { continue } j.Result[tmpk] = append(j.Result[tmpk], jf.Result[tmpk]...) } } } } if len(j.Winnerorder) == 0 && jf.Winnerorder != nil && len(jf.Winnerorder) > 0 { if j.Category == "招标" && j.CategorySecond != "单一" { } else { j.Winnerorder = append(j.Winnerorder, jf.Winnerorder...) } } if len(j.PackageInfo) == 0 && isUsedPackageJF(jf.PackageInfo) { j.PackageInfo = jf.PackageInfo } } if isSite { ismerge, ok := e.SiteMerge.Load(j.SpiderCode) if ok && ismerge.(bool) { tmpj := &ju.Job{ SourceMid: j.SourceMid, Category: j.Category, CategorySecond: j.CategorySecond, Content: j.Content, SpiderCode: j.SpiderCode, //Domain: qu.ObjToString(doc["domain"]), //Href: qu.ObjToString(doc["href"]), Title: j.Title, Data: j.Data, City: j.City, Province: j.Province, Jsondata: j.Jsondata, Result: map[string][]*ju.ExtField{}, BuyerAddr: j.BuyerAddr, RuleBlock: e.RuleBlock, } qu.Try(func() { pretreated.AnalyStart(tmpj, false, "") //job.Block分块 }, func(err interface{}) { log.Debug("pretreated.AnalyStart.ExtractProcess", err, j.SourceMid) }) e.ExtractDetail(tmpj, false, "") //合并数据 j.Block = append(j.Block, tmpj.Block...) j.Winnerorder = append(j.Winnerorder, tmpj.Winnerorder...) for tmpk, _ := range j.Result { if len(tmpj.Result[tmpk]) > 0 { j.Result[tmpk] = append(j.Result[tmpk], tmpj.Result[tmpk]...) } } for tmpk, _ := range tmpj.Result { if len(j.Result[tmpk]) == 0 { j.Result[tmpk] = append(j.Result[tmpk], tmpj.Result[tmpk]...) } } } } //分析抽取结果并保存 AnalysisSaveResult(j, jf, e) <-e.TaskInfo.ProcessPool } // 抽取-正文-规则等 detail func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) { qu.Try(func() { doc := *j.Data tmprules := map[string][]*RuleCore{} lockrule.Lock() //加载分类抽取配置 if j.Category == "all" || j.CategorySecond == "all" { if isSite { for k, vc1 := range e.SiteRuleCores["all_all"] { tmprules[k] = vc1 } } else { for k, vc1 := range e.RuleCores["all_all"] { tmprules[k] = vc1 } } } else { if isSite { for k, vc1 := range e.SiteRuleCores[j.Category+"_"+j.CategorySecond] { tmprules[k] = vc1 } //找不到配置类别全抽 if tmprules == nil || len(tmprules) == 0 { for k, vc1 := range e.SiteRuleCores["all_all"] { tmprules[k] = vc1 } } } else { for k, vc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] { tmprules[k] = vc1 } //找不到配置类别全抽 if tmprules == nil || len(tmprules) == 0 { for k, vc1 := range e.RuleCores["all_all"] { tmprules[k] = vc1 } } } } if len(tmprules) < 1 { //分类未覆盖部分 if isSite { for k, vc1 := range e.RuleCores["all_all"] { tmprules[k] = vc1 } } else { for k, vc1 := range e.SiteRuleCores["all_all"] { tmprules[k] = vc1 } } } lockrule.Unlock() //抽取规则 for _, vc1 := range tmprules { for _, vc := range vc1 { tmp := ju.DeepCopy(doc).(map[string]interface{}) //是否进入逻辑 if !ju.Logic(vc.LuaLogic, tmp) { continue } if vc.Field == "bidamount" { //log.Debug("调试抽取字段") } //抽取-前置规则 //for _, v := range vc.RulePres { // tmp = ExtRegPre(tmp, j, v, e.TaskInfo) //} // log.Debug("抽取-前置规则", tmp) //抽取-规则 ExtRuleCore(tmp, e, vc, j, isSite) // log.Debug("抽取-规则", tmp) //抽取-后置规则 for _, v := range vc.RuleBacks { ExtRegBack(j, v, e.TaskInfo, vc) } //kv规则 for _, v := range vc.KVRuleCores { ExtRuleKV(j, v, e.TaskInfo) } //项目名称未能抽取到,标题来凑 if vc.Field == "projectname" { if vc.ExtFrom == "title" { isextitle := true for _, v := range j.Result[vc.Field] { if len([]rune(qu.ObjToString(v.Value))) > 5 { isextitle = false break } } if isextitle { //标题加入选举 field := &ju.ExtField{Field: vc.Field, Code: vc.Id + "_title", RuleText: "title", Type: "title", MatchType: "title", ExtFrom: vc.ExtFrom, SourceValue: j.Title, Value: j.Title} if isSite { field.Score = 1 } j.Result[vc.Field] = append(j.Result[vc.Field], field) } } for i := 0; i < 3; i++ { for _, v := range vc.RuleBacks { ExtRegBack(j, v, e.TaskInfo, vc) } } } } } //全局后置规则 if isSite { for _, v := range e.SiteRuleBacks { ExtRegBack(j, v, e.TaskInfo, nil) } } else { for _, v := range e.RuleBacks { ExtRegBack(j, v, e.TaskInfo, nil) } } //函数清理 for key, val := range j.Result { for i, v := range val { if v.Field == "projectname" && v.Type == "table" { break } if key == "budget" || key == "bidamount" { if _, ok := v.Value.(float64); ok && !v.IsTrue { continue } } lockclear.Lock() var cfn = []string{} if isSite { cfn = e.SiteClearFn[key] if len(cfn) == 0 { cfn = e.ClearFn[key] } } else { cfn = e.ClearFn[key] } lockclear.Unlock() if len(cfn) == 0 { continue } data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content}, j.SpiderCode, j.IsClearnMoney) if key == "budget" || key == "bidamount" { if istrue, ok := data[len(data)-1].(bool); istrue && ok { j.Result[key][i].IsTrue = true } else { j.Result[key][i].Value = data[0] continue } } before, _ := v.Value.(string) v.Value = data[0] BeforeAddClearFnLog(strings.Join(cfn, ","), "函数清理"+strings.Join(cfn, ","), j.SourceMid, before, v.MatchType, v, e) //添加行数清理的日志 , 清理特殊符号 lockclear.Lock() if clear.AsyField[key] != nil || clear.SymField[key] != nil || clear.MesField[key] != nil { text := qu.ObjToString(v.Value) before = text //指定清理--新增-函数清理-其他清理 if key == "winner" || key == "agency" || key == "buyer" { text = strings.ReplaceAll(text, "【", "") text = strings.ReplaceAll(text, "】", "") } v.Value = clear.OtherClean(key, text) BeforeAddClearFnLog("clear.OtherClean", "特殊符号清理clear.OtherClean", j.SourceMid, before, v.MatchType, v, e) } //AddClearFnLog("clearfn", j.SourceMid, v.Value, extinfo, v.Code, "函数清理", key, e.TaskInfo) lockclear.Unlock() } } PackageDetail(j, e, isSite, codeSite) //处理分包信息-去重 //bs, _ := json.Marshal(j.Result) //log.Debug("抽取结果", j.Title, j.SourceMid, string(bs)) }, func(err interface{}) { log.Debug("ExtractProcess err", err, j.SourceMid) }) } func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool, codeSite string) { qu.Try(func() { doc := *j.Data //抽取规则 tmprules := map[string][]*RuleCore{} lockrule.Lock() if j.Category == "all" || j.CategorySecond == "all" { for k, vc1 := range e.RuleCores["all_all"] { tmprules[k] = vc1 } } else { for k, vc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] { tmprules[k] = vc1 } } lockrule.Unlock() for _, vc1 := range tmprules { for _, vc := range vc1 { tmp := ju.DeepCopy(doc).(map[string]interface{}) //是否进入逻辑 if !ju.Logic(vc.LuaLogic, tmp) { continue } //抽取-前置规则 //for _, v := range vc.RulePres { // if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 { // tmp = ExtRegPre(tmp, j, v, e.TaskInfo) // } //} //抽取-规则 if value, ok := e.FileFields.Load(vc.Field); ok && qu.IntAllDef(value, 1) > 0 { ExtRuleCore(tmp, e, vc, j, isSite) } //抽取-后置规则 for _, v := range vc.RuleBacks { if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 { ExtRegBack(j, v, e.TaskInfo, vc) } } } } //全局后置规则 for _, v := range e.RuleBacks { if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 { ExtRegBack(j, v, e.TaskInfo, nil) } } //函数清理 for key, val := range j.Result { for _, v := range val { lockclear.Lock() var cfn = []string{} if isSite { cfn = e.SiteClearFn[key] if len(cfn) == 0 { cfn = e.ClearFn[key] } } else { cfn = e.ClearFn[key] } lockclear.Unlock() if len(cfn) == 0 { continue } data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content}, j.SpiderCode, j.IsClearnMoney) v.Value = data[0] //清理特殊符号 lockclear.Lock() if clear.AsyField[key] != nil || clear.SymField[key] != nil || clear.MesField[key] != nil { text := qu.ObjToString(v.Value) text = clear.OtherClean(key, text) v.Value = text } lockclear.Unlock() } } PackageDetail(j, e, isSite, codeSite) //处理分包信息 //bs, _ := json.Marshal(j.Result) //log.Debug("抽取结果", j.Title, j.SourceMid, string(bs)) }, func(err interface{}) { log.Debug("ExtractProcess err", err) }) } // 审查 func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) { defer qu.Catch() //获取审核字段 for _, field := range e.AuditFields { //1.分包 if resulttmp["package"] != nil { packagedata := resulttmp["package"].(map[string]map[string]interface{}) for _, val := range packagedata { if val[field] != nil { fv := qu.ObjToString(val[field]) if fv != "" { if field == "buyer" || field == "winner" { //field为buyer和winner时特殊处理,先从Redis中查,有直接通过,没有走匹配规则 e.RedisMatch(field, fv, val) //redis匹配 } else { //除了buyer和winner,其他字段走规则匹配 e.RuleMatch(field, fv, val) } } } } } //2.外围 if resulttmp[field] != nil { fv := qu.ObjToString(resulttmp[field]) if fv != "" { if field == "buyer" || field == "winner" { //field为buyer和winner时特殊处理,先从Redis中查,有直接通过,没有走匹配规则 e.RedisMatch(field, fv, resulttmp) //redis匹配 } else { //除了buyer和winner,其他字段走规则匹配 e.RuleMatch(field, fv, resulttmp) } } } } } // Redis匹配 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) { defer qu.Catch() i := redis.GetInt(field, field+"_"+fv) //查找redis if i == 0 { //reids未找到,执行规则匹配 val[field+"_isredis"] = false e.RuleMatch(field, fv, val) //规则匹配 } else { //redis找到,打标识存库 val[field+"_isredis"] = true } } // 规则匹配 func (e *ExtractTask) RuleMatch(field, fieldval string, tmpMap map[string]interface{}) { defer qu.Catch() if fieldval != "" { SMap := e.StartMatch(field, fieldval) //SMap.AddKey(field+"_isaudit", false) for _, k := range SMap.Keys { tmpMap[k] = SMap.Map[k] } tmpMap[field+"_isaudit"] = false //添加字段未审核信息 } } // 开始规则匹配 func (e *ExtractTask) StartMatch(field, text string) *pretreated.SortMap { defer qu.Catch() SMap := pretreated.NewSortMap() lock.Lock() f := e.RecogFieldMap[field] lock.Unlock() if len(f) > 0 { fid := qu.BsonIdToSId(f["_id"]) recogFieldPreRule := qu.ObjToString(f["s_recogfield_prerule"]) textAfterRecogFieldPrerule := ju.PreFilter(text, recogFieldPreRule) //识别字段的前置过滤 if textAfterRecogFieldPrerule != "" { lock.Lock() classMap := e.FidClassMap[fid] lock.Unlock() L: for _, c := range classMap { //class classid := qu.BsonIdToSId(c["_id"]) classPrerule := qu.ObjToString(c["s_class_prerule"]) savefield := qu.ObjToString(c["s_savefield"]) //保存字段 textAfterClassPrerule := ju.PreFilter(textAfterRecogFieldPrerule, classPrerule) //class的前置过滤 if textAfterClassPrerule != "" { lock.Lock() ruleMap := e.CidRuleMap[classid] lock.Unlock() for _, r := range ruleMap { //rule rulePrerule := qu.ObjToString(r["s_rule_prerule"]) s_name := qu.ObjToString(r["s_name"]) rule := r["rule"].([]interface{}) textAfterRulePrerule := ju.PreFilter(textAfterClassPrerule, rulePrerule) //class的前置过滤 if textAfterRulePrerule != "" { b, _ := ju.RecogAnalyRules(textAfterRulePrerule, rule) if b { //匹配到一个分类下某个规则时,不再继续匹配 if savefield != "" { //保存字段不为空,存储代码信息 SMap.AddKey(field+"_"+savefield, s_name) } break L } } } } } } } return SMap }