// extractInit package extract import ( db "jy/mongodbutil" ju "jy/util" qu "qfw/util" "regexp" "sort" "strconv" "strings" "sync" "time" log "github.com/donnie4w/go-logger/logger" ) type RegLuaInfo struct { //正则或脚本信息 Code, Name, Field string // RuleText string // IsLua bool // RegPreBac *ExtReg // RegCore *ExtReg // LFields map[string]string //lua抽取字段属性组 } type ExtReg struct { Reg *regexp.Regexp Replace string Bextract bool ExtractPos map[string]int NumSign int //正负修正标记,例如浮动率(上浮正1、下浮负-1) } type RuleCore struct { Field string //逻辑字段 LuaLogic string //进入逻辑 ExtFrom string //从哪个字段抽取 RulePres []*RegLuaInfo //抽取前置规则 RuleBacks []*RegLuaInfo //抽取后置规则 RuleCores []*RegLuaInfo //抽取规则 } type Tag struct { Type string //标签类型 string 字符串、regexp 正则 Key string // Reg *regexp.Regexp // } type TaskInfo struct { Name, Version, VersionId, TrackColl string //名称、版本、版本id、追踪记录表 FromDbAddr, FromDB, FromColl string //抽取数据库地址、库名、表名 ToDbAddr, ToDB, ToColl string //结果数据库地址、库名、表名 TestColl, LastExtId string //测试结果表、上次抽取信息id FDB *db.Pool //数据库连接池 TDB *db.Pool //数据库连接池 IsEtxLog bool //是否开启抽取日志 ProcessPool chan bool //任务进程池 TestLua bool //检查测试用 } type ExtractTask struct { Id string //任务id IsRun bool //是否启动 Content string //信息内容 TaskInfo *TaskInfo //任务信息 RulePres []*RegLuaInfo //通用前置规则 RuleBacks []*RegLuaInfo //通用后置规则 RuleBlock *ju.RuleBlock //RuleCores []*RuleCore //抽取规则 RuleCores map[string]map[string][]*RuleCore //分类抽取规则 PkgRuleCores []*RuleCore //分包抽取规则 Tag map[string][]*Tag //标签库 ClearFn map[string][]string //清理函数 IsExtractCity bool //是否开启城市抽取 Fields map[string]int //抽取属性组 IsFileField bool //是否开启附件抽取 FileFields *sync.Map //抽取附件属性组 ResultChanel chan bool //抽取结果详情 ResultArr [][]map[string]interface{} //抽取结果详情 BidChanel chan bool //抽取结果 BidArr [][]map[string]interface{} //抽取结果 BidTotal int //结果数量 RecogFieldMap map[string]map[string]interface{} //识别字段 FidClassMap map[string][]map[string]interface{} //分类 CidRuleMap map[string][]map[string]interface{} //规则 AuditFields []string //需要审核的字段名称 ProvinceMap map[string]string //省全称简称(key:浙江省 val:浙江) ProvinceBriefMap map[string]*Province //省简称对应的省信息(key:浙江 val:&Province{}) CityMap map[string]string //市全称简称(key:杭州市 val:杭州) CityBriefMap map[string]*City //市简称对应的市信息(key:杭州 val:&City{}) CityFullMap map[string]*City //市全称对应的市信息(key:杭州市 val:&City{}) DistrictCityMap map[string]*City //区或县对应的city DistrictSimAndAll map[string]string //区或县(key:简称 val:全称) StreetDistrictMap map[string]*District //街道对应的区或县 ProvinceAllGet *ju.DFA //省全称 ProvinceSimGet *ju.DFA //省简称 CityAllGet *ju.DFA //市全称 CitySimGet *ju.DFA //市简称 DistrictAllGet *ju.DFA //区或县全称 DistrictSimGet *ju.DFA //区或县简称 StreetGet *ju.DFA //街道 PostCodeMap map[string]*PostCode //邮编 AreaCodeMap map[string]*AreaCode //区号 InfoType []map[string]interface{} } type ClearTaskInfo struct { Name, Version, VersionId string //名称、版本、版本id FromDbAddr, FromDB, FromColl string //清理数据库地址、库名、表名 FDB *db.Pool //数据库连接池 TDB *db.Pool //数据库连接池 IsCltLog bool //是否开启清理日志 ProcessPool chan bool //任务进程池 } type ClearLua struct { Field string //字段字段 Code string //代码 Name string //名称 LuaText string //LuaLogic string //进入逻辑 //ExtFrom string //从哪个字段抽取 LFields map[string]string //lua抽取字段属性组 } type ClearTask struct { Id string //任务id Content string //信息内容 ClearTaskInfo *ClearTaskInfo //任务信息 ClearLuas map[string][]*ClearLua //清理脚本 UpdateResult [][]map[string]interface{} //清理后结果 ClearChannel chan bool } func init() { TaskList = make(map[string]*ExtractTask) ClearTaskList = make(map[string]*ClearTask) go SaveExtLog() go SaveCltLog() //保存清理日志 } //加载任务信息 func (e *ExtractTask) InitTestTaskInfo(resultcoll, trackcoll string) { task, _ := db.Mgo.FindById("task", e.Id, nil) if len(*task) > 1 { v, _ := db.Mgo.FindOne("version", `{"version":"`+(*task)["s_version"].(string)+`","delete":false}`) e.TaskInfo = &TaskInfo{ Name: (*task)["s_taskname"].(string), Version: (*task)["s_version"].(string), VersionId: qu.BsonIdToSId((*v)["_id"]), TrackColl: trackcoll, FromDbAddr: (*task)["s_mgoaddr"].(string), FromDB: (*task)["s_mgodb"].(string), FromColl: (*task)["s_mgocoll"].(string), TestColl: resultcoll, IsEtxLog: true, ProcessPool: make(chan bool, 1), } if (*v)["isextractcity"] != nil { e.IsExtractCity = (*v)["isextractcity"].(bool) } } else { return } } //加载任务信息 func (e *ExtractTask) InitTaskInfo() { task, _ := db.Mgo.FindById("task", e.Id, nil) log.Debug("task", task) if len(*task) > 1 { v, _ := db.Mgo.FindOne("version", `{"version":"`+(*task)["s_version"].(string)+`","delete":false}`) strs := strings.Split((*task)["s_mgosavecoll"].(string), "/") log.Debug("s_mgosavecoll", strs) if len(strs) < 3 { return } else { e.TaskInfo = &TaskInfo{ Name: (*task)["s_taskname"].(string), Version: (*task)["s_version"].(string), VersionId: qu.BsonIdToSId((*v)["_id"]), //TrackColl: (*task)["s_trackcoll"].(string), FromDbAddr: (*task)["s_mgoaddr"].(string), FromDB: (*task)["s_mgodb"].(string), FromColl: (*task)["s_mgocoll"].(string), ToDbAddr: strs[0], ToDB: strs[1], ToColl: strs[2], IsEtxLog: false, //qu.If(qu.IntAll((*task)["i_track"]) == 1, true, false).(bool), LastExtId: qu.ObjToString((*task)["s_extlastid"]), ProcessPool: make(chan bool, qu.IntAllDef((*task)["i_process"], 1)), } if (*v)["isextractcity"] != nil { e.IsExtractCity = (*v)["isextractcity"].(bool) } } log.Debug(e.TaskInfo.Name, "thread:", qu.IntAllDef((*task)["i_process"], 1)) } else { return } } //加载通用前置规则 func (e *ExtractTask) InitRulePres() { defer qu.Catch() e.RulePres = []*RegLuaInfo{} list, _ := db.Mgo.Find("rule_pre", `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *list { rinfo := &RegLuaInfo{ Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) e.RulePres = append(e.RulePres, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1) tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1) pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } e.RulePres = append(e.RulePres, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } } //加载通用后置规则 func (e *ExtractTask) InitRuleBacks() { defer qu.Catch() e.RuleBacks = []*RegLuaInfo{} list, _ := db.Mgo.Find("rule_back", `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *list { rinfo := &RegLuaInfo{ Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) e.RuleBacks = append(e.RuleBacks, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1) tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1) pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } e.RuleBacks = append(e.RuleBacks, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } } func (e *ExtractTask) InfoTypeList() { infolist1, _ := db.Mgo.Find("infotype", `{}`, `{}`, `{}`, false, -1, -1) infolist := *infolist1 for _, v := range infolist { e.InfoType = append(e.InfoType, v) } } //加载抽取规则 func (e *ExtractTask) InitRuleCore() { defer qu.Catch() e.Fields = map[string]int{} e.RuleCores = make(map[string]map[string][]*RuleCore) fieldrules := map[string][]*RuleCore{} vinfos, _ := db.Mgo.Find("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1) for _, vinfo := range *vinfos { if b, _ := vinfo["isuse"].(bool); !b { continue } s_field := qu.ObjToString(vinfo["s_field"]) pid := qu.BsonIdToSId(vinfo["_id"]) list, _ := db.Mgo.Find("rule_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1) for _, vv := range *list { if b, _ := vv["isuse"].(bool); !b { continue } rcore := &RuleCore{} rcore.Field = s_field rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本 rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string) //前置规则 rulePres := []*RegLuaInfo{} plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *plist { rinfo := &RegLuaInfo{ Field: qu.ObjToString(v["s_field"]), Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) rulePres = append(rulePres, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1) tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1) pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } rulePres = append(rulePres, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } rcore.RulePres = rulePres //后置规则 ruleBacks := []*RegLuaInfo{} blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *blist { rinfo := &RegLuaInfo{ Field: qu.ObjToString(v["s_field"]), Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) ruleBacks = append(ruleBacks, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1) tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1) pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } ruleBacks = append(ruleBacks, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } rcore.RuleBacks = ruleBacks //抽取规则 ruleCores := []*RegLuaInfo{} clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *clist { if b, _ := v["isuse"].(bool); !b { continue } field := qu.ObjToString(v["s_field"]) e.Fields[field] = 1 //加入抽取属性组备用 rinfo := &RegLuaInfo{ Field: field, Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) //提取全部属性 rinfo.LFields = getALLFields() ruleCores = append(ruleCores, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1) tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1) pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { epos := strings.Split(tmp[1], ",") posm := map[string]int{} for _, v := range epos { ks := strings.Split(v, ":") if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area posm[ks[1]] = qu.IntAll(ks[0]) } else { //(.*)招标公告__2 posm[rinfo.Field] = qu.IntAll(ks[0]) } } rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: true, ExtractPos: posm} } else { rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: false} } ruleCores = append(ruleCores, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } rcore.RuleCores = ruleCores // if fieldrules[s_field] == nil { fieldrules[s_field] = []*RuleCore{} } fieldrules[s_field] = append(fieldrules[s_field], rcore) } } //属性配置 infolist, _ := db.Mgo.Find("infotype", `{}`, `{}`, `{}`, false, -1, -1) for _, v := range *infolist { topclass := qu.ObjToString(v["topclass"]) if v["subclass"] == nil { e.RuleCores[topclass] = make(map[string][]*RuleCore) for attr, _ := range v["fields"].(map[string]interface{}) { e.RuleCores[topclass][attr] = fieldrules[attr] } } else { for ca, fs := range v["subclass"].(map[string]interface{}) { e.RuleCores[topclass+"_"+ca] = make(map[string][]*RuleCore) for field, _ := range fs.(map[string]interface{}) { e.RuleCores[topclass+"_"+ca][field] = fieldrules[field] } } } } } //加载抽取规则 func (e *ExtractTask) InitRuleCore2() { defer qu.Catch() e.Fields = map[string]int{} infolist, _ := db.Mgo.Find("infotype", `{}`, `{}`, `{}`, false, -1, -1) e.RuleCores = make(map[string]map[string][]*RuleCore) for _, v := range *infolist { topclass := qu.ObjToString(v["topclass"]) if v["subclass"] == nil { e.RuleCores[topclass] = make(map[string][]*RuleCore) for attr, _ := range v["fields"].(map[string]interface{}) { vinfo, _ := db.Mgo.FindOneByField("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false,"s_field":"`+attr+`"}`, `{}`) e.RuleCores[topclass][attr] = append(e.RuleCores[topclass][attr], e.InfoRole(*vinfo)...) } } else { for ca, fs := range v["subclass"].(map[string]interface{}) { e.RuleCores[topclass+"_"+ca] = make(map[string][]*RuleCore) for field, _ := range fs.(map[string]interface{}) { vinfo, _ := db.Mgo.FindOneByField("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false,"s_field":"`+field+`"}`, `{}`) e.RuleCores[topclass+"_"+ca][field] = append(e.RuleCores[topclass+"_"+ca][field], e.InfoRole(*vinfo)...) } } } } } func (e *ExtractTask) InfoRole(vinfo map[string]interface{}) []*RuleCore { maps := []*RuleCore{} if b, _ := vinfo["isuse"].(bool); !b { return nil } s_field := qu.ObjToString(vinfo["s_field"]) pid := qu.BsonIdToSId(vinfo["_id"]) list, _ := db.Mgo.Find("rule_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1) for _, vv := range *list { if b, _ := vv["isuse"].(bool); !b { continue } rcore := &RuleCore{} rcore.Field = s_field rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本 rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string) //前置规则 rulePres := []*RegLuaInfo{} plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *plist { rinfo := &RegLuaInfo{ Field: qu.ObjToString(v["s_field"]), Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) rulePres = append(rulePres, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1) tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1) pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } rulePres = append(rulePres, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } rcore.RulePres = rulePres //后置规则 ruleBacks := []*RegLuaInfo{} blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *blist { rinfo := &RegLuaInfo{ Field: qu.ObjToString(v["s_field"]), Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) ruleBacks = append(ruleBacks, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1) tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1) pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } ruleBacks = append(ruleBacks, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } rcore.RuleBacks = ruleBacks //抽取规则 ruleCores := []*RegLuaInfo{} clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *clist { if b, _ := v["isuse"].(bool); !b { continue } field := qu.ObjToString(v["s_field"]) e.Fields[field] = 1 //加入抽取属性组备用 rinfo := &RegLuaInfo{ Field: field, Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) //提取全部属性 rinfo.LFields = getALLFields() ruleCores = append(ruleCores, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) ptmp := strings.Split(rinfo.RuleText, "#") sign := 0 if len(ptmp) == 2 { if ptmp[1] == "正" { sign = 1 } else if ptmp[1] == "负" { sign = -1 } } tmp := strings.Split(ptmp[0], "__") var pattern string if strings.Contains(tmp[0], "\\u") { tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1) tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1) pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { epos := strings.Split(tmp[1], ",") posm := map[string]int{} for _, v := range epos { ks := strings.Split(v, ":") if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area posm[ks[1]] = qu.IntAll(ks[0]) } else { //(.*)招标公告__2 posm[rinfo.Field] = qu.IntAll(ks[0]) } } rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: true, ExtractPos: posm, NumSign: sign} } else { rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: false} } ruleCores = append(ruleCores, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } rcore.RuleCores = ruleCores // maps = append(maps, rcore) } return maps } //加载分包抽取规则 func (e *ExtractTask) InitPkgCore() { defer qu.Catch() e.PkgRuleCores = []*RuleCore{} pkginfos, _ := db.Mgo.Find("pkg_info", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1) for _, pkginfo := range *pkginfos { if b, _ := pkginfo["isuse"].(bool); !b { continue } s_field := qu.ObjToString(pkginfo["s_field"]) pid := qu.BsonIdToSId(pkginfo["_id"]) logicList, _ := db.Mgo.Find("pkg_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1) for _, vv := range *logicList { if b, _ := vv["isuse"].(bool); !b { continue } rcore := &RuleCore{} rcore.Field = s_field rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本 rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string) //后置规则 ruleBacks := []*RegLuaInfo{} blist, _ := db.Mgo.Find("pkg_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *blist { rinfo := &RegLuaInfo{ Field: qu.ObjToString(v["s_field"]), Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) ruleBacks = append(ruleBacks, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1) tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1) pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } ruleBacks = append(ruleBacks, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } rcore.RuleBacks = ruleBacks e.PkgRuleCores = append(e.PkgRuleCores, rcore) } } } //加载标签库 func (e *ExtractTask) InitTag() { defer qu.Catch() e.Tag = map[string][]*Tag{} //字符串标签库 list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"string","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *list { field := qu.ObjToString(v["s_field"]) if tmp, ok := v["content"].([]interface{}); ok { fname := qu.ObjToString(v["s_name"]) tab := ju.TagFile{Name: fname} //用于表格kv tab.Items = make([]*ju.Tag, len(tmp)) for k, key := range tmp { tag := &Tag{Type: "string", Key: key.(string)} e.Tag[field] = append(e.Tag[field], tag) tab.Items[k] = &ju.Tag{key.(string), 0 - k, nil} } sort.Sort(tab.Items) ju.TagdbTable[fname] = &tab } } //正则标签库 list, _ = db.Mgo.Find("tagdetailinfo", `{"s_type":"reg","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *list { field := qu.ObjToString(v["s_field"]) if tmp, ok := v["content"].([]interface{}); ok { fname := qu.ObjToString(v["s_name"]) tab := ju.TagFile{Name: fname, Type: "reg"} //用于表格kv tab.Items = make([]*ju.Tag, len(tmp)) for k, key := range tmp { tag := &Tag{Type: "regexp", Key: key.(string), Reg: regexp.MustCompile(key.(string))} e.Tag[field] = append(e.Tag[field], tag) tab.Items[k] = &ju.Tag{key.(string), 0 - k, regexp.MustCompile(key.(string))} } sort.Sort(tab.Items) ju.TagdbTable[fname+"_reg"] = &tab } } } //获取fields func getALLFields() map[string]string { fields := map[string]string{} list, _ := db.Mgo.Find("fields", `{}`, nil, `{"s_field":1,"s_name"}`, false, -1, -1) for _, v := range *list { fields[qu.ObjToString(v["s_name"])] = qu.ObjToString(v["s_field"]) } return fields } //加载clear函数 func (e *ExtractTask) InitClearFn() { defer qu.Catch() list, _ := db.Mgo.Find("cleanup", `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1) fn := map[string][]string{} for _, tmp := range *list { field := tmp["s_field"].(string) fns := tmp["clear"].([]interface{}) if fn[field] == nil { fn[field] = []string{} } for _, v := range fns { fn[field] = append(fn[field], v.(string)) } } e.ClearFn = fn } //加载省份 func InitProvince(version string) map[string]interface{} { defer qu.Catch() fn := map[string]interface{}{} list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"province","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *list { name := qu.ObjToString(v["s_name"]) content := v["content"] switch content.(type) { case string: fn[name] = []interface{}{content.(string)} case []interface{}: fn[name] = content } } return fn } //加载城市简称 func InitCitySim(version string) map[string]map[string]interface{} { defer qu.Catch() list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"citysim","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1) fn := map[string]map[string]interface{}{} for _, v := range *list { name := qu.ObjToString(v["s_name"]) tmp := v["content"].(map[string]interface{}) fn[name] = tmp } return fn } //加载城市全称 func InitCityAll(version string) map[string]map[string]interface{} { defer qu.Catch() list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"cityall","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1) fn := map[string]map[string]interface{}{} for _, v := range *list { name := qu.ObjToString(v["s_name"]) tmp := v["content"].(map[string]interface{}) fn[name] = tmp } return fn } //初始化城市省份敏感词 func (e *ExtractTask) InitCityDFA() { defer qu.Catch() e.CityAllGet = &ju.DFA{} e.CitySimGet = &ju.DFA{} e.DistrictAllGet = &ju.DFA{} e.DistrictSimGet = &ju.DFA{} e.ProvinceAllGet = &ju.DFA{} e.ProvinceSimGet = &ju.DFA{} e.StreetGet = &ju.DFA{} //初始化map if e.ProvinceMap == nil { e.ProvinceMap = make(map[string]string) } if e.CityMap == nil { e.CityMap = make(map[string]string) } if e.DistrictSimAndAll == nil { e.DistrictSimAndAll = make(map[string]string) } if e.CityBriefMap == nil { e.CityBriefMap = make(map[string]*City) } if e.CityFullMap == nil { e.CityFullMap = make(map[string]*City) } if e.ProvinceBriefMap == nil { e.ProvinceBriefMap = make(map[string]*Province) } if e.DistrictCityMap == nil { e.DistrictCityMap = make(map[string]*City) } if e.StreetDistrictMap == nil { e.StreetDistrictMap = make(map[string]*District) } //初始化省 fn1 := InitProvince(e.TaskInfo.Version) for k, v := range fn1 { for _, p := range v.([]interface{}) { p1, _ := p.(string) e.ProvinceAllGet.AddWord(p1) //华中科技大学 e.ProvinceMap[p1] = k //华中科技大学:湖北 } } //初始化城市全称 fn2 := InitCityAll(e.TaskInfo.Version) for k, v := range fn2 { //加载省信息 e.ProvinceAllGet.AddWord(k) //加入省全称dfa(k:浙江省) p := &Province{} p.Name = k //省全称:浙江省 p.Brief = v["brief"].(string) //省简称:浙江 e.ProvinceSimGet.AddWord(p.Brief) //加入省简称dfa(k:浙江) e.ProvinceMap[k] = p.Brief //浙江省:浙江 e.ProvinceBriefMap[p.Brief] = p //浙江:省信息{} p.Cap = v["captial"].(string) //省会(杭州) //加载市信息 city, _ := v["city"].(map[string]interface{}) for k1, v1 := range city { e.CityAllGet.AddWord(k1) //加入市全称dfa(k:杭州市) v1m, _ := v1.(map[string]interface{}) c := &City{} c.Name = k1 //市全称:杭州市 c.Brief = v1m["brief"].(string) //市简称:杭州 e.CitySimGet.AddWord(c.Brief) //加入市简称dfa(k:杭州) e.CityMap[k1] = c.Brief //杭州市:杭州 e.CityBriefMap[c.Brief] = c //杭州:市信息{} e.CityFullMap[k1] = c //杭州市:市信息{} c.P = p if c.Name == p.Cap { p.Captial = c //加载province中的省会市信息{} } //区县 districtmap := v1m["area"].(map[string]interface{}) //区或县 for district, streetarr := range districtmap { d := &District{} d.Name = district d.C = c //省直辖市,河南济源市没有区一级,目前区一级写的还是济源市 //匹配时,如果匹配到区,拿区和市比对,相同则代表是省直辖市,不要区一级? e.DistrictAllGet.AddWord(district) //加入区或县全称dfa ctmp := e.DistrictCityMap[district] if ctmp == nil { e.DistrictCityMap[district] = c } //街道 for _, s := range qu.ObjArrToStringArr(streetarr.([]interface{})) { e.StreetGet.AddWord(s) //加入街道敏感词 dtmp := e.StreetDistrictMap[s] if dtmp == nil { e.StreetDistrictMap[s] = d } } } } } //初始化城市简称 fn3 := InitCitySim(e.TaskInfo.Version) for _, v := range fn3 { city, _ := v["city"].(map[string]interface{}) for _, v1 := range city { v1m, _ := v1.(map[string]interface{}) cb := v1m["brief"].(string) //市简称 arr := v1m["area"].(map[string]interface{}) //区或县简称 for districtsim, districtall := range arr { e.DistrictSimAndAll[districtsim] = districtall.(string) d := &District{} d.Name = districtsim d.C = e.CityBriefMap[cb] e.DistrictSimGet.AddWord(districtsim) //加入区或县简称敏感词 ctmp := e.DistrictCityMap[districtsim] if ctmp == nil { e.DistrictCityMap[districtsim] = e.CityBriefMap[cb] } } } } } //初始化邮编库 func (e *ExtractTask) InitPostCode() { defer qu.Catch() if e.PostCodeMap == nil { e.PostCodeMap = make(map[string]*PostCode) } list, _ := db.Mgo.Find("postcode", nil, nil, nil, false, -1, -1) for _, l := range *list { pc := &PostCode{} pc.Code = qu.ObjToString(l["code"]) pc.P = qu.ObjToString(l["province"]) pc.C = qu.ObjToString(l["city"]) pc.D = qu.ObjArrToStringArr(l["district"].([]interface{})) e.PostCodeMap[pc.Code] = pc } } //初始化区号库 func (e *ExtractTask) InitAreaCode() { defer qu.Catch() if e.AreaCodeMap == nil { e.AreaCodeMap = make(map[string]*AreaCode) } list, _ := db.Mgo.Find("areacode", nil, nil, nil, false, -1, -1) for _, l := range *list { ac := &AreaCode{} ac.Code = qu.ObjToString(l["code"]) ac.P = qu.ObjToString(l["province"]) ac.C = qu.ObjArrToStringArr(l["city"].([]interface{})) e.AreaCodeMap[ac.Code] = ac } } //初始化城市省份敏感词 //func (e *ExtractTask) InitCityDFA() { // defer qu.Catch() // e.CityAllGet = &ju.DFA{} // e.DistrictGet = &ju.DFA{} // e.AreaProvinceGet = &ju.DFA{} // e.StreetGet = &ju.DFA{} // //初始化map // if e.ProvinceMap == nil { // e.ProvinceMap = make(map[string]string) // } // if e.CityBriefMap == nil { // e.CityBriefMap = make(map[string]*City) // } // if e.ProvinceBriefMap == nil { // e.ProvinceBriefMap = make(map[string]*Province) // } // if e.AreaToCityMap == nil { // e.AreaToCityMap = make(map[string][]*City) // } // if e.DistrictCityMap == nil { // e.DistrictCityMap = make(map[string]*City) // } // if e.StreetDistrictMap == nil { // e.StreetDistrictMap = make(map[string]*District) // } // //初始化省 // fn1 := InitProvince(e.TaskInfo.Version) // for k, v := range fn1 { // for _, p := range v.([]interface{}) { // p1, _ := p.(string) // e.AreaProvinceGet.AddWord(p1) //华中科技大学 // e.ProvinceMap[p1] = k //华中科技大学:湖北 // } // } // //初始化城市全称 // fn2 := InitCityAll(e.TaskInfo.Version) // for k, v := range fn2 { // e.AreaProvinceGet.AddWord(k) //加入省全称dfa(k:浙江省) // p := &Province{} // p.Name = k //省全称 // p.Brief = v["brief"].(string) //省简称 // e.ProvinceMap[k] = p.Brief //浙江省:浙江 // e.ProvinceBriefMap[p.Brief] = p //浙江:省信息 // p.Cap = v["captial"].(string) //省会(杭州) // city, _ := v["city"].(map[string]interface{}) // // // for k1, v1 := range city { // v1m, _ := v1.(map[string]interface{}) // c := &City{} // c.Name = k1 // c.Brief = v1m["brief"].(string) // e.CityBriefMap[c.Brief] = c // c.P = p // if c.Brief == p.Cap { // p.Captial = c // } // //加入到城市map中 // // // cs := e.AreaToCityMap[k1] // e.CityAllGet.AddWord(k1) //市全称 // if cs != nil { // cs = append(cs, c) // } else { // cs = []*City{c} // } // e.AreaToCityMap[k1] = cs // //区县 // districtmap := v1m["area"].(map[string]interface{}) //区或县 // for district, streetarr := range districtmap { // d := &District{} // d.Name = district // d.C = c // e.DistrictGet.AddWord(district) //加入区或县敏感词 // ctmp := e.DistrictCityMap[district] // if ctmp == nil { // e.DistrictCityMap[district] = c // } // //街道 // for _, s := range qu.ObjArrToStringArr(streetarr.([]interface{})) { // e.StreetGet.AddWord(s) //加入街道敏感词 // dtmp := e.StreetDistrictMap[s] // if dtmp == nil { // e.StreetDistrictMap[s] = d // } // } // } // } // } // //初始化城市简称 // fn3 := InitCitySim(e.TaskInfo.Version) // e.CitySimGet = &ju.DFA{} // for k, v := range fn3 { // pb := v["brief"].(string) // p := e.ProvinceBriefMap[pb] // //加载 // for _, ss := range []string{k, pb} { //省全称和省简称 // cs := e.AreaToCityMap[ss] // if cs != nil { // cs = append(cs, p.Captial) // } else { // cs = []*City{p.Captial} // } // e.AreaToCityMap[ss] = cs // e.CitySimGet.AddWord(ss) // } // city, _ := v["city"].(map[string]interface{}) // for k1, v1 := range city { // v1m, _ := v1.(map[string]interface{}) // if v1m["brief"] == nil { // } // cb := v1m["brief"].(string) // c := e.AreaToCityMap[k1][0] // //加入到城市map中 // for _, ss := range []string{cb, k + cb, pb + cb} { //杭州 浙江省杭州 浙江杭州 // e.CitySimGet.AddWord(ss) // cs := e.AreaToCityMap[ss] // if cs != nil { // cs = append(cs, c) // } else { // cs = []*City{c} // } // e.AreaToCityMap[ss] = cs // } // arr := v1m["area"].([]interface{}) // for _, k2 := range arr { // s := k2.(string) // for n, ss := range []string{s, cb + s, pb + s, k + s} { //淳安 杭州淳安 浙江淳安 浙江省淳安 // cs := e.AreaToCityMap[ss] // e.CitySimGet.AddWord(ss) // if cs != nil { // cs = append(cs, c) // } else { // cs = []*City{c} // } // e.AreaToCityMap[ss] = cs // //只加入简称 // if n == 0 { // d := &District{} // d.Name = ss // d.C = c // e.DistrictGet.AddWord(ss) //加入区或县简称敏感词 // ctmp := e.DistrictCityMap[ss] // if ctmp == nil { // e.DistrictCityMap[ss] = c // } // } // } // } // } // } //} //保存抽取详情数据 func (e *ExtractTask) ResultSave(init bool) { defer qu.Catch() if e.ResultArr == nil { e.ResultArr = [][]map[string]interface{}{} } if init { go func() { for { if len(e.ResultArr) > 500 { arr := e.ResultArr[:500] e.ResultArr = e.ResultArr[500:] qu.Try(func() { db.Mgo.UpSertBulk("extract_result", arr...) }, func(err interface{}) { log.Debug(err) }) } else { arr := e.ResultArr e.ResultArr = [][]map[string]interface{}{} qu.Try(func() { db.Mgo.UpSertBulk("extract_result", arr...) }, func(err interface{}) { log.Debug(err) }) } time.Sleep(10 * time.Second) } }() } else { arr := e.ResultArr e.ResultArr = [][]map[string]interface{}{} qu.Try(func() { e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...) }, func(err interface{}) { log.Debug(err) }) } } //保存抽取数据 func (e *ExtractTask) BidSave(init bool) { defer qu.Catch() if e.BidArr == nil { e.BidArr = [][]map[string]interface{}{} } if init { go func() { for { if len(e.BidArr) > 500 { arr := e.BidArr[:500] e.BidArr = e.BidArr[500:] qu.Try(func() { e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...) }, func(err interface{}) { log.Debug(err) }) } else { arr := e.BidArr e.BidArr = [][]map[string]interface{}{} qu.Try(func() { e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...) }, func(err interface{}) { log.Debug(err) }) } time.Sleep(10 * time.Second) } }() } else { arr := e.BidArr e.BidArr = [][]map[string]interface{}{} qu.Try(func() { e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...) }, func(err interface{}) { log.Debug(err) }) time.Sleep(1 * time.Second) } } func (e *ExtractTask) InitAuditRecogField() { defer qu.Catch() e.RecogFieldMap = make(map[string]map[string]interface{}) recogFieldList, _ := db.Mgo.Find("rc_field", `{"delete":false}`, `{"_id":1}`, `{"s_recogfield":1,"s_recogfield_prerule":1}`, false, -1, -1) for _, f := range *recogFieldList { field := qu.ObjToString(f["s_recogfield"]) e.RecogFieldMap[field] = f } } func (e *ExtractTask) InitAuditClass() { defer qu.Catch() e.FidClassMap = make(map[string][]map[string]interface{}) class, _ := db.Mgo.Find("rc_class", `{"delete":false}`, `{"i_order":1}`, nil, false, -1, -1) for _, c := range *class { classList := []map[string]interface{}{} fid := qu.ObjToString(c["s_fid"]) if len(e.FidClassMap[fid]) > 0 { //追加 classList = e.FidClassMap[fid] } classList = append(classList, c) e.FidClassMap[fid] = classList } } //加载规则 func (e *ExtractTask) InitAuditRule() { defer qu.Catch() var rureg *regexp.Regexp var rs []rune var ru string var err error e.CidRuleMap = make(map[string][]map[string]interface{}) rule, _ := db.Mgo.Find("rc_rule", `{"delete":false}`, `{"i_order":1}`, nil, false, -1, -1) for _, v := range *rule { i_rule := []interface{}{} ss, _ := (v["s_rule"].([]interface{})) for _, r := range qu.ObjArrToStringArr(ss) { if strings.HasPrefix(r, "'") && strings.HasSuffix(r, "'") { //正则 rs = []rune(r) ru = string(rs[1 : len(rs)-1]) rureg, err = regexp.Compile(ru) if err != nil { log.Debug("error---rule:", r) continue } i_rule = append(i_rule, []interface{}{rureg}...) } else { //规则 i_rule = append(i_rule, r) } } v["rule"] = i_rule ruleList := []map[string]interface{}{} classid := qu.ObjToString(v["s_classid"]) if len(e.CidRuleMap[classid]) > 0 { //追加 ruleList = e.CidRuleMap[classid] } ruleList = append(ruleList, v) e.CidRuleMap[classid] = ruleList } } // func (e *ExtractTask) InitAuditFields() { if len(e.AuditFields) == 0 { v, _ := db.Mgo.FindOne("version", `{"isuse":true,"delete":false}`) //查找当前使用版本 if v != nil && len(*v) > 0 { //查找当前使用版本中属性配置需要审核的字段 vid := qu.BsonIdToSId((*v)["_id"]) query := map[string]interface{}{ "isaudit": true, "delete": false, "vid": vid, } data, _ := db.Mgo.Find("versioninfo", query, `{"_id":-1}`, `{"s_field":1}`, false, -1, -1) for _, d := range *data { field := qu.ObjToString(d["s_field"]) e.AuditFields = append(e.AuditFields, field) } } } } //加载附件抽取 func (e *ExtractTask) InitFile() { defer qu.Catch() //query:=bson.M{"version":e.TaskInfo.Version,"delete":false} ve, _ := db.Mgo.FindOne("version", `{"version":"`+e.TaskInfo.Version+`","delete":false}`) //ve, _ := db.Mgo.FindOne("version", query) if ve == nil { return } if (*ve)["isfiles"] != nil && (*ve)["isfiles"].(bool) { e.IsFileField = true } syscefiled := new(sync.Map) if (*ve)["s_filefileds"] != nil { for _, vff := range (*ve)["s_filefileds"].([]interface{}) { syscefiled.Store(vff.(string), 1) } } e.FileFields = syscefiled } //加载清理任务信息 func (c *ClearTask) InitClearTaskInfo() { cleartask, _ := db.Mgo.FindById("cleartask", c.Id, nil) if len(*cleartask) > 1 { v, _ := db.Mgo.FindOne("clearversion", `{"clearversion":"`+(*cleartask)["s_version"].(string)+`","delete":false}`) c.ClearTaskInfo = &ClearTaskInfo{ Name: (*cleartask)["s_taskname"].(string), Version: (*cleartask)["s_version"].(string), VersionId: qu.BsonIdToSId((*v)["_id"]), FromDbAddr: (*cleartask)["s_mgoaddr"].(string), FromDB: (*cleartask)["s_mgodb"].(string), FromColl: (*cleartask)["s_mgocoll"].(string), IsCltLog: ju.Config["iscltlog"].(bool), ProcessPool: make(chan bool, qu.IntAllDef((*cleartask)["i_process"], 1)), } log.Debug(c.ClearTaskInfo.Name, "thread:", qu.IntAllDef((*cleartask)["i_process"], 1)) } else { return } } //加载清理脚本 func (c *ClearTask) InitClearLuas() { defer qu.Catch() c.ClearLuas = make(map[string][]*ClearLua) list, _ := db.Mgo.Find("clearversioninfo", `{"vid":"`+c.ClearTaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1) for _, l := range *list { if b, _ := l["isuse"].(bool); !b { //仅使用启用的属性 continue } s_field := qu.ObjToString(l["s_field"]) pid := qu.BsonIdToSId(l["_id"]) luas, _ := db.Mgo.Find("clearulelogic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1) for _, vv := range *luas { if b, _ := vv["isuse"].(bool); !b { continue } clearLua := &ClearLua{ Field: s_field, Code: vv["s_code"].(string), Name: vv["s_name"].(string), LuaText: vv["s_luascript"].(string), LFields: getALLFields(), } c.ClearLuas[s_field] = append(c.ClearLuas[s_field], clearLua) } } } //加载分块规则 func (e *ExtractTask) InitBlockRule() { datas, _ := db.Mgo.Find("block_info", map[string]interface{}{ "vid": e.TaskInfo.VersionId, "delete": false, }, `{"index":-1}`, `{"block_reg":1,"title_reg":1}`, false, -1, -1) brs, trs := []*regexp.Regexp{}, []*regexp.Regexp{} for _, v := range *datas { block_reg, _ := v["block_reg"].(string) block_reg, _ = strconv.Unquote(`"` + block_reg + `"`) title_reg, _ := v["title_reg"].(string) title_reg, _ = strconv.Unquote(`"` + title_reg + `"`) if block_reg == "" || title_reg == "" { continue } b_reg, b_err := regexp.Compile(block_reg) t_reg, t_err := regexp.Compile(title_reg) if b_err != nil || t_err != nil { continue } brs = append(brs, b_reg) trs = append(trs, t_reg) } e.RuleBlock = &ju.RuleBlock{ BlockRegs: brs, TitleRegs: trs, Classify: e.InitBlockClassify(), } } //加载分块规则 func (e *ExtractTask) InitBlockClassify() *ju.BlockClassify { classify, _ := db.Mgo.Find("block_classify", map[string]interface{}{ "vid": e.TaskInfo.VersionId, "delete": false, }, nil, `{"name":1}`, false, -1, -1) classify_info, _ := db.Mgo.Find("block_classify_info", map[string]interface{}{ "vid": e.TaskInfo.VersionId, "delete": false, }, nil, `{"name":1,"code":1,"pid":1}`, false, -1, -1) classify_tag, _ := db.Mgo.Find("block_classify_tag", map[string]interface{}{ "vid": e.TaskInfo.VersionId, "delete": false, }, nil, `{"name":1,"pid":1}`, false, -1, -1) tag_map := map[string]ju.Tags{} for _, v := range *classify_tag { pid := qu.ObjToString(v["pid"]) tag_map[pid] = append(tag_map[pid], &ju.Tag{Value: qu.ObjToString(v["name"])}) } // info_map := map[string][]*ju.NameCode{} info_tag := map[string]*ju.TagFile{} for _, v := range *classify_info { pid := qu.ObjToString(v["pid"]) _id := qu.BsonIdToSId(v["_id"]) name := qu.ObjToString(v["name"]) info_tag[name] = &ju.TagFile{ Name: name, Items: tag_map[_id], } info_map[pid] = append(info_map[pid], &ju.NameCode{ Name: name, Code: qu.ObjToString(v["code"]), }) } classify_map := map[string][]*ju.NameCode{} for _, v := range *classify { _id := qu.BsonIdToSId(v["_id"]) if info_map[_id] == nil { continue } for _, vv := range strings.Split(qu.ObjToString(v["name"]), ",") { classify_map[vv] = append(classify_map[vv], info_map[_id]...) } } return &ju.BlockClassify{ Type: classify_map, Classify: info_tag, } }