// extractInit package extract import ( db "jy/mongodbutil" ju "jy/util" qu "qfw/util" "regexp" "sort" "strconv" "strings" "sync" "time" log "github.com/donnie4w/go-logger/logger" ) type RegLuaInfo struct { //正则或脚本信息 Code, Name, Field string // RuleText string // IsLua bool // RegPreBac *ExtReg // RegCore *ExtReg // LFields map[string]string //lua抽取字段属性组 } type ExtReg struct { Reg *regexp.Regexp Replace string Bextract bool ExtractPos map[string]int } type RuleCore struct { Field string //逻辑字段 LuaLogic string //进入逻辑 ExtFrom string //从哪个字段抽取 RulePres []*RegLuaInfo //抽取前置规则 RuleBacks []*RegLuaInfo //抽取后置规则 RuleCores []*RegLuaInfo //抽取规则 } type Tag struct { Type string //标签类型 string 字符串、regexp 正则 Key string // Reg *regexp.Regexp // } type TaskInfo struct { Name, Version, VersionId, TrackColl string //名称、版本、版本id、追踪记录表 FromDbAddr, FromDB, FromColl string //抽取数据库地址、库名、表名 ToDbAddr, ToDB, ToColl string //结果数据库地址、库名、表名 TestColl, LastExtId string //测试结果表、上次抽取信息id FDB *db.Pool //数据库连接池 TDB *db.Pool //数据库连接池 IsEtxLog bool //是否开启抽取日志 ProcessPool chan bool //任务进程池 TestLua bool //检查测试用 } type ExtractTask struct { Id string //任务id IsRun bool //是否启动 Content string //信息内容 TaskInfo *TaskInfo //任务信息 RulePres []*RegLuaInfo //通用前置规则 RuleBacks []*RegLuaInfo //通用后置规则 RuleCores []*RuleCore //抽取规则 PkgRuleCores []*RuleCore //分包抽取规则 Tag map[string][]*Tag //标签库 ClearFn map[string][]string //清理函数 IsExtractCity bool //是否开启城市抽取 Fields map[string]int //抽取属性组 IsFileField bool //是否开启附件抽取 FileFields *sync.Map //抽取附件属性组 ResultChanel chan bool //抽取结果详情 ResultArr [][]map[string]interface{} //抽取结果详情 BidChanel chan bool //抽取结果 BidArr [][]map[string]interface{} //抽取结果 RecogFieldMap map[string]map[string]interface{} //识别字段 FidClassMap map[string][]map[string]interface{} //分类 CidRuleMap map[string][]map[string]interface{} //规则 AuditFields []string //需要审核的字段名称 ProvinceMap map[string]string CityBrief map[string]*City //只加载一次即可 ProvinceBrief map[string]*Province //只加载一次 AreaToCity map[string][]*City //两个文件共用 DistrictCityMap map[string]*City StreetDistrictMap map[string]*District AreaGet *ju.DFA //市全称 AreaDistrict *ju.DFA //区或县 AreaProvinceGet *ju.DFA //省 AreaSimGet *ju.DFA //市简称 AreaStreet *ju.DFA //街道 } type ClearTaskInfo struct { Name, Version, VersionId string //名称、版本、版本id FromDbAddr, FromDB, FromColl string //清理数据库地址、库名、表名 FDB *db.Pool //数据库连接池 TDB *db.Pool //数据库连接池 IsCltLog bool //是否开启清理日志 ProcessPool chan bool //任务进程池 } type ClearLua struct { Field string //字段字段 Code string //代码 Name string //名称 LuaText string //LuaLogic string //进入逻辑 //ExtFrom string //从哪个字段抽取 LFields map[string]string //lua抽取字段属性组 } type ClearTask struct { Id string //任务id Content string //信息内容 ClearTaskInfo *ClearTaskInfo //任务信息 ClearLuas map[string][]*ClearLua //清理脚本 UpdateResult [][]map[string]interface{} //清理后结果 ClearChannel chan bool } func init() { TaskList = make(map[string]*ExtractTask) ClearTaskList = make(map[string]*ClearTask) go SaveExtLog() go SaveCltLog() //保存清理日志 } //加载任务信息 func (e *ExtractTask) InitTestTaskInfo(resultcoll, trackcoll string) { task, _ := db.Mgo.FindById("task", e.Id, nil) if len(*task) > 1 { v, _ := db.Mgo.FindOne("version", `{"version":"`+(*task)["s_version"].(string)+`","delete":false}`) e.TaskInfo = &TaskInfo{ Name: (*task)["s_taskname"].(string), Version: (*task)["s_version"].(string), VersionId: qu.BsonIdToSId((*v)["_id"]), TrackColl: trackcoll, FromDbAddr: (*task)["s_mgoaddr"].(string), FromDB: (*task)["s_mgodb"].(string), FromColl: (*task)["s_mgocoll"].(string), TestColl: resultcoll, IsEtxLog: true, ProcessPool: make(chan bool, 1), } if (*v)["isextractcity"] != nil { e.IsExtractCity = (*v)["isextractcity"].(bool) } } else { return } } //加载任务信息 func (e *ExtractTask) InitTaskInfo() { task, _ := db.Mgo.FindById("task", e.Id, nil) log.Debug("task", task) if len(*task) > 1 { v, _ := db.Mgo.FindOne("version", `{"version":"`+(*task)["s_version"].(string)+`","delete":false}`) strs := strings.Split((*task)["s_mgosavecoll"].(string), "/") log.Debug("s_mgosavecoll", strs) if len(strs) < 3 { return } else { e.TaskInfo = &TaskInfo{ Name: (*task)["s_taskname"].(string), Version: (*task)["s_version"].(string), VersionId: qu.BsonIdToSId((*v)["_id"]), //TrackColl: (*task)["s_trackcoll"].(string), FromDbAddr: (*task)["s_mgoaddr"].(string), FromDB: (*task)["s_mgodb"].(string), FromColl: (*task)["s_mgocoll"].(string), ToDbAddr: strs[0], ToDB: strs[1], ToColl: strs[2], IsEtxLog: false, //qu.If(qu.IntAll((*task)["i_track"]) == 1, true, false).(bool), LastExtId: qu.ObjToString((*task)["s_extlastid"]), ProcessPool: make(chan bool, qu.IntAllDef((*task)["i_process"], 1)), } if (*v)["isextractcity"] != nil { e.IsExtractCity = (*v)["isextractcity"].(bool) } } log.Debug(e.TaskInfo.Name, "thread:", qu.IntAllDef((*task)["i_process"], 1)) } else { return } } //加载通用前置规则 func (e *ExtractTask) InitRulePres() { defer qu.Catch() e.RulePres = []*RegLuaInfo{} list, _ := db.Mgo.Find("rule_pre", `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *list { rinfo := &RegLuaInfo{ Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) e.RulePres = append(e.RulePres, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } e.RulePres = append(e.RulePres, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } } //加载通用后置规则 func (e *ExtractTask) InitRuleBacks() { defer qu.Catch() e.RuleBacks = []*RegLuaInfo{} list, _ := db.Mgo.Find("rule_back", `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *list { rinfo := &RegLuaInfo{ Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) e.RuleBacks = append(e.RuleBacks, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } e.RuleBacks = append(e.RuleBacks, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } } //加载抽取规则 func (e *ExtractTask) InitRuleCore() { defer qu.Catch() e.Fields = map[string]int{} e.RuleCores = []*RuleCore{} vinfos, _ := db.Mgo.Find("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1) for _, vinfo := range *vinfos { if b, _ := vinfo["isuse"].(bool); !b { continue } s_field := qu.ObjToString(vinfo["s_field"]) pid := qu.BsonIdToSId(vinfo["_id"]) list, _ := db.Mgo.Find("rule_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1) for _, vv := range *list { if b, _ := vv["isuse"].(bool); !b { continue } rcore := &RuleCore{} rcore.Field = s_field rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本 rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string) //前置规则 rulePres := []*RegLuaInfo{} plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *plist { rinfo := &RegLuaInfo{ Field: qu.ObjToString(v["s_field"]), Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) rulePres = append(rulePres, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } rulePres = append(rulePres, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } rcore.RulePres = rulePres //后置规则 ruleBacks := []*RegLuaInfo{} blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *blist { rinfo := &RegLuaInfo{ Field: qu.ObjToString(v["s_field"]), Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) ruleBacks = append(ruleBacks, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } ruleBacks = append(ruleBacks, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } rcore.RuleBacks = ruleBacks //抽取规则 ruleCores := []*RegLuaInfo{} clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *clist { if b, _ := v["isuse"].(bool); !b { continue } field := qu.ObjToString(v["s_field"]) e.Fields[field] = 1 //加入抽取属性组备用 rinfo := &RegLuaInfo{ Field: field, Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) //提取全部属性 rinfo.LFields = getALLFields() ruleCores = append(ruleCores, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { epos := strings.Split(tmp[1], ",") posm := map[string]int{} for _, v := range epos { ks := strings.Split(v, ":") if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area posm[ks[1]] = qu.IntAll(ks[0]) } else { //(.*)招标公告__2 posm[rinfo.Field] = qu.IntAll(ks[0]) } } rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: true, ExtractPos: posm} } else { rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: false} } ruleCores = append(ruleCores, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } rcore.RuleCores = ruleCores // e.RuleCores = append(e.RuleCores, rcore) } } } //加载分包抽取规则 func (e *ExtractTask) InitPkgCore() { defer qu.Catch() e.PkgRuleCores = []*RuleCore{} pkginfos, _ := db.Mgo.Find("pkg_info", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1) for _, pkginfo := range *pkginfos { if b, _ := pkginfo["isuse"].(bool); !b { continue } s_field := qu.ObjToString(pkginfo["s_field"]) pid := qu.BsonIdToSId(pkginfo["_id"]) logicList, _ := db.Mgo.Find("pkg_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1) for _, vv := range *logicList { if b, _ := vv["isuse"].(bool); !b { continue } rcore := &RuleCore{} rcore.Field = s_field rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本 rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string) //后置规则 ruleBacks := []*RegLuaInfo{} blist, _ := db.Mgo.Find("pkg_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *blist { rinfo := &RegLuaInfo{ Field: qu.ObjToString(v["s_field"]), Code: v["s_code"].(string), Name: v["s_name"].(string), IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool), } if rinfo.IsLua { rinfo.RuleText = v["s_luascript"].(string) ruleBacks = append(ruleBacks, rinfo) } else { qu.Try(func() { rinfo.RuleText = v["s_rule"].(string) tmp := strings.Split(rinfo.RuleText, "__") var pattern string if strings.Contains(tmp[0], "\\u") { pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`) } else { pattern = tmp[0] } if len(tmp) == 2 { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]} } else { rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""} } ruleBacks = append(ruleBacks, rinfo) }, func(err interface{}) { log.Debug(rinfo.Code, rinfo.Field, err) }) } } rcore.RuleBacks = ruleBacks e.PkgRuleCores = append(e.PkgRuleCores, rcore) } } } //加载标签库 func (e *ExtractTask) InitTag() { defer qu.Catch() e.Tag = map[string][]*Tag{} //字符串标签库 list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"string","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *list { field := qu.ObjToString(v["s_field"]) if tmp, ok := v["content"].([]interface{}); ok { fname := qu.ObjToString(v["s_name"]) tab := ju.TagFile{Name: fname} //用于表格kv tab.Items = make([]*ju.Tag, len(tmp)) for k, key := range tmp { tag := &Tag{Type: "string", Key: key.(string)} e.Tag[field] = append(e.Tag[field], tag) tab.Items[k] = &ju.Tag{key.(string), 0 - k, nil} } sort.Sort(tab.Items) ju.TagdbTable[fname] = &tab } } //正则标签库 list, _ = db.Mgo.Find("tagdetailinfo", `{"s_type":"reg","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *list { field := qu.ObjToString(v["s_field"]) if tmp, ok := v["content"].([]interface{}); ok { fname := qu.ObjToString(v["s_name"]) tab := ju.TagFile{Name: fname, Type: "reg"} //用于表格kv tab.Items = make([]*ju.Tag, len(tmp)) for k, key := range tmp { tag := &Tag{Type: "regexp", Key: key.(string), Reg: regexp.MustCompile(key.(string))} e.Tag[field] = append(e.Tag[field], tag) tab.Items[k] = &ju.Tag{key.(string), 0 - k, regexp.MustCompile(key.(string))} } sort.Sort(tab.Items) ju.TagdbTable[fname+"_reg"] = &tab } } } //获取fields func getALLFields() map[string]string { fields := map[string]string{} list, _ := db.Mgo.Find("fields", `{}`, nil, `{"s_field":1,"s_name"}`, false, -1, -1) for _, v := range *list { fields[qu.ObjToString(v["s_name"])] = qu.ObjToString(v["s_field"]) } return fields } //加载clear函数 func (e *ExtractTask) InitClearFn() { defer qu.Catch() list, _ := db.Mgo.Find("cleanup", `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1) fn := map[string][]string{} for _, tmp := range *list { field := tmp["s_field"].(string) fns := tmp["clear"].([]interface{}) if fn[field] == nil { fn[field] = []string{} } for _, v := range fns { fn[field] = append(fn[field], v.(string)) } } e.ClearFn = fn } //加载省份 func InitProvince(version string) map[string]interface{} { defer qu.Catch() fn := map[string]interface{}{} list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"province","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1) for _, v := range *list { name := qu.ObjToString(v["s_name"]) content := v["content"] switch content.(type) { case string: fn[name] = []interface{}{content.(string)} case []interface{}: fn[name] = content } } return fn } //加载城市简称 func InitCitySim(version string) map[string]map[string]interface{} { defer qu.Catch() list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"citysim","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1) fn := map[string]map[string]interface{}{} for _, v := range *list { name := qu.ObjToString(v["s_name"]) tmp := v["content"].(map[string]interface{}) fn[name] = tmp } return fn } //加载城市全称 func InitCityAll(version string) map[string]map[string]interface{} { defer qu.Catch() list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"cityall","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1) fn := map[string]map[string]interface{}{} for _, v := range *list { name := qu.ObjToString(v["s_name"]) tmp := v["content"].(map[string]interface{}) fn[name] = tmp } return fn } //初始化城市省份敏感词 func (e *ExtractTask) InitDFA() { defer qu.Catch() e.AreaGet = &ju.DFA{} e.AreaDistrict = &ju.DFA{} e.AreaProvinceGet = &ju.DFA{} e.AreaStreet = &ju.DFA{} //初始化map if e.ProvinceMap == nil { e.ProvinceMap = make(map[string]string) } if e.CityBrief == nil { e.CityBrief = make(map[string]*City) } if e.ProvinceBrief == nil { e.ProvinceBrief = make(map[string]*Province) } if e.AreaToCity == nil { e.AreaToCity = make(map[string][]*City) } if e.DistrictCityMap == nil { e.DistrictCityMap = make(map[string]*City) } if e.StreetDistrictMap == nil { e.StreetDistrictMap = make(map[string]*District) } //初始化省 fn1 := InitProvince(e.TaskInfo.Version) for k, v := range fn1 { for _, p := range v.([]interface{}) { p1, _ := p.(string) e.AreaProvinceGet.AddWord(p1) e.ProvinceMap[p1] = k } } //初始化城市全称 fn2 := InitCityAll(e.TaskInfo.Version) for k, v := range fn2 { e.AreaProvinceGet.AddWord(k) //省全称 p := &Province{} p.Name = k p.Brief = v["brief"].(string) e.ProvinceMap[k] = p.Brief // e.ProvinceBrief[p.Brief] = p p.Cap = v["captial"].(string) city, _ := v["city"].(map[string]interface{}) for k1, v1 := range city { v1m, _ := v1.(map[string]interface{}) c := &City{} c.Name = k1 // if v1m["brief"] == nil { // } c.Brief = v1m["brief"].(string) // e.CityBrief[c.Brief] = c c.P = p if c.Brief == p.Cap { p.Captial = c } //加入到城市map中 // cs := e.AreaToCity[k1] e.AreaGet.AddWord(k1) //市全称 if cs != nil { cs = append(cs, c) } else { cs = []*City{c} } e.AreaToCity[k1] = cs //区县 districtmap := v1m["area"].(map[string]interface{}) //区或县 for district, streetarr := range districtmap { d := &District{} d.Name = district d.C = c e.AreaDistrict.AddWord(district) //加入区或县敏感词 ctmp := e.DistrictCityMap[district] if ctmp == nil { e.DistrictCityMap[district] = c } //街道 for _, s := range qu.ObjArrToStringArr(streetarr.([]interface{})) { e.AreaStreet.AddWord(s) //加入街道敏感词 dtmp := e.StreetDistrictMap[s] if dtmp == nil { e.StreetDistrictMap[s] = d } } } } } //初始化城市简称 fn3 := InitCitySim(e.TaskInfo.Version) e.AreaSimGet = &ju.DFA{} for k, v := range fn3 { pb := v["brief"].(string) p := e.ProvinceBrief[pb] //加载 for _, ss := range []string{k, pb} { cs := e.AreaToCity[ss] if cs != nil { cs = append(cs, p.Captial) } else { cs = []*City{p.Captial} } e.AreaToCity[ss] = cs e.AreaSimGet.AddWord(ss) //省全称和省简称 } city, _ := v["city"].(map[string]interface{}) for k1, v1 := range city { v1m, _ := v1.(map[string]interface{}) if v1m["brief"] == nil { } cb := v1m["brief"].(string) c := e.AreaToCity[k1][0] //加入到城市map中 for _, ss := range []string{cb, k + cb, pb + cb} { //杭州 浙江省杭州 浙江杭州 e.AreaSimGet.AddWord(ss) cs := e.AreaToCity[ss] if cs != nil { cs = append(cs, c) } else { cs = []*City{c} } e.AreaToCity[ss] = cs } arr := v1m["area"].([]interface{}) for _, k2 := range arr { s := k2.(string) for n, ss := range []string{s, cb + s, pb + s, k + s} { //淳安 杭州淳安 浙江淳安 浙江省淳安 cs := e.AreaToCity[ss] e.AreaSimGet.AddWord(ss) if cs != nil { cs = append(cs, c) } else { cs = []*City{c} } e.AreaToCity[ss] = cs //只加入简称 if n == 0 { d := &District{} d.Name = ss d.C = c e.AreaDistrict.AddWord(ss) //加入区或县简称敏感词 ctmp := e.DistrictCityMap[ss] if ctmp == nil { e.DistrictCityMap[ss] = c } } } } } } } //保存抽取详情数据 func (e *ExtractTask) ResultSave(init bool) { defer qu.Catch() if e.ResultArr == nil { e.ResultArr = [][]map[string]interface{}{} } if init { go func() { for { if len(e.ResultArr) > 500 { arr := e.ResultArr[:500] qu.Try(func() { db.Mgo.UpSertBulk("extract_result", arr...) }, func(err interface{}) { log.Debug(err) }) e.ResultArr = e.ResultArr[500:] } else { arr := e.ResultArr qu.Try(func() { db.Mgo.UpSertBulk("extract_result", arr...) }, func(err interface{}) { log.Debug(err) }) e.ResultArr = [][]map[string]interface{}{} } time.Sleep(10 * time.Second) } }() } else { arr := e.ResultArr qu.Try(func() { e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...) }, func(err interface{}) { log.Debug(err) }) e.ResultArr = [][]map[string]interface{}{} } } //保存抽取数据 func (e *ExtractTask) BidSave(init bool) { defer qu.Catch() if e.BidArr == nil { e.BidArr = [][]map[string]interface{}{} } if init { go func() { for { if len(e.BidArr) > 500 { arr := e.BidArr[:500] qu.Try(func() { e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...) }, func(err interface{}) { log.Debug(err) }) e.BidArr = e.BidArr[500:] } else { arr := e.BidArr qu.Try(func() { e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...) }, func(err interface{}) { log.Debug(err) }) e.BidArr = [][]map[string]interface{}{} } time.Sleep(10 * time.Second) } }() } else { arr := e.BidArr qu.Try(func() { e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...) }, func(err interface{}) { log.Debug(err) }) e.BidArr = [][]map[string]interface{}{} time.Sleep(1 * time.Second) } } func (e *ExtractTask) InitAuditRecogField() { defer qu.Catch() e.RecogFieldMap = make(map[string]map[string]interface{}) recogFieldList, _ := db.Mgo.Find("rc_field", `{"delete":false}`, `{"_id":1}`, `{"s_recogfield":1,"s_recogfield_prerule":1}`, false, -1, -1) for _, f := range *recogFieldList { field := qu.ObjToString(f["s_recogfield"]) e.RecogFieldMap[field] = f } } func (e *ExtractTask) InitAuditClass() { defer qu.Catch() e.FidClassMap = make(map[string][]map[string]interface{}) class, _ := db.Mgo.Find("rc_class", `{"delete":false}`, `{"i_order":1}`, nil, false, -1, -1) for _, c := range *class { classList := []map[string]interface{}{} fid := qu.ObjToString(c["s_fid"]) if len(e.FidClassMap[fid]) > 0 { //追加 classList = e.FidClassMap[fid] } classList = append(classList, c) e.FidClassMap[fid] = classList } } //加载规则 func (e *ExtractTask) InitAuditRule() { defer qu.Catch() var rureg *regexp.Regexp var rs []rune var ru string var err error e.CidRuleMap = make(map[string][]map[string]interface{}) rule, _ := db.Mgo.Find("rc_rule", `{"delete":false}`, `{"i_order":1}`, nil, false, -1, -1) for _, v := range *rule { i_rule := []interface{}{} ss, _ := (v["s_rule"].([]interface{})) for _, r := range qu.ObjArrToStringArr(ss) { if strings.HasPrefix(r, "'") && strings.HasSuffix(r, "'") { //正则 rs = []rune(r) ru = string(rs[1 : len(rs)-1]) rureg, err = regexp.Compile(ru) if err != nil { log.Debug("error---rule:", r) continue } i_rule = append(i_rule, []interface{}{rureg}...) } else { //规则 i_rule = append(i_rule, r) } } v["rule"] = i_rule ruleList := []map[string]interface{}{} classid := qu.ObjToString(v["s_classid"]) if len(e.CidRuleMap[classid]) > 0 { //追加 ruleList = e.CidRuleMap[classid] } ruleList = append(ruleList, v) e.CidRuleMap[classid] = ruleList } } // func (e *ExtractTask) InitAuditFields() { if len(e.AuditFields) == 0 { v, _ := db.Mgo.FindOne("version", `{"isuse":true,"delete":false}`) //查找当前使用版本 if v != nil && len(*v) > 0 { //查找当前使用版本中属性配置需要审核的字段 vid := qu.BsonIdToSId((*v)["_id"]) query := map[string]interface{}{ "isaudit": true, "delete": false, "vid": vid, } data, _ := db.Mgo.Find("versioninfo", query, `{"_id":-1}`, `{"s_field":1}`, false, -1, -1) for _, d := range *data { field := qu.ObjToString(d["s_field"]) e.AuditFields = append(e.AuditFields, field) } } } } //加载附件抽取 func (e *ExtractTask) InitFile() { defer qu.Catch() //query:=bson.M{"version":e.TaskInfo.Version,"delete":false} ve, _ := db.Mgo.FindOne("version", `{"version":"`+e.TaskInfo.Version+`","delete":false}`) //ve, _ := db.Mgo.FindOne("version", query) if ve == nil { return } if (*ve)["isfiles"] != nil && (*ve)["isfiles"].(bool) { e.IsFileField = true } syscefiled := new(sync.Map) if (*ve)["s_filefileds"] != nil { for _, vff := range (*ve)["s_filefileds"].([]interface{}) { syscefiled.Store(vff.(string),1) } } e.FileFields = syscefiled } //加载清理任务信息 func (c *ClearTask) InitClearTaskInfo() { cleartask, _ := db.Mgo.FindById("cleartask", c.Id, nil) if len(*cleartask) > 1 { v, _ := db.Mgo.FindOne("clearversion", `{"clearversion":"`+(*cleartask)["s_version"].(string)+`","delete":false}`) c.ClearTaskInfo = &ClearTaskInfo{ Name: (*cleartask)["s_taskname"].(string), Version: (*cleartask)["s_version"].(string), VersionId: qu.BsonIdToSId((*v)["_id"]), FromDbAddr: (*cleartask)["s_mgoaddr"].(string), FromDB: (*cleartask)["s_mgodb"].(string), FromColl: (*cleartask)["s_mgocoll"].(string), IsCltLog: ju.Config["iscltlog"].(bool), ProcessPool: make(chan bool, qu.IntAllDef((*cleartask)["i_process"], 1)), } log.Debug(c.ClearTaskInfo.Name, "thread:", qu.IntAllDef((*cleartask)["i_process"], 1)) } else { return } } //加载清理脚本 func (c *ClearTask) InitClearLuas() { defer qu.Catch() c.ClearLuas = make(map[string][]*ClearLua) list, _ := db.Mgo.Find("clearversioninfo", `{"vid":"`+c.ClearTaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1) for _, l := range *list { if b, _ := l["isuse"].(bool); !b { //仅使用启用的属性 continue } s_field := qu.ObjToString(l["s_field"]) pid := qu.BsonIdToSId(l["_id"]) luas, _ := db.Mgo.Find("clearulelogic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1) for _, vv := range *luas { if b, _ := vv["isuse"].(bool); !b { continue } clearLua := &ClearLua{ Field: s_field, Code: vv["s_code"].(string), Name: vv["s_name"].(string), LuaText: vv["s_luascript"].(string), LFields: getALLFields(), } c.ClearLuas[s_field] = append(c.ClearLuas[s_field], clearLua) } } }