|
@@ -50,25 +50,39 @@ type Tag struct {
|
|
|
}
|
|
|
|
|
|
type ExtractTask struct {
|
|
|
- Id string //任务id
|
|
|
- IsRun bool //是否启动
|
|
|
- Content string //信息内容
|
|
|
- TaskInfo *TaskInfo //任务信息
|
|
|
- RulePres []*RegLuaInfo //通用前置规则
|
|
|
- RuleBacks []*RegLuaInfo //通用后置规则
|
|
|
- RuleCores []*RuleCore //抽取规则
|
|
|
- Tag map[string][]*Tag //标签库
|
|
|
- ClearFn map[string][]string //清理函数
|
|
|
+ Id string //任务id
|
|
|
+ IsRun bool //是否启动
|
|
|
+ Content string //信息内容
|
|
|
+ TaskInfo *TaskInfo //任务信息
|
|
|
+ RulePres []*RegLuaInfo //通用前置规则
|
|
|
+ RuleBacks []*RegLuaInfo //通用后置规则
|
|
|
+ RuleCores []*RuleCore //抽取规则
|
|
|
+ Tag map[string][]*Tag //标签库
|
|
|
+ ClearFn map[string][]string //清理函数
|
|
|
+ IsExtractCity bool //是否开启城市抽取
|
|
|
|
|
|
ResultChanel chan bool //抽取结果详情
|
|
|
ResultArr [][]map[string]interface{} //抽取结果详情
|
|
|
BidChanel chan bool //抽取结果
|
|
|
BidArr [][]map[string]interface{} //抽取结果
|
|
|
-}
|
|
|
|
|
|
-var RecogFieldMap map[string]map[string]interface{}
|
|
|
-var FidClassMap map[string][]map[string]interface{}
|
|
|
-var CidRuleMap map[string][]map[string]interface{}
|
|
|
+ RecogFieldMap map[string]map[string]interface{} //识别字段
|
|
|
+ FidClassMap map[string][]map[string]interface{} //分类
|
|
|
+ CidRuleMap map[string][]map[string]interface{} //规则
|
|
|
+ AuditFields []string //需要审核的字段名称
|
|
|
+
|
|
|
+ ProvinceMap map[string]string
|
|
|
+ CityBrief map[string]*City //只加载一次即可
|
|
|
+ ProvinceBrief map[string]*Province //只加载一次
|
|
|
+ AreaToCity map[string][]*City //两个文件共用
|
|
|
+ DistrictCityMap map[string]*City
|
|
|
+ StreetDistrictMap map[string]*District
|
|
|
+ AreaGet DFA //市全称
|
|
|
+ AreaDistrict DFA //区或县
|
|
|
+ AreaProvinceGet DFA //省
|
|
|
+ AreaSimGet DFA //市简称
|
|
|
+ AreaStreet DFA //街道
|
|
|
+}
|
|
|
|
|
|
func init() {
|
|
|
TaskList = make(map[string]*ExtractTask)
|
|
@@ -92,6 +106,7 @@ func (e *ExtractTask) InitTestTaskInfo(resultcoll, trackcoll string) {
|
|
|
IsEtxLog: true,
|
|
|
ProcessPool: make(chan bool, 1),
|
|
|
}
|
|
|
+ e.IsExtractCity = (*v)["isextractcity"].(bool)
|
|
|
} else {
|
|
|
return
|
|
|
}
|
|
@@ -116,6 +131,7 @@ func (e *ExtractTask) InitTaskInfo() {
|
|
|
LastExtId: qu.ObjToString((*task)["s_extlastid"]),
|
|
|
ProcessPool: make(chan bool, qu.IntAllDef((*task)["i_process"], 1)),
|
|
|
}
|
|
|
+ e.IsExtractCity = (*v)["isextractcity"].(bool)
|
|
|
log.Println(e.TaskInfo.Name, "thread:", qu.IntAllDef((*task)["i_process"], 1))
|
|
|
} else {
|
|
|
return
|
|
@@ -329,7 +345,6 @@ func (e *ExtractTask) InitTag() {
|
|
|
for k, key := range tmp {
|
|
|
tag := &Tag{Type: "string", Key: key.(string)}
|
|
|
e.Tag[field] = append(e.Tag[field], tag)
|
|
|
-
|
|
|
tab.Items[k] = &ju.Tag{key.(string), 0 - k, nil}
|
|
|
}
|
|
|
sort.Sort(tab.Items)
|
|
@@ -347,7 +362,6 @@ func (e *ExtractTask) InitTag() {
|
|
|
for k, key := range tmp {
|
|
|
tag := &Tag{Type: "regexp", Key: key.(string), Reg: regexp.MustCompile(key.(string))}
|
|
|
e.Tag[field] = append(e.Tag[field], tag)
|
|
|
-
|
|
|
tab.Items[k] = &ju.Tag{key.(string), 0 - k, regexp.MustCompile(key.(string))}
|
|
|
}
|
|
|
sort.Sort(tab.Items)
|
|
@@ -386,10 +400,10 @@ func (e *ExtractTask) InitClearFn() {
|
|
|
}
|
|
|
|
|
|
//加载省份
|
|
|
-func (e *ExtractTask) InitProvince() {
|
|
|
+func InitProvince(version string) map[string]interface{} {
|
|
|
defer qu.Catch()
|
|
|
fn := map[string]interface{}{}
|
|
|
- list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"province","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
+ list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"province","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
for _, v := range *list {
|
|
|
name := qu.ObjToString(v["s_name"])
|
|
|
content := v["content"]
|
|
@@ -400,124 +414,143 @@ func (e *ExtractTask) InitProvince() {
|
|
|
fn[name] = content
|
|
|
}
|
|
|
}
|
|
|
- ProviceConfig = fn
|
|
|
+ return fn
|
|
|
}
|
|
|
|
|
|
//加载城市简称
|
|
|
-func (e *ExtractTask) InitCitySim() {
|
|
|
+func InitCitySim(version string) map[string]map[string]interface{} {
|
|
|
defer qu.Catch()
|
|
|
- list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"citysim","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
+ list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"citysim","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
fn := map[string]map[string]interface{}{}
|
|
|
for _, v := range *list {
|
|
|
name := qu.ObjToString(v["s_name"])
|
|
|
tmp := v["content"].(map[string]interface{})
|
|
|
fn[name] = tmp
|
|
|
}
|
|
|
- CitySimConfig = fn
|
|
|
+ return fn
|
|
|
}
|
|
|
|
|
|
//加载城市全称
|
|
|
-func (e *ExtractTask) InitCityAll() {
|
|
|
+func InitCityAll(version string) map[string]map[string]interface{} {
|
|
|
defer qu.Catch()
|
|
|
- list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"cityall","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
- // if len(*list) != 34 {
|
|
|
- // fmt.Println("加载城市配置文件出错", len(*list))
|
|
|
- // }
|
|
|
+ list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"cityall","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
fn := map[string]map[string]interface{}{}
|
|
|
for _, v := range *list {
|
|
|
name := qu.ObjToString(v["s_name"])
|
|
|
tmp := v["content"].(map[string]interface{})
|
|
|
fn[name] = tmp
|
|
|
}
|
|
|
- CityAllConfig = fn
|
|
|
+ return fn
|
|
|
}
|
|
|
|
|
|
//初始化城市省份敏感词
|
|
|
-func InitDFA() {
|
|
|
+func (e *ExtractTask) InitDFA() {
|
|
|
defer qu.Catch()
|
|
|
- AreaGet = DFA{}
|
|
|
- AreaProvinceGet = DFA{}
|
|
|
- AreaStreet = DFA{}
|
|
|
- for k, v := range ProviceConfig {
|
|
|
+ e.AreaGet = DFA{}
|
|
|
+ e.AreaProvinceGet = DFA{}
|
|
|
+ e.AreaStreet = DFA{}
|
|
|
+ //初始化map
|
|
|
+ if e.ProvinceMap == nil {
|
|
|
+ e.ProvinceMap = make(map[string]string)
|
|
|
+ }
|
|
|
+ if e.CityBrief == nil {
|
|
|
+ e.CityBrief = make(map[string]*City)
|
|
|
+ }
|
|
|
+ if e.ProvinceBrief == nil {
|
|
|
+ e.ProvinceBrief = make(map[string]*Province)
|
|
|
+ }
|
|
|
+ if e.AreaToCity == nil {
|
|
|
+ e.AreaToCity = make(map[string][]*City)
|
|
|
+ }
|
|
|
+ if e.DistrictCityMap == nil {
|
|
|
+ e.DistrictCityMap = make(map[string]*City)
|
|
|
+ }
|
|
|
+ if e.StreetDistrictMap == nil {
|
|
|
+ e.StreetDistrictMap = make(map[string]*District)
|
|
|
+ }
|
|
|
+ //初始化省
|
|
|
+ fn1 := InitProvince(e.TaskInfo.Version)
|
|
|
+ for k, v := range fn1 {
|
|
|
for _, p := range v.([]interface{}) {
|
|
|
p1, _ := p.(string)
|
|
|
- AreaProvinceGet.AddWord(p1)
|
|
|
- ProvinceMap[p1] = k
|
|
|
+ e.AreaProvinceGet.AddWord(p1)
|
|
|
+ e.ProvinceMap[p1] = k
|
|
|
}
|
|
|
}
|
|
|
- // ProvinceMap["新疆省"] = "新疆"
|
|
|
- // ProvinceMap["新疆兵团"] = "新疆"
|
|
|
- // provinceMap["广西省"] = "广西"
|
|
|
- for k, v := range CityAllConfig {
|
|
|
- AreaProvinceGet.AddWord(k) //省全称
|
|
|
+
|
|
|
+ //初始化城市全称
|
|
|
+ fn2 := InitCityAll(e.TaskInfo.Version)
|
|
|
+ for k, v := range fn2 {
|
|
|
+ e.AreaProvinceGet.AddWord(k) //省全称
|
|
|
p := &Province{}
|
|
|
p.Name = k
|
|
|
p.Brief = v["brief"].(string)
|
|
|
- ProvinceMap[k] = p.Brief
|
|
|
- ProvinceBrief[p.Brief] = p
|
|
|
+ e.ProvinceMap[k] = p.Brief
|
|
|
+ //
|
|
|
+ e.ProvinceBrief[p.Brief] = p
|
|
|
p.Cap = v["captial"].(string)
|
|
|
city, _ := v["city"].(map[string]interface{})
|
|
|
for k1, v1 := range city {
|
|
|
v1m, _ := v1.(map[string]interface{})
|
|
|
c := &City{}
|
|
|
c.Name = k1
|
|
|
- if v1m["brief"] == nil {
|
|
|
- }
|
|
|
+ // if v1m["brief"] == nil {
|
|
|
+ // }
|
|
|
c.Brief = v1m["brief"].(string)
|
|
|
- CityBrief[c.Brief] = c
|
|
|
+ //
|
|
|
+ e.CityBrief[c.Brief] = c
|
|
|
c.P = p
|
|
|
if c.Brief == p.Cap {
|
|
|
p.Captial = c
|
|
|
}
|
|
|
//加入到城市map中
|
|
|
- cs := AreaToCity[k1]
|
|
|
- AreaGet.AddWord(k1) //市全称
|
|
|
+ //
|
|
|
+ cs := e.AreaToCity[k1]
|
|
|
+ e.AreaGet.AddWord(k1) //市全称
|
|
|
if cs != nil {
|
|
|
cs = append(cs, c)
|
|
|
} else {
|
|
|
cs = []*City{c}
|
|
|
}
|
|
|
- AreaToCity[k1] = cs
|
|
|
-
|
|
|
+ e.AreaToCity[k1] = cs
|
|
|
//区县
|
|
|
districtmap := v1m["area"].(map[string]interface{}) //区或县
|
|
|
for district, streetarr := range districtmap {
|
|
|
d := &District{}
|
|
|
d.Name = district
|
|
|
d.C = c
|
|
|
- AreaDistrict.AddWord(district) //加入区或县敏感词
|
|
|
- ctmp := DistrictCityMap[district]
|
|
|
+ e.AreaDistrict.AddWord(district) //加入区或县敏感词
|
|
|
+ ctmp := e.DistrictCityMap[district]
|
|
|
if ctmp == nil {
|
|
|
- DistrictCityMap[district] = c
|
|
|
+ e.DistrictCityMap[district] = c
|
|
|
}
|
|
|
-
|
|
|
//街道
|
|
|
for _, s := range qu.ObjArrToStringArr(streetarr.([]interface{})) {
|
|
|
- AreaStreet.AddWord(s) //加入街道敏感词
|
|
|
- dtmp := StreetDistrictMap[s]
|
|
|
+ e.AreaStreet.AddWord(s) //加入街道敏感词
|
|
|
+ dtmp := e.StreetDistrictMap[s]
|
|
|
if dtmp == nil {
|
|
|
- StreetDistrictMap[s] = d
|
|
|
+ e.StreetDistrictMap[s] = d
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- //加载简称
|
|
|
- AreaSimGet = DFA{}
|
|
|
- for k, v := range CitySimConfig {
|
|
|
+ //初始化城市简称
|
|
|
+ fn3 := InitCitySim(e.TaskInfo.Version)
|
|
|
+ e.AreaSimGet = DFA{}
|
|
|
+ for k, v := range fn3 {
|
|
|
pb := v["brief"].(string)
|
|
|
- p := ProvinceBrief[pb]
|
|
|
+ p := e.ProvinceBrief[pb]
|
|
|
//加载
|
|
|
for _, ss := range []string{k, pb} {
|
|
|
- cs := AreaToCity[ss]
|
|
|
+ cs := e.AreaToCity[ss]
|
|
|
if cs != nil {
|
|
|
cs = append(cs, p.Captial)
|
|
|
} else {
|
|
|
cs = []*City{p.Captial}
|
|
|
}
|
|
|
- AreaToCity[ss] = cs
|
|
|
- AreaSimGet.AddWord(ss) //省全称和省简称
|
|
|
+ e.AreaToCity[ss] = cs
|
|
|
+ e.AreaSimGet.AddWord(ss) //省全称和省简称
|
|
|
}
|
|
|
city, _ := v["city"].(map[string]interface{})
|
|
|
for k1, v1 := range city {
|
|
@@ -525,40 +558,40 @@ func InitDFA() {
|
|
|
if v1m["brief"] == nil {
|
|
|
}
|
|
|
cb := v1m["brief"].(string)
|
|
|
- c := AreaToCity[k1][0]
|
|
|
+ c := e.AreaToCity[k1][0]
|
|
|
//加入到城市map中
|
|
|
for _, ss := range []string{cb, k + cb, pb + cb} { //杭州 浙江省杭州 浙江杭州
|
|
|
- AreaSimGet.AddWord(ss)
|
|
|
- cs := AreaToCity[ss]
|
|
|
+ e.AreaSimGet.AddWord(ss)
|
|
|
+ cs := e.AreaToCity[ss]
|
|
|
if cs != nil {
|
|
|
cs = append(cs, c)
|
|
|
} else {
|
|
|
cs = []*City{c}
|
|
|
}
|
|
|
- AreaToCity[ss] = cs
|
|
|
+ e.AreaToCity[ss] = cs
|
|
|
}
|
|
|
arr := v1m["area"].([]interface{})
|
|
|
for _, k2 := range arr {
|
|
|
s := k2.(string)
|
|
|
for n, ss := range []string{s, cb + s, pb + s, k + s} { //淳安 杭州淳安 浙江淳安 浙江省淳安
|
|
|
- cs := AreaToCity[ss]
|
|
|
- AreaSimGet.AddWord(ss)
|
|
|
+ cs := e.AreaToCity[ss]
|
|
|
+ e.AreaSimGet.AddWord(ss)
|
|
|
if cs != nil {
|
|
|
cs = append(cs, c)
|
|
|
} else {
|
|
|
cs = []*City{c}
|
|
|
}
|
|
|
- AreaToCity[ss] = cs
|
|
|
+ e.AreaToCity[ss] = cs
|
|
|
|
|
|
//只加入简称
|
|
|
if n == 0 {
|
|
|
d := &District{}
|
|
|
d.Name = ss
|
|
|
d.C = c
|
|
|
- AreaDistrict.AddWord(ss) //加入区或县简称敏感词
|
|
|
- ctmp := DistrictCityMap[ss]
|
|
|
+ e.AreaDistrict.AddWord(ss) //加入区或县简称敏感词
|
|
|
+ ctmp := e.DistrictCityMap[ss]
|
|
|
if ctmp == nil {
|
|
|
- DistrictCityMap[ss] = c
|
|
|
+ e.DistrictCityMap[ss] = c
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -648,39 +681,39 @@ func (e *ExtractTask) BidSave() {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func InitAuditRecogField() {
|
|
|
+func (e *ExtractTask) InitAuditRecogField() {
|
|
|
defer qu.Catch()
|
|
|
- RecogFieldMap = make(map[string]map[string]interface{})
|
|
|
+ e.RecogFieldMap = make(map[string]map[string]interface{})
|
|
|
recogFieldList, _ := db.Mgo.Find("rc_field", `{"delete":false}`, `{"_id":1}`, `{"s_recogfield":1,"s_recogfield_prerule":1}`, false, -1, -1)
|
|
|
for _, f := range *recogFieldList {
|
|
|
field := qu.ObjToString(f["s_recogfield"])
|
|
|
- RecogFieldMap[field] = f
|
|
|
+ e.RecogFieldMap[field] = f
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func InitAuditClass() {
|
|
|
+func (e *ExtractTask) InitAuditClass() {
|
|
|
defer qu.Catch()
|
|
|
- FidClassMap = make(map[string][]map[string]interface{})
|
|
|
+ e.FidClassMap = make(map[string][]map[string]interface{})
|
|
|
class, _ := db.Mgo.Find("rc_class", `{"delete":false}`, `{"i_order":1}`, nil, false, -1, -1)
|
|
|
for _, c := range *class {
|
|
|
classList := []map[string]interface{}{}
|
|
|
fid := qu.ObjToString(c["s_fid"])
|
|
|
- if len(FidClassMap[fid]) > 0 { //追加
|
|
|
- classList = FidClassMap[fid]
|
|
|
+ if len(e.FidClassMap[fid]) > 0 { //追加
|
|
|
+ classList = e.FidClassMap[fid]
|
|
|
}
|
|
|
classList = append(classList, c)
|
|
|
- FidClassMap[fid] = classList
|
|
|
+ e.FidClassMap[fid] = classList
|
|
|
}
|
|
|
}
|
|
|
|
|
|
//加载规则
|
|
|
-func InitAuditRule() {
|
|
|
+func (e *ExtractTask) InitAuditRule() {
|
|
|
defer qu.Catch()
|
|
|
var rureg *regexp.Regexp
|
|
|
var rs []rune
|
|
|
var ru string
|
|
|
var err error
|
|
|
- CidRuleMap = make(map[string][]map[string]interface{})
|
|
|
+ e.CidRuleMap = make(map[string][]map[string]interface{})
|
|
|
rule, _ := db.Mgo.Find("rc_rule", `{"delete":false}`, `{"i_order":1}`, nil, false, -1, -1)
|
|
|
for _, v := range *rule {
|
|
|
i_rule := []interface{}{}
|
|
@@ -704,11 +737,10 @@ func InitAuditRule() {
|
|
|
|
|
|
ruleList := []map[string]interface{}{}
|
|
|
classid := qu.ObjToString(v["s_classid"])
|
|
|
- if len(CidRuleMap[classid]) > 0 { //追加
|
|
|
- ruleList = CidRuleMap[classid]
|
|
|
-
|
|
|
+ if len(e.CidRuleMap[classid]) > 0 { //追加
|
|
|
+ ruleList = e.CidRuleMap[classid]
|
|
|
}
|
|
|
ruleList = append(ruleList, v)
|
|
|
- CidRuleMap[classid] = ruleList
|
|
|
+ e.CidRuleMap[classid] = ruleList
|
|
|
}
|
|
|
}
|