|
@@ -85,15 +85,20 @@ type ExtractTask struct {
|
|
|
|
|
|
ResultChanel chan bool //抽取结果详情
|
|
|
sync.RWMutex
|
|
|
- ResultArr [][]map[string]interface{} //抽取结果详情
|
|
|
- BidChanel chan bool //抽取结果
|
|
|
- BidArr [][]map[string]interface{} //抽取结果
|
|
|
- BidTotal int //结果数量
|
|
|
+ ResultArr [][]map[string]interface {
|
|
|
+ } //抽取结果详情
|
|
|
+ BidChanel chan bool //抽取结果
|
|
|
+ BidArr [][]map[string]interface {
|
|
|
+ } //抽取结果
|
|
|
+ BidTotal int //结果数量
|
|
|
|
|
|
- RecogFieldMap map[string]map[string]interface{} //识别字段
|
|
|
- FidClassMap map[string][]map[string]interface{} //分类
|
|
|
- CidRuleMap map[string][]map[string]interface{} //规则
|
|
|
- AuditFields []string //需要审核的字段名称
|
|
|
+ RecogFieldMap map[string]map[string]interface {
|
|
|
+ } //识别字段
|
|
|
+ FidClassMap map[string][]map[string]interface {
|
|
|
+ } //分类
|
|
|
+ CidRuleMap map[string][]map[string]interface {
|
|
|
+ } //规则
|
|
|
+ AuditFields []string //需要审核的字段名称
|
|
|
|
|
|
SiteCityMap map[string]*SiteCity //站点对应的省市区
|
|
|
ProvinceMap map[string]string //省全称简称(key:浙江省 val:浙江)
|
|
@@ -119,7 +124,8 @@ type ExtractTask struct {
|
|
|
PostCodeMap map[string]*PostCode //邮编
|
|
|
AreaCodeMap map[string]*AreaCode //区号
|
|
|
|
|
|
- InfoType []map[string]interface{}
|
|
|
+ InfoType []map[string]interface {
|
|
|
+ }
|
|
|
|
|
|
Trie_Full_Province *ju.Trie //省全称 省、直辖市、自治区
|
|
|
Trie_Full_City *ju.Trie //市全称 地级市
|
|
@@ -385,7 +391,7 @@ func (e *ExtractTask) InitRuleCore(isSite bool) {
|
|
|
defer qu.Catch()
|
|
|
allFields := getALLFields()
|
|
|
e.Fields = map[string]int{}
|
|
|
- var versioninfodb, rule_logicdb, rule_logicpredb, rule_logicbackdb, rule_logicoredb,rule_logickvdb string
|
|
|
+ var versioninfodb, rule_logicdb, rule_logicpredb, rule_logicbackdb, rule_logicoredb, rule_logickvdb string
|
|
|
eSiteRuleCores := make(map[string]map[string][]*RuleCore)
|
|
|
if isSite {
|
|
|
versioninfodb = "site_versioninfo"
|
|
@@ -393,7 +399,7 @@ func (e *ExtractTask) InitRuleCore(isSite bool) {
|
|
|
rule_logicpredb = "site_rule_logicpre"
|
|
|
rule_logicbackdb = "site_rule_logicback"
|
|
|
rule_logicoredb = "site_rule_logicore"
|
|
|
- rule_logickvdb= "site_rule_logickv"
|
|
|
+ rule_logickvdb = "site_rule_logickv"
|
|
|
e.SiteRuleCores = make(map[string]map[string][]*RuleCore)
|
|
|
} else {
|
|
|
versioninfodb = "versioninfo"
|
|
@@ -401,7 +407,7 @@ func (e *ExtractTask) InitRuleCore(isSite bool) {
|
|
|
rule_logicpredb = "rule_logicpre"
|
|
|
rule_logicbackdb = "rule_logicback"
|
|
|
rule_logicoredb = "rule_logicore"
|
|
|
- rule_logickvdb= "rule_logickv"
|
|
|
+ rule_logickvdb = "rule_logickv"
|
|
|
e.RuleCores = make(map[string]map[string][]*RuleCore)
|
|
|
}
|
|
|
|
|
@@ -679,55 +685,101 @@ func (e *ExtractTask) InitPkgCore() {
|
|
|
continue
|
|
|
}
|
|
|
s_field := qu.ObjToString(pkginfo["s_field"])
|
|
|
- pid := qu.BsonIdToSId(pkginfo["_id"])
|
|
|
- logicList, _ := db.Mgo.Find("pkg_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
- for _, vv := range *logicList {
|
|
|
- if b, _ := vv["isuse"].(bool); !b {
|
|
|
- continue
|
|
|
+ sid := qu.BsonIdToSId(pkginfo["_id"])
|
|
|
+ rcore := &RuleCore{}
|
|
|
+ rcore.Field = s_field
|
|
|
+ rcore.ExtFrom = "detail"
|
|
|
+ //后置规则
|
|
|
+ ruleBacks := []*RegLuaInfo{}
|
|
|
+ blist, _ := db.Mgo.Find("pkg_logicback", `{"sid":"`+sid+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
+ for _, v := range *blist {
|
|
|
+ rinfo := &RegLuaInfo{
|
|
|
+ Field: qu.ObjToString(v["s_field"]),
|
|
|
+ Code: v["s_code"].(string),
|
|
|
+ Name: v["s_name"].(string),
|
|
|
+ IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
|
|
|
}
|
|
|
- rcore := &RuleCore{}
|
|
|
- rcore.Field = s_field
|
|
|
- rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本
|
|
|
- rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string)
|
|
|
- //后置规则
|
|
|
- ruleBacks := []*RegLuaInfo{}
|
|
|
- blist, _ := db.Mgo.Find("pkg_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
- for _, v := range *blist {
|
|
|
- rinfo := &RegLuaInfo{
|
|
|
- Field: qu.ObjToString(v["s_field"]),
|
|
|
- Code: v["s_code"].(string),
|
|
|
- Name: v["s_name"].(string),
|
|
|
- IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
|
|
|
- }
|
|
|
- if rinfo.IsLua {
|
|
|
- rinfo.RuleText = v["s_luascript"].(string)
|
|
|
+ if rinfo.IsLua {
|
|
|
+ rinfo.RuleText = v["s_luascript"].(string)
|
|
|
+ ruleBacks = append(ruleBacks, rinfo)
|
|
|
+ } else {
|
|
|
+ qu.Try(func() {
|
|
|
+ rinfo.RuleText = v["s_rule"].(string)
|
|
|
+ tmp := strings.Split(rinfo.RuleText, "__")
|
|
|
+ var pattern string
|
|
|
+ if strings.Contains(tmp[0], "\\u") {
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
|
|
|
+ pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
|
|
|
+ } else {
|
|
|
+ pattern = tmp[0]
|
|
|
+ }
|
|
|
+ if len(tmp) == 2 {
|
|
|
+ rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
|
|
|
+ } else {
|
|
|
+ rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
|
|
|
+ }
|
|
|
ruleBacks = append(ruleBacks, rinfo)
|
|
|
- } else {
|
|
|
- qu.Try(func() {
|
|
|
- rinfo.RuleText = v["s_rule"].(string)
|
|
|
- tmp := strings.Split(rinfo.RuleText, "__")
|
|
|
- var pattern string
|
|
|
- if strings.Contains(tmp[0], "\\u") {
|
|
|
- tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
|
|
|
- tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
|
|
|
- pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
|
|
|
- } else {
|
|
|
- pattern = tmp[0]
|
|
|
- }
|
|
|
- if len(tmp) == 2 {
|
|
|
- rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
|
|
|
- } else {
|
|
|
- rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
+ })
|
|
|
+ }
|
|
|
+ }
|
|
|
+ rcore.RuleBacks = ruleBacks
|
|
|
+ //抽取规则
|
|
|
+ ruleCores := []*RegLuaInfo{}
|
|
|
+ clist, _ := db.Mgo.Find("pkg_logicore", `{"sid":"`+sid+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
+ for _, v := range *clist {
|
|
|
+ if b, _ := v["isuse"].(bool); !b {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ field := qu.ObjToString(v["s_field"])
|
|
|
+ e.Fields[field] = 1 //加入抽取属性组备用
|
|
|
+ rinfo := &RegLuaInfo{
|
|
|
+ Field: field,
|
|
|
+ Code: v["s_code"].(string),
|
|
|
+ Name: v["s_name"].(string),
|
|
|
+ IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
|
|
|
+ }
|
|
|
+ if rinfo.IsLua {
|
|
|
+ rinfo.RuleText = v["s_luascript"].(string)
|
|
|
+ //提取全部属性
|
|
|
+ ruleCores = append(ruleCores, rinfo)
|
|
|
+ } else {
|
|
|
+ qu.Try(func() {
|
|
|
+ rinfo.RuleText = v["s_rule"].(string)
|
|
|
+ tmp := strings.Split(rinfo.RuleText, "__")
|
|
|
+ var pattern string
|
|
|
+ if strings.Contains(tmp[0], "\\u") {
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
|
|
|
+ pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
|
|
|
+ } else {
|
|
|
+ pattern = tmp[0]
|
|
|
+ }
|
|
|
+ if len(tmp) == 2 {
|
|
|
+ epos := strings.Split(tmp[1], ",")
|
|
|
+ posm := map[string]int{}
|
|
|
+ for _, v := range epos {
|
|
|
+ ks := strings.Split(v, ":")
|
|
|
+ if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
|
|
|
+ posm[ks[1]] = qu.IntAll(ks[0])
|
|
|
+ } else { //(.*)招标公告__2
|
|
|
+ posm[rinfo.Field] = qu.IntAll(ks[0])
|
|
|
+ }
|
|
|
}
|
|
|
- ruleBacks = append(ruleBacks, rinfo)
|
|
|
- }, func(err interface{}) {
|
|
|
- log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
- })
|
|
|
- }
|
|
|
+ rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: true, ExtractPos: posm}
|
|
|
+ } else {
|
|
|
+ rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: false}
|
|
|
+ }
|
|
|
+ ruleCores = append(ruleCores, rinfo)
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
+ })
|
|
|
}
|
|
|
- rcore.RuleBacks = ruleBacks
|
|
|
- e.PkgRuleCores = append(e.PkgRuleCores, rcore)
|
|
|
}
|
|
|
+ rcore.RuleCores = ruleCores
|
|
|
+ e.PkgRuleCores = append(e.PkgRuleCores, rcore)
|
|
|
}
|
|
|
}
|
|
|
|