|
@@ -471,192 +471,6 @@ func (e *ExtractTask) InitRuleCore() {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-//加载抽取规则
|
|
|
-func (e *ExtractTask) InitRuleCore2() {
|
|
|
- defer qu.Catch()
|
|
|
- e.Fields = map[string]int{}
|
|
|
- infolist, _ := db.Mgo.Find("infotype", `{}`, `{}`, `{}`, false, -1, -1)
|
|
|
- e.RuleCores = make(map[string]map[string][]*RuleCore)
|
|
|
- for _, v := range *infolist {
|
|
|
- topclass := qu.ObjToString(v["topclass"])
|
|
|
- if v["subclass"] == nil {
|
|
|
- e.RuleCores[topclass] = make(map[string][]*RuleCore)
|
|
|
- for attr, _ := range v["fields"].(map[string]interface{}) {
|
|
|
- vinfo, _ := db.Mgo.FindOneByField("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false,"s_field":"`+attr+`"}`, `{}`)
|
|
|
- e.RuleCores[topclass][attr] = append(e.RuleCores[topclass][attr], e.InfoRole(*vinfo)...)
|
|
|
- }
|
|
|
- } else {
|
|
|
- for ca, fs := range v["subclass"].(map[string]interface{}) {
|
|
|
- e.RuleCores[topclass+"_"+ca] = make(map[string][]*RuleCore)
|
|
|
- for field, _ := range fs.(map[string]interface{}) {
|
|
|
- vinfo, _ := db.Mgo.FindOneByField("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false,"s_field":"`+field+`"}`, `{}`)
|
|
|
- e.RuleCores[topclass+"_"+ca][field] = append(e.RuleCores[topclass+"_"+ca][field], e.InfoRole(*vinfo)...)
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-func (e *ExtractTask) InfoRole(vinfo map[string]interface{}) []*RuleCore {
|
|
|
- maps := []*RuleCore{}
|
|
|
- if b, _ := vinfo["isuse"].(bool); !b {
|
|
|
- return nil
|
|
|
- }
|
|
|
- s_field := qu.ObjToString(vinfo["s_field"])
|
|
|
- pid := qu.BsonIdToSId(vinfo["_id"])
|
|
|
- list, _ := db.Mgo.Find("rule_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
- for _, vv := range *list {
|
|
|
- if b, _ := vv["isuse"].(bool); !b {
|
|
|
- continue
|
|
|
- }
|
|
|
- rcore := &RuleCore{}
|
|
|
- rcore.Field = s_field
|
|
|
- rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本
|
|
|
- rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string)
|
|
|
- //前置规则
|
|
|
- rulePres := []*RegLuaInfo{}
|
|
|
- plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
- for _, v := range *plist {
|
|
|
- rinfo := &RegLuaInfo{
|
|
|
- Field: qu.ObjToString(v["s_field"]),
|
|
|
- Code: v["s_code"].(string),
|
|
|
- Name: v["s_name"].(string),
|
|
|
- IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
|
|
|
- }
|
|
|
- if rinfo.IsLua {
|
|
|
- rinfo.RuleText = v["s_luascript"].(string)
|
|
|
- rulePres = append(rulePres, rinfo)
|
|
|
- } else {
|
|
|
- qu.Try(func() {
|
|
|
- rinfo.RuleText = v["s_rule"].(string)
|
|
|
- tmp := strings.Split(rinfo.RuleText, "__")
|
|
|
- var pattern string
|
|
|
- if strings.Contains(tmp[0], "\\u") {
|
|
|
- tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
|
|
|
- tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
|
|
|
- pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
|
|
|
- } else {
|
|
|
- pattern = tmp[0]
|
|
|
- }
|
|
|
- if len(tmp) == 2 {
|
|
|
- rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
|
|
|
- } else {
|
|
|
- rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
|
|
|
- }
|
|
|
- rulePres = append(rulePres, rinfo)
|
|
|
- }, func(err interface{}) {
|
|
|
- log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
- })
|
|
|
- }
|
|
|
- }
|
|
|
- rcore.RulePres = rulePres
|
|
|
-
|
|
|
- //后置规则
|
|
|
- ruleBacks := []*RegLuaInfo{}
|
|
|
- blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
- for _, v := range *blist {
|
|
|
- rinfo := &RegLuaInfo{
|
|
|
- Field: qu.ObjToString(v["s_field"]),
|
|
|
- Code: v["s_code"].(string),
|
|
|
- Name: v["s_name"].(string),
|
|
|
- IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
|
|
|
- }
|
|
|
- if rinfo.IsLua {
|
|
|
- rinfo.RuleText = v["s_luascript"].(string)
|
|
|
- ruleBacks = append(ruleBacks, rinfo)
|
|
|
- } else {
|
|
|
- qu.Try(func() {
|
|
|
- rinfo.RuleText = v["s_rule"].(string)
|
|
|
- tmp := strings.Split(rinfo.RuleText, "__")
|
|
|
- var pattern string
|
|
|
- if strings.Contains(tmp[0], "\\u") {
|
|
|
- tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
|
|
|
- tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
|
|
|
- pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
|
|
|
- } else {
|
|
|
- pattern = tmp[0]
|
|
|
- }
|
|
|
- if len(tmp) == 2 {
|
|
|
- rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
|
|
|
- } else {
|
|
|
- rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
|
|
|
- }
|
|
|
- ruleBacks = append(ruleBacks, rinfo)
|
|
|
- }, func(err interface{}) {
|
|
|
- log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
- })
|
|
|
- }
|
|
|
- }
|
|
|
- rcore.RuleBacks = ruleBacks
|
|
|
-
|
|
|
- //抽取规则
|
|
|
- ruleCores := []*RegLuaInfo{}
|
|
|
- clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
- for _, v := range *clist {
|
|
|
- if b, _ := v["isuse"].(bool); !b {
|
|
|
- continue
|
|
|
- }
|
|
|
- field := qu.ObjToString(v["s_field"])
|
|
|
- e.Fields[field] = 1 //加入抽取属性组备用
|
|
|
- rinfo := &RegLuaInfo{
|
|
|
- Field: field,
|
|
|
- Code: v["s_code"].(string),
|
|
|
- Name: v["s_name"].(string),
|
|
|
- IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
|
|
|
- }
|
|
|
- if rinfo.IsLua {
|
|
|
- rinfo.RuleText = v["s_luascript"].(string)
|
|
|
- //提取全部属性
|
|
|
- rinfo.LFields = getALLFields()
|
|
|
- ruleCores = append(ruleCores, rinfo)
|
|
|
- } else {
|
|
|
- qu.Try(func() {
|
|
|
- rinfo.RuleText = v["s_rule"].(string)
|
|
|
- ptmp := strings.Split(rinfo.RuleText, "#")
|
|
|
- sign := 0
|
|
|
- if len(ptmp) == 2 {
|
|
|
- if ptmp[1] == "正" {
|
|
|
- sign = 1
|
|
|
- } else if ptmp[1] == "负" {
|
|
|
- sign = -1
|
|
|
- }
|
|
|
- }
|
|
|
- tmp := strings.Split(ptmp[0], "__")
|
|
|
- var pattern string
|
|
|
- if strings.Contains(tmp[0], "\\u") {
|
|
|
- tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
|
|
|
- tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
|
|
|
- pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
|
|
|
- } else {
|
|
|
- pattern = tmp[0]
|
|
|
- }
|
|
|
- if len(tmp) == 2 {
|
|
|
- epos := strings.Split(tmp[1], ",")
|
|
|
- posm := map[string]int{}
|
|
|
- for _, v := range epos {
|
|
|
- ks := strings.Split(v, ":")
|
|
|
- if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
|
|
|
- posm[ks[1]] = qu.IntAll(ks[0])
|
|
|
- } else { //(.*)招标公告__2
|
|
|
- posm[rinfo.Field] = qu.IntAll(ks[0])
|
|
|
- }
|
|
|
- }
|
|
|
- rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: true, ExtractPos: posm, NumSign: sign}
|
|
|
- } else {
|
|
|
- rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: false}
|
|
|
- }
|
|
|
- ruleCores = append(ruleCores, rinfo)
|
|
|
- }, func(err interface{}) {
|
|
|
- log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
- })
|
|
|
- }
|
|
|
- }
|
|
|
- rcore.RuleCores = ruleCores
|
|
|
- //
|
|
|
- maps = append(maps, rcore)
|
|
|
- }
|
|
|
- return maps
|
|
|
-}
|
|
|
-
|
|
|
//加载分包抽取规则
|
|
|
func (e *ExtractTask) InitPkgCore() {
|
|
|
defer qu.Catch()
|