|
@@ -291,6 +291,187 @@ func (e *ExtractTask) InfoTypeList() {
|
|
|
|
|
|
//加载抽取规则
|
|
|
func (e *ExtractTask) InitRuleCore() {
|
|
|
+ defer qu.Catch()
|
|
|
+ e.Fields = map[string]int{}
|
|
|
+ e.RuleCores = make(map[string]map[string][]*RuleCore)
|
|
|
+
|
|
|
+ fieldrules := map[string][]*RuleCore{}
|
|
|
+ vinfos, _ := db.Mgo.Find("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
+ for _, vinfo := range *vinfos {
|
|
|
+ if b, _ := vinfo["isuse"].(bool); !b {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ s_field := qu.ObjToString(vinfo["s_field"])
|
|
|
+ pid := qu.BsonIdToSId(vinfo["_id"])
|
|
|
+ list, _ := db.Mgo.Find("rule_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
+ for _, vv := range *list {
|
|
|
+ if b, _ := vv["isuse"].(bool); !b {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ rcore := &RuleCore{}
|
|
|
+ rcore.Field = s_field
|
|
|
+ rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本
|
|
|
+ rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string)
|
|
|
+ //前置规则
|
|
|
+ rulePres := []*RegLuaInfo{}
|
|
|
+ plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
+ for _, v := range *plist {
|
|
|
+ rinfo := &RegLuaInfo{
|
|
|
+ Field: qu.ObjToString(v["s_field"]),
|
|
|
+ Code: v["s_code"].(string),
|
|
|
+ Name: v["s_name"].(string),
|
|
|
+ IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
|
|
|
+ }
|
|
|
+ if rinfo.IsLua {
|
|
|
+ rinfo.RuleText = v["s_luascript"].(string)
|
|
|
+ rulePres = append(rulePres, rinfo)
|
|
|
+ } else {
|
|
|
+ qu.Try(func() {
|
|
|
+ rinfo.RuleText = v["s_rule"].(string)
|
|
|
+ tmp := strings.Split(rinfo.RuleText, "__")
|
|
|
+ var pattern string
|
|
|
+ if strings.Contains(tmp[0], "\\u") {
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
|
|
|
+ pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
|
|
|
+ } else {
|
|
|
+ pattern = tmp[0]
|
|
|
+ }
|
|
|
+ if len(tmp) == 2 {
|
|
|
+ rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
|
|
|
+ } else {
|
|
|
+ rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
|
|
|
+ }
|
|
|
+ rulePres = append(rulePres, rinfo)
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
+ })
|
|
|
+ }
|
|
|
+ }
|
|
|
+ rcore.RulePres = rulePres
|
|
|
+
|
|
|
+ //后置规则
|
|
|
+ ruleBacks := []*RegLuaInfo{}
|
|
|
+ blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
+ for _, v := range *blist {
|
|
|
+ rinfo := &RegLuaInfo{
|
|
|
+ Field: qu.ObjToString(v["s_field"]),
|
|
|
+ Code: v["s_code"].(string),
|
|
|
+ Name: v["s_name"].(string),
|
|
|
+ IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
|
|
|
+ }
|
|
|
+ if rinfo.IsLua {
|
|
|
+ rinfo.RuleText = v["s_luascript"].(string)
|
|
|
+ ruleBacks = append(ruleBacks, rinfo)
|
|
|
+ } else {
|
|
|
+ qu.Try(func() {
|
|
|
+ rinfo.RuleText = v["s_rule"].(string)
|
|
|
+ tmp := strings.Split(rinfo.RuleText, "__")
|
|
|
+ var pattern string
|
|
|
+ if strings.Contains(tmp[0], "\\u") {
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
|
|
|
+ pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
|
|
|
+ } else {
|
|
|
+ pattern = tmp[0]
|
|
|
+ }
|
|
|
+ if len(tmp) == 2 {
|
|
|
+ rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
|
|
|
+ } else {
|
|
|
+ rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
|
|
|
+ }
|
|
|
+ ruleBacks = append(ruleBacks, rinfo)
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
+ })
|
|
|
+ }
|
|
|
+ }
|
|
|
+ rcore.RuleBacks = ruleBacks
|
|
|
+
|
|
|
+ //抽取规则
|
|
|
+ ruleCores := []*RegLuaInfo{}
|
|
|
+ clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
+ for _, v := range *clist {
|
|
|
+ if b, _ := v["isuse"].(bool); !b {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ field := qu.ObjToString(v["s_field"])
|
|
|
+ e.Fields[field] = 1 //加入抽取属性组备用
|
|
|
+ rinfo := &RegLuaInfo{
|
|
|
+ Field: field,
|
|
|
+ Code: v["s_code"].(string),
|
|
|
+ Name: v["s_name"].(string),
|
|
|
+ IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
|
|
|
+ }
|
|
|
+ if rinfo.IsLua {
|
|
|
+ rinfo.RuleText = v["s_luascript"].(string)
|
|
|
+ //提取全部属性
|
|
|
+ rinfo.LFields = getALLFields()
|
|
|
+ ruleCores = append(ruleCores, rinfo)
|
|
|
+ } else {
|
|
|
+ qu.Try(func() {
|
|
|
+ rinfo.RuleText = v["s_rule"].(string)
|
|
|
+ tmp := strings.Split(rinfo.RuleText, "__")
|
|
|
+ var pattern string
|
|
|
+ if strings.Contains(tmp[0], "\\u") {
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
|
|
|
+ pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
|
|
|
+ } else {
|
|
|
+ pattern = tmp[0]
|
|
|
+ }
|
|
|
+ if len(tmp) == 2 {
|
|
|
+ epos := strings.Split(tmp[1], ",")
|
|
|
+ posm := map[string]int{}
|
|
|
+ for _, v := range epos {
|
|
|
+ ks := strings.Split(v, ":")
|
|
|
+ if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
|
|
|
+ posm[ks[1]] = qu.IntAll(ks[0])
|
|
|
+ } else { //(.*)招标公告__2
|
|
|
+ posm[rinfo.Field] = qu.IntAll(ks[0])
|
|
|
+ }
|
|
|
+ }
|
|
|
+ rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: true, ExtractPos: posm}
|
|
|
+ } else {
|
|
|
+ rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: false}
|
|
|
+ }
|
|
|
+ ruleCores = append(ruleCores, rinfo)
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
+ })
|
|
|
+ }
|
|
|
+ }
|
|
|
+ rcore.RuleCores = ruleCores
|
|
|
+ //
|
|
|
+ if fieldrules[s_field] == nil {
|
|
|
+ fieldrules[s_field] = []*RuleCore{}
|
|
|
+ }
|
|
|
+ fieldrules[s_field] = append(fieldrules[s_field], rcore)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //属性配置
|
|
|
+ infolist, _ := db.Mgo.Find("infotype", `{}`, `{}`, `{}`, false, -1, -1)
|
|
|
+ for _, v := range *infolist {
|
|
|
+ topclass := qu.ObjToString(v["topclass"])
|
|
|
+ if v["subclass"] == nil {
|
|
|
+ e.RuleCores[topclass] = make(map[string][]*RuleCore)
|
|
|
+ for attr, _ := range v["fields"].(map[string]interface{}) {
|
|
|
+ e.RuleCores[topclass][attr] = fieldrules[attr]
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ for ca, fs := range v["subclass"].(map[string]interface{}) {
|
|
|
+ e.RuleCores[topclass+"_"+ca] = make(map[string][]*RuleCore)
|
|
|
+ for field, _ := range fs.(map[string]interface{}) {
|
|
|
+ e.RuleCores[topclass+"_"+ca][field] = fieldrules[field]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+//加载抽取规则
|
|
|
+func (e *ExtractTask) InitRuleCore2() {
|
|
|
defer qu.Catch()
|
|
|
e.Fields = map[string]int{}
|
|
|
infolist, _ := db.Mgo.Find("infotype", `{}`, `{}`, `{}`, false, -1, -1)
|