zhangjinkun пре 6 година
родитељ
комит
337ada7c94
1 измењених фајлова са 181 додато и 0 уклоњено
  1. 181 0
      src/jy/extract/extractInit.go

+ 181 - 0
src/jy/extract/extractInit.go

@@ -291,6 +291,187 @@ func (e *ExtractTask) InfoTypeList() {
 
 //加载抽取规则
 func (e *ExtractTask) InitRuleCore() {
+	defer qu.Catch()
+	e.Fields = map[string]int{}
+	e.RuleCores = make(map[string]map[string][]*RuleCore)
+
+	fieldrules := map[string][]*RuleCore{}
+	vinfos, _ := db.Mgo.Find("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1)
+	for _, vinfo := range *vinfos {
+		if b, _ := vinfo["isuse"].(bool); !b {
+			continue
+		}
+		s_field := qu.ObjToString(vinfo["s_field"])
+		pid := qu.BsonIdToSId(vinfo["_id"])
+		list, _ := db.Mgo.Find("rule_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1)
+		for _, vv := range *list {
+			if b, _ := vv["isuse"].(bool); !b {
+				continue
+			}
+			rcore := &RuleCore{}
+			rcore.Field = s_field
+			rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本
+			rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string)
+			//前置规则
+			rulePres := []*RegLuaInfo{}
+			plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
+			for _, v := range *plist {
+				rinfo := &RegLuaInfo{
+					Field: qu.ObjToString(v["s_field"]),
+					Code:  v["s_code"].(string),
+					Name:  v["s_name"].(string),
+					IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
+				}
+				if rinfo.IsLua {
+					rinfo.RuleText = v["s_luascript"].(string)
+					rulePres = append(rulePres, rinfo)
+				} else {
+					qu.Try(func() {
+						rinfo.RuleText = v["s_rule"].(string)
+						tmp := strings.Split(rinfo.RuleText, "__")
+						var pattern string
+						if strings.Contains(tmp[0], "\\u") {
+							tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
+							tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
+							pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
+						} else {
+							pattern = tmp[0]
+						}
+						if len(tmp) == 2 {
+							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
+						} else {
+							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
+						}
+						rulePres = append(rulePres, rinfo)
+					}, func(err interface{}) {
+						log.Debug(rinfo.Code, rinfo.Field, err)
+					})
+				}
+			}
+			rcore.RulePres = rulePres
+
+			//后置规则
+			ruleBacks := []*RegLuaInfo{}
+			blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
+			for _, v := range *blist {
+				rinfo := &RegLuaInfo{
+					Field: qu.ObjToString(v["s_field"]),
+					Code:  v["s_code"].(string),
+					Name:  v["s_name"].(string),
+					IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
+				}
+				if rinfo.IsLua {
+					rinfo.RuleText = v["s_luascript"].(string)
+					ruleBacks = append(ruleBacks, rinfo)
+				} else {
+					qu.Try(func() {
+						rinfo.RuleText = v["s_rule"].(string)
+						tmp := strings.Split(rinfo.RuleText, "__")
+						var pattern string
+						if strings.Contains(tmp[0], "\\u") {
+							tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
+							tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
+							pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
+						} else {
+							pattern = tmp[0]
+						}
+						if len(tmp) == 2 {
+							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
+						} else {
+							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
+						}
+						ruleBacks = append(ruleBacks, rinfo)
+					}, func(err interface{}) {
+						log.Debug(rinfo.Code, rinfo.Field, err)
+					})
+				}
+			}
+			rcore.RuleBacks = ruleBacks
+
+			//抽取规则
+			ruleCores := []*RegLuaInfo{}
+			clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
+			for _, v := range *clist {
+				if b, _ := v["isuse"].(bool); !b {
+					continue
+				}
+				field := qu.ObjToString(v["s_field"])
+				e.Fields[field] = 1 //加入抽取属性组备用
+				rinfo := &RegLuaInfo{
+					Field: field,
+					Code:  v["s_code"].(string),
+					Name:  v["s_name"].(string),
+					IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
+				}
+				if rinfo.IsLua {
+					rinfo.RuleText = v["s_luascript"].(string)
+					//提取全部属性
+					rinfo.LFields = getALLFields()
+					ruleCores = append(ruleCores, rinfo)
+				} else {
+					qu.Try(func() {
+						rinfo.RuleText = v["s_rule"].(string)
+						tmp := strings.Split(rinfo.RuleText, "__")
+						var pattern string
+						if strings.Contains(tmp[0], "\\u") {
+							tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
+							tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
+							pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
+						} else {
+							pattern = tmp[0]
+						}
+						if len(tmp) == 2 {
+							epos := strings.Split(tmp[1], ",")
+							posm := map[string]int{}
+							for _, v := range epos {
+								ks := strings.Split(v, ":")
+								if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
+									posm[ks[1]] = qu.IntAll(ks[0])
+								} else { //(.*)招标公告__2
+									posm[rinfo.Field] = qu.IntAll(ks[0])
+								}
+							}
+							rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: true, ExtractPos: posm}
+						} else {
+							rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: false}
+						}
+						ruleCores = append(ruleCores, rinfo)
+					}, func(err interface{}) {
+						log.Debug(rinfo.Code, rinfo.Field, err)
+					})
+				}
+			}
+			rcore.RuleCores = ruleCores
+			//
+			if fieldrules[s_field] == nil {
+				fieldrules[s_field] = []*RuleCore{}
+			}
+			fieldrules[s_field] = append(fieldrules[s_field], rcore)
+		}
+	}
+
+	//属性配置
+	infolist, _ := db.Mgo.Find("infotype", `{}`, `{}`, `{}`, false, -1, -1)
+	for _, v := range *infolist {
+		topclass := qu.ObjToString(v["topclass"])
+		if v["subclass"] == nil {
+			e.RuleCores[topclass] = make(map[string][]*RuleCore)
+			for attr, _ := range v["fields"].(map[string]interface{}) {
+				e.RuleCores[topclass][attr] = fieldrules[attr]
+			}
+		} else {
+			for ca, fs := range v["subclass"].(map[string]interface{}) {
+				e.RuleCores[topclass+"_"+ca] = make(map[string][]*RuleCore)
+				for field, _ := range fs.(map[string]interface{}) {
+					e.RuleCores[topclass+"_"+ca][field] = fieldrules[field]
+				}
+			}
+		}
+	}
+}
+
+//加载抽取规则
+func (e *ExtractTask) InitRuleCore2() {
 	defer qu.Catch()
 	e.Fields = map[string]int{}
 	infolist, _ := db.Mgo.Find("infotype", `{}`, `{}`, `{}`, false, -1, -1)