wangji 6 anni fa
parent
commit
419526d74f

+ 35 - 12
src/jy/admin/rulecheck.go

@@ -313,21 +313,44 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 		doc = extract.ExtRegPre(doc, j, v, e.TaskInfo)
 	}
 	//抽取规则
-	for _, vc := range e.RuleCores {
-		tmp := ju.DeepCopy(doc).(map[string]interface{})
-		//是否进入逻辑
-		if !ju.Logic(vc.LuaLogic, tmp) {
-			continue
-		}
-		//抽取-前置规则
-		for _, v := range vc.RulePres {
-			tmp = extract.ExtRegPre(tmp, j, v, e.TaskInfo)
+	if j.CategorySecond==""{
+		for _, vc1 := range e.RuleCores[j.Category] {
+			for _, vc := range vc1 {
+				tmp := ju.DeepCopy(doc).(map[string]interface{})
+				//是否进入逻辑
+				if !ju.Logic(vc.LuaLogic, tmp) {
+					continue
+				}
+				//抽取-前置规则
+				for _, v := range vc.RulePres {
+					tmp = extract.ExtRegPre(tmp, j, v, e.TaskInfo)
+				}
+				//抽取-规则
+				for _, v := range vc.RuleCores {
+					extract.ExtRegCore(vc.ExtFrom, tmp, j, v, e)
+				}
+			}
 		}
-		//抽取-规则
-		for _, v := range vc.RuleCores {
-			extract.ExtRegCore(vc.ExtFrom, tmp, j, v, e)
+	}else{
+		for _, vc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
+			for _, vc := range vc1 {
+				tmp := ju.DeepCopy(doc).(map[string]interface{})
+				//是否进入逻辑
+				if !ju.Logic(vc.LuaLogic, tmp) {
+					continue
+				}
+				//抽取-前置规则
+				for _, v := range vc.RulePres {
+					tmp = extract.ExtRegPre(tmp, j, v, e.TaskInfo)
+				}
+				//抽取-规则
+				for _, v := range vc.RuleCores {
+					extract.ExtRegCore(vc.ExtFrom, tmp, j, v, e)
+				}
+			}
 		}
 	}
+
 	result := extract.GetResultMapForLua(j)
 	lua := ju.LuaScript{Code: code, Name: name, Result: result, Script: script}
 	lua.Block = j.Block

+ 1 - 0
src/jy/extract/exportask.go

@@ -56,6 +56,7 @@ func extractAndExport(v string, t map[string]interface{}) {
 	e.InitRuleCore()
 	e.InitTag()
 	e.InitClearFn()
+	e.InfoTypeList()
 	//品牌抽取是否开启
 	ju.IsBrandGoods = ju.Config["brandgoods"].(bool)
 

+ 38 - 12
src/jy/extract/extpackage.go

@@ -146,23 +146,49 @@ func PackageDetail(j *ju.Job, e *ExtractTask) {
 func extRegBackPack(j *ju.Job, e *ExtractTask) {
 	defer qu.Catch()
 	//正则清理
-	for _, rc := range e.RuleCores {
-		for pk, pack := range j.PackageInfo {
-			clear, _ := pack["clear"].(map[string]interface{})
-			for k, val := range pack {
-				if b, ok := clear[k].(bool); ok && b {
-					if rc.Field == k {
-						text := qu.ObjToString(val)
-						for _, in := range rc.RuleBacks {
-							if text != "" && !in.IsLua {
-								text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
+	if j.CategorySecond == "" {
+		for _, rc1 := range e.RuleCores[j.Category] {
+			for _, rc := range rc1 {
+				for pk, pack := range j.PackageInfo {
+					clear, _ := pack["clear"].(map[string]interface{})
+					for k, val := range pack {
+						if b, ok := clear[k].(bool); ok && b {
+							if rc.Field == k {
+								text := qu.ObjToString(val)
+								for _, in := range rc.RuleBacks {
+									if text != "" && !in.IsLua {
+										text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
+									}
+								}
+								pack[k] = text
 							}
 						}
-						pack[k] = text
 					}
+					j.PackageInfo[pk] = pack
+				}
+			}
+		}
+	} else {
+		for _, rc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
+			for _, rc := range rc1 {
+				for pk, pack := range j.PackageInfo {
+					clear, _ := pack["clear"].(map[string]interface{})
+					for k, val := range pack {
+						if b, ok := clear[k].(bool); ok && b {
+							if rc.Field == k {
+								text := qu.ObjToString(val)
+								for _, in := range rc.RuleBacks {
+									if text != "" && !in.IsLua {
+										text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
+									}
+								}
+								pack[k] = text
+							}
+						}
+					}
+					j.PackageInfo[pk] = pack
 				}
 			}
-			j.PackageInfo[pk] = pack
 		}
 	}
 	//通用正则清理

+ 134 - 47
src/jy/extract/extract.go

@@ -225,6 +225,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 		file2text(&doc) //附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
 	}
 	toptype := qu.ObjToString(doc["toptype"])
+	subtype := qu.ObjToString(doc["subtype"])
 	if qu.ObjToString(doc["type"]) == "bid" {
 		toptype = "结果"
 	}
@@ -234,6 +235,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 	j = &ju.Job{
 		SourceMid:  qu.BsonIdToSId(doc["_id"]),
 		Category:   toptype,
+		CategorySecond:subtype,
 		Content:    qu.ObjToString(doc["detail"]),
 		SpiderCode: qu.ObjToString(doc["spidercode"]),
 		//Domain:     qu.ObjToString(doc["domain"]),
@@ -321,37 +323,84 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
 		for _, v := range e.RulePres {
 			doc = ExtRegPre(doc, j, v, e.TaskInfo)
 		}
-		//抽取规则
-		for _, vc := range e.RuleCores {
-			tmp := ju.DeepCopy(doc).(map[string]interface{})
-			//是否进入逻辑
-			if !ju.Logic(vc.LuaLogic, tmp) {
-				continue
-			}
-			//抽取-前置规则
-			for _, v := range vc.RulePres {
-				tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
+		if j.CategorySecond=="" {
+			//抽取规则
+			tmprules:= map[string][]*RuleCore{}
+			lock.Lock()
+			for k, vc1 := range e.RuleCores[j.Category] {
+				tmprules[k]=vc1
 			}
-			// log.Debug("抽取-前置规则", tmp)
+			lock.Unlock()
+			for _, vc1 := range tmprules {
+				for _, vc := range vc1 {
+					tmp := ju.DeepCopy(doc).(map[string]interface{})
+					//是否进入逻辑
+					if !ju.Logic(vc.LuaLogic, tmp) {
+						continue
+					}
+					//抽取-前置规则
+					for _, v := range vc.RulePres {
+						tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
+					}
+					// log.Debug("抽取-前置规则", tmp)
 
-			//抽取-规则
-			for _, v := range vc.RuleCores {
-				ExtRegCore(vc.ExtFrom, tmp, j, v, e)
-			}
-			// log.Debug("抽取-规则", tmp)
+					//抽取-规则
+					for _, v := range vc.RuleCores {
+						ExtRegCore(vc.ExtFrom, tmp, j, v, e)
+					}
+					// log.Debug("抽取-规则", tmp)
+
+					//项目名称未能抽取到,标题来凑
+					if vc.Field == "projectname" {
+						if len(j.Result[vc.Field]) < 1 {
+							j.Result[vc.Field] = append(j.Result[vc.Field], &ju.ExtField{vc.Field, "title", "title", "regexp", "title", vc.ExtFrom, j.Title, 0})
+						}
+					}
 
-			//项目名称未能抽取到,标题来凑
-			if vc.Field == "projectname" {
-				if len(j.Result[vc.Field]) < 1 {
-					j.Result[vc.Field] = append(j.Result[vc.Field], &ju.ExtField{vc.Field, "title", "title", "regexp", "title", vc.ExtFrom, j.Title, 0})
+					//抽取-后置规则
+					for _, v := range vc.RuleBacks {
+						ExtRegBack(j, v, e.TaskInfo)
+					}
+					// log.Debug("抽取-后置规则", tmp)
 				}
 			}
+		}else{
+			fmt.Println(e.RuleCores)
+			fmt.Println("++++++++++++++++")
+			fmt.Println(e.RuleCores[j.Category+"_"+j.CategorySecond])
+			for _, vc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
+				for _, vc := range vc1 {
+					tmp := ju.DeepCopy(doc).(map[string]interface{})
+					//是否进入逻辑
+					if !ju.Logic(vc.LuaLogic, tmp) {
+						continue
+					}
+					//抽取-前置规则
+					for _, v := range vc.RulePres {
+						tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
+					}
+					// log.Debug("抽取-前置规则", tmp)
 
-			//抽取-后置规则
-			for _, v := range vc.RuleBacks {
-				ExtRegBack(j, v, e.TaskInfo)
+					//抽取-规则
+					for _, v := range vc.RuleCores {
+						ExtRegCore(vc.ExtFrom, tmp, j, v, e)
+					}
+					// log.Debug("抽取-规则", tmp)
+
+					//项目名称未能抽取到,标题来凑
+					if vc.Field == "projectname" {
+						if len(j.Result[vc.Field]) < 1 {
+							j.Result[vc.Field] = append(j.Result[vc.Field], &ju.ExtField{vc.Field, "title", "title", "regexp", "title", vc.ExtFrom, j.Title, 0})
+						}
+					}
+
+					//抽取-后置规则
+					for _, v := range vc.RuleBacks {
+						ExtRegBack(j, v, e.TaskInfo)
+					}
+					// log.Debug("抽取-后置规则", tmp)
+				}
 			}
-			// log.Debug("抽取-后置规则", tmp)
 		}
 
 		//全局后置规则
@@ -418,37 +467,75 @@ func (e *ExtractTask) ExtractFile(j *ju.Job) {
 			}
 		}
 		//抽取规则
-		for _, vc := range e.RuleCores {
-			tmp := ju.DeepCopy(doc).(map[string]interface{})
-			//是否进入逻辑
-			if !ju.Logic(vc.LuaLogic, tmp) {
-				continue
-			}
-			//抽取-前置规则
-			for _, v := range vc.RulePres {
-				if e.FileFields[vc.Field] > 0 {
-					tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
-				}
-			}
-			// log.Debug("抽取-前置规则", tmp)
+		if j.CategorySecond==""{
+			for _, vc1 := range e.RuleCores[j.Category] {
+				for _, vc := range vc1 {
+					tmp := ju.DeepCopy(doc).(map[string]interface{})
+					//是否进入逻辑
+					if !ju.Logic(vc.LuaLogic, tmp) {
+						continue
+					}
+					//抽取-前置规则
+					for _, v := range vc.RulePres {
+						if e.FileFields[vc.Field] > 0 {
+							tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
+						}
+					}
+					// log.Debug("抽取-前置规则", tmp)
+
+					//抽取-规则
+					for _, v := range vc.RuleCores {
+						if e.FileFields[vc.Field] > 0 {
+							ExtRegCore(vc.ExtFrom, tmp, j, v, e)
+						}
+					}
+					// log.Debug("抽取-规则", tmp)
 
-			//抽取-规则
-			for _, v := range vc.RuleCores {
-				if e.FileFields[vc.Field] > 0 {
-					ExtRegCore(vc.ExtFrom, tmp, j, v, e)
+					//抽取-后置规则
+					for _, v := range vc.RuleBacks {
+						if e.FileFields[vc.Field] > 0 {
+							ExtRegBack(j, v, e.TaskInfo)
+						}
+					}
+					// log.Debug("抽取-后置规则", tmp)
 				}
 			}
-			// log.Debug("抽取-规则", tmp)
+		}else{
+			for _, vc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
+				for _, vc := range vc1 {
+					tmp := ju.DeepCopy(doc).(map[string]interface{})
+					//是否进入逻辑
+					if !ju.Logic(vc.LuaLogic, tmp) {
+						continue
+					}
+					//抽取-前置规则
+					for _, v := range vc.RulePres {
+						if e.FileFields[vc.Field] > 0 {
+							tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
+						}
+					}
+					// log.Debug("抽取-前置规则", tmp)
 
-			//抽取-后置规则
-			for _, v := range vc.RuleBacks {
-				if e.FileFields[vc.Field] > 0 {
-					ExtRegBack(j, v, e.TaskInfo)
+					//抽取-规则
+					for _, v := range vc.RuleCores {
+						if e.FileFields[vc.Field] > 0 {
+							ExtRegCore(vc.ExtFrom, tmp, j, v, e)
+						}
+					}
+					// log.Debug("抽取-规则", tmp)
+
+					//抽取-后置规则
+					for _, v := range vc.RuleBacks {
+						if e.FileFields[vc.Field] > 0 {
+							ExtRegBack(j, v, e.TaskInfo)
+						}
+					}
+					// log.Debug("抽取-后置规则", tmp)
 				}
 			}
-			// log.Debug("抽取-后置规则", tmp)
 		}
 
+
 		//全局后置规则
 		for _, v := range e.RuleBacks {
 			if e.FileFields[v.Field] > 0 {

+ 174 - 144
src/jy/extract/extractInit.go

@@ -54,18 +54,19 @@ type TaskInfo struct {
 	TestLua                             bool      //检查测试用
 }
 type ExtractTask struct {
-	Id            string              //任务id
-	IsRun         bool                //是否启动
-	Content       string              //信息内容
-	TaskInfo      *TaskInfo           //任务信息
-	RulePres      []*RegLuaInfo       //通用前置规则
-	RuleBacks     []*RegLuaInfo       //通用后置规则
-	RuleCores     []*RuleCore         //抽取规则
-	PkgRuleCores  []*RuleCore         //分包抽取规则
-	Tag           map[string][]*Tag   //标签库
-	ClearFn       map[string][]string //清理函数
-	IsExtractCity bool                //是否开启城市抽取
-	Fields        map[string]int      //抽取属性组
+	Id        string        //任务id
+	IsRun     bool          //是否启动
+	Content   string        //信息内容
+	TaskInfo  *TaskInfo     //任务信息
+	RulePres  []*RegLuaInfo //通用前置规则
+	RuleBacks []*RegLuaInfo //通用后置规则
+	//RuleCores      []*RuleCore         //抽取规则
+	RuleCores     map[string]map[string][]*RuleCore //分类抽取规则
+	PkgRuleCores  []*RuleCore                       //分包抽取规则
+	Tag           map[string][]*Tag                 //标签库
+	ClearFn       map[string][]string               //清理函数
+	IsExtractCity bool                              //是否开启城市抽取
+	Fields        map[string]int                    //抽取属性组
 
 	IsFileField bool           //是否开启附件抽取
 	FileFields  map[string]int //抽取附件属性组
@@ -91,6 +92,8 @@ type ExtractTask struct {
 	AreaProvinceGet   *ju.DFA //省
 	AreaSimGet        *ju.DFA //市简称
 	AreaStreet        *ju.DFA //街道
+
+	InfoType []map[string]interface{}
 }
 
 type ClearTaskInfo struct {
@@ -262,156 +265,183 @@ func (e *ExtractTask) InitRuleBacks() {
 		}
 	}
 }
+func (e *ExtractTask) InfoTypeList() {
+	infolist1, _ := db.Mgo.Find("infotype", `{}`, `{}`, `{}`, false, -1, -1)
+	infolist := *infolist1
+	for _, v := range infolist {
+		e.InfoType = append(e.InfoType, v)
+	}
+}
 
 //加载抽取规则
 func (e *ExtractTask) InitRuleCore() {
 	defer qu.Catch()
 	e.Fields = map[string]int{}
-	e.RuleCores = []*RuleCore{}
-	vinfos, _ := db.Mgo.Find("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1)
-	for _, vinfo := range *vinfos {
-		if b, _ := vinfo["isuse"].(bool); !b {
+	infolist, _ := db.Mgo.Find("infotype", `{}`, `{}`, `{}`, false, -1, -1)
+	e.RuleCores=make(map[string]map[string][]*RuleCore)
+	for _, v := range *infolist {
+		topclass := qu.ObjToString(v["topclass"])
+		if v["subclass"] == nil {
+			e.RuleCores[topclass]=make(map[string][]*RuleCore)
+			for attr, _ := range v["fields"].(map[string]interface{}) {
+				vinfo, _ := db.Mgo.FindOneByField("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false,"s_field":"`+attr+`"}`, `{}`)
+				e.RuleCores[topclass][attr] = append(e.RuleCores[topclass][attr], e.InfoRole(*vinfo)...)
+			}
+		} else {
+			for ca, fs := range v["subclass"].(map[string]interface{}) {
+				e.RuleCores[topclass+"_"+ca]=make(map[string][]*RuleCore)
+				for field, _ := range fs.(map[string]interface{}) {
+					vinfo, _ := db.Mgo.FindOneByField("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false,"s_field":"`+field+`"}`, `{}`)
+					e.RuleCores[topclass+"_"+ca][field] = append(e.RuleCores[topclass+"_"+ca][field], e.InfoRole(*vinfo)...)
+				}
+			}
+		}
+	}
+}
+func (e *ExtractTask) InfoRole(vinfo map[string]interface{}) []*RuleCore {
+	maps := []*RuleCore{}
+	if b, _ := vinfo["isuse"].(bool); !b {
+		return nil
+	}
+	s_field := qu.ObjToString(vinfo["s_field"])
+	pid := qu.BsonIdToSId(vinfo["_id"])
+	list, _ := db.Mgo.Find("rule_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1)
+	for _, vv := range *list {
+		if b, _ := vv["isuse"].(bool); !b {
 			continue
 		}
-		s_field := qu.ObjToString(vinfo["s_field"])
-		pid := qu.BsonIdToSId(vinfo["_id"])
-		list, _ := db.Mgo.Find("rule_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1)
-		for _, vv := range *list {
-			if b, _ := vv["isuse"].(bool); !b {
-				continue
+		rcore := &RuleCore{}
+		rcore.Field = s_field
+		rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本
+		rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string)
+		//前置规则
+		rulePres := []*RegLuaInfo{}
+		plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
+		for _, v := range *plist {
+			rinfo := &RegLuaInfo{
+				Field: qu.ObjToString(v["s_field"]),
+				Code:  v["s_code"].(string),
+				Name:  v["s_name"].(string),
+				IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
 			}
-			rcore := &RuleCore{}
-			rcore.Field = s_field
-			rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本
-			rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string)
-			//前置规则
-			rulePres := []*RegLuaInfo{}
-			plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
-			for _, v := range *plist {
-				rinfo := &RegLuaInfo{
-					Field: qu.ObjToString(v["s_field"]),
-					Code:  v["s_code"].(string),
-					Name:  v["s_name"].(string),
-					IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
-				}
-				if rinfo.IsLua {
-					rinfo.RuleText = v["s_luascript"].(string)
+			if rinfo.IsLua {
+				rinfo.RuleText = v["s_luascript"].(string)
+				rulePres = append(rulePres, rinfo)
+			} else {
+				qu.Try(func() {
+					rinfo.RuleText = v["s_rule"].(string)
+					tmp := strings.Split(rinfo.RuleText, "__")
+					var pattern string
+					if strings.Contains(tmp[0], "\\u") {
+						pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
+					} else {
+						pattern = tmp[0]
+					}
+					if len(tmp) == 2 {
+						rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
+					} else {
+						rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
+					}
 					rulePres = append(rulePres, rinfo)
-				} else {
-					qu.Try(func() {
-						rinfo.RuleText = v["s_rule"].(string)
-						tmp := strings.Split(rinfo.RuleText, "__")
-						var pattern string
-						if strings.Contains(tmp[0], "\\u") {
-							pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
-						} else {
-							pattern = tmp[0]
-						}
-						if len(tmp) == 2 {
-							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
-						} else {
-							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
-						}
-						rulePres = append(rulePres, rinfo)
-					}, func(err interface{}) {
-						log.Debug(rinfo.Code, rinfo.Field, err)
-					})
-				}
+				}, func(err interface{}) {
+					log.Debug(rinfo.Code, rinfo.Field, err)
+				})
 			}
-			rcore.RulePres = rulePres
-
-			//后置规则
-			ruleBacks := []*RegLuaInfo{}
-			blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
-			for _, v := range *blist {
-				rinfo := &RegLuaInfo{
-					Field: qu.ObjToString(v["s_field"]),
-					Code:  v["s_code"].(string),
-					Name:  v["s_name"].(string),
-					IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
-				}
-				if rinfo.IsLua {
-					rinfo.RuleText = v["s_luascript"].(string)
+		}
+		rcore.RulePres = rulePres
+
+		//后置规则
+		ruleBacks := []*RegLuaInfo{}
+		blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
+		for _, v := range *blist {
+			rinfo := &RegLuaInfo{
+				Field: qu.ObjToString(v["s_field"]),
+				Code:  v["s_code"].(string),
+				Name:  v["s_name"].(string),
+				IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
+			}
+			if rinfo.IsLua {
+				rinfo.RuleText = v["s_luascript"].(string)
+				ruleBacks = append(ruleBacks, rinfo)
+			} else {
+				qu.Try(func() {
+					rinfo.RuleText = v["s_rule"].(string)
+					tmp := strings.Split(rinfo.RuleText, "__")
+					var pattern string
+					if strings.Contains(tmp[0], "\\u") {
+						pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
+					} else {
+						pattern = tmp[0]
+					}
+					if len(tmp) == 2 {
+						rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
+					} else {
+						rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
+					}
 					ruleBacks = append(ruleBacks, rinfo)
-				} else {
-					qu.Try(func() {
-						rinfo.RuleText = v["s_rule"].(string)
-						tmp := strings.Split(rinfo.RuleText, "__")
-						var pattern string
-						if strings.Contains(tmp[0], "\\u") {
-							pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
-						} else {
-							pattern = tmp[0]
-						}
-						if len(tmp) == 2 {
-							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
-						} else {
-							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
-						}
-						ruleBacks = append(ruleBacks, rinfo)
-					}, func(err interface{}) {
-						log.Debug(rinfo.Code, rinfo.Field, err)
-					})
-				}
+				}, func(err interface{}) {
+					log.Debug(rinfo.Code, rinfo.Field, err)
+				})
 			}
-			rcore.RuleBacks = ruleBacks
+		}
+		rcore.RuleBacks = ruleBacks
 
-			//抽取规则
-			ruleCores := []*RegLuaInfo{}
-			clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
-			for _, v := range *clist {
-				if b, _ := v["isuse"].(bool); !b {
-					continue
-				}
-				field := qu.ObjToString(v["s_field"])
-				e.Fields[field] = 1 //加入抽取属性组备用
-				rinfo := &RegLuaInfo{
-					Field: field,
-					Code:  v["s_code"].(string),
-					Name:  v["s_name"].(string),
-					IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
-				}
-				if rinfo.IsLua {
-					rinfo.RuleText = v["s_luascript"].(string)
-					//提取全部属性
-					rinfo.LFields = getALLFields()
-					ruleCores = append(ruleCores, rinfo)
-				} else {
-					qu.Try(func() {
-						rinfo.RuleText = v["s_rule"].(string)
-						tmp := strings.Split(rinfo.RuleText, "__")
-						var pattern string
-						if strings.Contains(tmp[0], "\\u") {
-							pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
-						} else {
-							pattern = tmp[0]
-						}
-						if len(tmp) == 2 {
-							epos := strings.Split(tmp[1], ",")
-							posm := map[string]int{}
-							for _, v := range epos {
-								ks := strings.Split(v, ":")
-								if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
-									posm[ks[1]] = qu.IntAll(ks[0])
-								} else { //(.*)招标公告__2
-									posm[rinfo.Field] = qu.IntAll(ks[0])
-								}
+		//抽取规则
+		ruleCores := []*RegLuaInfo{}
+		clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
+		for _, v := range *clist {
+			if b, _ := v["isuse"].(bool); !b {
+				continue
+			}
+			field := qu.ObjToString(v["s_field"])
+			e.Fields[field] = 1 //加入抽取属性组备用
+			rinfo := &RegLuaInfo{
+				Field: field,
+				Code:  v["s_code"].(string),
+				Name:  v["s_name"].(string),
+				IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
+			}
+			if rinfo.IsLua {
+				rinfo.RuleText = v["s_luascript"].(string)
+				//提取全部属性
+				rinfo.LFields = getALLFields()
+				ruleCores = append(ruleCores, rinfo)
+			} else {
+				qu.Try(func() {
+					rinfo.RuleText = v["s_rule"].(string)
+					tmp := strings.Split(rinfo.RuleText, "__")
+					var pattern string
+					if strings.Contains(tmp[0], "\\u") {
+						pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
+					} else {
+						pattern = tmp[0]
+					}
+					if len(tmp) == 2 {
+						epos := strings.Split(tmp[1], ",")
+						posm := map[string]int{}
+						for _, v := range epos {
+							ks := strings.Split(v, ":")
+							if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
+								posm[ks[1]] = qu.IntAll(ks[0])
+							} else { //(.*)招标公告__2
+								posm[rinfo.Field] = qu.IntAll(ks[0])
 							}
-							rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: true, ExtractPos: posm}
-						} else {
-							rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: false}
 						}
-						ruleCores = append(ruleCores, rinfo)
-					}, func(err interface{}) {
-						log.Debug(rinfo.Code, rinfo.Field, err)
-					})
-				}
+						rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: true, ExtractPos: posm}
+					} else {
+						rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(pattern), Bextract: false}
+					}
+					ruleCores = append(ruleCores, rinfo)
+				}, func(err interface{}) {
+					log.Debug(rinfo.Code, rinfo.Field, err)
+				})
 			}
-			rcore.RuleCores = ruleCores
-			//
-			e.RuleCores = append(e.RuleCores, rcore)
 		}
+		rcore.RuleCores = ruleCores
+		//
+		maps = append(maps, rcore)
 	}
+	return maps
 }
 
 //加载分包抽取规则

+ 1 - 0
src/jy/util/article.go

@@ -4,6 +4,7 @@ package util
 type Job struct {
 	SourceMid    string                            //数据源的MongoId
 	Category     string                            //类别
+	CategorySecond string							//二级分类
 	Content      string                            //正文
 	Title        string                            //标题
 	SpiderCode   string                            //爬虫代码

+ 302 - 0
src/web/templates/admin/class.html

@@ -0,0 +1,302 @@
+{{template "inc"}}
+<!-- Main Header -->
+{{template "header"}}
+<!-- Left side column. 权限菜单 -->
+{{template "memu"}}
+<head>
+    <style>
+
+        #selectclear2 select {
+            width:190px;
+            height:167px;
+            padding:5px;
+        }
+        #selectclear2{
+            display: flex;
+            flex-direction: row;
+        }
+        #selectclear2 .move{
+            display: flex;
+            flex-direction: column;
+            margin: 20px 25px
+        }
+        #selectclear2 .move button{
+            margin: 1px 0px;
+            padding: 4px 6px;
+        }
+        #selectclear2 .doublebox {
+            text-align:center;
+        }
+
+
+    </style>
+</head>
+
+<!-- Content Wrapper. Contains page content -->
+<div class="content-wrapper">
+    <section class="content-header">
+        <h1>
+            <small><button type="button" class="btn btn-primary" data-toggle="modal" data-target="#modal-info" onclick="formReset()">新增分类抽取</button></small>
+        </h1>
+        <ol class="breadcrumb">
+            <li><a href="/admin/infotype"><i class="fa fa-dashboard"></i> 分类抽取</a></li>
+        </ol>
+    </section>
+    <!-- Main content -->
+    <section class="content">
+        <div class="row">
+            <div class="col-xs-12">
+                <div class="box">
+                    <div class="box-body">
+                        <table id="dataTable" class="table table-bordered table-hover">
+                            <thead>
+                            <tr>
+                                <th>一级分类</th>
+                                <th>二级分类</th>
+                                <th>操作</th>
+                            </tr>
+                            </thead>
+                        </table>
+                    </div>
+                    <!-- /.box-body -->
+                </div>
+                <!-- /.box -->
+            </div>
+        </div>
+    </section>
+</div>
+
+<div class="modal fade" id="modal-info">
+    <div class="modal-dialog">
+        <form id="userform" class="form-horizontal" role="form">
+            <div class="modal-content">
+                <div class="modal-header">
+                    <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+                        <span aria-hidden="true">&times;</span></button>
+                    <h4 class="modal-title">分类信息</h4>
+                </div>
+                <div class="modal-body">
+                    <div class="form-group">
+                        <label for="code" class="col-sm-2 control-label" style="width:20% !important;">一级分类名称:</label>
+                        <div class="col-sm-10" style="width:80% !important;">
+                            <select id="topclass" name="topclass" class="form-control" onclick="topfunc()">
+                            </select>
+                        </div>
+                    </div>
+                    <div class="form-group">
+                        <label for="code" class="col-sm-2 control-label" style="width:20% !important;">二级分类名称:</label>
+                        <div class="col-sm-10" style="width:80% !important;">
+                            <select id="subclass" name="subclass" class="form-control" >
+                            </select>
+                        </div>
+                    </div>
+                    <div class="form-group">
+                        <label for="code" class="col-sm-2 control-label" style="width:20% !important;">属性:</label>
+                        <div class="col-sm-10" id="selectclear2" style="width:75% !important;">
+                            <div class="doublebox">
+                                <select multiple="multiple" id="select3" style="overflow-x: scroll;"></select>
+                            </div>
+                            <div class="move" style="margin: 10px;">
+                                <button type="button" id="up2" class="btn btn-primary">上移</button>
+                                <button type="button" id="right2" class="btn btn-primary">右移</button>
+                                <button type="button" id="left2" class="btn btn-primary">左移</button>
+                                <button type="button" id="down2" class="btn btn-primary">下移</button>
+                            </div>
+                            <div class="doublebox">
+                                <select multiple="multiple" id="select4" style="overflow-x: scroll;"></select>
+                            </div>
+                        </div>
+                    </div>
+                    <!--<div class="form-group">
+                        <label for="modify" class="col-sm-2 control-label">二级菜单:</label>
+                        <div id="secondmenu" class="col-sm-10">
+                            <input type="button" value="+" onclick="append()"></input>
+                        </div>
+                    </div>-->
+                </div>
+                <div class="modal-footer">
+                    <button type="button" class="btn btn-default" data-dismiss="modal" onclick="formReset()">取消</button>
+                    <button type="button" class="btn btn-primary" onclick="save()">保存</button>
+                </div>
+            </div>
+            <!-- /.modal-content -->
+        </form>
+        <input type="hidden" id="_id">
+    </div>
+    <!-- /.modal-dialog -->
+</div>
+<!-- /.modal -->
+
+<!-- footer -->
+{{template "footer"}}
+
+<script>
+    menuActive("infotype")
+    $(function () {
+        ttable=$('#dataTable').DataTable({
+            "paging"      : true,
+            "lengthChange": false,
+            "searching"   : true,
+            "ordering"    : true,
+            "info"        : true,
+            "autoWidth"   : false,
+            "ajax": {
+                "url": "/admin/infotype/data",
+                "type": "post",
+                "data":{}
+            },
+            "language": {
+                "url": "/res/dist/js/dataTables.chinese.lang"
+            },
+            "columns": [
+                { "data": "topclass",render:function(val,a,row){
+                        return row.topclass}},
+                { "data": "subclass",render:function(val,a,row){
+                        return row.subclass}},
+                {"data":"_id",render:function(val,a,row){
+                        return "<a href='#' onclick='edit(\""+val+"\",\""+row.subclass+"\")'><i class='fa fa-fw fa-edit text-yellow'></i></a> &nbsp;"+
+                                "<a href='#' onclick='del(\""+val+"\",\""+row.subclass+"\")'><i class='fa fa-fw fa-trash text-red'></i></a>"
+                    }}
+            ]
+        });
+        //ttable.on('init.dt', function () {});
+    })
+
+    function save(){
+        _id=$("#_id").val()
+        topclass=$("#topclass").val()
+        var clearArr = [];
+        var clearArr2 = [];
+        $("#select4 option").each(function(i,val){
+            clearArr[i] = this.value
+        })
+        $("#select3 option").each(function(i,val){
+            clearArr2[i] = this.value
+        })
+        var subclass=$("#subclass").val()
+        var fields = JSON.stringify(clearArr)
+        var fields2 = JSON.stringify(clearArr2)
+        if(topclass == "" || clearArr.length==0){
+            alert("表单填写不完整!");
+            return false;
+        }
+        $.ajax({
+            url:"/admin/infotype/save",
+            type:"post",
+            data:{"topclass":topclass,"subclass":subclass,"fields":fields,"fields2":fields2,"_id":_id},
+            success:function(r){
+                if(r.rep){
+                    $("#userform")[0].reset();
+                    $("#modal-info").modal("hide");
+                    ttable.ajax.reload();
+                }else{
+                    alert("保存失败,可能是要添加的分类已存在");
+                }
+            }
+        })
+    }
+    function del(_id,subclass){
+        showConfirm("确定删除?", function() {
+            $.ajax({
+                url:"/admin/infotype/del",
+                type:"post",
+                data:{"_id":_id,"subclass":subclass},
+                success:function(r){
+                    if(r.rep){
+                        ttable.ajax.reload();
+                    }else{
+                        showTip("删除失败", 1000, function() {});
+                    }
+                }
+            })
+        });
+    }
+    function edit(_id,subclass){
+        $("#topclass").empty()
+        $("#subclass").empty()
+        $("#select3").empty();
+        $("#select4").empty();
+        $("#_id").val(_id)
+        console.log(_id,subclass)
+        $.ajax({
+            url:"/admin/infotype/select",
+            type:"post",
+            data:{"_id":_id,"subclass":subclass},
+            success:function(r){
+                console.log(r)
+                if(r){
+                    $("#topclass").append("<option value="+r.topclass+">"+r.topclass+"</option>")
+                    $("#topclass").attr("disabled",true);
+                    $("#subclass").append("<option value="+r.subclass+">"+r.subclass+"</option>")
+                    $("#subclass").attr("disabled",true);
+                    for(var a=0;a<r.fields.length;a++){
+                        $("#select4").append("<option  value='"+r.fields[a].s_field+"'>"+r.fields[a].s_name+"</option>");
+                    }
+                    for(var a=0;a<r.fields2.length;a++){
+                        $("#select3").append("<option  value='"+r.fields2[a].s_field+"'>"+r.fields2[a].s_name+"</option>");
+                    }
+                }
+            }
+        })
+        $("#modal-info").modal("show");
+    }
+    function topfunc() {
+        $("#subclass").empty()
+        var top=$("#topclass").val()
+        var topmap={"top":top}
+        $.post("/admin/subclass/data",topmap,function (data,status) {
+            if(data.length!=0){
+                for(var a=0;a<data.data.subclass.length;a++) {
+                    $("#subclass").append("<option value="+data.data.subclass[a]+">"+data.data.subclass[a]+"</option>")
+                }
+            }
+        })
+    }
+    function formReset(){
+        $("#_id").val("")
+        $("#topclass").empty()
+        $("#topclass").attr("disabled",false);
+        $("#subclass").empty()
+        $("#subclass").attr("disabled",false);
+        $("#select3").empty();
+        $("#select4").empty();
+        $.post("/admin/topclass/data",'',function (data,status) {
+            $("#topclass").append("<option value=''>--请选择--</option>")
+            for(var a=0;a<data.data.length;a++) {
+                $("#topclass").append("<option value="+data.data[a].topclass+">"+data.data[a].topclass+"</option>")
+            }
+        })
+        $.post("/admin/fields/data",'',function (data,status) {
+            for(var a=0;a<data.data.length;a++) {
+                $("#select3").append("<option value="+data.data[a].s_field+">"+data.data[a].s_name+"</option>")
+            }
+        })
+        $("#modal-info-addclear").modal("show");
+    }
+    $("#selectclear2 #right2").click(function(){
+        $("#select3 option:selected").appendTo("#select4");
+    });
+    //左移
+    $("#selectclear2 #left2").click(function(){
+        $("#select4 option:selected").appendTo("#select3");
+    });
+    $("#selectclear2 #up2,#selectclear2 #down2").click(function() {
+        var $opt = $("#select4 option:selected:first");
+        if (!$opt.length){
+            return;
+        }
+        if (this.id == "up2"){
+            $opt.prev().before($opt);
+        }else{
+            $opt.next().after($opt);
+        }
+    });
+    //双击右移
+    $("#selectclear2 #select3").dblclick(function(){
+        $("#selectclear2 #select3 option:selected").appendTo("#select4");
+    });
+    //双击左移
+    $("#selectclear2 #select4").dblclick(function(){
+        $("#selectclear2 #select4 option:selected").appendTo("#select3");
+    });
+</script>

+ 0 - 1
src/web/templates/admin/com_memu.html

@@ -51,7 +51,6 @@ $(function () {
 	$.post('/admin/menu','',function (data,status) {
 		for(var a=0;a<data.data.length;a++) {
             var info=data.data[a]
-            console.log(info)
 		    if (info.secondmenu){
                 var str=""
                 for(var sec=1;sec<=Object.keys(info.secondmenu).length;sec++){

+ 1 - 1
src/web/templates/admin/secondmenu.html

@@ -86,7 +86,7 @@
 {{template "footer"}}
 
 <script>
-    menuActive("secondmenu")
+    menuActive("menu")
     $(function () {
         ttable=$('#dataTable').DataTable({
             "paging"      : true,