浏览代码

分块规则前端配置

wcj 6 年之前
父节点
当前提交
1300787264

+ 3 - 3
src/jy/admin/rulecheck.go

@@ -286,7 +286,7 @@ func checkCoreReg(field, content, ruleText string) map[string]string {
 //lua脚本前置过滤验证
 func checkPreScript(code, name, infoid, script string) map[string]interface{} {
 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j,_ := extract.PreInfo(*doc)
+	j, _ := extract.PreInfo(*doc)
 	delete(*j.Data, "contenthtml")
 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
 	lua.Block = j.Block
@@ -306,7 +306,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 	e.InitRuleCore()
 	e.InitTag()
 	tmp, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j,_ := extract.PreInfo(*tmp)
+	j, _ := extract.PreInfo(*tmp)
 	doc := *j.Data
 	//全局前置规则,结果覆盖doc属性
 	for _, v := range e.RulePres {
@@ -350,7 +350,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 //lua脚本抽取验证
 func checkCoreScript(code, name, infoid, script string) interface{} {
 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j ,_ := extract.PreInfo(*doc)
+	j, _ := extract.PreInfo(*doc)
 	delete(*j.Data, "contenthtml")
 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
 	lua.Block = j.Block

+ 75 - 9
src/jy/admin/version.go

@@ -2,15 +2,17 @@
 package admin
 
 import (
-	"github.com/gin-contrib/sessions"
-	"github.com/gin-gonic/gin"
-	"gopkg.in/mgo.v2/bson"
 	. "jy/mongodbutil"
 	"jy/util"
+	"log"
 	"net/http"
 	qu "qfw/util"
 	"strings"
 	"time"
+
+	"github.com/gin-contrib/sessions"
+	"github.com/gin-gonic/gin"
+	"gopkg.in/mgo.v2/bson"
 )
 
 func init() {
@@ -27,20 +29,20 @@ func init() {
 	})
 	//根据_id查询版本详细信息
 	Admin.GET("/version/dataById", func(c *gin.Context) {
-		gid ,b :=c.GetQuery("_id")
-		if !b || !bson.IsObjectIdHex(gid){
-			c.JSON(400,gin.H{"req":false})
+		gid, b := c.GetQuery("_id")
+		if !b || !bson.IsObjectIdHex(gid) {
+			c.JSON(400, gin.H{"req": false})
 			return
 		}
 
 		data, _ := Mgo.FindOne("version", `{"_id":"`+gid+`","delete":false}`)
-		c.JSON(200, gin.H{"req":true,"data": data})
+		c.JSON(200, gin.H{"req": true, "data": data})
 	})
 	Admin.POST("/version/save", func(c *gin.Context) {
 		_id, _ := c.GetPostForm("_id")
 		data := GetPostForm(c)
-		if data["s_filefileds"]!=nil{
-			data["s_filefileds"] = strings.Split(data["s_filefileds"].(string),",")
+		if data["s_filefileds"] != nil {
+			data["s_filefileds"] = strings.Split(data["s_filefileds"].(string), ",")
 		}
 		if _id != "" {
 			Mgo.UpdateById("version", _id, map[string]interface{}{"$set": data})
@@ -249,6 +251,70 @@ func init() {
 		//b := Mgo.Del("versioninfo", `{"_id":"`+_id+`"}`)
 		c.JSON(200, gin.H{"rep": b})
 	})
+	//分块配置
+	Admin.GET("/version/blockinfo", func(c *gin.Context) {
+		vid := c.Query("vid")
+		c.HTML(http.StatusOK, "blockinfo.html", gin.H{"vid": vid})
+	})
+	Admin.POST("/version/blockinfo_list", func(c *gin.Context) {
+		vid, _ := c.GetPostForm("vid")
+		data, _ := Mgo.Find("block_info", bson.M{"vid": vid, "delete": false}, `{"index":-1}`, `{"block_reg":1,"title_reg":1,"index":1}`, false, -1, -1)
+		for _, v := range *data {
+			v["id"] = qu.BsonIdToSId(v["_id"])
+		}
+		c.JSON(http.StatusOK, gin.H{"data": data})
+	})
+	//分块配置保存
+	Admin.POST("/version/blockinfo_save", func(c *gin.Context) {
+		status := false
+		_id, _ := c.GetPostForm("_id")
+		block_reg, _ := c.GetPostForm("block_reg")
+		title_reg, _ := c.GetPostForm("title_reg")
+		if _id != "" {
+			status = Mgo.UpdateById("block_info", _id, bson.M{
+				"$set": bson.M{
+					"l_updatetime": time.Now().Unix(),
+					"block_reg":    block_reg,
+					"title_reg":    title_reg,
+				},
+			})
+		} else {
+			vid, _ := c.GetPostForm("vid")
+			list, flag := Mgo.Find("block_info", bson.M{"vid": vid}, `{"index": 1}`, `{"index":1}`, false, 0, 1)
+			index := -1
+			if flag && len(*list) == 1 {
+				index = qu.IntAllDef((*list)[0]["index"], 1) - 1
+			}
+			status = Mgo.Save("block_info", bson.M{
+				"delete":       false,
+				"index":        index,
+				"block_reg":    block_reg,
+				"title_reg":    title_reg,
+				"vid":          vid,
+				"l_createtime": time.Now().Unix(),
+				"s_username":   sessions.Default(c).Get("username"),
+			}) != ""
+		}
+		c.JSON(http.StatusOK, gin.H{"status": status})
+	})
+	Admin.POST("/version/blockinfo_updateindex", func(c *gin.Context) {
+		_ids := c.PostFormArray("_ids")
+		indexs := c.PostFormArray("indexs")
+		log.Println(_ids, indexs)
+		for k, _id := range _ids {
+			Mgo.UpdateById("block_info", _id, bson.M{
+				"$set": bson.M{
+					"index": qu.IntAll(indexs[k]),
+				},
+			})
+		}
+		c.JSON(http.StatusOK, gin.H{})
+	})
+	Admin.POST("/version/blockinfo_delete", func(c *gin.Context) {
+		_id, _ := c.GetPostForm("_id")
+		status := Mgo.UpdateById("block_info", _id, bson.M{"$set": bson.M{"delete": true}})
+		c.JSON(http.StatusOK, gin.H{"status": status})
+	})
 }
 
 //克隆版本通用属性

+ 2 - 2
src/jy/extract/exportask.go

@@ -72,9 +72,9 @@ func extractAndExport(v string, t map[string]interface{}) {
 		var j, jf *ju.Job
 		if e.IsFileField && v["projectinfo"] != nil {
 			v["isextFile"] = true
-			j, jf = PreInfo(v)
+			j, jf = e.PreInfo(v)
 		} else {
-			j, _ = PreInfo(v)
+			j, _ = e.PreInfo(v)
 		}
 		e.TaskInfo.ProcessPool <- true
 		go e.ExtractProcess(j, jf)

+ 12 - 5
src/jy/extract/extract.go

@@ -83,9 +83,9 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 			var j, jf *ju.Job
 			if ext.IsFileField && v["projectinfo"] != nil {
 				v["isextFile"] = true
-				j, jf = PreInfo(v)
+				j, jf = ext.PreInfo(v)
 			} else {
-				j, _ = PreInfo(v)
+				j, _ = ext.PreInfo(v)
 			}
 			ext.TaskInfo.ProcessPool <- true
 			go ext.ExtractProcess(j, jf)
@@ -184,9 +184,9 @@ func RunExtractTask(taskId string) {
 			var j, jf *ju.Job
 			if ext.IsFileField && v["projectinfo"] != nil {
 				v["isextFile"] = true
-				j, jf = PreInfo(v)
+				j, jf = ext.PreInfo(v)
 			} else {
-				j, _ = PreInfo(v)
+				j, _ = ext.PreInfo(v)
 			}
 			ext.TaskInfo.ProcessPool <- true
 			go ext.ExtractProcess(j, jf)
@@ -201,8 +201,13 @@ func RunExtractTask(taskId string) {
 	time.AfterFunc(1*time.Minute, func() { RunExtractTask(taskId) })
 }
 
-//信息预处理
+//信息预处理-不和版本关联,取最新版本的配置项
 func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
+	return (&ExtractTask{}).PreInfo(doc)
+}
+
+//信息预处理-和版本关联
+func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 	defer qu.Catch()
 	//判断是否有附件这个字段
 	var isextFile bool
@@ -244,6 +249,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 		Province:  qu.ObjToString(doc["area"]),
 		Result:    map[string][]*ju.ExtField{},
 		BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
+		RuleBlock: e.RuleBlock,
 	}
 	if isextFile {
 		jf = &ju.Job{
@@ -257,6 +263,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 			Province:   qu.ObjToString(doc["area"]),
 			Result:     map[string][]*ju.ExtField{},
 			BuyerAddr:  qu.ObjToString(doc["buyeraddr"]),
+			RuleBlock:  e.RuleBlock,
 			IsFile:     isextFile,
 		}
 	}

+ 48 - 17
src/jy/extract/extractInit.go

@@ -53,21 +53,22 @@ type TaskInfo struct {
 	TestLua                             bool      //检查测试用
 }
 type ExtractTask struct {
-	Id            string              //任务id
-	IsRun         bool                //是否启动
-	Content       string              //信息内容
-	TaskInfo      *TaskInfo           //任务信息
-	RulePres      []*RegLuaInfo       //通用前置规则
-	RuleBacks     []*RegLuaInfo       //通用后置规则
-	RuleCores     []*RuleCore         //抽取规则
-	PkgRuleCores  []*RuleCore         //分包抽取规则
+	Id            string        //任务id
+	IsRun         bool          //是否启动
+	Content       string        //信息内容
+	TaskInfo      *TaskInfo     //任务信息
+	RulePres      []*RegLuaInfo //通用前置规则
+	RuleBacks     []*RegLuaInfo //通用后置规则
+	RuleCores     []*RuleCore   //抽取规则
+	PkgRuleCores  []*RuleCore   //分包抽取规则
+	RuleBlock     *ju.RuleBlock
 	Tag           map[string][]*Tag   //标签库
 	ClearFn       map[string][]string //清理函数
 	IsExtractCity bool                //是否开启城市抽取
 	Fields        map[string]int      //抽取属性组
 
-	IsFileField       bool      //是否开启附件抽取
-	FileFields        map[string]int      //抽取附件属性组
+	IsFileField bool           //是否开启附件抽取
+	FileFields  map[string]int //抽取附件属性组
 
 	ResultChanel chan bool                  //抽取结果详情
 	ResultArr    [][]map[string]interface{} //抽取结果详情
@@ -914,16 +915,16 @@ func (e *ExtractTask) InitFile() {
 	//query:=bson.M{"version":e.TaskInfo.Version,"delete":false}
 	ve, _ := db.Mgo.FindOne("version", `{"version":"`+e.TaskInfo.Version+`","delete":false}`)
 	//ve, _ := db.Mgo.FindOne("version", query)
-	if ve == nil{
+	if ve == nil {
 		return
 	}
-	if (*ve)["isfiles"]!=nil && (*ve)["isfiles"].(bool){
-		e.IsFileField =true
+	if (*ve)["isfiles"] != nil && (*ve)["isfiles"].(bool) {
+		e.IsFileField = true
 	}
-	efiled := make(map[string]int,0)
-	if (*ve)["s_filefileds"] != nil{
-		for _,vff :=range (*ve)["s_filefileds"].([]interface{}) {
-			efiled[vff.(string)]=1
+	efiled := make(map[string]int, 0)
+	if (*ve)["s_filefileds"] != nil {
+		for _, vff := range (*ve)["s_filefileds"].([]interface{}) {
+			efiled[vff.(string)] = 1
 		}
 	}
 	e.FileFields = efiled
@@ -977,3 +978,33 @@ func (c *ClearTask) InitClearLuas() {
 		}
 	}
 }
+
+//加载分块规则
+func (e *ExtractTask) InitBlockRule() {
+	datas, _ := db.Mgo.Find("block_info", map[string]interface{}{
+		"vid":    e.TaskInfo.VersionId,
+		"delete": false,
+	}, `{"index":-1}`, `{"block_reg":1,"title_reg":1}`, false, -1, -1)
+	brs, trs := []*regexp.Regexp{}, []*regexp.Regexp{}
+	for _, v := range *datas {
+		block_reg, _ := v["block_reg"].(string)
+		block_reg, _ = strconv.Unquote(`"` + block_reg + `"`)
+		title_reg, _ := v["title_reg"].(string)
+		title_reg, _ = strconv.Unquote(`"` + title_reg + `"`)
+		if block_reg == "" || title_reg == "" {
+			continue
+		}
+		b_reg, b_err := regexp.Compile(block_reg)
+		t_reg, t_err := regexp.Compile(title_reg)
+		log.Println(block_reg, title_reg, b_err, t_err)
+		if b_err != nil || t_err != nil {
+			continue
+		}
+		brs = append(brs, b_reg)
+		trs = append(trs, t_reg)
+	}
+	e.RuleBlock = &ju.RuleBlock{
+		BlockRegs: brs,
+		TitleRegs: trs,
+	}
+}

+ 7 - 6
src/jy/extract/extractudp.go

@@ -94,6 +94,7 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 	ext.InitRuleCore()
 	ext.InitTag()
 	ext.InitClearFn()
+	ext.InitBlockRule()
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
 		ext.InitDFA()
@@ -150,9 +151,9 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 					var j, jf *ju.Job
 					if ext.IsFileField && v["projectinfo"] != nil {
 						v["isextFile"] = true
-						j, jf = PreInfo(v)
+						j, jf = ext.PreInfo(v)
 					} else {
-						j, _ = PreInfo(v)
+						j, _ = ext.PreInfo(v)
 					}
 					ext.TaskInfo.ProcessPool <- true
 					go ext.ExtractProcess(j, jf)
@@ -176,9 +177,9 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 					var j, jf *ju.Job
 					if ext.IsFileField && v["projectinfo"] != nil {
 						v["isextFile"] = true
-						j, jf = PreInfo(v)
+						j, jf = ext.PreInfo(v)
 					} else {
-						j, _ = PreInfo(v)
+						j, _ = ext.PreInfo(v)
 					}
 					ext.TaskInfo.ProcessPool <- true
 					go ext.ExtractProcess(j, jf)
@@ -216,9 +217,9 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 				var j, jf *ju.Job
 				if ext.IsFileField && v["projectinfo"] != nil {
 					v["isextFile"] = true
-					j, jf = PreInfo(v)
+					j, jf = ext.PreInfo(v)
 				} else {
-					j, _ = PreInfo(v)
+					j, _ = ext.PreInfo(v)
 				}
 				ext.TaskInfo.ProcessPool <- true
 				go ext.ExtractProcess(j, jf)

+ 7 - 7
src/jy/pretreated/analystep.go

@@ -21,20 +21,20 @@ func AnalyStart(job *util.Job) {
 	//
 	tabs, ration := ComputeConRatio(con, 1)
 	if len(tabs) > 0 {
-		newcon, newtabs, newration := findBigText(con, ration, tabs)
+		newcon, newtabs, newration := FindBigText(con, ration, tabs)
 		if newcon != "" && newration == 0 {
 			con = newcon
 			tabs = newtabs
 			ration = newration
 		}
 	}
-	blockArrays, _ := DivideBlock(con, 1)
+	blockArrays, _ := DivideBlock(con, 1, job.RuleBlock)
 	if len(blockArrays) > 0 { //有分块
 		//从块里面找分包
 		job.BlockPackage = FindPackageFromBlocks(&blockArrays, job.Title)
 		for _, bl := range blockArrays {
 			if len([]rune(bl.Text)) > 80 {
-				ba1, _ := DivideBlock(bl.Text, 1)
+				ba1, _ := DivideBlock(bl.Text, 1, job.RuleBlock)
 				if len(ba1) > 0 {
 					t := ""
 					for _, t1 := range ba1 {
@@ -49,7 +49,7 @@ func AnalyStart(job *util.Job) {
 			t1, _ := ComputeConRatio(bl.Text, 2)
 			if len(t1) > 0 {
 				job.HasTable = 1 //添加标识:文本中有table
-				tabres := AnalyTableV2(t1, job.Category, bl.Title, bl.Text, 2, job.SourceMid)
+				tabres := AnalyTableV2(t1, job.Category, bl.Title, bl.Text, 2, job.SourceMid, job.RuleBlock)
 				processTableResult(tabres, bl, job)
 				if bl.Title == "" && tabres.BlockTag != "" {
 					bl.Title = tabres.BlockTag
@@ -67,7 +67,7 @@ func AnalyStart(job *util.Job) {
 			job.HasTable = 1 //添加标识:文本中有table
 			newCon = TextAfterRemoveTable(con)
 			job.BlockPackage = FindPackageFromText(job.Title, newCon)
-			tabres := AnalyTableV2(tabs, job.Category, "", con, 1, job.SourceMid)
+			tabres := AnalyTableV2(tabs, job.Category, "", con, 1, job.SourceMid, job.RuleBlock)
 			processTableResult(tabres, bl, job)
 			//			for k, v := range bl.TableKV.Kv {
 			//				log.Println("bl.TableKV.Kv", k, v)
@@ -224,7 +224,7 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job) {
 //ration==1 遍历所有tabs,ration!=1 tabs只有一个
 func tableDivideBlock(con string, ration float32, tabs []*goquery.Selection) string {
 	if len(tabs) != 1 {
-		return ""
+		//return ""
 	}
 	for _, tab := range tabs {
 		content := ""
@@ -275,7 +275,7 @@ func tableDivideBlock(con string, ration float32, tabs []*goquery.Selection) str
 }
 
 //查找大文本,5次
-func findBigText(con string, r float32, t []*goquery.Selection) (content string, tabs []*goquery.Selection, ration float32) {
+func FindBigText(con string, r float32, t []*goquery.Selection) (content string, tabs []*goquery.Selection, ration float32) {
 	content = tableDivideBlock(con, r, t)
 	if content == "" {
 		return

+ 4 - 4
src/jy/pretreated/analytable.go

@@ -524,7 +524,7 @@ func (table *Table) MergerToTableresult() {
 解析表格入口
 返回:汇总表格对象
 **/
-func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}) (tabres *TableResult) {
+func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock) (tabres *TableResult) {
 	defer qutil.Catch()
 	//u.Debug(con)
 	if itype == 1 {
@@ -532,7 +532,7 @@ func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, ityp
 		con = RepairCon(con)
 	}
 	//生成tableresult对象
-	tabres = NewTableResult(_id, toptype, blockTag, con, itype)
+	tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock)
 	//可以有多个table
 	for _, table := range tabs {
 		//隐藏表格跳过
@@ -1866,7 +1866,7 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 				L:
 					for in2, v1 := range vs {
 						if len([]rune(v1)) < 20 && !moneyNum.MatchString(v1) && FindVal2_1.MatchString(v1) {
-							for _, serial := range regSerialTitles_2 {
+							for _, serial := range tn.TableResult.RuleBlock.TitleRegs {
 								if serial.MatchString(v1) {
 									break L
 								}
@@ -2394,7 +2394,7 @@ L:
 				jumpNextTd = false
 			}
 			///////////////////////////////////////
-			thisTdKvs := kvAfterDivideBlock(td.Text, 3)
+			thisTdKvs := kvAfterDivideBlock(td.Text, 3, tn.TableResult.RuleBlock)
 			if len(thisTdKvs) == 0 {
 				thisTdKvs = colonkvEntity.GetKvs(td.Text, "", 2)
 			}

+ 11 - 11
src/jy/pretreated/division.go

@@ -11,15 +11,15 @@ import (
 
 //分块、分段功能
 var (
-	regSerialTitles = []string{
+	/*regSerialTitles = []string{
 		"([一二三四五六七八九十]+)[\u3000\u2003\u00a0\\s]*[、..::,](.*)",
 		"[((]([一二三四五六七八九十]+)[))][\u3000\u2003\u00a0\\s]*[、..::]?(.*)",
 		"(\\d+)[\u3000\u2003\u00a0\\s]*、(.*)",
 		"(\\d+)[\u3000\u2003\u00a0\\s]*[..]([^\\d][^\r\n]+)",
 		"(\\d+)[\u3000\u2003\u00a0\\s]+([^\\d][^\r\n]+)",
 		"1[..](\\d+)[\u3000\u2003\u00a0\\s]+([^\\d..][^\r\n]+)",
-	}
-	regSerialTitles_1 = []*regexp.Regexp{
+	}*/
+	/*regSerialTitles_1 = []*regexp.Regexp{
 		regexp.MustCompile("([\r\n][\u3000\u2003\u00a0\\s]*|^[\u3000\u2003\u00a0\\s]*)([一二三四五六七八九十]+)[\u3000\u2003\u00a0\\s]*[、..::,](.*)"),
 		regexp.MustCompile("([\r\n][\u3000\u2003\u00a0\\s]*|^[\u3000\u2003\u00a0\\s]*)[((]([一二三四五六七八九十]+)[))][\u3000\u2003\u00a0\\s]*[、..::]?(.*)"),
 		regexp.MustCompile("([\r\n][\u3000\u2003\u00a0\\s]*|^[\u3000\u2003\u00a0\\s]*)(\\d+)[\u3000\u2003\u00a0\\s]*、(.*)"),
@@ -36,7 +36,7 @@ var (
 		regexp.MustCompile("^(\\d+)[\u3000\u2003\u00a0\\s]+([^\\d][^\r\n]+)$"),
 		regexp.MustCompile("^1[..](\\d+)[\u3000\u2003\u00a0\\s]+([^\\d..][^\r\n]+)$"),
 		regexp.MustCompile("^[(](\\d+)[\u3000\u2003\u00a0\\s)]+([^\r\n]+)$"),
-	}
+	}*/
 	regReplAllTd       = regexp.MustCompile("(?smi)<td.*?>.+?</td>")
 	regIsNumber        = regexp.MustCompile("^\\d+$")
 	regIsChineseNumber = regexp.MustCompile("^[一二三四五六七八九十]+$")
@@ -64,7 +64,7 @@ var (
 )
 
 //分块
-func DivideBlock(content string, from int) ([]*util.Block, int) {
+func DivideBlock(content string, from int, ruleBlock *util.RuleBlock) ([]*util.Block, int) {
 	defer qutil.Catch()
 	returnValue := 0
 	var blocks []*util.Block
@@ -75,7 +75,7 @@ func DivideBlock(content string, from int) ([]*util.Block, int) {
 	//contentTemp := regReplAllTd.ReplaceAllString(content, "")
 	contentTemp := TextAfterRemoveTable(content)
 	tdIndexs := regReplAllTd.FindAllStringSubmatchIndex(content, -1)
-	regContenSerialTitle, regSerialTitleIndex := getSerialType(contentTemp)
+	regContenSerialTitle, regSerialTitleIndex := getSerialType(contentTemp, ruleBlock.BlockRegs)
 	//没有分块
 	if regSerialTitleIndex == -1 {
 		if len(contentTemp) == len(content) {
@@ -86,7 +86,7 @@ func DivideBlock(content string, from int) ([]*util.Block, int) {
 		}
 	}
 	//匹配序号和标题
-	regSerialTitle := regSerialTitles_2[regSerialTitleIndex]
+	regSerialTitle := ruleBlock.TitleRegs[regSerialTitleIndex]
 	indexs := regContenSerialTitle.FindAllStringIndex(content, -1)
 	indexs = filterSerial(content, indexs, tdIndexs)
 	//头块
@@ -330,11 +330,11 @@ func filterSerial(content string, indexs, tdIndexs [][]int) [][]int {
 }
 
 //获取正文所用的序号类型
-func getSerialType(content string) (*regexp.Regexp, int) {
+func getSerialType(content string, blockRegs []*regexp.Regexp) (*regexp.Regexp, int) {
 	var regContenSerialTitle *regexp.Regexp
 	//先判断文章最外层使用的是哪种序号
 	contentStartIndex, regSerialTitleIndex := -1, -1
-	for k, v := range regSerialTitles_1 {
+	for k, v := range blockRegs {
 		indexs := v.FindStringIndex(content)
 		//只用最外层的序号,里面的过滤掉
 		if len(indexs) == 2 && !regSpliteSegment.MatchString(strings.TrimSpace(content[indexs[0]:indexs[1]])) && (contentStartIndex == -1 || indexs[0] < contentStartIndex) {
@@ -753,8 +753,8 @@ func interceptText(indexs []int, indexPkgMap map[int]string, pkgIndexMap map[str
 }
 
 //分块之后的kv
-func kvAfterDivideBlock(text string, from int) []*util.Kv {
-	blocks, _ := DivideBlock(text, from)
+func kvAfterDivideBlock(text string, from int, ruleBlock *util.RuleBlock) []*util.Kv {
+	blocks, _ := DivideBlock(text, from, ruleBlock)
 	kvs := []*util.Kv{}
 	for _, v := range blocks {
 		//util.Debug(v.Text)

+ 5 - 3
src/jy/pretreated/tablev2.go

@@ -33,10 +33,11 @@ type TableResult struct {
 	HasKey         int                   //有key
 	HasBrand       int                   //有品牌
 	HasGoods       int                   //有商品
+	RuleBlock      *u.RuleBlock
 }
 
 //快速创建TableResult对象
-func NewTableResult(Id interface{}, Toptype, BlockTag, con string, Itype int) *TableResult {
+func NewTableResult(Id interface{}, Toptype, BlockTag, con string, Itype int, ruleBlock *u.RuleBlock) *TableResult {
 	return &TableResult{
 		Id:           Id,
 		Toptype:      Toptype,
@@ -48,6 +49,7 @@ func NewTableResult(Id interface{}, Toptype, BlockTag, con string, Itype int) *T
 		PackageMap:   NewSortMap(),
 		SortKV:       NewSortMap(),
 		SortKVWeight: map[string]int{},
+		RuleBlock:    ruleBlock,
 	}
 }
 
@@ -144,7 +146,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 					stag = str
 				}
 			}
-			sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id)
+			sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id, table.TableResult.RuleBlock)
 			td.BH = false
 
 			td.SonTableResult = sonts
@@ -212,7 +214,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 	ub := []*u.Block{}
 	if lentxt > 50 { //看是否划块
 		//u.Debug(txt)
-		ub, _ = DivideBlock(txt, 2)
+		ub, _ = DivideBlock(txt, 2, nil)
 		if len(ub) > 0 {
 			colonKvWeight := map[string]int{}
 			spaceKvWeight := map[string]int{}

+ 17 - 7
src/jy/util/article.go

@@ -1,5 +1,9 @@
 package util
 
+import (
+	"regexp"
+)
+
 //
 type Job struct {
 	SourceMid    string                            //数据源的MongoId
@@ -18,13 +22,13 @@ type Job struct {
 	BlockPackage map[string]*BlockPackage          //块中的分包
 	Winnerorder  []map[string]interface{}          //中标候选人排序
 	PackageInfo  map[string]map[string]interface{} //分包信息
-
-	BrandData [][]map[string]string //
-	HasTable  int                   //有table
-	HasKey    int                   //是否匹配到table中的标题
-	HasBrand  int                   //有品牌
-	HasGoods  int                   //有商品
-	IsFile    bool                  //有附件
+	RuleBlock    *RuleBlock                        //分块规则
+	BrandData    [][]map[string]string             //
+	HasTable     int                               //有table
+	HasKey       int                               //是否匹配到table中的标题
+	HasBrand     int                               //有品牌
+	HasGoods     int                               //有商品
+	IsFile       bool                              //有附件
 }
 
 type ExtField struct {
@@ -38,6 +42,12 @@ type ExtField struct {
 	Score     int         //得分
 }
 
+//分块规则
+type RuleBlock struct {
+	BlockRegs []*regexp.Regexp
+	TitleRegs []*regexp.Regexp
+}
+
 //块
 type Block struct {
 	Tags     []Tags          //对块做的标签,可以作为数据抽取的依据

+ 190 - 0
src/web/templates/admin/blockinfo.html

@@ -0,0 +1,190 @@
+{{template "inc"}}
+<!-- Main Header -->
+{{template "header"}}
+<!-- Left side column. 权限菜单 -->
+{{template "memu"}}
+
+<!-- Content Wrapper. Contains page content -->
+<div class="content-wrapper">
+	<section class="content-header">
+		<h1>
+			<small><a class="btn btn-primary opr" opr="new">新增规则</a></small>
+		</h1>
+		<ol class="breadcrumb">
+		  <li><a href="/admin/version"><i class="fa fa-dashboard"></i>抽取版本</a></li>
+		  <li class="active"><a href="/admin/version/blockinfo?vid={{.vid}}">分块配置</a></li>
+		</ol>
+    </section>
+  <!-- Main content -->
+  <section class="content">
+      <div class="row">
+	      <div class="col-xs-12">
+	        <div class="box">
+		        <div class="box-body">
+		            <table id="dataTable" class="table table-striped table-bordered table-hover">
+		              <thead>
+		              <tr>
+						<th>优先级</th>
+						<th>分块正则</th>
+						<th>块标题正则</th>
+						<th>操作</th>
+		              </tr>
+		              </thead>
+		            </table>
+					<p class="text-danger text-right">注:可拖拽调整优先级顺序</p>
+		        </div>
+	          <!-- /.box-body -->
+	        </div>
+        <!-- /.box -->
+		</div>
+	</div>
+  </section>
+</div>
+	
+<!-- footer -->
+{{template "dialog"}}
+{{template "footer"}}
+<link rel="stylesheet" href="https://cdn.datatables.net/rowreorder/1.2.5/css/rowReorder.bootstrap.min.css">
+<script src="https://cdn.datatables.net/rowreorder/1.2.5/js/dataTables.rowReorder.min.js"></script>
+<script>
+menuActive("version")
+$(function () {
+	ttable=$('#dataTable').DataTable({
+		"columnDefs": [
+			{
+				"targets": 0,
+				visible:true
+			},
+	        {
+	            "orderable": false,
+	            "targets": "_all"
+	        }
+		],
+		rowReorder: {
+			dataSrc: 'index',
+            selector: 'tr'
+        },
+		"order": [[ 0, 'desc' ]],
+		"paging"      : false,
+		"lengthChange": false,
+		"searching"   : false,
+		"info"        : true,
+		"autoWidth"   : false,
+		"language": {
+            "url": "/res/dist/js/dataTables.chinese.lang"
+        },
+		"ajax": {
+			"url": "/admin/version/blockinfo_list",
+			"type": "post",
+			"data":{"vid":{{ .vid}}}
+		},
+		"columns": [
+			{"data": "index","orderable": false},
+			{"data": "block_reg","width":"50%"},
+			{"data": "title_reg","width":"30%"},
+			{"data":"_id","width":"12%",render:function(val,a,row){
+				return '<a class="btn btn-sm btn-primary opr" opr="edit">编辑</a>&nbsp;<a class="btn btn-sm btn-danger" href="#" onclick="del(\''+val+'\')">删除</a>';
+			}}
+       	]
+	});
+	ttable.on('init.dt', function () {
+		$(".opr").click(function(){
+			var n=$(this).attr("opr")
+			var _tit="",htmlObj={},obj,tag=[]
+			switch(n){
+			case "edit":	
+                obj=ttable.row($(this).closest("tr")).data();
+			case "new":
+                tag=[
+						{label:"分块正则",s_label:"block_reg",type:"tpl_input",placeholder:"分块正则",must:true},
+						{label:"块标题正则",s_label:"title_reg",type:"tpl_input",placeholder:"块标题正则",must:true},
+                        {s_label:"_id",type:"tpl_hidden"},
+					]
+				
+				if(n=="new"){
+					_tit="新增规则"
+					obj={}
+				}else{
+					_tit="编辑规则"
+				}
+				htmlObj={
+					title:_tit,
+					tag:tag,
+					bts:[
+						{label:"保存",class:"btn-primary",
+							fun:function(){
+								var block_reg = $.trim($("#block_reg").val());
+								var title_reg = $.trim($("#title_reg").val());
+								var bcon=true;
+								if(block_reg==""||title_reg==""){
+									bcon=false;
+								}
+								if (bcon){
+									var obj={
+										_id:$("#_id").val(),
+										block_reg:block_reg,
+										title_reg:title_reg,
+										vid:{{.vid}}
+									}
+                                    //console.log(obj)							
+									$.post("/admin/version/blockinfo_save",obj,function(data){
+										if(data.status){
+											window.location.href="/admin/version/blockinfo?vid={{.vid}}";	
+										}else{
+											showTip("保存失败!",1000)
+										}
+									},'json')
+								}else{
+									alert("红色标签的表单不能为空!")
+								}
+							}
+						}
+					]
+				}
+			OpenDialog(htmlObj,obj)
+			break;
+			}
+		});
+	});
+	ttable.on( 'order.dt search.dt', function () {
+        ttable.column(0, {search:'applied', order:'applied'}).nodes().each( function (cell, i) {
+            cell.innerHTML = i+1;
+        } );
+    } ).draw();
+	ttable.on( 'row-reordered', function ( e, diff, edit ) {
+		var _ids = [],indexs=[];
+		for(var i=0;i<diff.length;i++){
+			var rowData = ttable.row( diff[i].node ).data();
+			_ids.push(rowData._id);
+			indexs.push(rowData.index);
+		}
+		ttable.rowReorder.disable();
+		$.ajax({
+			type: "POST",
+			url: "/admin/version/blockinfo_updateindex",
+			data: {_ids:_ids,indexs:indexs},
+			dataType: "json",
+			traditional: true,
+			success: function(r){
+				ttable.rowReorder.enable();
+			}
+		});
+    });
+})
+function del(_id){
+	showConfirm("确定删除?", function() {
+		$.ajax({
+			url:"/admin/version/blockinfo_delete",
+			type:"post",
+			data:{"_id":_id},
+			success:function(r){
+				if(r.status){				
+					ttable.ajax.reload();
+				}else{
+					showTip("删除失败", 1000, function() {});
+				}
+			}
+		})
+	});
+}
+</script>

+ 3 - 2
src/web/templates/admin/version.html

@@ -174,8 +174,9 @@ $(function () {
 			}},
 			{ "data":"_id","width":"25%",render:function(val,a,row){
 				return '<div class="btn-group">'+
-						'<a class="btn btn-sm btn-success" href="/admin/version/info?vid='+val+'" >属性配置</a>'+
-						'<a class="btn btn-sm btn-info" href="/admin/version/pkginfo?vid='+val+'" >分包配置</a>'+
+						'<a class="btn btn-sm btn-success" href="/admin/version/info?vid='+val+'" >属性</a>'+
+						'<a class="btn btn-sm btn-warning" href="/admin/version/blockinfo?vid='+val+'" >分块</a>'+
+						'<a class="btn btn-sm btn-info" href="/admin/version/pkginfo?vid='+val+'" >分包</a>'+
 						/*'<a class="btn btn-sm btn-primary opr" opr="edit">编&nbsp;&nbsp;辑1</a>'+*/
 						"<a class=\"btn btn-sm btn-primary opr\" href='#' onclick=\"edit('"+val+"')\">编&nbsp;&nbsp;辑</a> &nbsp;"+
 						'<a class="btn btn-sm btn-danger" href="#" onclick="del(\''+val+'\',\''+row["version"]+'\')">删&nbsp;&nbsp;除</a>'