6 жил өмнө · 1300787264
--- a/src/jy/admin/rulecheck.go
+++ b/src/jy/admin/rulecheck.go
@@ -286,7 +286,7 @@ func checkCoreReg(field, content, ruleText string) map[string]string {
 
															 //lua脚本前置过滤验证
														
 
															 func checkPreScript(code, name, infoid, script string) map[string]interface{} {
														
 
															 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
														
 
															-	j,_ := extract.PreInfo(*doc)
														
 
															+	j, _ := extract.PreInfo(*doc)
														
 
															 	delete(*j.Data, "contenthtml")
														
 
															 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
														
 
															 	lua.Block = j.Block
														
@@ -306,7 +306,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 
															 	e.InitRuleCore()
														
 
															 	e.InitTag()
														
 
															 	tmp, _ := Mgo.FindById("bidding", infoid, extract.Fields)
														
 
															-	j,_ := extract.PreInfo(*tmp)
														
 
															+	j, _ := extract.PreInfo(*tmp)
														
 
															 	doc := *j.Data
														
 
															 	//全局前置规则,结果覆盖doc属性
														
 
															 	for _, v := range e.RulePres {
														
@@ -350,7 +350,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 
															 //lua脚本抽取验证
														
 
															 func checkCoreScript(code, name, infoid, script string) interface{} {
														
 
															 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
														
 
															-	j ,_ := extract.PreInfo(*doc)
														
 
															+	j, _ := extract.PreInfo(*doc)
														
 
															 	delete(*j.Data, "contenthtml")
														
 
															 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
														
 
															 	lua.Block = j.Block
														
--- a/src/jy/admin/version.go
+++ b/src/jy/admin/version.go
@@ -2,15 +2,17 @@
 
															 package admin
														
 
															 import (
														
 
															-	"github.com/gin-contrib/sessions"
														
 
															-	"github.com/gin-gonic/gin"
														
 
															-	"gopkg.in/mgo.v2/bson"
														
 
															 	. "jy/mongodbutil"
														
 
															 	"jy/util"
														
 
															+	"log"
														
 
															 	"net/http"
														
 
															 	qu "qfw/util"
														
 
															 	"strings"
														
 
															 	"time"
														
 
															+
														
 
															+	"github.com/gin-contrib/sessions"
														
 
															+	"github.com/gin-gonic/gin"
														
 
															+	"gopkg.in/mgo.v2/bson"
														
 
															 )
														
 
															 func init() {
														
@@ -27,20 +29,20 @@ func init() {
 
															 	})
														
 
															 	//根据_id查询版本详细信息
														
 
															 	Admin.GET("/version/dataById", func(c *gin.Context) {
														
 
															-		gid ,b :=c.GetQuery("_id")
														
 
															-		if !b || !bson.IsObjectIdHex(gid){
														
 
															-			c.JSON(400,gin.H{"req":false})
														
 
															+		gid, b := c.GetQuery("_id")
														
 
															+		if !b || !bson.IsObjectIdHex(gid) {
														
 
															+			c.JSON(400, gin.H{"req": false})
														
 
															 			return
														
 
															 		}
														
 
															 		data, _ := Mgo.FindOne("version", `{"_id":"`+gid+`","delete":false}`)
														
 
															-		c.JSON(200, gin.H{"req":true,"data": data})
														
 
															+		c.JSON(200, gin.H{"req": true, "data": data})
														
 
															 	})
														
 
															 	Admin.POST("/version/save", func(c *gin.Context) {
														
 
															 		_id, _ := c.GetPostForm("_id")
														
 
															 		data := GetPostForm(c)
														
 
															-		if data["s_filefileds"]!=nil{
														
 
															-			data["s_filefileds"] = strings.Split(data["s_filefileds"].(string),",")
														
 
															+		if data["s_filefileds"] != nil {
														
 
															+			data["s_filefileds"] = strings.Split(data["s_filefileds"].(string), ",")
														
 
															 		}
														
 
															 		if _id != "" {
														
 
															 			Mgo.UpdateById("version", _id, map[string]interface{}{"$set": data})
														
@@ -249,6 +251,70 @@ func init() {
 
															 		//b := Mgo.Del("versioninfo", `{"_id":"`+_id+`"}`)
														
 
															 		c.JSON(200, gin.H{"rep": b})
														
 
															 	})
														
 
															+	//分块配置
														
 
															+	Admin.GET("/version/blockinfo", func(c *gin.Context) {
														
 
															+		vid := c.Query("vid")
														
 
															+		c.HTML(http.StatusOK, "blockinfo.html", gin.H{"vid": vid})
														
 
															+	})
														
 
															+	Admin.POST("/version/blockinfo_list", func(c *gin.Context) {
														
 
															+		vid, _ := c.GetPostForm("vid")
														
 
															+		data, _ := Mgo.Find("block_info", bson.M{"vid": vid, "delete": false}, `{"index":-1}`, `{"block_reg":1,"title_reg":1,"index":1}`, false, -1, -1)
														
 
															+		for _, v := range *data {
														
 
															+			v["id"] = qu.BsonIdToSId(v["_id"])
														
 
															+		}
														
 
															+		c.JSON(http.StatusOK, gin.H{"data": data})
														
 
															+	})
														
 
															+	//分块配置保存
														
 
															+	Admin.POST("/version/blockinfo_save", func(c *gin.Context) {
														
 
															+		status := false
														
 
															+		_id, _ := c.GetPostForm("_id")
														
 
															+		block_reg, _ := c.GetPostForm("block_reg")
														
 
															+		title_reg, _ := c.GetPostForm("title_reg")
														
 
															+		if _id != "" {
														
 
															+			status = Mgo.UpdateById("block_info", _id, bson.M{
														
 
															+				"$set": bson.M{
														
 
															+					"l_updatetime": time.Now().Unix(),
														
 
															+					"block_reg":    block_reg,
														
 
															+					"title_reg":    title_reg,
														
 
															+				},
														
 
															+			})
														
 
															+		} else {
														
 
															+			vid, _ := c.GetPostForm("vid")
														
 
															+			list, flag := Mgo.Find("block_info", bson.M{"vid": vid}, `{"index": 1}`, `{"index":1}`, false, 0, 1)
														
 
															+			index := -1
														
 
															+			if flag && len(*list) == 1 {
														
 
															+				index = qu.IntAllDef((*list)[0]["index"], 1) - 1
														
 
															+			}
														
 
															+			status = Mgo.Save("block_info", bson.M{
														
 
															+				"delete":       false,
														
 
															+				"index":        index,
														
 
															+				"block_reg":    block_reg,
														
 
															+				"title_reg":    title_reg,
														
 
															+				"vid":          vid,
														
 
															+				"l_createtime": time.Now().Unix(),
														
 
															+				"s_username":   sessions.Default(c).Get("username"),
														
 
															+			}) != ""
														
 
															+		}
														
 
															+		c.JSON(http.StatusOK, gin.H{"status": status})
														
 
															+	})
														
 
															+	Admin.POST("/version/blockinfo_updateindex", func(c *gin.Context) {
														
 
															+		_ids := c.PostFormArray("_ids")
														
 
															+		indexs := c.PostFormArray("indexs")
														
 
															+		log.Println(_ids, indexs)
														
 
															+		for k, _id := range _ids {
														
 
															+			Mgo.UpdateById("block_info", _id, bson.M{
														
 
															+				"$set": bson.M{
														
 
															+					"index": qu.IntAll(indexs[k]),
														
 
															+				},
														
 
															+			})
														
 
															+		}
														
 
															+		c.JSON(http.StatusOK, gin.H{})
														
 
															+	})
														
 
															+	Admin.POST("/version/blockinfo_delete", func(c *gin.Context) {
														
 
															+		_id, _ := c.GetPostForm("_id")
														
 
															+		status := Mgo.UpdateById("block_info", _id, bson.M{"$set": bson.M{"delete": true}})
														
 
															+		c.JSON(http.StatusOK, gin.H{"status": status})
														
 
															+	})
														
 
															 }
														
 
															 //克隆版本通用属性
														
--- a/src/jy/extract/exportask.go
+++ b/src/jy/extract/exportask.go
@@ -72,9 +72,9 @@ func extractAndExport(v string, t map[string]interface{}) {
 
															 		var j, jf *ju.Job
														
 
															 		if e.IsFileField && v["projectinfo"] != nil {
														
 
															 			v["isextFile"] = true
														
 
															-			j, jf = PreInfo(v)
														
 
															+			j, jf = e.PreInfo(v)
														
 
															 		} else {
														
 
															-			j, _ = PreInfo(v)
														
 
															+			j, _ = e.PreInfo(v)
														
 
															 		}
														
 
															 		e.TaskInfo.ProcessPool <- true
														
 
															 		go e.ExtractProcess(j, jf)
														
--- a/src/jy/extract/extract.go
+++ b/src/jy/extract/extract.go
@@ -83,9 +83,9 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 
															 			var j, jf *ju.Job
														
 
															 			if ext.IsFileField && v["projectinfo"] != nil {
														
 
															 				v["isextFile"] = true
														
 
															-				j, jf = PreInfo(v)
														
 
															+				j, jf = ext.PreInfo(v)
														
 
															 			} else {
														
 
															-				j, _ = PreInfo(v)
														
 
															+				j, _ = ext.PreInfo(v)
														
 
															 			}
														
 
															 			ext.TaskInfo.ProcessPool <- true
														
 
															 			go ext.ExtractProcess(j, jf)
														
@@ -184,9 +184,9 @@ func RunExtractTask(taskId string) {
 
															 			var j, jf *ju.Job
														
 
															 			if ext.IsFileField && v["projectinfo"] != nil {
														
 
															 				v["isextFile"] = true
														
 
															-				j, jf = PreInfo(v)
														
 
															+				j, jf = ext.PreInfo(v)
														
 
															 			} else {
														
 
															-				j, _ = PreInfo(v)
														
 
															+				j, _ = ext.PreInfo(v)
														
 
															 			}
														
 
															 			ext.TaskInfo.ProcessPool <- true
														
 
															 			go ext.ExtractProcess(j, jf)
														
@@ -201,8 +201,13 @@ func RunExtractTask(taskId string) {
 
															 	time.AfterFunc(1*time.Minute, func() { RunExtractTask(taskId) })
														
 
															 }
														
 
															-//信息预处理
														
 
															+//信息预处理-不和版本关联，取最新版本的配置项
														
 
															 func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
														
 
															+	return (&ExtractTask{}).PreInfo(doc)
														
 
															+}
														
 
															+
														
 
															+//信息预处理-和版本关联
														
 
															+func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
														
 
															 	defer qu.Catch()
														
 
															 	//判断是否有附件这个字段
														
 
															 	var isextFile bool
														
@@ -244,6 +249,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 
															 		Province:  qu.ObjToString(doc["area"]),
														
 
															 		Result:    map[string][]*ju.ExtField{},
														
 
															 		BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
														
 
															+		RuleBlock: e.RuleBlock,
														
 
															 	}
														
 
															 	if isextFile {
														
 
															 		jf = &ju.Job{
														
@@ -257,6 +263,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 
															 			Province:   qu.ObjToString(doc["area"]),
														
 
															 			Result:     map[string][]*ju.ExtField{},
														
 
															 			BuyerAddr:  qu.ObjToString(doc["buyeraddr"]),
														
 
															+			RuleBlock:  e.RuleBlock,
														
 
															 			IsFile:     isextFile,
														
 
															 		}
														
 
															 	}
														
--- a/src/jy/extract/extractInit.go
+++ b/src/jy/extract/extractInit.go
@@ -53,21 +53,22 @@ type TaskInfo struct {
 
															 	TestLua                             bool      //检查测试用
														
 
															 }
														
 
															 type ExtractTask struct {
														
 
															-	Id            string              //任务id
														
 
															-	IsRun         bool                //是否启动
														
 
															-	Content       string              //信息内容
														
 
															-	TaskInfo      *TaskInfo           //任务信息
														
 
															-	RulePres      []*RegLuaInfo       //通用前置规则
														
 
															-	RuleBacks     []*RegLuaInfo       //通用后置规则
														
 
															-	RuleCores     []*RuleCore         //抽取规则
														
 
															-	PkgRuleCores  []*RuleCore         //分包抽取规则
														
 
															+	Id            string        //任务id
														
 
															+	IsRun         bool          //是否启动
														
 
															+	Content       string        //信息内容
														
 
															+	TaskInfo      *TaskInfo     //任务信息
														
 
															+	RulePres      []*RegLuaInfo //通用前置规则
														
 
															+	RuleBacks     []*RegLuaInfo //通用后置规则
														
 
															+	RuleCores     []*RuleCore   //抽取规则
														
 
															+	PkgRuleCores  []*RuleCore   //分包抽取规则
														
 
															+	RuleBlock     *ju.RuleBlock
														
 
															 	Tag           map[string][]*Tag   //标签库
														
 
															 	ClearFn       map[string][]string //清理函数
														
 
															 	IsExtractCity bool                //是否开启城市抽取
														
 
															 	Fields        map[string]int      //抽取属性组
														
 
															-	IsFileField       bool      //是否开启附件抽取
														
 
															-	FileFields        map[string]int      //抽取附件属性组
														
 
															+	IsFileField bool           //是否开启附件抽取
														
 
															+	FileFields  map[string]int //抽取附件属性组
														
 
															 	ResultChanel chan bool                  //抽取结果详情
														
 
															 	ResultArr    [][]map[string]interface{} //抽取结果详情
														
@@ -914,16 +915,16 @@ func (e *ExtractTask) InitFile() {
 
															 	//query:=bson.M{"version":e.TaskInfo.Version,"delete":false}
														
 
															 	ve, _ := db.Mgo.FindOne("version", `{"version":"`+e.TaskInfo.Version+`","delete":false}`)
														
 
															 	//ve, _ := db.Mgo.FindOne("version", query)
														
 
															-	if ve == nil{
														
 
															+	if ve == nil {
														
 
															 		return
														
 
															 	}
														
 
															-	if (*ve)["isfiles"]!=nil && (*ve)["isfiles"].(bool){
														
 
															-		e.IsFileField =true
														
 
															+	if (*ve)["isfiles"] != nil && (*ve)["isfiles"].(bool) {
														
 
															+		e.IsFileField = true
														
 
															 	}
														
 
															-	efiled := make(map[string]int,0)
														
 
															-	if (*ve)["s_filefileds"] != nil{
														
 
															-		for _,vff :=range (*ve)["s_filefileds"].([]interface{}) {
														
 
															-			efiled[vff.(string)]=1
														
 
															+	efiled := make(map[string]int, 0)
														
 
															+	if (*ve)["s_filefileds"] != nil {
														
 
															+		for _, vff := range (*ve)["s_filefileds"].([]interface{}) {
														
 
															+			efiled[vff.(string)] = 1
														
 
															 		}
														
 
															 	}
														
 
															 	e.FileFields = efiled
														
@@ -977,3 +978,33 @@ func (c *ClearTask) InitClearLuas() {
 
															 		}
														
 
															 	}
														
 
															 }
														
 
															+
														
 
															+//加载分块规则
														
 
															+func (e *ExtractTask) InitBlockRule() {
														
 
															+	datas, _ := db.Mgo.Find("block_info", map[string]interface{}{
														
 
															+		"vid":    e.TaskInfo.VersionId,
														
 
															+		"delete": false,
														
 
															+	}, `{"index":-1}`, `{"block_reg":1,"title_reg":1}`, false, -1, -1)
														
 
															+	brs, trs := []*regexp.Regexp{}, []*regexp.Regexp{}
														
 
															+	for _, v := range *datas {
														
 
															+		block_reg, _ := v["block_reg"].(string)
														
 
															+		block_reg, _ = strconv.Unquote(`"` + block_reg + `"`)
														
 
															+		title_reg, _ := v["title_reg"].(string)
														
 
															+		title_reg, _ = strconv.Unquote(`"` + title_reg + `"`)
														
 
															+		if block_reg == "" || title_reg == "" {
														
 
															+			continue
														
 
															+		}
														
 
															+		b_reg, b_err := regexp.Compile(block_reg)
														
 
															+		t_reg, t_err := regexp.Compile(title_reg)
														
 
															+		log.Println(block_reg, title_reg, b_err, t_err)
														
 
															+		if b_err != nil || t_err != nil {
														
 
															+			continue
														
 
															+		}
														
 
															+		brs = append(brs, b_reg)
														
 
															+		trs = append(trs, t_reg)
														
 
															+	}
														
 
															+	e.RuleBlock = &ju.RuleBlock{
														
 
															+		BlockRegs: brs,
														
 
															+		TitleRegs: trs,
														
 
															+	}
														
 
															+}
														
--- a/src/jy/extract/extractudp.go
+++ b/src/jy/extract/extractudp.go
@@ -94,6 +94,7 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 
															 	ext.InitRuleCore()
														
 
															 	ext.InitTag()
														
 
															 	ext.InitClearFn()
														
 
															+	ext.InitBlockRule()
														
 
															 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
														
 
															 		//初始化城市DFA信息
														
 
															 		ext.InitDFA()
														
@@ -150,9 +151,9 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 
															 					var j, jf *ju.Job
														
 
															 					if ext.IsFileField && v["projectinfo"] != nil {
														
 
															 						v["isextFile"] = true
														
 
															-						j, jf = PreInfo(v)
														
 
															+						j, jf = ext.PreInfo(v)
														
 
															 					} else {
														
 
															-						j, _ = PreInfo(v)
														
 
															+						j, _ = ext.PreInfo(v)
														
 
															 					}
														
 
															 					ext.TaskInfo.ProcessPool <- true
														
 
															 					go ext.ExtractProcess(j, jf)
														
@@ -176,9 +177,9 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 
															 					var j, jf *ju.Job
														
 
															 					if ext.IsFileField && v["projectinfo"] != nil {
														
 
															 						v["isextFile"] = true
														
 
															-						j, jf = PreInfo(v)
														
 
															+						j, jf = ext.PreInfo(v)
														
 
															 					} else {
														
 
															-						j, _ = PreInfo(v)
														
 
															+						j, _ = ext.PreInfo(v)
														
 
															 					}
														
 
															 					ext.TaskInfo.ProcessPool <- true
														
 
															 					go ext.ExtractProcess(j, jf)
														
@@ -216,9 +217,9 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 
															 				var j, jf *ju.Job
														
 
															 				if ext.IsFileField && v["projectinfo"] != nil {
														
 
															 					v["isextFile"] = true
														
 
															-					j, jf = PreInfo(v)
														
 
															+					j, jf = ext.PreInfo(v)
														
 
															 				} else {
														
 
															-					j, _ = PreInfo(v)
														
 
															+					j, _ = ext.PreInfo(v)
														
 
															 				}
														
 
															 				ext.TaskInfo.ProcessPool <- true
														
 
															 				go ext.ExtractProcess(j, jf)
														
--- a/src/jy/pretreated/analystep.go
+++ b/src/jy/pretreated/analystep.go
@@ -21,20 +21,20 @@ func AnalyStart(job *util.Job) {
 
															 	//
														
 
															 	tabs, ration := ComputeConRatio(con, 1)
														
 
															 	if len(tabs) > 0 {
														
 
															-		newcon, newtabs, newration := findBigText(con, ration, tabs)
														
 
															+		newcon, newtabs, newration := FindBigText(con, ration, tabs)
														
 
															 		if newcon != "" && newration == 0 {
														
 
															 			con = newcon
														
 
															 			tabs = newtabs
														
 
															 			ration = newration
														
 
															 		}
														
 
															 	}
														
 
															-	blockArrays, _ := DivideBlock(con, 1)
														
 
															+	blockArrays, _ := DivideBlock(con, 1, job.RuleBlock)
														
 
															 	if len(blockArrays) > 0 { //有分块
														
 
															 		//从块里面找分包
														
 
															 		job.BlockPackage = FindPackageFromBlocks(&blockArrays, job.Title)
														
 
															 		for _, bl := range blockArrays {
														
 
															 			if len([]rune(bl.Text)) > 80 {
														
 
															-				ba1, _ := DivideBlock(bl.Text, 1)
														
 
															+				ba1, _ := DivideBlock(bl.Text, 1, job.RuleBlock)
														
 
															 				if len(ba1) > 0 {
														
 
															 					t := ""
														
 
															 					for _, t1 := range ba1 {
														
@@ -49,7 +49,7 @@ func AnalyStart(job *util.Job) {
 
															 			t1, _ := ComputeConRatio(bl.Text, 2)
														
 
															 			if len(t1) > 0 {
														
 
															 				job.HasTable = 1 //添加标识:文本中有table
														
 
															-				tabres := AnalyTableV2(t1, job.Category, bl.Title, bl.Text, 2, job.SourceMid)
														
 
															+				tabres := AnalyTableV2(t1, job.Category, bl.Title, bl.Text, 2, job.SourceMid, job.RuleBlock)
														
 
															 				processTableResult(tabres, bl, job)
														
 
															 				if bl.Title == "" && tabres.BlockTag != "" {
														
 
															 					bl.Title = tabres.BlockTag
														
@@ -67,7 +67,7 @@ func AnalyStart(job *util.Job) {
 
															 			job.HasTable = 1 //添加标识:文本中有table
														
 
															 			newCon = TextAfterRemoveTable(con)
														
 
															 			job.BlockPackage = FindPackageFromText(job.Title, newCon)
														
 
															-			tabres := AnalyTableV2(tabs, job.Category, "", con, 1, job.SourceMid)
														
 
															+			tabres := AnalyTableV2(tabs, job.Category, "", con, 1, job.SourceMid, job.RuleBlock)
														
 
															 			processTableResult(tabres, bl, job)
														
 
															 			//			for k, v := range bl.TableKV.Kv {
														
 
															 			//				log.Println("bl.TableKV.Kv", k, v)
														
@@ -224,7 +224,7 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job) {
 
															 //ration==1 遍历所有tabs，ration!=1 tabs只有一个
														
 
															 func tableDivideBlock(con string, ration float32, tabs []*goquery.Selection) string {
														
 
															 	if len(tabs) != 1 {
														
 
															-		return ""
														
 
															+		//return ""
														
 
															 	}
														
 
															 	for _, tab := range tabs {
														
 
															 		content := ""
														
@@ -275,7 +275,7 @@ func tableDivideBlock(con string, ration float32, tabs []*goquery.Selection) str
 
															 }
														
 
															 //查找大文本，5次
														
 
															-func findBigText(con string, r float32, t []*goquery.Selection) (content string, tabs []*goquery.Selection, ration float32) {
														
 
															+func FindBigText(con string, r float32, t []*goquery.Selection) (content string, tabs []*goquery.Selection, ration float32) {
														
 
															 	content = tableDivideBlock(con, r, t)
														
 
															 	if content == "" {
														
 
															 		return
														
--- a/src/jy/pretreated/analytable.go
+++ b/src/jy/pretreated/analytable.go
@@ -524,7 +524,7 @@ func (table *Table) MergerToTableresult() {
 
															 解析表格入口
														
 
															 返回：汇总表格对象
														
 
															 **/
														
 
															-func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}) (tabres *TableResult) {
														
 
															+func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock) (tabres *TableResult) {
														
 
															 	defer qutil.Catch()
														
 
															 	//u.Debug(con)
														
 
															 	if itype == 1 {
														
@@ -532,7 +532,7 @@ func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, ityp
 
															 		con = RepairCon(con)
														
 
															 	}
														
 
															 	//生成tableresult对象
														
 
															-	tabres = NewTableResult(_id, toptype, blockTag, con, itype)
														
 
															+	tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock)
														
 
															 	//可以有多个table
														
 
															 	for _, table := range tabs {
														
 
															 		//隐藏表格跳过
														
@@ -1866,7 +1866,7 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 
															 				L:
														
 
															 					for in2, v1 := range vs {
														
 
															 						if len([]rune(v1)) < 20 && !moneyNum.MatchString(v1) && FindVal2_1.MatchString(v1) {
														
 
															-							for _, serial := range regSerialTitles_2 {
														
 
															+							for _, serial := range tn.TableResult.RuleBlock.TitleRegs {
														
 
															 								if serial.MatchString(v1) {
														
 
															 									break L
														
 
															 								}
														
@@ -2394,7 +2394,7 @@ L:
 
															 				jumpNextTd = false
														
 
															 			}
														
 
															 			///////////////////////////////////////
														
 
															-			thisTdKvs := kvAfterDivideBlock(td.Text, 3)
														
 
															+			thisTdKvs := kvAfterDivideBlock(td.Text, 3, tn.TableResult.RuleBlock)
														
 
															 			if len(thisTdKvs) == 0 {
														
 
															 				thisTdKvs = colonkvEntity.GetKvs(td.Text, "", 2)
														
 
															 			}
														
--- a/src/jy/pretreated/division.go
+++ b/src/jy/pretreated/division.go
@@ -11,15 +11,15 @@ import (
 
															 //分块、分段功能
														
 
															 var (
														
 
															-	regSerialTitles = []string{
														
 
															+	/*regSerialTitles = []string{
														
 
															 		"([一二三四五六七八九十]+)[\u3000\u2003\u00a0\\s]*[、．.:：，](.*)",
														
 
															 		"[（(]([一二三四五六七八九十]+)[)）][\u3000\u2003\u00a0\\s]*[、．.:：]?(.*)",
														
 
															 		"(\\d+)[\u3000\u2003\u00a0\\s]*、(.*)",
														
 
															 		"(\\d+)[\u3000\u2003\u00a0\\s]*[.．]([^\\d][^\r\n]+)",
														
 
															 		"(\\d+)[\u3000\u2003\u00a0\\s]+([^\\d][^\r\n]+)",
														
 
															 		"1[.．](\\d+)[\u3000\u2003\u00a0\\s]+([^\\d.．][^\r\n]+)",
														
 
															-	}
														
 
															-	regSerialTitles_1 = []*regexp.Regexp{
														
 
															+	}*/
														
 
															+	/*regSerialTitles_1 = []*regexp.Regexp{
														
 
															 		regexp.MustCompile("([\r\n][\u3000\u2003\u00a0\\s]*|^[\u3000\u2003\u00a0\\s]*)([一二三四五六七八九十]+)[\u3000\u2003\u00a0\\s]*[、．.:：，](.*)"),
														
 
															 		regexp.MustCompile("([\r\n][\u3000\u2003\u00a0\\s]*|^[\u3000\u2003\u00a0\\s]*)[（(]([一二三四五六七八九十]+)[)）][\u3000\u2003\u00a0\\s]*[、．.:：]?(.*)"),
														
 
															 		regexp.MustCompile("([\r\n][\u3000\u2003\u00a0\\s]*|^[\u3000\u2003\u00a0\\s]*)(\\d+)[\u3000\u2003\u00a0\\s]*、(.*)"),
														
@@ -36,7 +36,7 @@ var (
 
															 		regexp.MustCompile("^(\\d+)[\u3000\u2003\u00a0\\s]+([^\\d][^\r\n]+)$"),
														
 
															 		regexp.MustCompile("^1[.．](\\d+)[\u3000\u2003\u00a0\\s]+([^\\d.．][^\r\n]+)$"),
														
 
															 		regexp.MustCompile("^[（](\\d+)[\u3000\u2003\u00a0\\s）]+([^\r\n]+)$"),
														
 
															-	}
														
 
															+	}*/
														
 
															 	regReplAllTd       = regexp.MustCompile("(?smi)<td.*?>.+?</td>")
														
 
															 	regIsNumber        = regexp.MustCompile("^\\d+$")
														
 
															 	regIsChineseNumber = regexp.MustCompile("^[一二三四五六七八九十]+$")
														
@@ -64,7 +64,7 @@ var (
 
															 )
														
 
															 //分块
														
 
															-func DivideBlock(content string, from int) ([]*util.Block, int) {
														
 
															+func DivideBlock(content string, from int, ruleBlock *util.RuleBlock) ([]*util.Block, int) {
														
 
															 	defer qutil.Catch()
														
 
															 	returnValue := 0
														
 
															 	var blocks []*util.Block
														
@@ -75,7 +75,7 @@ func DivideBlock(content string, from int) ([]*util.Block, int) {
 
															 	//contentTemp := regReplAllTd.ReplaceAllString(content, "")
														
 
															 	contentTemp := TextAfterRemoveTable(content)
														
 
															 	tdIndexs := regReplAllTd.FindAllStringSubmatchIndex(content, -1)
														
 
															-	regContenSerialTitle, regSerialTitleIndex := getSerialType(contentTemp)
														
 
															+	regContenSerialTitle, regSerialTitleIndex := getSerialType(contentTemp, ruleBlock.BlockRegs)
														
 
															 	//没有分块
														
 
															 	if regSerialTitleIndex == -1 {
														
 
															 		if len(contentTemp) == len(content) {
														
@@ -86,7 +86,7 @@ func DivideBlock(content string, from int) ([]*util.Block, int) {
 
															 		}
														
 
															 	}
														
 
															 	//匹配序号和标题
														
 
															-	regSerialTitle := regSerialTitles_2[regSerialTitleIndex]
														
 
															+	regSerialTitle := ruleBlock.TitleRegs[regSerialTitleIndex]
														
 
															 	indexs := regContenSerialTitle.FindAllStringIndex(content, -1)
														
 
															 	indexs = filterSerial(content, indexs, tdIndexs)
														
 
															 	//头块
														
@@ -330,11 +330,11 @@ func filterSerial(content string, indexs, tdIndexs [][]int) [][]int {
 
															 }
														
 
															 //获取正文所用的序号类型
														
 
															-func getSerialType(content string) (*regexp.Regexp, int) {
														
 
															+func getSerialType(content string, blockRegs []*regexp.Regexp) (*regexp.Regexp, int) {
														
 
															 	var regContenSerialTitle *regexp.Regexp
														
 
															 	//先判断文章最外层使用的是哪种序号
														
 
															 	contentStartIndex, regSerialTitleIndex := -1, -1
														
 
															-	for k, v := range regSerialTitles_1 {
														
 
															+	for k, v := range blockRegs {
														
 
															 		indexs := v.FindStringIndex(content)
														
 
															 		//只用最外层的序号，里面的过滤掉
														
 
															 		if len(indexs) == 2 && !regSpliteSegment.MatchString(strings.TrimSpace(content[indexs[0]:indexs[1]])) && (contentStartIndex == -1 || indexs[0] < contentStartIndex) {
														
@@ -753,8 +753,8 @@ func interceptText(indexs []int, indexPkgMap map[int]string, pkgIndexMap map[str
 
															 }
														
 
															 //分块之后的kv
														
 
															-func kvAfterDivideBlock(text string, from int) []*util.Kv {
														
 
															-	blocks, _ := DivideBlock(text, from)
														
 
															+func kvAfterDivideBlock(text string, from int, ruleBlock *util.RuleBlock) []*util.Kv {
														
 
															+	blocks, _ := DivideBlock(text, from, ruleBlock)
														
 
															 	kvs := []*util.Kv{}
														
 
															 	for _, v := range blocks {
														
 
															 		//util.Debug(v.Text)
														
--- a/src/jy/pretreated/tablev2.go
+++ b/src/jy/pretreated/tablev2.go
@@ -33,10 +33,11 @@ type TableResult struct {
 
															 	HasKey         int                   //有key
														
 
															 	HasBrand       int                   //有品牌
														
 
															 	HasGoods       int                   //有商品
														
 
															+	RuleBlock      *u.RuleBlock
														
 
															 }
														
 
															 //快速创建TableResult对象
														
 
															-func NewTableResult(Id interface{}, Toptype, BlockTag, con string, Itype int) *TableResult {
														
 
															+func NewTableResult(Id interface{}, Toptype, BlockTag, con string, Itype int, ruleBlock *u.RuleBlock) *TableResult {
														
 
															 	return &TableResult{
														
 
															 		Id:           Id,
														
 
															 		Toptype:      Toptype,
														
@@ -48,6 +49,7 @@ func NewTableResult(Id interface{}, Toptype, BlockTag, con string, Itype int) *T
 
															 		PackageMap:   NewSortMap(),
														
 
															 		SortKV:       NewSortMap(),
														
 
															 		SortKVWeight: map[string]int{},
														
 
															+		RuleBlock:    ruleBlock,
														
 
															 	}
														
 
															 }
														
@@ -144,7 +146,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 
															 					stag = str
														
 
															 				}
														
 
															 			}
														
 
															-			sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id)
														
 
															+			sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id, table.TableResult.RuleBlock)
														
 
															 			td.BH = false
														
 
															 			td.SonTableResult = sonts
														
@@ -212,7 +214,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 
															 	ub := []*u.Block{}
														
 
															 	if lentxt > 50 { //看是否划块
														
 
															 		//u.Debug(txt)
														
 
															-		ub, _ = DivideBlock(txt, 2)
														
 
															+		ub, _ = DivideBlock(txt, 2, nil)
														
 
															 		if len(ub) > 0 {
														
 
															 			colonKvWeight := map[string]int{}
														
 
															 			spaceKvWeight := map[string]int{}
														
--- a/src/jy/util/article.go
+++ b/src/jy/util/article.go
@@ -1,5 +1,9 @@
 
															 package util
														
 
															+import (
														
 
															+	"regexp"
														
 
															+)
														
 
															+
														
 
															 //
														
 
															 type Job struct {
														
 
															 	SourceMid    string                            //数据源的MongoId
														
@@ -18,13 +22,13 @@ type Job struct {
 
															 	BlockPackage map[string]*BlockPackage          //块中的分包
														
 
															 	Winnerorder  []map[string]interface{}          //中标候选人排序
														
 
															 	PackageInfo  map[string]map[string]interface{} //分包信息
														
 
															-
														
 
															-	BrandData [][]map[string]string //
														
 
															-	HasTable  int                   //有table
														
 
															-	HasKey    int                   //是否匹配到table中的标题
														
 
															-	HasBrand  int                   //有品牌
														
 
															-	HasGoods  int                   //有商品
														
 
															-	IsFile    bool                  //有附件
														
 
															+	RuleBlock    *RuleBlock                        //分块规则
														
 
															+	BrandData    [][]map[string]string             //
														
 
															+	HasTable     int                               //有table
														
 
															+	HasKey       int                               //是否匹配到table中的标题
														
 
															+	HasBrand     int                               //有品牌
														
 
															+	HasGoods     int                               //有商品
														
 
															+	IsFile       bool                              //有附件
														
 
															 }
														
 
															 type ExtField struct {
														
@@ -38,6 +42,12 @@ type ExtField struct {
 
															 	Score     int         //得分
														
 
															 }
														
 
															+//分块规则
														
 
															+type RuleBlock struct {
														
 
															+	BlockRegs []*regexp.Regexp
														
 
															+	TitleRegs []*regexp.Regexp
														
 
															+}
														
 
															+
														
 
															 //块
														
 
															 type Block struct {
														
 
															 	Tags     []Tags          //对块做的标签，可以作为数据抽取的依据
														
--- a/src/web/templates/admin/blockinfo.html
+++ b/src/web/templates/admin/blockinfo.html
@@ -0,0 +1,190 @@
 
															+{{template "inc"}}
														
 
															+<!-- Main Header -->
														
 
															+{{template "header"}}
														
 
															+<!-- Left side column. 权限菜单 -->
														
 
															+{{template "memu"}}
														
 
															+
														
 
															+<!-- Content Wrapper. Contains page content -->
														
 
															+<div class="content-wrapper">
														
 
															+	<section class="content-header">
														
 
															+		<h1>
														
 
															+			<small><a class="btn btn-primary opr" opr="new">新增规则</a></small>
														
 
															+		</h1>
														
 
															+		<ol class="breadcrumb">
														
 
															+		  <li><a href="/admin/version"><i class="fa fa-dashboard"></i>抽取版本</a></li>
														
 
															+		  <li class="active"><a href="/admin/version/blockinfo?vid={{.vid}}">分块配置</a></li>
														
 
															+		</ol>
														
 
															+    </section>
														
 
															+  <!-- Main content -->
														
 
															+  <section class="content">
														
 
															+      <div class="row">
														
 
															+	      <div class="col-xs-12">
														
 
															+	        <div class="box">
														
 
															+		        <div class="box-body">
														
 
															+		            <table id="dataTable" class="table table-striped table-bordered table-hover">
														
 
															+		              <thead>
														
 
															+		              <tr>
														
 
															+						<th>优先级</th>
														
 
															+						<th>分块正则</th>
														
 
															+						<th>块标题正则</th>
														
 
															+						<th>操作</th>
														
 
															+		              </tr>
														
 
															+		              </thead>
														
 
															+		            </table>
														
 
															+					<p class="text-danger text-right">注：可拖拽调整优先级顺序</p>
														
 
															+		        </div>
														
 
															+	          <!-- /.box-body -->
														
 
															+	        </div>
														
 
															+        <!-- /.box -->
														
 
															+		</div>
														
 
															+	</div>
														
 
															+  </section>
														
 
															+</div>
														
 
															+	
														
 
															+<!-- footer -->
														
 
															+{{template "dialog"}}
														
 
															+{{template "footer"}}
														
 
															+<link rel="stylesheet" href="https://cdn.datatables.net/rowreorder/1.2.5/css/rowReorder.bootstrap.min.css">
														
 
															+<script src="https://cdn.datatables.net/rowreorder/1.2.5/js/dataTables.rowReorder.min.js"></script>
														
 
															+<script>
														
 
															+menuActive("version")
														
 
															+$(function () {
														
 
															+	ttable=$('#dataTable').DataTable({
														
 
															+		"columnDefs": [
														
 
															+			{
														
 
															+				"targets": 0,
														
 
															+				visible:true
														
 
															+			},
														
 
															+	        {
														
 
															+	            "orderable": false,
														
 
															+	            "targets": "_all"
														
 
															+	        }
														
 
															+		],
														
 
															+		rowReorder: {
														
 
															+			dataSrc: 'index',
														
 
															+            selector: 'tr'
														
 
															+        },
														
 
															+		"order": [[ 0, 'desc' ]],
														
 
															+		"paging"      : false,
														
 
															+		"lengthChange": false,
														
 
															+		"searching"   : false,
														
 
															+		"info"        : true,
														
 
															+		"autoWidth"   : false,
														
 
															+		"language": {
														
 
															+            "url": "/res/dist/js/dataTables.chinese.lang"
														
 
															+        },
														
 
															+		"ajax": {
														
 
															+			"url": "/admin/version/blockinfo_list",
														
 
															+			"type": "post",
														
 
															+			"data":{"vid":{{ .vid}}}
														
 
															+		},
														
 
															+		"columns": [
														
 
															+			{"data": "index","orderable": false},
														
 
															+			{"data": "block_reg","width":"50%"},
														
 
															+			{"data": "title_reg","width":"30%"},
														
 
															+			{"data":"_id","width":"12%",render:function(val,a,row){
														
 
															+				return '<a class="btn btn-sm btn-primary opr" opr="edit">编辑</a>&nbsp;<a class="btn btn-sm btn-danger" href="#" onclick="del(\''+val+'\')">删除</a>';
														
 
															+			}}
														
 
															+       	]
														
 
															+	});
														
 
															+	ttable.on('init.dt', function () {
														
 
															+		$(".opr").click(function(){
														
 
															+			var n=$(this).attr("opr")
														
 
															+			var _tit="",htmlObj={},obj,tag=[]
														
 
															+			switch(n){
														
 
															+			case "edit":	
														
 
															+                obj=ttable.row($(this).closest("tr")).data();
														
 
															+			case "new":
														
 
															+                tag=[
														
 
															+						{label:"分块正则",s_label:"block_reg",type:"tpl_input",placeholder:"分块正则",must:true},
														
 
															+						{label:"块标题正则",s_label:"title_reg",type:"tpl_input",placeholder:"块标题正则",must:true},
														
 
															+                        {s_label:"_id",type:"tpl_hidden"},
														
 
															+					]
														
 
															+				
														
 
															+				if(n=="new"){
														
 
															+					_tit="新增规则"
														
 
															+					obj={}
														
 
															+				}else{
														
 
															+					_tit="编辑规则"
														
 
															+				}
														
 
															+				htmlObj={
														
 
															+					title:_tit,
														
 
															+					tag:tag,
														
 
															+					bts:[
														
 
															+						{label:"保存",class:"btn-primary",
														
 
															+							fun:function(){
														
 
															+								var block_reg = $.trim($("#block_reg").val());
														
 
															+								var title_reg = $.trim($("#title_reg").val());
														
 
															+								var bcon=true;
														
 
															+								if(block_reg==""||title_reg==""){
														
 
															+									bcon=false;
														
 
															+								}
														
 
															+								if (bcon){
														
 
															+									var obj={
														
 
															+										_id:$("#_id").val(),
														
 
															+										block_reg:block_reg,
														
 
															+										title_reg:title_reg,
														
 
															+										vid:{{.vid}}
														
 
															+									}
														
 
															+                                    //console.log(obj)							
														
 
															+									$.post("/admin/version/blockinfo_save",obj,function(data){
														
 
															+										if(data.status){
														
 
															+											window.location.href="/admin/version/blockinfo?vid={{.vid}}";	
														
 
															+										}else{
														
 
															+											showTip("保存失败！",1000)
														
 
															+										}
														
 
															+									},'json')
														
 
															+								}else{
														
 
															+									alert("红色标签的表单不能为空！")
														
 
															+								}
														
 
															+							}
														
 
															+						}
														
 
															+					]
														
 
															+				}
														
 
															+			OpenDialog(htmlObj,obj)
														
 
															+			break;
														
 
															+			}
														
 
															+		});
														
 
															+	});
														
 
															+	ttable.on( 'order.dt search.dt', function () {
														
 
															+        ttable.column(0, {search:'applied', order:'applied'}).nodes().each( function (cell, i) {
														
 
															+            cell.innerHTML = i+1;
														
 
															+        } );
														
 
															+    } ).draw();
														
 
															+	ttable.on( 'row-reordered', function ( e, diff, edit ) {
														
 
															+		var _ids = [],indexs=[];
														
 
															+		for(var i=0;i<diff.length;i++){
														
 
															+			var rowData = ttable.row( diff[i].node ).data();
														
 
															+			_ids.push(rowData._id);
														
 
															+			indexs.push(rowData.index);
														
 
															+		}
														
 
															+		ttable.rowReorder.disable();
														
 
															+		$.ajax({
														
 
															+			type: "POST",
														
 
															+			url: "/admin/version/blockinfo_updateindex",
														
 
															+			data: {_ids:_ids,indexs:indexs},
														
 
															+			dataType: "json",
														
 
															+			traditional: true,
														
 
															+			success: function(r){
														
 
															+				ttable.rowReorder.enable();
														
 
															+			}
														
 
															+		});
														
 
															+    });
														
 
															+})
														
 
															+function del(_id){
														
 
															+	showConfirm("确定删除?", function() {
														
 
															+		$.ajax({
														
 
															+			url:"/admin/version/blockinfo_delete",
														
 
															+			type:"post",
														
 
															+			data:{"_id":_id},
														
 
															+			success:function(r){
														
 
															+				if(r.status){				
														
 
															+					ttable.ajax.reload();
														
 
															+				}else{
														
 
															+					showTip("删除失败", 1000, function() {});
														
 
															+				}
														
 
															+			}
														
 
															+		})
														
 
															+	});
														
 
															+}
														
 
															+</script>
														
--- a/src/web/templates/admin/version.html
+++ b/src/web/templates/admin/version.html
@@ -174,8 +174,9 @@ $(function () {
 
															 			}},
														
 
															 			{ "data":"_id","width":"25%",render:function(val,a,row){
														
 
															 				return '<div class="btn-group">'+
														
 
															-						'<a class="btn btn-sm btn-success" href="/admin/version/info?vid='+val+'" >属性配置</a>'+
														
 
															-						'<a class="btn btn-sm btn-info" href="/admin/version/pkginfo?vid='+val+'" >分包配置</a>'+
														
 
															+						'<a class="btn btn-sm btn-success" href="/admin/version/info?vid='+val+'" >属性</a>'+
														
 
															+						'<a class="btn btn-sm btn-warning" href="/admin/version/blockinfo?vid='+val+'" >分块</a>'+
														
 
															+						'<a class="btn btn-sm btn-info" href="/admin/version/pkginfo?vid='+val+'" >分包</a>'+
														
 
															 						/*'<a class="btn btn-sm btn-primary opr" opr="edit">编&nbsp;&nbsp;辑1</a>'+*/
														
 
															 						"<a class=\"btn btn-sm btn-primary opr\" href='#' onclick=\"edit('"+val+"')\">编&nbsp;&nbsp;辑</a> &nbsp;"+
														
 
															 						'<a class="btn btn-sm btn-danger" href="#" onclick="del(\''+val+'\',\''+row["version"]+'\')">删&nbsp;&nbsp;除</a>'