6 years ago · d3d00520e9
--- a/src/config.json
+++ b/src/config.json
@@ -8,11 +8,11 @@
 
				     "elasticPoolSize": 30,
			
 
				     "mergetable": "projectset",
			
 
				     "mergetablealias": "projectset_v1",
			
 
				-    "saveresult": true,
			
 
				+    "saveresult": false,
			
 
				     "fieldscore": true,
			
 
				-    "qualityaudit": false,
			
 
				-    "iscltlog": true,
			
 
				-    "brandgoods": false,
			
 
				+    "qualityaudit": true,
			
 
				+    "iscltlog": false,
			
 
				+    "brandgoods": true,
			
 
				     "udptaskid": "5be107e600746bf92debf080",
			
 
				     "udpip": "127.0.0.1",
			
 
				     "udpport": "1484",
			
--- a/src/jy/admin/rulecheck.go
+++ b/src/jy/admin/rulecheck.go
@@ -198,6 +198,8 @@ func checkPreReg(content, ruleText string) string {
 
				 		tmp := strings.Split(ruleText, "__")
			
 
				 		var pattern string
			
 
				 		if strings.Contains(tmp[0], "\\u") {
			
 
				+			tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
			
 
				+			tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
			
 
				 			pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
			
 
				 		} else {
			
 
				 			pattern = tmp[0]
			
@@ -222,6 +224,8 @@ func checkBackReg(content, ruleText string) string {
 
				 		tmp := strings.Split(ruleText, "__")
			
 
				 		var pattern string
			
 
				 		if strings.Contains(tmp[0], "\\u") {
			
 
				+			tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
			
 
				+			tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
			
 
				 			pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
			
 
				 		} else {
			
 
				 			pattern = tmp[0]
			
@@ -258,10 +262,13 @@ func checkCoreReg(field, content, ruleText string) map[string]string {
 
				 			}
			
 
				 			var pattern string
			
 
				 			if strings.Contains(tmp[0], "\\u") {
			
 
				+				tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
			
 
				+				tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
			
 
				 				pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
			
 
				 			} else {
			
 
				 				pattern = tmp[0]
			
 
				 			}
			
 
				+			log.Println("pattern", pattern)
			
 
				 			reg := regexp.MustCompile(pattern)
			
 
				 			apos := reg.FindAllStringSubmatchIndex(content, -1)
			
 
				 			if len(apos) > 0 {
			
@@ -313,7 +320,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 
				 		doc = extract.ExtRegPre(doc, j, v, e.TaskInfo)
			
 
				 	}
			
 
				 	//抽取规则
			
 
				-	if j.CategorySecond==""{
			
 
				+	if j.CategorySecond == "" {
			
 
				 		for _, vc1 := range e.RuleCores[j.Category] {
			
 
				 			for _, vc := range vc1 {
			
 
				 				tmp := ju.DeepCopy(doc).(map[string]interface{})
			
@@ -331,7 +338,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 
				 				}
			
 
				 			}
			
 
				 		}
			
 
				-	}else{
			
 
				+	} else {
			
 
				 		for _, vc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
			
 
				 			for _, vc := range vc1 {
			
 
				 				tmp := ju.DeepCopy(doc).(map[string]interface{})
			
--- a/src/jy/clear/totimestamp.go
+++ b/src/jy/clear/totimestamp.go
@@ -4,11 +4,12 @@ package clear
 
				 import (
			
 
				 	"fmt"
			
 
				 	"regexp"
			
 
				+	"strconv"
			
 
				 	"strings"
			
 
				 	"time"
			
 
				 )
			
 
				 
			
 
				-var reg, regA, regB, regC, regAfter *regexp.Regexp
			
 
				+var reg, regA, regB, regC, regD, regAfter *regexp.Regexp
			
 
				 
			
 
				 const (
			
 
				 	T = 365 * 86400
			
@@ -22,10 +23,12 @@ var item = map[string]string{
 
				 }
			
 
				 
			
 
				 func init() {
			
 
				+	//二〇一五年十一月四日十五时
			
 
				 	reg, _ = regexp.Compile(`\d+`)
			
 
				 	regA, _ = regexp.Compile(`[一|二|三|四|五|六|七|八|九|十|零|〇|１|２|３|４|５|６|７|８|９|０]`)
			
 
				 	regB, _ = regexp.Compile(`\d+年\d+月\d+日((上|下)午)?\s*\d+[:：时]\d+分?[-—]\d+[:：时]\d+时?分?`)
			
 
				 	regC, _ = regexp.Compile(`\s*\d+[:：时]\d+分?[-—]`)
			
 
				+	regD, _ = regexp.Compile(`([一|二|三|四|五|六|七|八|九|十|零|〇]{4})年([一|二|三|四|五|六|七|八|九|十]{1,2})月([一|二|三|四|五|六|七|八|九|十]{1,3})日([一|二|三|四|五|六|七|八|九|十]{1,3})时`)
			
 
				 	regAfter, _ = regexp.Compile(`(下午D?\d{1,2}[时|:|：|h|H])`)
			
 
				 }
			
 
				 
			
@@ -41,6 +44,27 @@ func init() {
 
				 */
			
 
				 func ObjToTimestamp(data []interface{}) []interface{} {
			
 
				 	tmp := fmt.Sprint(data[0])
			
 
				+	//处理类似：二〇一五年十一月四日十五时
			
 
				+	cht := regD.FindStringSubmatch(tmp)
			
 
				+	if len(cht) == 5 {
			
 
				+		y := chineseToNumber(cht[1])
			
 
				+		m := 0
			
 
				+		for _, v := range []rune(cht[2]) {
			
 
				+			it, _ := strconv.Atoi(item[string(v)])
			
 
				+			m += it
			
 
				+		}
			
 
				+		d := 0
			
 
				+		for _, v := range []rune(cht[3]) {
			
 
				+			it, _ := strconv.Atoi(item[string(v)])
			
 
				+			d += it
			
 
				+		}
			
 
				+		M := 0
			
 
				+		for _, v := range []rune(cht[4]) {
			
 
				+			it, _ := strconv.Atoi(item[string(v)])
			
 
				+			M += it
			
 
				+		}
			
 
				+		tmp = fmt.Sprintf("%s年%d月%d日%d时", y, m, d, M)
			
 
				+	}
			
 
				 	//2016年12月7日上午9:00-11：30时 时间范围处理 取后面的时间
			
 
				 	if regB.MatchString(tmp) {
			
 
				 		tmp = regC.ReplaceAllString(tmp, "")
			
@@ -97,8 +121,8 @@ func ObjToTimestamp(data []interface{}) []interface{} {
 
				 		t, _ := time.ParseInLocation("2006-01-02 15:04", timestr, time.Local)
			
 
				 		timestamp = t.Unix()
			
 
				 	}
			
 
				-	if timestamp < 0 || timestamp > (time.Now().Unix()+T) {
			
 
				-		data[0] = 0
			
 
				+	if timestamp <= 0 || timestamp > (time.Now().Unix()+T) {
			
 
				+		data[0] = ""
			
 
				 	} else {
			
 
				 		if addreptime > 0 {
			
 
				 			timestamp += addreptime
			
--- a/src/jy/extract/exportask.go
+++ b/src/jy/extract/exportask.go
@@ -57,6 +57,7 @@ func extractAndExport(v string, t map[string]interface{}) {
 
				 	e.InitTag()
			
 
				 	e.InitClearFn()
			
 
				 	e.InfoTypeList()
			
 
				+	e.InitBlockRule()
			
 
				 	//品牌抽取是否开启
			
 
				 	ju.IsBrandGoods = ju.Config["brandgoods"].(bool)
			
 
				 
			
--- a/src/jy/extract/extract.go
+++ b/src/jy/extract/extract.go
@@ -46,6 +46,7 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 
				 	ext.InitRuleCore()
			
 
				 	ext.InitPkgCore()
			
 
				 	ext.InitBlockRule()
			
 
				+	ext.InfoTypeList()
			
 
				 	ext.InitTag()
			
 
				 	ext.InitClearFn()
			
 
				 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
			
@@ -118,6 +119,7 @@ func StartExtractTaskId(taskId string) bool {
 
				 	ext.InitRuleCore()
			
 
				 	ext.InitPkgCore()
			
 
				 	ext.InitBlockRule()
			
 
				+	ext.InfoTypeList()
			
 
				 	ext.InitTag()
			
 
				 	ext.InitClearFn()
			
 
				 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
			
--- a/src/jy/extract/extractInit.go
+++ b/src/jy/extract/extractInit.go
@@ -214,6 +214,8 @@ func (e *ExtractTask) InitRulePres() {
 
				 				tmp := strings.Split(rinfo.RuleText, "__")
			
 
				 				var pattern string
			
 
				 				if strings.Contains(tmp[0], "\\u") {
			
 
				+					tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
			
 
				+					tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
			
 
				 					pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
			
 
				 				} else {
			
 
				 					pattern = tmp[0]
			
@@ -251,6 +253,8 @@ func (e *ExtractTask) InitRuleBacks() {
 
				 				tmp := strings.Split(rinfo.RuleText, "__")
			
 
				 				var pattern string
			
 
				 				if strings.Contains(tmp[0], "\\u") {
			
 
				+					tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
			
 
				+					tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
			
 
				 					pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
			
 
				 				} else {
			
 
				 					pattern = tmp[0]
			
@@ -335,6 +339,8 @@ func (e *ExtractTask) InfoRole(vinfo map[string]interface{}) []*RuleCore {
 
				 					tmp := strings.Split(rinfo.RuleText, "__")
			
 
				 					var pattern string
			
 
				 					if strings.Contains(tmp[0], "\\u") {
			
 
				+							tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
			
 
				+							tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
			
 
				 						pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
			
 
				 					} else {
			
 
				 						pattern = tmp[0]
			
@@ -371,6 +377,8 @@ func (e *ExtractTask) InfoRole(vinfo map[string]interface{}) []*RuleCore {
 
				 					tmp := strings.Split(rinfo.RuleText, "__")
			
 
				 					var pattern string
			
 
				 					if strings.Contains(tmp[0], "\\u") {
			
 
				+							tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
			
 
				+							tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
			
 
				 						pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
			
 
				 					} else {
			
 
				 						pattern = tmp[0]
			
@@ -414,6 +422,8 @@ func (e *ExtractTask) InfoRole(vinfo map[string]interface{}) []*RuleCore {
 
				 					tmp := strings.Split(rinfo.RuleText, "__")
			
 
				 					var pattern string
			
 
				 					if strings.Contains(tmp[0], "\\u") {
			
 
				+							tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
			
 
				+							tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
			
 
				 						pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
			
 
				 					} else {
			
 
				 						pattern = tmp[0]
			
@@ -485,6 +495,8 @@ func (e *ExtractTask) InitPkgCore() {
 
				 						tmp := strings.Split(rinfo.RuleText, "__")
			
 
				 						var pattern string
			
 
				 						if strings.Contains(tmp[0], "\\u") {
			
 
				+							tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
			
 
				+							tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
			
 
				 							pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
			
 
				 						} else {
			
 
				 							pattern = tmp[0]
			
--- a/src/jy/extract/score.go
+++ b/src/jy/extract/score.go
@@ -17,7 +17,7 @@ func init() {
 
				 	qu.ReadConfig("./res/fieldscore.json", &SoreConfig)
			
 
				 	//实例化正则
			
 
				 	for _, tmp := range SoreConfig {
			
 
				-		log.Println(tmp)
			
 
				+		//log.Println(tmp)
			
 
				 		if tmp["type"] == "string" {
			
 
				 			if positions, ok := tmp["position"].([]interface{}); ok {
			
 
				 				for _, position := range positions {
			
@@ -25,8 +25,10 @@ func init() {
 
				 						qu.Try(func() {
			
 
				 							strReq, _ := p["regstr"].(string)
			
 
				 							if strings.Contains(strReq, "\\u") {
			
 
				-								pattern, _ := strconv.Unquote(`"` + strReq + `"`)
			
 
				-								p["regexp"] = regexp.MustCompile(pattern)
			
 
				+								strReq = strings.Replace(strReq, "\\", "\\\\", -1)
			
 
				+								strReq = strings.Replace(strReq, "\\\\u", "\\u", -1)
			
 
				+								strReq, _ = strconv.Unquote(`"` + strReq + `"`)
			
 
				+								p["regexp"] = regexp.MustCompile(strReq)
			
 
				 							} else {
			
 
				 								p["regexp"] = regexp.MustCompile(strReq)
			
 
				 							}
			
@@ -42,8 +44,10 @@ func init() {
 
				 						qu.Try(func() {
			
 
				 							strReq, _ := p["regstr"].(string)
			
 
				 							if strings.Contains(strReq, "\\u") {
			
 
				-								pattern, _ := strconv.Unquote(`"` + strReq + `"`)
			
 
				-								p["regexp"] = regexp.MustCompile(pattern)
			
 
				+								strReq = strings.Replace(strReq, "\\", "\\\\", -1)
			
 
				+								strReq = strings.Replace(strReq, "\\\\u", "\\u", -1)
			
 
				+								strReq, _ = strconv.Unquote(`"` + strReq + `"`)
			
 
				+								p["regexp"] = regexp.MustCompile(strReq)
			
 
				 							} else {
			
 
				 								p["regexp"] = regexp.MustCompile(strReq)
			
 
				 							}
			
--- a/src/jy/pretreated/analystep.go
+++ b/src/jy/pretreated/analystep.go
@@ -18,39 +18,30 @@ func AnalyStart(job *util.Job) {
 
				 	//格式化正文
			
 
				 	con = formatText(con, "all")
			
 
				 	job.Content = con
			
 
				-	//
			
 
				+	//计算表格占比,返回表格数组、占比
			
 
				 	tabs, ration := ComputeConRatio(con, 1)
			
 
				 	if len(tabs) > 0 {
			
 
				 		newcon, newtabs, newration := FindBigText(con, ration, tabs)
			
 
				-		if newcon != "" && newration == 0 {
			
 
				+		if newcon != "" {
			
 
				 			con = newcon
			
 
				 			tabs = newtabs
			
 
				 			ration = newration
			
 
				 		}
			
 
				 	}
			
 
				-	blockArrays, _ := DivideBlock(con, 1, job.RuleBlock)
			
 
				-	if len(blockArrays) > 0 { //有分块
			
 
				+	blockArrays, _ := DivideBlock(con, 1, job.RuleBlock) //分块
			
 
				+	if len(blockArrays) > 0 {                            //有分块
			
 
				 		//从块里面找分包
			
 
				-		job.BlockPackage = FindPackageFromBlocks(&blockArrays, job.Title)
			
 
				+		job.BlockPackage = FindPackageFromBlocks(&blockArrays, job.Title) //从块里面找分包
			
 
				 		for _, bl := range blockArrays {
			
 
				 			if len([]rune(bl.Text)) > 80 {
			
 
				-				ba1, _ := DivideBlock(bl.Text, 1, job.RuleBlock)
			
 
				-				if len(ba1) > 0 {
			
 
				-					t := ""
			
 
				-					for _, t1 := range ba1 {
			
 
				-						t += t1.Text
			
 
				-					}
			
 
				-					bl.Text = t
			
 
				-					bl.ColonKV = GetKVAll(t, bl.Title, 1)
			
 
				-					bl.SpaceKV = SspacekvEntity.Entrance(t, bl.Title)
			
 
				-				}
			
 
				+				bl.Block, _ = DivideBlock(bl.Text, 1, job.RuleBlock)
			
 
				 			}
			
 
				 			//块中再查找表格(块,处理完把值赋到块)
			
 
				 			t1, _ := ComputeConRatio(bl.Text, 2)
			
 
				 			if len(t1) > 0 {
			
 
				-				job.HasTable = 1 //添加标识:文本中有table
			
 
				-				tabres := AnalyTableV2(t1, job.Category, bl.Title, bl.Text, 2, job.SourceMid, job.RuleBlock)
			
 
				-				processTableResult(tabres, bl, job)
			
 
				+				job.HasTable = 1                                                                             //添加标识:文本中有table
			
 
				+				tabres := AnalyTableV2(t1, job.Category, bl.Title, bl.Text, 2, job.SourceMid, job.RuleBlock) //解析表格入口 返回：汇总表格对象
			
 
				+				processTableResult(tabres, bl, job)                                                          //分析table解析结果
			
 
				 				if bl.Title == "" && tabres.BlockTag != "" {
			
 
				 					bl.Title = tabres.BlockTag
			
 
				 				}
			
@@ -77,7 +68,6 @@ func AnalyStart(job *util.Job) {
 
				 			//			for k, v := range bl.TableKV.Kv {
			
 
				 			//				log.Println("bl.TableKV.Kv", k, v)
			
 
				 			//			}
			
 
				-
			
 
				 		} else {
			
 
				 			//从正文里面找分包
			
 
				 			job.BlockPackage = FindPackageFromText(job.Title, newCon)
			
@@ -88,8 +78,8 @@ func AnalyStart(job *util.Job) {
 
				 		}
			
 
				 
			
 
				 		//调用kv解析
			
 
				-		bl.ColonKV = GetKVAll(newCon, "", 1)
			
 
				-		bl.SpaceKV = SspacekvEntity.Entrance(newCon, "")
			
 
				+		bl.ColonKV = GetKVAll(newCon, "", nil, 1)
			
 
				+		bl.SpaceKV = SspacekvEntity.Entrance(newCon, "", nil)
			
 
				 		job.Block = append(job.Block, bl)
			
 
				 	}
			
 
				 }
			
@@ -279,7 +269,7 @@ func tableDivideBlock(con string, ration float32, tabs []*goquery.Selection) str
 
				 			content = regEndWrap.ReplaceAllString(content, "")
			
 
				 			doc, _ := goquery.NewDocumentFromReader(strings.NewReader(con))
			
 
				 			doc.Find("table").Eq(0).ReplaceWithHtml(content)
			
 
				-			con, _ = doc.Html()
			
 
				+			con, _ = doc.Find("body").Html()
			
 
				 		}
			
 
				 	}
			
 
				 	return con
			
@@ -295,9 +285,11 @@ func FindBigText(con string, r float32, t []*goquery.Selection) (content string,
 
				 		if content != "" {
			
 
				 			tabs, ration = ComputeConRatio(content, 1)
			
 
				 			if len(tabs) > 0 {
			
 
				-				content = tableDivideBlock(content, ration, tabs)
			
 
				-				if content == "" {
			
 
				+				con := tableDivideBlock(content, ration, tabs)
			
 
				+				if con == "" {
			
 
				 					return
			
 
				+				} else {
			
 
				+					content = con
			
 
				 				}
			
 
				 			} else {
			
 
				 				doc, _ := goquery.NewDocumentFromReader(strings.NewReader(con))
			
--- a/src/jy/pretreated/analytable.go
+++ b/src/jy/pretreated/analytable.go
--- a/src/jy/pretreated/colonkv.go
+++ b/src/jy/pretreated/colonkv.go
@@ -63,8 +63,11 @@ func (ce *ColonkvEntity) divisionMoreKV(con string) string {
 
				 }
			
 
				 
			
 
				 //获取冒号kv入口
			
 
				-func (ce *ColonkvEntity) entrance(con, title string, from int) ([]*Kv, map[string]string) {
			
 
				+func (ce *ColonkvEntity) entrance(con, title string, contactFormat *ContactFormat, from int) ([]*Kv, map[string]string) {
			
 
				 	kvs := ce.GetKvs(con, title, from)
			
 
				+	if from == 1 {
			
 
				+		FormatContactKv(&kvs, title, nil, contactFormat)
			
 
				+	}
			
 
				 	kv := map[string]string{}
			
 
				 	for _, v := range kvs {
			
 
				 		if strings.TrimSpace(v.Value) == "" {
			
@@ -84,7 +87,7 @@ func (ce *ColonkvEntity) GetKvs(con, title string, from int) []*Kv {
 
				 
			
 
				 //处理正文
			
 
				 func (ce *ColonkvEntity) processText(con string) string {
			
 
				-	con = ce.divisionMoreKV(con)
			
 
				+	con = ce.divisionMoreKV(con)//一行多个冒号kv处理
			
 
				 	for {
			
 
				 		tmp := con
			
 
				 		con = ce.divisionMoreKV(con)
			
@@ -238,8 +241,7 @@ func IsContactKvHandle(value string, m map[string]bool) bool {
 
				 
			
 
				 //kv关于联系人信息的处理
			
 
				 //采购人>集中采购机构
			
 
				-/*
			
 
				-func FormatContactKv(kvs *[]*Kv, title string, buyers []string) {
			
 
				+func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *ContactFormat) {
			
 
				 	////////////////////////////
			
 
				 	//处理联系人信息
			
 
				 	var indexMap map[int]string
			
@@ -565,7 +567,6 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string) {
 
				 	//	}
			
 
				 	//Debug("totalIndexMap", len(totalIndexMap))
			
 
				 }
			
 
				-*/
			
 
				 func ContactTypeTitleMatch(title string) string {
			
 
				 	matchType := ""
			
 
				 	if title != "" && len([]rune(title)) < 15 {
			
@@ -614,9 +615,9 @@ func HasOrderContactType(text string) []string {
 
				 
			
 
				 //两种冒号kv结合到一起
			
 
				 //from 1--全文 2--table td 3--table td解析采购单位联系人 4--分包
			
 
				-func GetKVAll(content, title string, from int) *JobKv {
			
 
				+func GetKVAll(content, title string, contactFormat *ContactFormat, from int) *JobKv {
			
 
				 	content = formatText(content, "kv")
			
 
				-	m1Kvs, _ := colonkvEntity.entrance(content, title, from)
			
 
				+	m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from)
			
 
				 	m1, m1Weight := KvTagsToKV(m1Kvs, title, nil, from)
			
 
				 	if m1 == nil {
			
 
				 		m1 = map[string]string{}
			
--- a/src/jy/pretreated/division.go
+++ b/src/jy/pretreated/division.go
@@ -19,7 +19,7 @@ var (
 
				 		"(\\d+)[\u3000\u2003\u00a0\\s]+([^\\d][^\r\n]+)",
			
 
				 		"1[.．](\\d+)[\u3000\u2003\u00a0\\s]+([^\\d.．][^\r\n]+)",
			
 
				 	}*/
			
 
				-	/*regSerialTitles_1 = []*regexp.Regexp{
			
 
				+	regSerialTitles_1 = []*regexp.Regexp{
			
 
				 		regexp.MustCompile("([\r\n][\u3000\u2003\u00a0\\s]*|^[\u3000\u2003\u00a0\\s]*)([一二三四五六七八九十]+)[\u3000\u2003\u00a0\\s]*[、．.:：，](.*)"),
			
 
				 		regexp.MustCompile("([\r\n][\u3000\u2003\u00a0\\s]*|^[\u3000\u2003\u00a0\\s]*)[（(]([一二三四五六七八九十]+)[)）][\u3000\u2003\u00a0\\s]*[、．.:：]?(.*)"),
			
 
				 		regexp.MustCompile("([\r\n][\u3000\u2003\u00a0\\s]*|^[\u3000\u2003\u00a0\\s]*)(\\d+)[\u3000\u2003\u00a0\\s]*、(.*)"),
			
@@ -36,7 +36,7 @@ var (
 
				 		regexp.MustCompile("^(\\d+)[\u3000\u2003\u00a0\\s]+([^\\d][^\r\n]+)$"),
			
 
				 		regexp.MustCompile("^1[.．](\\d+)[\u3000\u2003\u00a0\\s]+([^\\d.．][^\r\n]+)$"),
			
 
				 		regexp.MustCompile("^[（](\\d+)[\u3000\u2003\u00a0\\s）]+([^\r\n]+)$"),
			
 
				-	}*/
			
 
				+	}
			
 
				 	regReplAllTd       = regexp.MustCompile("(?smi)<td.*?>.+?</td>")
			
 
				 	regIsNumber        = regexp.MustCompile("^\\d+$")
			
 
				 	regIsChineseNumber = regexp.MustCompile("^[一二三四五六七八九十]+$")
			
@@ -48,12 +48,16 @@ var (
 
				 	regDivision        = regexp.MustCompile("[:：]")
			
 
				 	regSpliteSegment   = regexp.MustCompile("[\r\n]")
			
 
				 	regFilterNumber    = regexp.MustCompile("^[\\d一二三四五六七八九十]+")
			
 
				-	regSplit           = regexp.MustCompile("和|以?及|与|、")
			
 
				+	regSplit           = regexp.MustCompile("或|和|以?及|与|、|或")
			
 
				 	regStartWrap       = regexp.MustCompile("^[\r\n]")
			
 
				 	regEndWrap         = regexp.MustCompile("[\r\n]$")
			
 
				 	regMoreWrap        = regexp.MustCompile("[\r\n]{2,}")
			
 
				 	replSerial         = regexp.MustCompile("(\r\n|^)([\\d一二三四五六七八九十][、．.:：，])+\\d")
			
 
				 	moreColonReg       = regexp.MustCompile("[:：]+")
			
 
				+	regFilter          = regexp.MustCompile("等$")
			
 
				+	confusion          = map[string]string{
			
 
				+		"参与": "canyu",
			
 
				+	}
			
 
				 	//查找分包之前，先对内容进行预处理
			
 
				 	/*
			
 
				 		第一包：采购设备清单
			
@@ -75,7 +79,13 @@ func DivideBlock(content string, from int, ruleBlock *util.RuleBlock) ([]*util.B
 
				 	//contentTemp := regReplAllTd.ReplaceAllString(content, "")
			
 
				 	contentTemp := TextAfterRemoveTable(content)
			
 
				 	tdIndexs := regReplAllTd.FindAllStringSubmatchIndex(content, -1)
			
 
				-	regContenSerialTitle, regSerialTitleIndex := getSerialType(contentTemp, ruleBlock.BlockRegs)
			
 
				+	var regContenSerialTitle *regexp.Regexp
			
 
				+	var regSerialTitleIndex int
			
 
				+	if ruleBlock!=nil && len(ruleBlock.BlockRegs)>0{
			
 
				+		regContenSerialTitle, regSerialTitleIndex = getSerialType(contentTemp, ruleBlock.BlockRegs)
			
 
				+	}else {
			
 
				+		regContenSerialTitle, regSerialTitleIndex = getSerialType(contentTemp,  regSerialTitles_1)
			
 
				+	}
			
 
				 	//没有分块
			
 
				 	if regSerialTitleIndex == -1 {
			
 
				 		if len(contentTemp) == len(content) {
			
@@ -86,7 +96,12 @@ func DivideBlock(content string, from int, ruleBlock *util.RuleBlock) ([]*util.B
 
				 		}
			
 
				 	}
			
 
				 	//匹配序号和标题
			
 
				-	regSerialTitle := ruleBlock.TitleRegs[regSerialTitleIndex]
			
 
				+	var regSerialTitle *regexp.Regexp
			
 
				+	if ruleBlock != nil && len(ruleBlock.TitleRegs)>0{
			
 
				+		regSerialTitle = ruleBlock.TitleRegs[regSerialTitleIndex]
			
 
				+	}else {
			
 
				+		regSerialTitle = regSerialTitles_2[regSerialTitleIndex]
			
 
				+	}
			
 
				 	indexs := regContenSerialTitle.FindAllStringIndex(content, -1)
			
 
				 	indexs = filterSerial(content, indexs, tdIndexs)
			
 
				 	//头块
			
@@ -154,7 +169,6 @@ func DivideBlock(content string, from int, ruleBlock *util.RuleBlock) ([]*util.B
 
				 		}
			
 
				 		//获取块中除了序号和标题的内容
			
 
				 		blockText := regTrimSpace.ReplaceAllString(content[end:nextStart], "")
			
 
				-		var titles = []string{}
			
 
				 		if title != "" {
			
 
				 			blockTextTemp := regReplAllSpace.ReplaceAllString(blockText, "")
			
 
				 			//特殊情况处理
			
@@ -173,6 +187,7 @@ func DivideBlock(content string, from int, ruleBlock *util.RuleBlock) ([]*util.B
 
				 						十二、开标时间：2017年3月20日9时30分
			
 
				 					*/
			
 
				 					blockText = title
			
 
				+					title = ""
			
 
				 				}
			
 
				 			} else if blockTextTemp != "" && regDivision.MatchString(title) {
			
 
				 				/*
			
@@ -185,34 +200,16 @@ func DivideBlock(content string, from int, ruleBlock *util.RuleBlock) ([]*util.B
 
				 				divisionIndexs := regDivision.FindStringIndex(title)
			
 
				 				titleBefore := regReplAllSpace.ReplaceAllString(title[:divisionIndexs[0]], "")
			
 
				 				titleAfter := regReplAllSpace.ReplaceAllString(title[divisionIndexs[1]:], "")
			
 
				+				blockText = title + "\n" + blockText
			
 
				 				if titleAfter != "" {
			
 
				-					titles = append(titles, titleBefore)
			
 
				-					//分段 去每一个冒号前面的key
			
 
				-					segments := regSpliteSegment.Split(blockText, -1)
			
 
				-					for _, sv := range segments {
			
 
				-						divisionIndexs = regDivision.FindStringIndex(sv)
			
 
				-						if len(divisionIndexs) == 0 {
			
 
				-							continue
			
 
				-						}
			
 
				-						titleTemp := regReplAllSpace.ReplaceAllString(sv[:divisionIndexs[0]], "")
			
 
				-						if titleTemp == "" {
			
 
				-							continue
			
 
				-						}
			
 
				-						titles = append(titles, titleTemp)
			
 
				-					}
			
 
				-					blockText = title + "\n" + blockText
			
 
				 					title = ""
			
 
				 				} else {
			
 
				-					blockText = title + "\n" + blockText
			
 
				 					title = titleBefore
			
 
				 				}
			
 
				 			} else {
			
 
				 				blockText = title + "\n" + blockText
			
 
				 			}
			
 
				 		}
			
 
				-		if len(titles) == 0 {
			
 
				-			titles = append(titles, title)
			
 
				-		}
			
 
				 		//没有内容的块，不打标签，不分段
			
 
				 		if blockText == "" {
			
 
				 			continue
			
@@ -222,29 +219,29 @@ func DivideBlock(content string, from int, ruleBlock *util.RuleBlock) ([]*util.B
 
				 			continue
			
 
				 		}
			
 
				 		blockText = hasMergeKV(title, blockText)
			
 
				-		block := &util.Block{
			
 
				-			Index: index,     //序号
			
 
				-			Text:  blockText, //内容
			
 
				-			Title: title,     //标题
			
 
				-			Start: start,
			
 
				-			End:   nextStart,
			
 
				-		}
			
 
				 		//
			
 
				 		titleIsExists := map[string]bool{} //去重
			
 
				-		for _, tv := range titles {
			
 
				-			tv = filterTitle(tv)
			
 
				-			//分割标题 [和及]。。。
			
 
				-			splitTitles := regSplit.Split(tv, -1)
			
 
				-			for _, sv := range splitTitles {
			
 
				-				if sv == "" || titleIsExists[sv] {
			
 
				-					continue
			
 
				-				}
			
 
				-				titleIsExists[sv] = true
			
 
				-				//标题过短过长不打标签
			
 
				-				if len([]rune(sv)) >= 2 && len([]rune(sv)) <= 10 {
			
 
				-					//打标签
			
 
				-					block.Tags = append(block.Tags, util.GetBlockTags(sv))
			
 
				-				}
			
 
				+		title = filterTitle(title)
			
 
				+		//分割标题 [和及]。。。 参与
			
 
				+		splitTitles := ProcTitle(title)
			
 
				+		block := &util.Block{
			
 
				+			Index:  index,     //序号
			
 
				+			Text:   blockText, //内容
			
 
				+			Title:  title,     //标题
			
 
				+			Titles: splitTitles,
			
 
				+			Start:  start,
			
 
				+			End:    nextStart,
			
 
				+		}
			
 
				+
			
 
				+		for _, sv := range splitTitles {
			
 
				+			if sv == "" || titleIsExists[sv] {
			
 
				+				continue
			
 
				+			}
			
 
				+			titleIsExists[sv] = true
			
 
				+			//标题过短过长不打标签
			
 
				+			if len([]rune(sv)) >= 2 && len([]rune(sv)) <= 10 {
			
 
				+				//打标签
			
 
				+				block.Tags = append(block.Tags, util.GetBlockTags(sv))
			
 
				 			}
			
 
				 		}
			
 
				 		tagsToBlocks(blocks, block)
			
@@ -268,18 +265,62 @@ func DivideBlock(content string, from int, ruleBlock *util.RuleBlock) ([]*util.B
 
				 			returnValue = 1
			
 
				 		}
			
 
				 	}
			
 
				-
			
 
				+	contactFormat := &util.ContactFormat{
			
 
				+		IndexMap: map[int]string{},
			
 
				+		MatchMap: map[string]map[string]bool{},
			
 
				+	}
			
 
				 	for _, bl := range returnBlocks {
			
 
				 		//解析kv
			
 
				 		newText := TextAfterRemoveTable(bl.Text)
			
 
				-		bl.ColonKV = GetKVAll(newText, bl.Title, from)
			
 
				-		bl.SpaceKV = SspacekvEntity.Entrance(newText, bl.Title)
			
 
				+		bl.ColonKV = GetKVAll(newText, bl.Title, contactFormat, from)
			
 
				+		bl.SpaceKV = SspacekvEntity.Entrance(newText, bl.Title, contactFormat)
			
 
				 		//正则抽取的时候有时需要匹配换行或者句号，这里在解析完kv之后，在块结尾添加换行和句号
			
 
				 		bl.Text = appendWarpStop(bl.Text)
			
 
				 	}
			
 
				 	return returnBlocks, returnValue
			
 
				 }
			
 
				 
			
 
				+//块标题处理
			
 
				+func ProcTitle(title string) []string {
			
 
				+	if title == "" {
			
 
				+		return []string{}
			
 
				+	}
			
 
				+	for k, v := range confusion {
			
 
				+		title = strings.Replace(title, k, v, -1)
			
 
				+	}
			
 
				+	direct := 1
			
 
				+	prev := ""
			
 
				+	ara := regSplit.Split(title, -1)
			
 
				+	for kk, vv := range ara {
			
 
				+		for kkk, vvv := range confusion {
			
 
				+			vv = strings.Replace(vv, vvv, kkk, -1)
			
 
				+		}
			
 
				+		ara[kk] = vv
			
 
				+		if len([]rune(vv)) == 2 {
			
 
				+			if kk == 0 {
			
 
				+				direct = -1
			
 
				+			} else {
			
 
				+				start := ""
			
 
				+				if len([]rune(prev)) > 3 {
			
 
				+					start = string([]rune(prev)[:len([]rune(prev))-2])
			
 
				+				}
			
 
				+				ara[kk] = start + vv
			
 
				+			}
			
 
				+		}
			
 
				+		if len([]rune(vv)) > 3 {
			
 
				+			if direct == -1 {
			
 
				+				end := string([]rune(vv)[len([]rune(vv))-2:])
			
 
				+				for i := 0; i < kk; i++ {
			
 
				+					ara[i] = ara[i] + end
			
 
				+				}
			
 
				+				break
			
 
				+			}
			
 
				+			prev = vv
			
 
				+		}
			
 
				+	}
			
 
				+	return ara
			
 
				+}
			
 
				+
			
 
				 //有合并kv的 例如项目名称及编号
			
 
				 func hasMergeKV(title, text string) string {
			
 
				 	title = regDivision.ReplaceAllString(title, "")
			
@@ -413,6 +454,12 @@ func tagsToBlocks(blocks []*util.Block, block *util.Block) {
 
				 }
			
 
				 
			
 
				 func filterTitle(title string) string {
			
 
				+	if strings.Contains(title, "，") && strings.Contains(title, "。") {
			
 
				+		return ""
			
 
				+	}
			
 
				+	if len([]rune(title)) > 30 {
			
 
				+		return ""
			
 
				+	}
			
 
				 	//清理空格
			
 
				 	title = regReplAllSpace.ReplaceAllString(title, "")
			
 
				 	//清理成对出现的符号中的内容
			
@@ -421,6 +468,7 @@ func filterTitle(title string) string {
 
				 	title = regReplAllSymbol.ReplaceAllString(title, "")
			
 
				 	//清理序号
			
 
				 	title = regFilterNumber.ReplaceAllString(title, "")
			
 
				+	title = regFilter.ReplaceAllString(title, "")
			
 
				 	return title
			
 
				 }
			
 
				 
			
@@ -438,8 +486,8 @@ func FindPackageFromBlocks(blocks *[]*util.Block, title string) (blockPackage ma
 
				 		//把分包内容摘除掉有问题 有的项目名称中包含二标段
			
 
				 		if ok && false {
			
 
				 			v.Text = surplusText
			
 
				-			v.ColonKV = GetKVAll(surplusText, v.Title, 1)
			
 
				-			v.SpaceKV = SspacekvEntity.Entrance(surplusText, v.Title)
			
 
				+			v.ColonKV = GetKVAll(surplusText, v.Title, nil, 1)
			
 
				+			v.SpaceKV = SspacekvEntity.Entrance(surplusText, v.Title, nil)
			
 
				 		}
			
 
				 	}
			
 
				 	return
			
@@ -588,9 +636,9 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 
				 				//合并文本
			
 
				 				(*blockPackage)[index].Text += "\n" + text
			
 
				 				//合并冒号kv
			
 
				-				colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", 1)
			
 
				+				colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1)
			
 
				 				if headKey != "" {
			
 
				-					kvAgain := GetKVAll(text, "", 4)
			
 
				+					kvAgain := GetKVAll(text, "", nil, 4)
			
 
				 					for kv_k, kv_v := range kvAgain.Kv {
			
 
				 						if colonJobKv.Kv[kv_k] == "" {
			
 
				 							colonJobKv.Kv[kv_k] = kv_v
			
@@ -608,7 +656,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 
				 					(*blockPackage)[index].ColonKV.Kv[kv_k] = kv_v
			
 
				 				}
			
 
				 				//合并空格kv
			
 
				-				spaceJobKv := SspacekvEntity.Entrance(text, "")
			
 
				+				spaceJobKv := SspacekvEntity.Entrance(text, "", nil)
			
 
				 				for kv_k, kv_v := range spaceJobKv.Kv {
			
 
				 					if kv_v == "" {
			
 
				 						continue
			
@@ -626,9 +674,9 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 
				 					Type:     bv[1],
			
 
				 					Accuracy: accuracy,
			
 
				 				}
			
 
				-				finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", 4)
			
 
				+				finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4)
			
 
				 				if headKey != "" {
			
 
				-					kvAgain := GetKVAll(text, "", 4)
			
 
				+					kvAgain := GetKVAll(text, "", nil, 4)
			
 
				 					for kv_k, kv_v := range kvAgain.Kv {
			
 
				 						if finalKv.Kv[kv_k] == "" {
			
 
				 							finalKv.Kv[kv_k] = kv_v
			
@@ -637,7 +685,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 
				 					}
			
 
				 				}
			
 
				 				newBpkg.ColonKV = finalKv
			
 
				-				newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "")
			
 
				+				newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil)
			
 
				 				(*blockPackage)[index] = newBpkg
			
 
				 			}
			
 
				 		}
			
--- a/src/jy/pretreated/spacekv.go
+++ b/src/jy/pretreated/spacekv.go
@@ -16,7 +16,7 @@ var (
 
				 	excludeSpaceKey = regexp.MustCompile("[.、�\\[【{｛〔<《\\]】}｝〕>》]")
			
 
				 )
			
 
				 
			
 
				-func (se *SpacekvEntity) Entrance(text, title string) *util.JobKv {
			
 
				+func (se *SpacekvEntity) Entrance(text, title string, contactFormat *util.ContactFormat) *util.JobKv {
			
 
				 	lines := se.getLines(text)
			
 
				 	kvMaps := []*util.Kv{}
			
 
				 	for _, line := range lines {
			
@@ -26,7 +26,7 @@ func (se *SpacekvEntity) Entrance(text, title string) *util.JobKv {
 
				 		}
			
 
				 		kvMaps = append(kvMaps, kvMap...)
			
 
				 	}
			
 
				-	//FormatContactKv(&kvMaps, title, nil, contactFormat)
			
 
				+	FormatContactKv(&kvMaps, title, nil, contactFormat)
			
 
				 	kv, tagKv := KvTagsToKV(kvMaps, title, nil, 1)
			
 
				 	return &util.JobKv{
			
 
				 		Kvs:   kvMaps,
			
--- a/src/jy/pretreated/tablev2.go
+++ b/src/jy/pretreated/tablev2.go
@@ -64,24 +64,24 @@ type TD struct {
 
				 	Val            string             //值
			
 
				 	Text           string             //原始串
			
 
				 	SortKV         *SortMap           //存放kv值
			
 
				-	Html           string
			
 
				-	BH             bool         //是否是表头
			
 
				-	MustBH         bool         //不能修改的表头
			
 
				-	StandardKey    string       //标准表头
			
 
				-	Colspan        int          //合并列
			
 
				-	Rowspan        int          //合并行
			
 
				-	StartCol       int          //起始列
			
 
				-	EndCol         int          //终止列
			
 
				-	StartRow       int          //起始行
			
 
				-	EndRow         int          //终止行
			
 
				-	ColPos         int          //当前在TR中的位置
			
 
				-	HeadTd         *TD          //(是val元素)k节点
			
 
				-	KVDirect       int          //键-值方向,0未知,1横 2纵//指值和k的方向
			
 
				-	KeyDirect      int          //k方向，k纵值横，k横值纵 1横 2纵
			
 
				-	SonTds         []*TD        //(是key元素)值节点数组
			
 
				-	SonTableResult *TableResult //子值表格集
			
 
				-	ArrVal         []string     //数组值，当是左临元素是合并行的元素时！
			
 
				-	Valtype        string       //"BO=中标人顺序"
			
 
				+	Html           string             //html值
			
 
				+	BH             bool               //是否是表头
			
 
				+	MustBH         bool               //不能修改的表头
			
 
				+	StandardKey    string             //标准表头
			
 
				+	Colspan        int                //合并列
			
 
				+	Rowspan        int                //合并行
			
 
				+	StartCol       int                //起始列
			
 
				+	EndCol         int                //终止列
			
 
				+	StartRow       int                //起始行
			
 
				+	EndRow         int                //终止行
			
 
				+	ColPos         int                //当前在TR中的位置
			
 
				+	HeadTd         *TD                //(是val元素)k节点
			
 
				+	KVDirect       int                //键-值方向,0未知,1横 2纵//指值和k的方向
			
 
				+	KeyDirect      int                //k方向，k纵值横，k横值纵 1横 2纵
			
 
				+	SonTds         []*TD              //(是key元素)值节点数组
			
 
				+	SonTableResult *TableResult       //子值表格集
			
 
				+	ArrVal         []string           //数组值，当是左临元素是合并行的元素时！
			
 
				+	Valtype        string             //"BO=中标人顺序"
			
 
				 }
			
 
				 
			
 
				 var submatchreg = regexp.MustCompile(`((?:[一二三四五六七八九十0-10]+[、])([\\S]{4,12})|([\\S]{2,12}))[:：]([\\S]{5,60})([一二三四五六七八九]+[、])?`)
			
@@ -112,44 +112,68 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 
				 	if rowspan == 0 {
			
 
				 		rowspan = 1
			
 
				 	}
			
 
				-	td.Colspan, td.Rowspan = colspan, rowspan
			
 
				-	td.Html, _ = td.Goquery.Html()
			
 
				-	ht := td.Goquery.ChildrenFiltered("table")
			
 
				-	bsontable := false
			
 
				+	td.Colspan, td.Rowspan = colspan, rowspan  //合并列,合并行
			
 
				+	td.Html, _ = td.Goquery.Html()             //html值
			
 
				+	ht := td.Goquery.ChildrenFiltered("table") //获取td的table
			
 
				+	bsontable := false                         //默认td中没有table
			
 
				 	txt := ""
			
 
				 	//子table处理合并
			
 
				 	if ht.Size() > 0 {
			
 
				 		//qutil.Debug("有子表格")
			
 
				 		txt = TextAfterRemoveTable(td.Html)
			
 
				-		ts := td.TR.Table.TableResult
			
 
				-		tabs, _ := ComputeConRatio(td.Html, 2)
			
 
				-		if len(tabs) > 0 {
			
 
				-			bsontable = true
			
 
				-			stag := ts.BlockTag
			
 
				-			if stag == "" {
			
 
				-				var tdleft *TD
			
 
				-				if len(tr.TDs) > 0 {
			
 
				-					tdleft = tr.TDs[len(tr.TDs)-1]
			
 
				-					if tdleft.BH {
			
 
				-						//u.Debug(tdleft.Val),如果不存在就是上一行的
			
 
				-						stag = tdleft.Val
			
 
				-					}
			
 
				-				} else if len(tr.Table.TRs) > 0 {
			
 
				-					lasttr := tr.Table.TRs[len(tr.Table.TRs)-1]
			
 
				-					str := ""
			
 
				-					for _, td3 := range lasttr.TDs {
			
 
				-						str += td3.Val
			
 
				-						if len([]rune(str)) > 14 {
			
 
				-							str = ""
			
 
				-							break
			
 
				-						}
			
 
				+		td.tdHasTable(&bsontable, tr, table) //处理td中的table，块标签处理，子表解析集处理
			
 
				+	} else {
			
 
				+		txt = strings.TrimSpace(td.Goquery.Text())
			
 
				+	}
			
 
				+	text := dwReg.ReplaceAllString(u.TrimLRAll(txt, ""), "$1")
			
 
				+	td.Val = text //值
			
 
				+	td.Text = txt //原始串
			
 
				+	//对td单元格值判断是否是表头和根据td内容长度进行分块处理
			
 
				+	td.tdIsHb(tr, table, bsontable)
			
 
				+	bhead := false
			
 
				+	if td.TR.RowPos == 0 { //第一行
			
 
				+		if td.Goquery.Closest("thead").Size() == 1 && !bsontable { //如果是thead确定为k值表头
			
 
				+			bhead = true
			
 
				+		}
			
 
				+	}
			
 
				+	if bhead && !bsontable {
			
 
				+		td.BH = true
			
 
				+		td.KeyDirect = 1 //k方向，k纵值横，k横值纵 1横 2纵
			
 
				+		td.KVDirect = 2  //键-值方向,0未知,1横 2纵//指值和k的方向
			
 
				+	}
			
 
				+	//u.Debug(td.BH, td.Val)
			
 
				+	return td
			
 
				+}
			
 
				+
			
 
				+//处理td中的table，块标签处理，子表解析集处理
			
 
				+func (td *TD) tdHasTable(bsontable *bool, tr *TR, table *Table) {
			
 
				+	ts := td.TR.Table.TableResult
			
 
				+	tabs, _ := ComputeConRatio(td.Html, 2) //计算表格占比
			
 
				+	if len(tabs) > 0 {
			
 
				+		(*bsontable) = true
			
 
				+		stag := ts.BlockTag //块标签
			
 
				+		if stag == "" {
			
 
				+			var tdleft *TD
			
 
				+			if len(tr.TDs) > 0 {
			
 
				+				tdleft = tr.TDs[len(tr.TDs)-1]
			
 
				+				if tdleft.BH {
			
 
				+					//u.Debug(tdleft.Val),如果不存在就是上一行的
			
 
				+					stag = tdleft.Val
			
 
				+				}
			
 
				+			} else if len(tr.Table.TRs) > 0 {
			
 
				+				lasttr := tr.Table.TRs[len(tr.Table.TRs)-1]
			
 
				+				str := ""
			
 
				+				for _, td3 := range lasttr.TDs {
			
 
				+					str += td3.Val
			
 
				+					if len([]rune(str)) > 14 {
			
 
				+						str = ""
			
 
				+						break
			
 
				 					}
			
 
				-					stag = str
			
 
				 				}
			
 
				+				stag = str
			
 
				 			}
			
 
				-			sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id, table.TableResult.RuleBlock)
			
 
				+			sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id, table.TableResult.RuleBlock) //又一次调用解析表格入口
			
 
				 			td.BH = false
			
 
				-
			
 
				 			td.SonTableResult = sonts
			
 
				 			//for _, k := range sonts.SortKV.Keys {
			
 
				 			//u.Debug(k, sonts.SortKV.Map[k])
			
@@ -201,21 +225,20 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 
				 				//u.Debug(fmt.Sprintf("%v", td.TR.Table.BlockPackage.Map["1"]))
			
 
				 			}
			
 
				 		}
			
 
				-	} else {
			
 
				-		txt = td.Goquery.Text()
			
 
				 	}
			
 
				-	text := dwReg.ReplaceAllString(u.TrimLRAll(txt, ""), "$1")
			
 
				-	//u.Debug(txt, text)
			
 
				-	td.Val = text
			
 
				-	td.Text = txt
			
 
				-	//对td单元格值判断是否是key
			
 
				-	lentxt := len([]rune(text))
			
 
				+}
			
 
				+
			
 
				+//对td单元格值判断是否是表头和根据td内容长度进行分块处理
			
 
				+func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
			
 
				+	lenval := len([]rune(td.Val)) //经过处理的td内容长度
			
 
				 	//if lentxt > 9 {
			
 
				 	//td.KV = GetKVAll(txt, "")
			
 
				 	ub := []*u.Block{}
			
 
				-	if lentxt > 50 { //看是否划块
			
 
				+	//经过处理的td内容长度大于50，划块，分包
			
 
				+	if lenval > 50 { //看是否划块
			
 
				 		//u.Debug(txt)
			
 
				-		ub, _ = DivideBlock(txt, 2, nil)
			
 
				+		ub, _ = DivideBlock(td.Text, 2, table.TableResult.RuleBlock) //对td的原始值
			
 
				+		//看是否划块
			
 
				 		if len(ub) > 0 {
			
 
				 			colonKvWeight := map[string]int{}
			
 
				 			spaceKvWeight := map[string]int{}
			
@@ -249,15 +272,15 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 
				 		}*/
			
 
				 		if len(tr.TDs) > 0 {
			
 
				 			tdleft := tr.TDs[len(tr.TDs)-1]
			
 
				-			if tdleft.BH && excludeKey.MatchString(tdleft.Text) {
			
 
				+			if tdleft.BH && excludeKey.MatchString(tdleft.Text) { //(涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)
			
 
				 				isFindPkg = false
			
 
				 			}
			
 
				 		}
			
 
				 		if isFindPkg {
			
 
				 			if len(ub) > 0 {
			
 
				-				blockPackage = FindPackageFromBlocks(&ub, "")
			
 
				+				blockPackage = FindPackageFromBlocks(&ub, "") //从块里面找分包
			
 
				 			} else {
			
 
				-				blockPackage = FindPackageFromText("", text)
			
 
				+				blockPackage = FindPackageFromText("", td.Val) //从正文里面找分包
			
 
				 			}
			
 
				 		}
			
 
				 		if len(blockPackage) > 0 {
			
@@ -287,16 +310,28 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				-	//
			
 
				-	if lentxt < 50 {
			
 
				+	//经过处理的td内容长度小于50，冒号kv，td表头
			
 
				+	if lenval < 50 {
			
 
				 		//		td.SortKV = FindKv(text, "")
			
 
				 		kvTitle := ""
			
 
				 		if len(td.TR.TDs) > 0 {
			
 
				 			kvTitle = td.TR.TDs[len(td.TR.TDs)-1].Val
			
 
				 		}
			
 
				-		_, resm := colonkvEntity.entrance(text, kvTitle, 2)
			
 
				+		/*
			
 
				+					预算总价
			
 
				+			(人民币：元）
			
 
				+		*/
			
 
				+		if td.Text != "" && strings.Contains(td.Text, "预算总价") && (strings.Contains(td.Text, "(") || strings.Contains(td.Text, "（")) {
			
 
				+			tagindex := 0
			
 
				+			if tagindex = strings.Index(td.Text, "（"); tagindex <= 0 {
			
 
				+				tagindex = strings.Index(td.Text, "(")
			
 
				+			}
			
 
				+			td.SortKV.AddKey(strings.TrimSpace(td.Text[:tagindex]), strings.TrimSpace(td.Text[tagindex:])) //存放kv值
			
 
				+			td.BH = true
			
 
				+		}
			
 
				+		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 3) //td冒号kv
			
 
				 		for k, v := range resm {
			
 
				-			td.SortKV.AddKey(k, v)
			
 
				+			td.SortKV.AddKey(k, v) //存放kv值
			
 
				 		}
			
 
				 		//u.Debug(td.SortKV.Keys, "-------2--------------------------------")
			
 
				 		//		td.SortKV = FindKv(text, "") //GetKvFromtxt(text, "")
			
@@ -308,13 +343,21 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 
				 				td.BH = true
			
 
				 			}
			
 
				 		} else if !bsontable {
			
 
				-			txt := repSpace.ReplaceAllString(text, "")
			
 
				+			txt := repSpace.ReplaceAllString(td.Val, "")
			
 
				 			btw, must, _, _, repl := CheckHeader(txt)
			
 
				+			if lenval > 15 {
			
 
				+				btw = false
			
 
				+			}
			
 
				+			if strings.Contains(td.Val, "个项目") {
			
 
				+				must = false
			
 
				+				btw = false
			
 
				+			}
			
 
				 			td.Valtype = repl
			
 
				 			td.MustBH = must
			
 
				 			td.BH = btw
			
 
				 		}
			
 
				-	} else if len(ub) == 0 { //之前这里没加判断，现在加上判断，造成分块之后的kv被覆盖掉
			
 
				+	} else if len(ub) == 0 {
			
 
				+		//之前这里没加判断，现在加上判断，造成分块之后的kv被覆盖掉
			
 
				 		//u.Debug("----\n\n\n", txt, "\n\n\n----")
			
 
				 		//u.Debug(GetKVAll(txt, ""))
			
 
				 		/*
			
@@ -329,7 +372,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 
				 			}
			
 
				 		*/
			
 
				 
			
 
				-		td.SortKV = FindKv(text, "", 2)
			
 
				+		td.SortKV = FindKv(td.Val, "", 2)
			
 
				 
			
 
				 		//		td.LeftNode.Val
			
 
				 		//		for _, vvv := range *td.TR {
			
@@ -339,24 +382,11 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 
				 		if len(td.TR.TDs) > 0 {
			
 
				 			kvTitle = td.TR.TDs[len(td.TR.TDs)-1].Val
			
 
				 		}
			
 
				-		_, resm := colonkvEntity.entrance(text, kvTitle, 2)
			
 
				+		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 2) //获取冒号kv入口
			
 
				 		for k, v := range resm {
			
 
				 			td.SortKV.AddKey(k, v)
			
 
				 		}
			
 
				 	}
			
 
				-	bhead := false
			
 
				-	if td.TR.RowPos == 0 { //第一行
			
 
				-		if td.Goquery.Closest("thead").Size() == 1 && !bsontable { //如果是thead确定为k值表头
			
 
				-			bhead = true
			
 
				-		}
			
 
				-	}
			
 
				-	if bhead && !bsontable {
			
 
				-		td.BH = true
			
 
				-		td.KeyDirect = 1
			
 
				-		td.KVDirect = 2
			
 
				-	}
			
 
				-	//u.Debug(td.BH, td.Val)
			
 
				-	return td
			
 
				 }
			
 
				 func (t *Table) Print() {
			
 
				 	for row, trs := range t.TRs {
			
--- a/src/jy/util/article.go
+++ b/src/jy/util/article.go
@@ -53,6 +53,7 @@ type RuleBlock struct {
 
				 type Block struct {
			
 
				 	Tags        []Tags                   //对块做的标签，可以作为数据抽取的依据
			
 
				 	Title       string                   //块标题
			
 
				+	Titles      []string                 //拆分以后多个块标题
			
 
				 	Index       int                      //块索引
			
 
				 	Text        string                   //块内容
			
 
				 	Start       int                      //开始索引
			
@@ -63,6 +64,7 @@ type Block struct {
 
				 	BPackage    *BlockPackage            //分包信息
			
 
				 	Tag         map[string]bool          //块标签
			
 
				 	Block       []*Block                 //子块
			
 
				+	Category    string                   //块分类
			
 
				 	Winnerorder []map[string]interface{} //块中，中标候选人排序
			
 
				 }
			
 
				 
			
--- a/src/jy/util/config.go
+++ b/src/jy/util/config.go
@@ -12,9 +12,9 @@ import (
 
				 var FormatTextMap map[string][]map[string]interface{}
			
 
				 
			
 
				 func init() {
			
 
				-	//loadFormatText()
			
 
				-	//LoadTagDb("./res/tagdb")
			
 
				-	//LoadTagDb("./res/blocktagdb")
			
 
				+	loadFormatText()
			
 
				+	LoadTagDb("./res/tagdb")
			
 
				+	LoadTagDb("./res/blocktagdb")
			
 
				 }
			
 
				 
			
 
				 //加载格式化正文配置
			
--- a/src/res/fieldscore.json
+++ b/src/res/fieldscore.json
@@ -172,7 +172,7 @@
 
				         "position": [
			
 
				             {
			
 
				                 "describe": "全为中文汉字或符号",
			
 
				-                "regstr": "^[\\u4e00-\\u9fa5（）()【】\\\\[\\\\],，。、：:《》]+$",
			
 
				+                "regstr": "^[\\u4e00-\\u9fa5（）()【】\\[\\],，。、：:《》]+$",
			
 
				                 "score": -20
			
 
				             },
			
 
				             {
			
--- a/src/res/formattext.json
+++ b/src/res/formattext.json
@@ -20,11 +20,6 @@
 
				             "separator": " ",
			
 
				             "desc": "替换掉无效的kv"
			
 
				         },
			
 
				-        {
			
 
				-            "reg": "[^\\n:：]{2,18}[:：]\\s*详见[^,。，.：:\\s]{2,18}",
			
 
				-            "separator": "",
			
 
				-            "desc": "替换掉无效的kv"
			
 
				-        },
			
 
				         {
			
 
				             "reg": "(\\d+[，,.]+)+\\d+((百|千)?元|(百|千)?(万|亿)元?)",
			
 
				             "separator": "[，,]__",
			
@@ -182,6 +177,11 @@
 
				             "reg": "\n[\\d.\u3000\u2003\u00a0\\s]*(联系人)及(电话)[:：](.+?)[\u3000\u2003\u00a0\\s]+(.+)",
			
 
				             "separator": "\n$1：$3\n$2：$4",
			
 
				             "desc": ""
			
 
				+        },
			
 
				+        {
			
 
				+            "reg": "[^\\n:：]{2,18}[:：]\\s*详见[^,。，.：:\\s]{2,18}",
			
 
				+            "separator": "",
			
 
				+            "desc": "替换掉无效的kv"
			
 
				         }
			
 
				     ]
			
 
				 }
			
--- a/src/udpfileserver/main.go
+++ b/src/udpfileserver/main.go
@@ -2,6 +2,8 @@ package main
 
				 
			
 
				 import (
			
 
				 	"encoding/json"
			
 
				+	"fmt"
			
 
				+	"github.com/go-gomail/gomail"
			
 
				 	"gopkg.in/mgo.v2/bson"
			
 
				 	"jy/mongodbutil"
			
 
				 	"log"
			
@@ -9,8 +11,11 @@ import (
 
				 	"net"
			
 
				 	"net/rpc"
			
 
				 	"path"
			
 
				+	"qfw/common/src/qfw/util"
			
 
				 	qu "qfw/util"
			
 
				+	"strconv"
			
 
				 	"strings"
			
 
				+	"sync"
			
 
				 	"time"
			
 
				 )
			
 
				 
			
@@ -61,7 +66,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 
				 		lid := strings.TrimSpace(mapInfo["lteid"].(string))
			
 
				 		if bson.IsObjectIdHex(gid) && bson.IsObjectIdHex(lid) {
			
 
				 			var jsq int64
			
 
				-			query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(gid),"$lte": bson.ObjectIdHex(lid),}}
			
 
				+			query := bson.M{"_id": bson.M{"$gt": bson.ObjectIdHex(gid),"$lte": bson.ObjectIdHex(lid),}}
			
 
				 			log.Println("query---:", query)
			
 
				 			sum :=mongodbutil.Mgo.Count(MgoC,query)
			
 
				 			log.Println("sum:", sum)
			
@@ -71,7 +76,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 
				 				limit = sum
			
 
				 			}
			
 
				 			for i := 0; i < pageNum; i++ {
			
 
				-				query = bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(gid), "$lte": bson.ObjectIdHex(lid)}}
			
 
				+				query = bson.M{"_id": bson.M{"$gt": bson.ObjectIdHex(gid), "$lte": bson.ObjectIdHex(lid)}}
			
 
				 				log.Println("page=", i+1,"query=", query,limit)
			
 
				 				list, b := mongodbutil.Mgo.Find(MgoC,query,nil,bson.M{"_id": 1,MgoFileFiled:1},false,0, limit)
			
 
				 				if !b{
			
@@ -103,6 +108,11 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 
				 										log.Println(mid, "mgo ", MgoFileFiled,"没有fid ")
			
 
				 										continue
			
 
				 									}
			
 
				+									//if qu.ObjToString(fileinfo["update"]) ==""{
			
 
				+									//	<-ChanB
			
 
				+									//	log.Println(mid, "mgo ", MgoFileFiled,"没有update ")
			
 
				+									//	continue
			
 
				+									//}
			
 
				 									save(mid,attk, qmap, &fileinfo,&updateNum)
			
 
				 									<-ChanB
			
 
				 								}
			
@@ -111,7 +121,10 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 
				 					}
			
 
				 				}
			
 
				 			}
			
 
				+			//识别完以后再次查询数据库，进行下一轮识别
			
 
				 			log.Println("处理查询数据结束...",jsq,time.Now().Sub(stime))
			
 
				+			//进行下一轮识别
			
 
				+			forfunc(lid)
			
 
				 		} else {
			
 
				 			log.Println("开始id或结束id参数错误:", string(data))
			
 
				 		}
			
@@ -185,6 +198,7 @@ func save(mid interface{},attk string, qmap, fileinfo *map[string]interface{},up
 
				 			(*fileinfo)["content"] = rdata["context"]
			
 
				 		}
			
 
				 		(*fileinfo)["expend"] = rdata["expend"]
			
 
				+		delete(*fileinfo,"update")
			
 
				 		//log.Println((*fileinfo))
			
 
				 
			
 
				 		(*qmap)[MgoFileFiled].(map[string]interface{})["attachments"].(map[string]interface{})[attk]=*fileinfo
			
@@ -208,8 +222,81 @@ func save(mid interface{},attk string, qmap, fileinfo *map[string]interface{},up
 
				 		}else {
			
 
				 			log.Println(mid, "mongo更新数据失败",qu.ObjToString((*fileinfo)["fid"]))
			
 
				 		}
			
 
				+		nowHour := time.Now().Hour()
			
 
				+		rdlock.Lock()
			
 
				+		if nowHour != hourNum{
			
 
				+			log.Println("send email:",SendMail(fmt.Sprint(updateBool,mid)))
			
 
				+			hourNum = nowHour
			
 
				+		}
			
 
				+		rdlock.Unlock()
			
 
				 	} else {
			
 
				 		log.Println(mid, "调用rpc服务解析异常：",qu.ObjToString((*fileinfo)["fid"]), rdata["err"])
			
 
				 	}
			
 
				 
			
 
				 }
			
 
				+var hourNum int
			
 
				+var rdlock sync.RWMutex
			
 
				+func SendMail( body string ) error {
			
 
				+	//定义邮箱服务器连接信息，如果是阿里邮箱 pass填密码，qq邮箱填授权码
			
 
				+	mailConn := map[string]string {
			
 
				+		"user": "550838476@qq.com",
			
 
				+		"pass": "",
			
 
				+		"host": "smtp.qq.com",
			
 
				+		"port": "465",
			
 
				+	}
			
 
				+
			
 
				+	port, _ := strconv.Atoi(mailConn["port"]) //转换端口类型为int
			
 
				+
			
 
				+	m := gomail.NewMessage()
			
 
				+	m.SetHeader("From","Get to" + "<" + mailConn["user"] + ">")  //这种方式可以添加别名，即“XD Game”， 也可以直接用<code>m.SetHeader("From",mailConn["user"])</code> 读者可以自行实验下效果
			
 
				+	m.SetHeader("To", []string{"550838476@qq.com"}...)  //发送给多个用户
			
 
				+	m.SetHeader("Subject", "MongoId")  //设置邮件主题
			
 
				+	m.SetBody("text/html", body)     //设置邮件正文
			
 
				+
			
 
				+	d := gomail.NewDialer(mailConn["host"], port, mailConn["user"], mailConn["pass"])
			
 
				+
			
 
				+	err := d.DialAndSend(m)
			
 
				+	return err
			
 
				+
			
 
				+}
			
 
				+
			
 
				+func forfunc(lid string) {
			
 
				+	for {
			
 
				+		//查询最后一个id
			
 
				+		lastObjectId, _ := mongodbutil.Mgo.Find(MgoC,nil,"-_id",bson.M{"_id":1},true,-1,-1)
			
 
				+		lastId,ok := (*lastObjectId)[0]["_id"].(bson.ObjectId)
			
 
				+		log.Println("lastID:",lastId)
			
 
				+		//查询最后一个id出错重新查询
			
 
				+		if!ok{//转换失败
			
 
				+			log.Println("查询异常",*lastObjectId)
			
 
				+			time.Sleep(time.Minute)
			
 
				+			continue
			
 
				+		}
			
 
				+		//查询最后一个id等于上一轮的id就重新查询
			
 
				+		if lastId.Hex() == lid {
			
 
				+			log.Println("没有新数据",lastId.Hex())
			
 
				+			SendMail(time.Now().String()+"没有最新数据,当前最后一条数据id:"+lastId.Hex())
			
 
				+			time.Sleep(time.Hour)
			
 
				+			continue
			
 
				+		}
			
 
				+		//不相等说明有新数据，进行下次处理
			
 
				+		m := map[string]string{
			
 
				+			"gtid":lid,//上一轮结束的最后id
			
 
				+			"lteid":lastId.Hex(),//新一轮查询出来的id
			
 
				+		}
			
 
				+		bytes, _ := json.Marshal(m)
			
 
				+		//发送udp
			
 
				+		err := udpclient.WriteUdp(bytes,mu.OP_TYPE_DATA,&net.UDPAddr{
			
 
				+			IP:   net.ParseIP( util.ObjToString(Sysconfig["udpip"])),
			
 
				+			Port:  util.IntAll(Sysconfig["udpport"]),
			
 
				+		})
			
 
				+		if err != nil{
			
 
				+			log.Println("发送udp失败",err,string(bytes))
			
 
				+			time.Sleep(time.Minute)
			
 
				+			continue
			
 
				+		}
			
 
				+		SendMail(time.Now().String()+fmt.Sprint("发送udp成功,gtid：",lid,"，lteid:",lastId.Hex()))
			
 
				+		log.Println("发送udp成功,gtid：",lid,"，lteid:",lastId.Hex())
			
 
				+		break//发送完后终止循环
			
 
				+	}
			
 
				+}
			
--- a/udpprojectset/src/cleareids.go
+++ b/udpprojectset/src/cleareids.go
@@ -35,10 +35,10 @@ func clearPKey() {
 
				 	wg := sync.WaitGroup{}
			
 
				 	for _, pncb := range []*KeyMap{PNKey, PCKey, PBKey} {
			
 
				 		wg.Add(1)
			
 
				-		go func() {
			
 
				+		go func(pncb *KeyMap) {
			
 
				 			defer wg.Done()
			
 
				 			clearPNCBKey(pncb, nowtime)
			
 
				-		}()
			
 
				+		}(pncb)
			
 
				 	}
			
 
				 	wg.Wait()
			
 
				 	log.Println("清理结束")
			
@@ -60,6 +60,9 @@ func clearIdsKeys(pKey *KeyMap, nowtime int64) []string {
 
				 	for k, ma := range pKey.Map {
			
 
				 		ids := ma.Arr
			
 
				 		delids := []interface{}{}
			
 
				+		if ids == nil {
			
 
				+			continue
			
 
				+		}
			
 
				 		res := redis.Mget(REDISIDS, *ids)
			
 
				 		for _, b1 := range res {
			
 
				 			if b1 != nil {
			
@@ -100,7 +103,7 @@ func clearIdsKeys(pKey *KeyMap, nowtime int64) []string {
 
				 }
			
 
				 
			
 
				 func deleteSliceId(a []string, id string) *[]string {
			
 
				-	ret := make([]string, 0, len(a))
			
 
				+	ret := make([]string, 0)
			
 
				 	for _, val := range a {
			
 
				 		if val != id {
			
 
				 			ret = append(ret, val)
			
--- a/udpprojectset/src/config.json
+++ b/udpprojectset/src/config.json
@@ -23,6 +23,8 @@
 
				     },
			
 
				     "taskstock": {
			
 
				         "open": true,
			
 
				+		"startTime":1325347200,
			
 
				+        "startdate": "2015-11-01",
			
 
				         "endate": "2019-06-30"
			
 
				     },
			
 
				     "udpport": ":1482",
			
--- a/udpprojectset/src/fulldata.go
+++ b/udpprojectset/src/fulldata.go
@@ -2,7 +2,6 @@ package main
 
				 
			
 
				 import (
			
 
				 	"log"
			
 
				-	"strings"
			
 
				 
			
 
				 	"qfw/util"
			
 
				 	"qfw/util/mongodb"
			
@@ -14,10 +13,13 @@ import (
 
				 
			
 
				 var FullCount = 0
			
 
				 
			
 
				-func RunFullData() {
			
 
				+func RunFullData(startTime int64) {
			
 
				+	if startTime < 1325347200 {
			
 
				+		log.Println("时间错误", startTime)
			
 
				+	}
			
 
				 	defer util.Catch()
			
 
				 	var wg = sync.WaitGroup{}
			
 
				-	startTime := int64(1325347200) //2012-01-01
			
 
				+	//startTime := int64(1325347200) //2012-01-01
			
 
				 	ps := 3
			
 
				 	pool := make(chan *task, ps)
			
 
				 	day := 0
			
@@ -27,7 +29,7 @@ func RunFullData() {
 
				 		bComplete := false
			
 
				 		for {
			
 
				 			if startTime > now || bComplete {
			
 
				-				log.Println("任务结束")
			
 
				+				log.Println("任务结束", startTime)
			
 
				 				endChan <- true
			
 
				 				break
			
 
				 			}
			
@@ -113,6 +115,7 @@ func (t *task) query() {
 
				 			info := PreThisInfo(tmp)
			
 
				 			if info != nil {
			
 
				 				lockPNCBMap(info)
			
 
				+				storeLock(info)
			
 
				 				startProjectMerge(info, tmp)
			
 
				 				redis.Put(INFOID, thisid, 1, INFOTIMEOUT)
			
 
				 				currentMegerTime = info.Publishtime
			
@@ -126,79 +129,3 @@ func (t *task) query() {
 
				 	log.Println("currentFull", FullCount)
			
 
				 
			
 
				 }
			
 
				-
			
 
				-//获取对比项目数组
			
 
				-func getComeperProjects2(p PCBV, thisinfo *Info) (res []interface{}, pncb []*CompareInfo) {
			
 
				-	newarr := []string{}
			
 
				-	repeatId := map[string]bool{}
			
 
				-	if p.PnameLen > 0 {
			
 
				-		pn := NewCompareInfo("pn", thisinfo.PNKey, PNKey)
			
 
				-		pncb = append(pncb, pn)
			
 
				-		thisinfo.AllRelatePNKeyMap = map[string]*Key{}
			
 
				-		pn.KeyMap.Lock.Lock()
			
 
				-		for k, v := range pn.KeyMap.Map {
			
 
				-			if strings.Contains(k, pn.Key) || strings.Contains(pn.Key, k) {
			
 
				-				thisinfo.AllRelatePNKeyMap[k] = v
			
 
				-				for _, id := range *v.Arr {
			
 
				-					if !repeatId[id] {
			
 
				-						newarr = append(newarr, id)
			
 
				-						repeatId[id] = true
			
 
				-					}
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-		if thisinfo.AllRelatePNKeyMap[pn.Key] == nil {
			
 
				-			K := &Key{&[]string{}, &sync.Mutex{}}
			
 
				-			thisinfo.AllRelatePNKeyMap[pn.Key] = K
			
 
				-			pn.KeyMap.Map[pn.Key] = K
			
 
				-		}
			
 
				-		pn.KeyMap.Lock.Unlock()
			
 
				-	}
			
 
				-	if p.PcodeLen > 0 {
			
 
				-		pc := NewCompareInfo("pc", thisinfo.PCKey, PCKey)
			
 
				-		pncb = append(pncb, pc)
			
 
				-		thisinfo.AllRelatePCKeyMap = map[string]*Key{}
			
 
				-		pc.KeyMap.Lock.Lock()
			
 
				-		for k, v := range pc.KeyMap.Map {
			
 
				-			if strings.Contains(k, pc.Key) || strings.Contains(pc.Key, k) {
			
 
				-				thisinfo.AllRelatePCKeyMap[k] = v
			
 
				-				for _, id := range *v.Arr {
			
 
				-					if !repeatId[id] {
			
 
				-						newarr = append(newarr, id)
			
 
				-						repeatId[id] = true
			
 
				-					}
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-		if thisinfo.AllRelatePCKeyMap[pc.Key] == nil {
			
 
				-			K := &Key{&[]string{}, &sync.Mutex{}}
			
 
				-			thisinfo.AllRelatePCKeyMap[pc.Key] = K
			
 
				-			pc.KeyMap.Map[pc.Key] = K
			
 
				-		}
			
 
				-		pc.KeyMap.Lock.Unlock()
			
 
				-	}
			
 
				-
			
 
				-	if p.BuyerLen > 0 {
			
 
				-		pb := NewCompareInfo("pb", thisinfo.PBKey, PBKey)
			
 
				-		pncb = append(pncb, pb)
			
 
				-		pb.KeyMap.Lock.Lock()
			
 
				-		K := pb.KeyMap.Map[pb.Key]
			
 
				-		if K == nil {
			
 
				-			K = &Key{&[]string{}, &sync.Mutex{}}
			
 
				-			pb.KeyMap.Map[pb.Key] = K
			
 
				-		} else {
			
 
				-			for _, id := range *K.Arr {
			
 
				-				if !repeatId[id] {
			
 
				-					newarr = append(newarr, id)
			
 
				-					repeatId[id] = true
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-		pb.KeyMap.Lock.Unlock()
			
 
				-	}
			
 
				-
			
 
				-	if len(newarr) > 0 {
			
 
				-		res = redis.Mget(REDISIDS, newarr)
			
 
				-	}
			
 
				-	return
			
 
				-}
			
--- a/udpprojectset/src/main.go
+++ b/udpprojectset/src/main.go
@@ -146,11 +146,15 @@ func main() {
 
				 	log.Println("load data from redis finished.", n)
			
 
				 	//清理redis
			
 
				 	//clearedis()
			
 
				+
			
 
				 	if taskstock, ok := Sysconfig["taskstock"].(map[string]interface{}); ok { //跑存量数据
			
 
				 		if b, _ := taskstock["open"].(bool); b {
			
 
				-			endate, _ := taskstock["endate"].(string)
			
 
				-			taskStock(endate)
			
 
				+			RunFullData(util.Int64All(taskstock["startTime"]))
			
 
				+			//			startdate, _ := taskstock["startdate"].(string)
			
 
				+			//			endate, _ := taskstock["endate"].(string)
			
 
				+			//			taskStock(startdate, endate)
			
 
				 		}
			
 
				+
			
 
				 	}
			
 
				 	updport := Sysconfig["udpport"].(string)
			
 
				 	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
			
@@ -245,6 +249,7 @@ func taskInc(mapInfo map[string]interface{}) {
 
				 				info := PreThisInfo(tmp)
			
 
				 				if info != nil {
			
 
				 					lockPNCBMap(info)
			
 
				+					storeLock(info)
			
 
				 					startProjectMerge(info, tmp)
			
 
				 					redis.Put(INFOID, thisid, 1, INFOTIMEOUT)
			
 
				 					currentMegerTime = info.Publishtime
			
@@ -288,13 +293,13 @@ func taskInc(mapInfo map[string]interface{}) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-func taskStock(endDate string) {
			
 
				+func taskStock(startDate, endDate string) {
			
 
				 	defer func() {
			
 
				 		<-SingleThread
			
 
				 	}()
			
 
				 	defer util.Catch()
			
 
				 	publishtimes := []map[string]interface{}{}
			
 
				-	start, _ := time.ParseInLocation(util.Date_Short_Layout, "2015-11-01", time.Local)
			
 
				+	start, _ := time.ParseInLocation(util.Date_Short_Layout, startDate, time.Local)
			
 
				 	end, _ := time.ParseInLocation(util.Date_Short_Layout, endDate, time.Local)
			
 
				 	for {
			
 
				 		publishtime := map[string]interface{}{
			
@@ -364,6 +369,7 @@ func taskStock(endDate string) {
 
				 					info := PreThisInfo(tmp)
			
 
				 					if info != nil {
			
 
				 						lockPNCBMap(info)
			
 
				+						storeLock(info)
			
 
				 						startProjectMerge(info, tmp)
			
 
				 						redis.Put(INFOID, thisid, 1, INFOTIMEOUT)
			
 
				 						currentMegerTime = info.Publishtime
			
--- a/udpprojectset/src/projectmeger.go
+++ b/udpprojectset/src/projectmeger.go
@@ -73,9 +73,6 @@ func startProjectMerge(thisinfo *Info, tmp map[string]interface{}) {
 
				 	}
			
 
				 	//合并流程
			
 
				 	if bNormalScore {
			
 
				-		PNKeyMap.Store(thisinfo.PNKey, true)
			
 
				-		PBKeyMap.Store(thisinfo.PBKey, true)
			
 
				-		PCKeyMap.Store(thisinfo.PCKey, true)
			
 
				 		if pcbv.Buyer { //有采购单位
			
 
				 			hasBuyer(pcbv, thisinfo, tmp)
			
 
				 		} else { //无采购单位
			
@@ -189,7 +186,7 @@ func noBuyer(p PCBV, thisinfo *Info, tmp map[string]interface{}) {
 
				 			sflag = "invalid"
			
 
				 		}
			
 
				 	}
			
 
				-	//extInfoTag(sflag, thisinfo.Id)
			
 
				+	extInfoTag(sflag, thisinfo.Id)
			
 
				 	//go IS.Add(sflag) //数据统计使用
			
 
				 }
			
 
				 
			
@@ -375,30 +372,28 @@ func getComeperProjects(p PCBV, thisinfo *Info) (res []interface{}, pncb []*Comp
 
				 		pncb = append(pncb, pb)
			
 
				 	}
			
 
				 	repeatId := map[string]bool{}
			
 
				-	IdLock.Lock() //此处加id锁，会引进多线程的死锁，对比三个大map数组，找到key相同的项目id数组,并去重
			
 
				+	//IdLock.Lock() //此处加id锁，会引进多线程的死锁，对比三个大map数组，找到key相同的项目id数组,并去重
			
 
				 	for _, pv := range pncb {
			
 
				-		if pv != nil {
			
 
				-			pv.KeyMap.Lock.Lock()
			
 
				-			K := pv.KeyMap.Map[pv.Key]
			
 
				-			if K == nil {
			
 
				-				K = &Key{&[]string{}, &sync.Mutex{}}
			
 
				-				pv.KeyMap.Map[pv.Key] = K
			
 
				-			}
			
 
				-			pv.K = K
			
 
				-			pv.K.Lock.Lock()
			
 
				-			pv.KeyMap.Lock.Unlock()
			
 
				-			defer pv.K.Lock.Unlock()
			
 
				-			newarr := []string{}
			
 
				-			for _, id := range *K.Arr {
			
 
				-				if !repeatId[id] {
			
 
				-					newarr = append(newarr, id)
			
 
				-					repeatId[id] = true
			
 
				-				}
			
 
				+		pv.KeyMap.Lock.Lock()
			
 
				+		K := pv.KeyMap.Map[pv.Key]
			
 
				+		if K == nil {
			
 
				+			K = &Key{&[]string{}, &sync.Mutex{}}
			
 
				+			pv.KeyMap.Map[pv.Key] = K
			
 
				+		}
			
 
				+		pv.K = K
			
 
				+		pv.K.Lock.Lock()
			
 
				+		pv.KeyMap.Lock.Unlock()
			
 
				+		defer pv.K.Lock.Unlock()
			
 
				+		newarr := []string{}
			
 
				+		for _, id := range *K.Arr {
			
 
				+			if !repeatId[id] {
			
 
				+				newarr = append(newarr, id)
			
 
				+				repeatId[id] = true
			
 
				 			}
			
 
				-			pv.IdArr = newarr
			
 
				 		}
			
 
				+		pv.IdArr = newarr
			
 
				 	}
			
 
				-	IdLock.Unlock()
			
 
				+	//IdLock.Unlock()
			
 
				 	for _, pv := range pncb {
			
 
				 		if len(pv.IdArr) > 0 {
			
 
				 			res = append(res, redis.Mget(REDISIDS, pv.IdArr))
			
@@ -581,6 +576,7 @@ func lockPNCBMap(thisinfo *Info) {
 
				 		if ok {
			
 
				 			break
			
 
				 		} else {
			
 
				+			//log.Println("has key store")
			
 
				 			time.Sleep(100 * time.Millisecond)
			
 
				 		}
			
 
				 	}
			
@@ -588,6 +584,7 @@ func lockPNCBMap(thisinfo *Info) {
 
				 
			
 
				 //pncbMap解锁
			
 
				 func unlockPNCBMap(thisinfo *Info) {
			
 
				+	//log.Println("del key store", thisinfo.PNKey)
			
 
				 	//if len(thisinfo.PNKey) > 3 {
			
 
				 	PNKeyMap.Delete(thisinfo.PNKey)
			
 
				 	//}
			
@@ -598,3 +595,12 @@ func unlockPNCBMap(thisinfo *Info) {
 
				 	PBKeyMap.Delete(thisinfo.PBKey)
			
 
				 	//}
			
 
				 }
			
 
				+
			
 
				+//store lock
			
 
				+func storeLock(thisinfo *Info) {
			
 
				+	PncbMayLock.Lock()
			
 
				+	PNKeyMap.Store(thisinfo.PNKey, true)
			
 
				+	PBKeyMap.Store(thisinfo.PBKey, true)
			
 
				+	PCKeyMap.Store(thisinfo.PCKey, true)
			
 
				+	PncbMayLock.Unlock()
			
 
				+}