Prechádzať zdrojové kódy

kv存储结构调整

wcj 6 rokov pred
rodič
commit
5bbf266ad6

+ 53 - 198
src/jy/extract/extract.go

@@ -24,12 +24,12 @@ import (
 var (
 	lock, lockrule, lockclear sync.RWMutex
 
-	cut     = ju.NewCut()                          //获取正文并清理
-	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask          //任务列表
-	ClearTaskList map[string]*ClearTask            //清理任务列表
-	saveLimit     = 200                            //抽取日志批量保存
-	PageSize      = 5000                           //查询分页
+	cut           = ju.NewCut()                          //获取正文并清理
+	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask                //任务列表
+	ClearTaskList map[string]*ClearTask                  //清理任务列表
+	saveLimit     = 200                                  //抽取日志批量保存
+	PageSize      = 5000                                 //查询分页
 	Fields        = `{"title":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
@@ -402,6 +402,10 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
 				// log.Debug("抽取-后置规则", tmp)
 			}
 		}
+
+		//for _, vvc := range j.Result["budget"] {
+		//log.Debug("-----", fmt.Sprintf("%+v", vvc))
+		//}
 		//全局后置规则
 		for _, v := range e.RuleBacks {
 			ExtRegBack(j, v, e.TaskInfo)
@@ -677,176 +681,36 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
 
 //lua脚本根据属性设置提取kv值
 func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string][]map[string]interface{} {
-	defer qu.Catch()
 	kvmap := map[string][]map[string]interface{}{}
 	for fieldname, field := range in.LFields {
-		lock.Lock()
-		tags := t[field] //获取对应标签库
-		lock.Unlock()
-		if tags == nil {
-			continue
-		}
 		for _, bl := range j.Block {
-			//冒号kv
-			if bl.ColonKV != nil {
-				kvs := bl.ColonKV.Kvs
-				kvs2 := bl.ColonKV.Kvs_2
-				// log.Debug("ColonKV1", kvs)
-				// log.Debug("ColonKV2", kvs2)
-				for _, tag := range tags {
-					for _, kv := range kvs {
-						if tag.Type == "string" {
-							if kv.Key == tag.Key {
-								text := ju.TrimLRSpace(kv.Value, "")
-								if text != "" {
-									kvmap[field] = append(kvmap[field], map[string]interface{}{
-										"field":       field,
-										"code":        in.Code,
-										"ruletext":    tag.Key,
-										"extfrom":     extfrom,
-										"sourcevalue": text,
-										"value":       text,
-										"type":        "colon1",
-										"matchtype":   "tag_string",
-										"blocktag":    bl.Tag,
-									})
-								}
-								break
-							}
-						} else if tag.Type == "regexp" {
-							if tag.Reg.MatchString(kv.Key) {
-								text := ju.TrimLRSpace(kv.Value, "")
-								if text != "" {
-									kvmap[field] = append(kvmap[field], map[string]interface{}{
-										"field":       field,
-										"code":        in.Code,
-										"ruletext":    tag.Key,
-										"extfrom":     extfrom,
-										"sourcevalue": text,
-										"value":       text,
-										"type":        "colon1",
-										"matchtype":   "tag_regexp",
-										"blocktag":    bl.Tag,
-									})
-								}
-								break
-							}
-						}
-					}
-					for _, kv := range kvs2 {
-						if tag.Type == "string" {
-							if kv.Key == tag.Key {
-								text := ju.TrimLRSpace(kv.Value, "")
-								if text != "" {
-									kvmap[field] = append(kvmap[field], map[string]interface{}{
-										"field":       field,
-										"code":        in.Code,
-										"ruletext":    tag.Key,
-										"extfrom":     extfrom,
-										"sourcevalue": text,
-										"value":       text,
-										"type":        "colon2",
-										"matchtype":   "tag_string",
-										"blocktag":    bl.Tag,
-									})
-								}
-								break
-							}
-						} else if tag.Type == "regexp" {
-							if tag.Reg.MatchString(kv.Key) {
-								text := ju.TrimLRSpace(kv.Value, "")
-								if text != "" {
-									kvmap[field] = append(kvmap[field], map[string]interface{}{
-										"field":       field,
-										"code":        in.Code,
-										"ruletext":    tag.Key,
-										"extfrom":     extfrom,
-										"sourcevalue": text,
-										"value":       text,
-										"type":        "colon2",
-										"matchtype":   "tag_regexp",
-										"blocktag":    bl.Tag,
-									})
-								}
-								break
-							}
-						}
-					}
+			tp := ""
+			for k, v := range []*ju.JobKv{bl.ColonKV, bl.SpaceKV, bl.TableKV} {
+				if k == 0 {
+					tp = "colon"
+				} else if k == 1 {
+					tp = "space"
+				} else if k == 2 {
+					tp = "table"
 				}
-			}
-			//空格kv
-			if bl.SpaceKV != nil {
-				kvs := bl.SpaceKV.Kvs
-				// log.Debug("SpaceKV", kvs)
-				for _, tag := range tags {
-					for _, kv := range kvs {
-						if tag.Type == "string" {
-							if kv.Key == tag.Key {
-								text := ju.TrimLRSpace(kv.Value, "")
-								if text != "" {
-									kvmap[field] = append(kvmap[field], map[string]interface{}{
-										"field":       field,
-										"code":        in.Code,
-										"ruletext":    tag.Key,
-										"extfrom":     extfrom,
-										"sourcevalue": text,
-										"value":       text,
-										"type":        "space",
-										"matchtype":   "tag_string",
-										"blocktag":    bl.Tag,
-									})
-								}
-								break
-							}
-						} else if tag.Type == "regexp" {
-							if tag.Reg.MatchString(kv.Key) {
-								text := ju.TrimLRSpace(kv.Value, "")
-								if text != "" {
-									kvmap[field] = append(kvmap[field], map[string]interface{}{
-										"field":       field,
-										"code":        in.Code,
-										"ruletext":    tag.Key,
-										"extfrom":     extfrom,
-										"sourcevalue": text,
-										"value":       text,
-										"type":        "space",
-										"matchtype":   "tag_regexp",
-										"blocktag":    bl.Tag,
-									})
-								}
-								break
-							}
-						}
-					}
+				if v == nil || v.KvTags == nil {
+					continue
 				}
-			}
-			//表格kv
-			if bl.TableKV != nil {
-				tkv := bl.TableKV
-				// log.Debug("tkv", tkv)
-				for k, v := range tkv.Kv {
-					if k == fieldname {
-						if len(tags) > -tkv.KvIndex[fieldname] {
-							ruletext := ""
-							if fieldname == "项目名称" && -tkv.KvIndex[fieldname] == -100 {
-								ruletext = "项目名称"
-							} else {
-								ruletext = tags[-tkv.KvIndex[fieldname]].Key
-							}
-							kvmap[field] = append(kvmap[field], map[string]interface{}{
-								"field":       field,
-								"code":        in.Code,
-								"ruletext":    ruletext,
-								"extfrom":     "table",
-								"sourcevalue": v,
-								"value":       v,
-								"type":        "table",
-								"matchtype":   "tag_string",
-								"blocktag":    bl.Tag,
-							})
-						} else { //涉及其他待处理
-							// log.Debug(tags)
-						}
+				for _, vv := range v.KvTags[fieldname] {
+					text := ju.TrimLRSpace(vv.Value, "")
+					if text != "" {
+						kvmap[field] = append(kvmap[field], map[string]interface{}{
+							"field":       field,
+							"code":        in.Code,
+							"ruletext":    vv.Key,
+							"extfrom":     extfrom,
+							"sourcevalue": text,
+							"value":       text,
+							"type":        tp,
+							"matchtype":   "tag_string",
+							"blocktag":    bl.Tag,
+							"weight":      vv.Weight,
+						})
 					}
 				}
 			}
@@ -1305,28 +1169,17 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				blocks = append(blocks, ju.BlockAndTag{v.Tag, tmpblock})
 			}
 			//把所有kv组装成一个字符串,存库
-			if v.ColonKV != nil {
-				for ck, cv := range v.ColonKV.Kv {
-					kvtext.WriteString(ck)
-					kvtext.WriteString(":")
-					kvtext.WriteString(cv)
-					kvtext.WriteString(" ")
-				}
-			}
-			if v.SpaceKV != nil {
-				for sk, sv := range v.SpaceKV.Kv {
-					kvtext.WriteString(sk)
-					kvtext.WriteString(":")
-					kvtext.WriteString(sv)
-					kvtext.WriteString(" ")
+			for _, jv := range []*ju.JobKv{v.ColonKV, v.SpaceKV, v.TableKV} {
+				if jv == nil {
+					continue
 				}
-			}
-			if v.TableKV != nil {
-				for tk, tv := range v.TableKV.Kv {
-					kvtext.WriteString(tk)
-					kvtext.WriteString(":")
-					kvtext.WriteString(tv)
-					kvtext.WriteString(" ")
+				for jv_k, jv_v := range jv.KvTags {
+					for _, jv_vv := range jv_v {
+						kvtext.WriteString(jv_k)
+						kvtext.WriteString(":")
+						kvtext.WriteString(jv_vv.Value)
+						kvtext.WriteString(" ")
+					}
 				}
 			}
 		}
@@ -1336,7 +1189,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		if len(blocks) > 0 {
 			tmp["blocks"] = blocks
 		}
-		tmp["extract_content"] = j.Content
 		if e.TaskInfo.TestColl == "" {
 			if len(tmp) > 0 { //保存抽取结果
 				for field, _ := range e.Fields {
@@ -1401,12 +1253,15 @@ func otherNeedSave(j *ju.Job, result map[string][]*ju.ExtField, e *ExtractTask)
 	for _, v := range j.Block {
 		//
 		for _, vv := range []*ju.JobKv{v.ColonKV, v.TableKV, v.SpaceKV} {
-			if vv == nil || vv.KvTag == nil {
+			if vv == nil || vv.KvTags == nil {
 				continue
 			}
-			for kkk, vvv := range vv.KvTag {
-				if vvv.Weight == ju.RetainKvWeight {
-					kv[kkk] = kv[kkk] + 1
+			for kkk, vvv := range vv.KvTags {
+				for _, vvvv := range vvv {
+					if vvvv.IsInvalid {
+						kv[kkk] = kv[kkk] + 1
+						break
+					}
 				}
 			}
 		}
@@ -1554,7 +1409,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 { //reids未找到,执行规则匹配
+	if i == 0 {                            //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库

+ 2 - 2
src/jy/extract/extractInit.go

@@ -732,7 +732,7 @@ func (e *ExtractTask) InitTag() {
 			for k, key := range tmp {
 				tag := &Tag{Type: "string", Key: key.(string)}
 				e.Tag[field] = append(e.Tag[field], tag)
-				tab.Items[k] = &ju.Tag{key.(string), 0 - k, nil}
+				tab.Items[k] = &ju.Tag{"", key.(string), 0 - k, nil, false}
 			}
 			sort.Sort(tab.Items)
 			ju.TagdbTable[fname] = &tab
@@ -749,7 +749,7 @@ func (e *ExtractTask) InitTag() {
 			for k, key := range tmp {
 				tag := &Tag{Type: "regexp", Key: key.(string), Reg: regexp.MustCompile(key.(string))}
 				e.Tag[field] = append(e.Tag[field], tag)
-				tab.Items[k] = &ju.Tag{key.(string), 0 - k, regexp.MustCompile(key.(string))}
+				tab.Items[k] = &ju.Tag{"", key.(string), 0 - k, regexp.MustCompile(key.(string)), false}
 			}
 			sort.Sort(tab.Items)
 			ju.TagdbTable[fname+"_reg"] = &tab

+ 3 - 3
src/jy/pretreated/analykv.go

@@ -23,7 +23,7 @@ var matchkh = map[string]string{
 	"〖": "〗",
 }
 
-func GetKvFromtxt(con, tag string, from int) ([]*u.Kv, map[string]string, map[string]*u.Tag) {
+func GetKvFromtxt(con, tag string, from int) ([]*u.Kv, map[string][]*u.Tag) {
 	res := FindKv(TextAfterRemoveTable(con), tag, from)
 	kvs := []*u.Kv{}
 	for _, k := range res.Keys {
@@ -35,8 +35,8 @@ func GetKvFromtxt(con, tag string, from int) ([]*u.Kv, map[string]string, map[st
 			})
 		}
 	}
-	kv, tagKv := KvTagsToKV(kvs, tag, nil, from)
-	return kvs, kv, tagKv
+	kvTags := GetKvTags(kvs, tag, nil)
+	return kvs, kvTags
 }
 
 type Line struct {

+ 39 - 85
src/jy/pretreated/analystep.go

@@ -6,7 +6,6 @@ package pretreated
 import (
 	"encoding/json"
 	"jy/util"
-	qutil "qfw/util"
 	"strings"
 
 	"github.com/PuerkitoBio/goquery"
@@ -30,37 +29,20 @@ func AnalyStart(job *util.Job) {
 		}
 	}
 	blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock) //分块
-	if len(blockArrays) > 0 { //有分块
+	if len(blockArrays) > 0 {                                                //有分块
 		//从块里面找分包
 		job.BlockPackage = FindPackageFromBlocks(&blockArrays, job.Title) //从块里面找分包
 		for _, bl := range blockArrays {
 			if len([]rune(bl.Text)) > 80 {
 				bl.Block, _ = DivideBlock(job.CategorySecond, bl.Text, 1, job.RuleBlock)
-			}
-			//块中再查找表格(块,处理完把值赋到块)
-			t1, _ := ComputeConRatio(bl.Text, 2)
-			if len(t1) > 0 {
-				job.HasTable = 1
-				for i := 0; i < len(tabs); i++ {
-					bl := &util.Block{}
-					//添加标识:文本中有table
-					tabres := AnalyTableV2(t1[0], job.Category, bl.Title, bl.Text, 2, job.SourceMid, job.RuleBlock) //解析表格入口 返回:汇总表格对象
-					processTableResult(tabres, bl, job)                                                             //分析table解析结果
-					if bl.Title == "" && tabres.BlockTag != "" {
-						bl.Title = tabres.BlockTag
-					}
-					if len(bl.TableKV.Kv) > 0 {
-						bl.Text = tabs[i].Text()
-						job.Block = append(job.Block, bl)
-					}
+				for _, bl_bl := range bl.Block {
+					processTableInBlock(bl_bl, job)
 				}
-				//				for k, v := range bl.TableKV.Kv {
-				//					log.Println("bl.TableKV.Kv", k, v)
-				//				}
 			}
+			processTableInBlock(bl, job)
+			//新加 未分块table中未能解析到中标候选人,从正文中解析
 			if job.Winnerorder == nil || len(job.Winnerorder) == 0 {
-				//新加table未找到winnerorder, 从分块文本中找中标候选人
-				job.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1)
+				bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1)
 			}
 			job.Block = append(job.Block, bl)
 		}
@@ -72,17 +54,9 @@ func AnalyStart(job *util.Job) {
 			newCon = TextAfterRemoveTable(con)
 			job.BlockPackage = FindPackageFromText(job.Title, newCon)
 			for i := 0; i < len(tabs); i++ {
-				bl := &util.Block{}
 				//添加标识:文本中有table
 				tabres := AnalyTableV2(tabs[i], job.Category, "", con, 1, job.SourceMid, job.RuleBlock) //解析表格入口 返回:汇总表格对象
 				processTableResult(tabres, bl, job)                                                     //分析table解析结果
-				if bl.Title == "" && tabres.BlockTag != "" {
-					bl.Title = tabres.BlockTag
-				}
-				if len(bl.TableKV.Kv) > 0 {
-					bl.Text = tabs[i].Text()
-					job.Block = append(job.Block, bl)
-				}
 			}
 			//			for k, v := range bl.TableKV.Kv {
 			//				log.Println("bl.TableKV.Kv", k, v)
@@ -91,25 +65,30 @@ func AnalyStart(job *util.Job) {
 			//从正文里面找分包
 			job.BlockPackage = FindPackageFromText(job.Title, newCon)
 		}
-		//新加 未分块table中未能解析到中标候选人,从正文中解析
-		if job.Winnerorder == nil || len(job.Winnerorder) == 0 {
-			bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1)
-		}
 		FindProjectCode(newCon, job) //匹配项目编号
 		bl.Text = newCon
 		//调用kv解析
 		bl.ColonKV = GetKVAll(newCon, "", nil, 1)
 		bl.SpaceKV = SspacekvEntity.Entrance(newCon, "", nil)
+		//新加 未分块table中未能解析到中标候选人,从正文中解析
+		if job.Winnerorder == nil || len(job.Winnerorder) == 0 {
+			bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1)
+		}
 		job.Block = append(job.Block, bl)
 	}
-	for _, v := range job.BlockPackage {
-		block := &util.Block{}
-		block.ColonKV = v.ColonKV
-		block.TableKV = v.TableKV
-		block.SpaceKV = v.SpaceKV
-		block.Text = v.Text
-		block.Winnerorder = v.WinnerOrder
-		job.Block = append(job.Block, block)
+}
+
+func processTableInBlock(bl *util.Block, job *util.Job) {
+	//块中再查找表格(块,处理完把值赋到块)
+	tabs, _ := ComputeConRatio(bl.Text, 2)
+	for _, tab := range tabs {
+		job.HasTable = 1
+		//添加标识:文本中有table
+		tabres := AnalyTableV2(tab, job.Category, bl.Title, bl.Text, 2, job.SourceMid, job.RuleBlock) //解析表格入口 返回:汇总表格对象
+		processTableResult(tabres, bl, job)                                                           //分析table解析结果
+		if bl.Title == "" && tabres.BlockTag != "" {
+			bl.Title = tabres.BlockTag
+		}
 	}
 }
 
@@ -142,14 +121,16 @@ func FindProjectCode(newCon string, job *util.Job) {
 		jsonMap := make(map[string]string)
 		json.Unmarshal([]byte(proCode), &jsonMap)
 		jobKv := util.NewJobKv()
+		kvTags := map[string][]*util.Tag{}
 		for k, v := range jsonMap {
+			kvTags[k] = append(kvTags[k], &util.Tag{Key: k, Value: v})
 			tmpkv := new(util.Kv)
 			tmpkv.Line = k + v
 			tmpkv.Key = k
 			tmpkv.Value = v
 			jobKv.Kvs = append(jobKv.Kvs, tmpkv)
 		}
-		jobKv.Kv = jsonMap
+		jobKv.KvTags = kvTags
 		blCode.ColonKV = jobKv
 		job.Block = append(job.Block, blCode)
 	}
@@ -158,20 +139,7 @@ func FindProjectCode(newCon string, job *util.Job) {
 //分析table解析结果
 func processTableResult(tabres *TableResult, block *util.Block, job *util.Job) {
 	//解析结果中的kv
-	kv := map[string]string{}
-	for k, v := range tabres.SortKV.Map {
-		kv[k] = qutil.ObjToString(v)
-	}
-	kvIndex := map[string]int{}
-	for k, v := range tabres.SortKVWeight {
-		kvIndex[k] = v
-	}
-	KvTag := map[string]*util.Tag{}
-	for k, _ := range tabres.SortKV.NotTagKey {
-		KvTag[k] = &util.Tag{Weight: util.RetainKvWeight}
-	}
-	block.TableKV = &util.JobKv{Kv: kv, KvIndex: kvIndex, KvTag: KvTag}
-
+	block.TableKV = &util.JobKv{KvTags: tabres.KvTags}
 	//分包
 	tablePackage := map[string]*util.BlockPackage{}
 	if tabres.IsMultiPackage {
@@ -184,23 +152,19 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job) {
 			//解析kv
 			//找到key是“包1中标单位”这种的key,过滤掉包1,再次到标签库中匹配
 			labelKVs := []*util.Kv{}
-			if blockPackage.TableKV != nil && blockPackage.TableKV.Kv != nil {
-				for tk, tv := range blockPackage.TableKV.Kv {
-					if regReplKey.MatchString(tk) || regSplit.MatchString(tk) {
-						labelKVs = append(labelKVs, &util.Kv{
-							Key:   tk,
-							Value: tv,
-						})
+			if blockPackage.TableKV != nil && len(blockPackage.TableKV.KvTags) > 0 {
+				for tk, tv := range blockPackage.TableKV.KvTags {
+					for _, tvv := range tv {
+						if regReplKey.MatchString(tk) || regSplit.MatchString(tk) {
+							labelKVs = append(labelKVs, &util.Kv{
+								Key:   tk,
+								Value: tvv.Value,
+							})
+						}
 					}
 				}
 			}
-			labelKV, _ := KvTagsToKV(labelKVs, "", nil, 2)
-			for lk, lv := range labelKV {
-				if blockPackage.TableKV.Kv[lk] != "" {
-					continue
-				}
-				blockPackage.TableKV.Kv[lk] = lv
-			}
+			blockPackage.TableKV.KvTags = GetKvTags(labelKVs, "", nil)
 			tablePackage[k] = blockPackage
 		}
 	}
@@ -234,24 +198,14 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job) {
 				if bv.ColonKV == nil {
 					bv.ColonKV = util.NewJobKv()
 				}
-				for k, v := range tv.ColonKV.Kv {
-					if bv.ColonKV.Kv[k] != "" {
-						continue
-					}
-					bv.ColonKV.Kv[k] = v
-				}
+				MergeKvTags(bv.ColonKV.KvTags, tv.ColonKV.KvTags)
 			}
 			//
 			if tv.TableKV != nil {
 				if bv.TableKV == nil {
 					bv.TableKV = util.NewJobKv()
 				}
-				for k, v := range tv.TableKV.Kv {
-					if bv.TableKV.Kv[k] != "" {
-						continue
-					}
-					bv.TableKV.Kv[k] = v
-				}
+				MergeKvTags(bv.TableKV.KvTags, tv.TableKV.KvTags)
 			}
 			//
 			if tv.Origin != "" {

+ 162 - 209
src/jy/pretreated/analytable.go

@@ -3,7 +3,6 @@ package pretreated
 import (
 	"fmt"
 	u "jy/util"
-	"log"
 	qutil "qfw/util"
 	"regexp"
 	"strings"
@@ -126,27 +125,27 @@ func IsHide(g *goquery.Selection) (b bool) {
 
 //对表格的key进行标准化处理,多个k相同时,出现覆盖问题
 //待扩展,暂不支持正则标签库
-func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1, k2 []string, weight []int, v1, returntag string, b bool) {
-	k1, k2 = []string{}, []string{}
-	weight = []int{}
-	tk := k
+func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (kvTags map[string][]*u.Tag, returntag string) {
+	kvTags = map[string][]*u.Tag{}
+	v1 := ""
 	if sv, sok := v.(string); sok { //取KV
 		v1 = sv
 	} else if sv, sok := v.([]string); sok { //是数组先默认取第一个
 		v1 = sv[0]
 	}
 	//对值单位的处理   (预算|费|价|额|规模|投资)
-	if moneyreg.MatchString(tk) {
-		v1 += GetMoneyUnit(tk, v1)
+	if moneyreg.MatchString(k) {
+		v1 += GetMoneyUnit(k, v1)
 	}
 	//先清理key
 	//u.Debug(1, k, v1)
-	k = ClearKey(k, 2)
+	k1 := ClearKey(k, 2)
 	//u.Debug(2, k)
 	//取标准key
-	res := u.GetTags(k)
-	if len(res) == 0 && tk != k {
-		res = u.GetTags(tk)
+	res := u.GetTags(k1)
+	if len(res) == 0 && k1 != k {
+		res = u.GetTags(k)
+		k1 = k
 	}
 	//log.Println(k, res)
 	//	if len(res) == 0 {
@@ -154,50 +153,44 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1, k2 []str
 	//	}
 	//当取到标准化值时,放入数组
 	if len(res) > 0 {
-		b = true
 		for _, t1 := range res {
-			k1 = append(k1, t1.Value)
-			weight = append(weight, t1.Weight)
+			//降低冒号值的权重
+			if MhSpilt.MatchString(v1) {
+				t1.Weight -= 50
+			}
+			kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight})
 		}
 		//k1 = res[0].Value
-	}
-	//没有取到标准化key时,对中标金额和中标单位的逻辑处理
-	if !b {
+	} else {
+		kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
+		//没有取到标准化key时,对中标金额和中标单位的逻辑处理
 		if filter_zbje_k.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
 			if tabletag == "" {
 				returntag = "中标情况"
 			}
-			k1 = append(k1, "中标金额")
-			weight = append(weight, -100)
-			b = true
+			kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: "中标金额", Value: v1, Weight: -100})
 		} else if filter_zbdw_ky.MatchString(k) && !filter_zbdw_kn.MatchString(k) &&
 			filter_zbdw_v.MatchString(v1) {
-			k1 = append(k1, "中标单位")
-			weight = append(weight, -100)
+			kvTags["中标单位"] = append(kvTags["中标单位"], &u.Tag{Key: "中标单位", Value: v1, Weight: -100})
 			if tabletag == "" {
 				returntag = "中标情况"
 			}
-			b = true
 		} else {
-			k2 = append(k2, k)
-		}
-	}
-	//对上一步没有取到标准化key的进一步处理
-	if !b {
-		if tabletag == "" {
+			//对上一步没有取到标准化key的进一步处理
+			if tabletag == "" {
 
-		}
-		if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) {
-			//u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1))
-			if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
-				k1 = append(k1, "中标金额")
-				weight = append(weight, -100)
-				b = true
-			} /*else if filter_zbdw_jd.MatchString(k) && filter_zbdw_v.MatchString(v1) {
-				k1 = append(k1, "中标单位")
-				weight = append(weight, -100)
-				b = true
-			}*/
+			}
+			if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) {
+				//u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1))
+				if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
+					kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: "中标金额", Value: v1, Weight: -100})
+
+				} /*else if filter_zbdw_jd.MatchString(k) && filter_zbdw_v.MatchString(v1) {
+					k1 = append(k1, "中标单位")
+					weight = append(weight, -100)
+					b = true
+				}*/
+			}
 		}
 	}
 	return
@@ -230,38 +223,19 @@ func (table *Table) KVFilter() {
 		v := table.SortKV.Map[k]
 		if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
 			k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
-			k1, n_k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
+			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
 			//qutil.Debug(k, v, k1, w1, v1, tag, b)
-			if b {
-				//降低冒号值的权重
-				if MhSpilt.MatchString(v1) {
-					for pos, _ := range k1 {
-						w1[pos] -= 50
-					}
-				}
-				if tag != "" && table.Tag == "" {
-					table.Tag = tag
-				}
-				for pos, k2 := range k1 { //根据关键词,过滤table.SortKV到table.StandKV和table.StandKVWeight
-					if table.StandKV[k2] == "" || w1[pos] > table.StandKVWeight[k2] {
-						table.StandKV[k2] = v1 //本节点
-						table.StandKVWeight[k2] = w1[pos]
-					}
-					//					 else if k2 == "中标金额" {
-					//						//						u.Debug(qutil.Float64All(v1), qutil.Float64All(table.StandKV[k2]))
-					//						if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
-					//							table.StandKV[k2] = v1
-					//						}
-					//					}
-				}
-			} else {
-				if table.StandKV[k] == "" && qutil.ObjToString(v) != "" {
-					table.StandKV[k] = qutil.ObjToString(v)
-					table.StandKVWeight[k] = 0
-				}
-			}
-			for _, n_k2 := range n_k1 {
-				table.SortKV.NotTagKey[n_k2] = true
+			if tag != "" && table.Tag == "" {
+				table.Tag = tag
+			}
+			for kk, vv := range kvTags { //根据关键词,过滤table.SortKV到table.StandKV和table.StandKVWeight
+				table.StandKV[kk] = append(table.StandKV[kk], vv...)
+				//					 else if k2 == "中标金额" {
+				//						//						u.Debug(qutil.Float64All(v1), qutil.Float64All(table.StandKV[k2]))
+				//						if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
+				//							table.StandKV[k2] = v1
+				//						}
+				//					}
 			}
 		} else {
 			//u.Debug(k, v, "---------")
@@ -315,11 +289,10 @@ func (table *Table) KVFilter() {
 		if len(table.WinnerOrder) > 0 {
 			//中标候选人合并
 			winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder)
-			if table.StandKV["中标单位"] == "" {
+			if len(table.StandKV["中标单位"]) == 0 {
 				ent := table.WinnerOrder[0]["entname"]
 				if ent != nil {
-					table.StandKV["中标单位"], _ = ent.(string)
-					table.StandKVWeight["中标单位"] = -25
+					table.StandKV["中标单位"] = append(table.StandKV["中标单位"], &u.Tag{Key: "中标单位", Value: qutil.ObjToString(ent), Weight: -25})
 				}
 			}
 		} else if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder
@@ -479,26 +452,17 @@ func (table *Table) sortKVArr(as *SortMap, winnertag bool) {
 					}
 				}
 			}
-			k1, n_k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v)
-			if b {
-				if tag != "" && table.Tag == "" {
-					table.Tag = tag
-				}
-				for pos, k2 := range k1 {
-					if table.StandKV[k2] == "" || w1[pos] > table.StandKVWeight[k2] {
-						table.StandKV[k2] = v1 //本节点
-						table.StandKVWeight[k2] = w1[pos]
-					}
-					//				else if k2 == "中标金额" {
-					//					if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
-					//						table.StandKV[k2] = v1
-					//					}
-					//				}
-				}
-			} else {
-				for _, n_k2 := range n_k1 {
-					table.SortKV.NotTagKey[n_k2] = true
-				}
+			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v)
+			if tag != "" && table.Tag == "" {
+				table.Tag = tag
+			}
+			for kk, vv := range kvTags {
+				table.StandKV[kk] = append(table.StandKV[kk], vv...)
+				//				else if k2 == "中标金额" {
+				//					if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
+				//						table.StandKV[k2] = v1
+				//					}
+				//				}
 			}
 		}
 	}
@@ -538,11 +502,8 @@ func (table *Table) analyTdKV() {
 			//u.Debug(td.BH, td.Val, td.SonTableResult)
 			if td.SonTableResult != nil {
 				//u.Debug(td.SonTableResult.SortKV.Map, "-------", td.SonTableResult.Tabs)
-				for _, k3 := range td.SonTableResult.SortKV.Keys {
-					if table.StandKV[k3] == "" || td.SonTableResult.SortKVWeight[k3] > table.StandKVWeight[k3] {
-						table.StandKV[k3] = qutil.ObjToString(td.SonTableResult.SortKV.Map[k3])
-						table.StandKVWeight[k3] = td.SonTableResult.SortKVWeight[k3]
-					}
+				for k3, v3 := range td.SonTableResult.KvTags {
+					table.StandKV[k3] = append(table.StandKV[k3], v3...)
 				}
 				//中标候选人排序
 				if table.WinnerOrder == nil || len(table.WinnerOrder) == 0 {
@@ -570,15 +531,23 @@ func (table *Table) MergerToTableresult() {
 					bp.TableKV = u.NewJobKv()
 				}
 				v1 := v.(*u.BlockPackage)
-				if v1.TableKV != nil && v1.TableKV.Kv != nil {
-					for k2, v2 := range v1.TableKV.Kv {
+				if v1.TableKV != nil && len(v1.TableKV.KvTags) > 0 {
+					for k2, v2 := range v1.TableKV.KvTags {
 						if bp.TableKV == nil {
 							bp.TableKV = u.NewJobKv()
 						}
-						if bp.TableKV.Kv[k2] == "" || (v1.TableKV.KvTag[k2] != nil && bp.TableKV.KvTag[k2] != nil && v1.TableKV.KvTag[k2].Weight > bp.TableKV.KvTag[k2].Weight) {
-							//可能会报错 assignment to entry in nil map
-							bp.TableKV.Kv[k2] = v2
-							bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+						isExists := false
+						for _, v2v := range v2 {
+							for _, v2vv := range bp.TableKV.KvTags[k2] {
+								if v2v.Value == v2vv.Value {
+									isExists = true
+									break
+								}
+							}
+							if !isExists {
+								bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
+								bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+							}
 						}
 					}
 				}
@@ -594,25 +563,12 @@ func (table *Table) MergerToTableresult() {
 		//		u.Debug(table, table.TableResult, str)
 	}
 	//遍历标准key到tableresult.sortkv中
-	for k, v := range table.StandKV {
-		if table.TableResult.SortKV.Map[k] == nil || table.StandKVWeight[k] > table.TableResult.SortKVWeight[k] || strings.Contains(table.Tag, "变更") {
-			v = strings.Replace(v, "__", "", -1)
-			if table.TableResult.SortKV.Map[k] == nil {
-				table.TableResult.SortKV.AddKey(k, v) //父集
-			} else {
-				if k == "项目编号" { //项目编号存在,又匹配到全为中文跳过
-
-					if regHz.MatchString(v) {
-						continue
-					}
-				}
-				table.TableResult.SortKV.ReplaceKey(k, v, k)
-			}
-			table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
-		} else if table.TableResult.SortKV.Map[k] != nil {
-			//u.Debug(k, v, table.TableResult.SortKV.Map[k], "..............")
+	for _, v := range table.StandKV {
+		for _, vv := range v {
+			vv.Value = strings.Replace(vv.Value, "__", "", -1)
 		}
 	}
+	MergeKvTags(table.TableResult.KvTags, table.StandKV)
 	//表格的块标签
 	if table.TableResult.BlockTag == "" && table.Tag != "" {
 		table.TableResult.BlockTag = table.Tag
@@ -646,11 +602,11 @@ func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype
 	tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock)
 	//可以有多个table
 	//for _, table := range tabs {
-		//隐藏表格跳过
-		if IsHide(tabs) {
-			return
-		}
-		tabres.GoqueryTabs = tabs
+	//隐藏表格跳过
+	if IsHide(tabs) {
+		return
+	}
+	tabres.GoqueryTabs = tabs
 	//}
 	//解析表格集
 	tabres.Analy()
@@ -665,26 +621,29 @@ func (ts *TableResult) Analy() {
 		MatchMap: map[string]map[string]bool{},
 	}
 	//for _, table := range ts.GoqueryTabs {
-		tn := NewTable(ts.Html, ts, ts.GoqueryTabs)
-		//核心模块
-		tsw := tn.Analy(contactFormat)
-		for _, tab := range tsw {
-			if len(tab.TRs) > 0 {
-				tabs = append(tabs, tab)
-			}
-			//fmt.Println("tab.SortKV.Map", tab.SortKV.Keys)
+	tn := NewTable(ts.Html, ts, ts.GoqueryTabs)
+	//核心模块
+	tsw := tn.Analy(contactFormat)
+	for _, tab := range tsw {
+		if len(tab.TRs) > 0 {
+			tabs = append(tabs, tab)
 		}
-		//tn.SonTables = append(tn.SonTables, tn)
+		//fmt.Println("tab.SortKV.Map", tab.SortKV.Keys)
+	}
+	//tn.SonTables = append(tn.SonTables, tn)
 	//}
 	//统一合并,考虑统一多表格是多包的情况---新增
 	if len(tabs) > 1 {
 		pns := map[string]string{}
 		pnarr := []string{}
 		for _, table := range tabs {
-			pn := table.StandKV["项目名称"]
-			if pn != "" && TitleReg.MatchString(pn) {
-				pnarr = append(pnarr, pn)
-				matchres := TitleReg.FindAllStringSubmatch(pn, -1)
+			if len(table.StandKV["项目名称"]) == 0 {
+				continue
+			}
+			pn := table.StandKV["项目名称"][0]
+			if pn != nil && pn.Value != "" && TitleReg.MatchString(pn.Value) {
+				pnarr = append(pnarr, pn.Value)
+				matchres := TitleReg.FindAllStringSubmatch(pn.Value, -1)
 				if len(matchres) == 1 && len(matchres[0]) > 0 {
 					v1 := u.PackageNumberConvert(matchres[0][0])
 					pns[v1] = matchres[0][0]
@@ -693,7 +652,9 @@ func (ts *TableResult) Analy() {
 					bp.Origin = matchres[0][0]
 					bp.TableKV = u.NewJobKv()
 					for _, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
-						bp.TableKV.Kv[k] = table.StandKV[k]
+						if len(table.StandKV[k]) > 0 {
+							bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
+						}
 					}
 					bp.WinnerOrder = table.WinnerOrder
 					if table.BlockPackage.Map[v1] == nil {
@@ -718,8 +679,7 @@ func (ts *TableResult) Analy() {
 				}
 			}
 			if btrue {
-				ts.SortKV.AddKey("项目名称", pname)
-				ts.SortKVWeight["项目名称"] = 100
+				ts.KvTags["项目名称"] = append(ts.KvTags["项目名称"], &u.Tag{Key: "项目名称", Value: pname, Weight: 100})
 				for _, table := range tabs {
 					table.BPackage = true
 					//预算、中标金额、NullTxtBid成交供应商排名 中标单位 成交状态
@@ -733,7 +693,9 @@ func (ts *TableResult) Analy() {
 								bp.TableKV = u.NewJobKv()
 							}
 							for nk, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
-								bp.TableKV.Kv[k] = table.StandKV[k]
+								if len(table.StandKV[k]) > 0 {
+									bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
+								}
 								if nk < 4 {
 									delete(table.StandKV, k)
 								}
@@ -789,7 +751,7 @@ func (table *Table) createTabe(trs *goquery.Selection) {
 			td := NewTD(selm, TR, table) //初始化td,kv处理,td中有table处理,td的方向
 			//num++
 			TR.AddTD(td)
-			if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0{ //删除一个tr,tr中所有td是空值的
+			if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0 { //删除一个tr,tr中所有td是空值的
 				empty++
 				if tds.Size() == empty {
 					tdTextIsNull = true
@@ -823,8 +785,7 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
 			if table.Tag != "" {
 				_, _, b := CheckMultiPackage(table.Tag, "")
 				if b {
-					table.StandKV["项目名称"] = table.Tag
-					table.StandKVWeight["项目名称"] = -100
+					table.StandKV["项目名称"] = append(table.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: table.Tag, Weight: -100})
 				}
 			}
 			table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
@@ -836,29 +797,12 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
 			}
 			//判断是否是多包,并处理分包的//遍历td分块
 			table.CheckMultiPackageByTable()
-			//str := "\n"
-			//for k, v := range table.StandKV {
-			//	str += fmt.Sprintf("_==___%s:%v\n", k, v)
-			//	if table.TableResult.SortKV.Map[k] == nil {
-			//		table.TableResult.SortKV.AddKey(k, v)
-			//		table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
-			//	}
-			//}
 			res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
 			if !res {
 				//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
 				table.KVFilter()
 			}
-			for k, v := range table.StandKV { //过滤后的标准化kv
-				if table.TableResult.SortKV.Map[k] == nil || table.StandKVWeight[k] > table.TableResult.SortKVWeight[k] {
-					table.TableResult.SortKV.AddKey(k, v)
-					table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
-				}
-			}
-			for k, v := range table.SortKV.NotTagKey {
-				table.TableResult.SortKV.NotTagKey[k] = v
-			}
-			//u.Debug(str)
+			//MergeKvTags(table.TableResult.KvTags, table.StandKV)
 		}
 	}
 	return ts
@@ -902,8 +846,7 @@ func (table *Table) tableSubDemolitionTable() []*Table {
 				tab1 = NewTable("", table.TableResult, table.Goquery)
 				tab1.BSplit = true
 				if tmn[rownum] != nil {
-					tab1.StandKV["项目名称"] = tmn[rownum]["tag"].(string)
-					tab1.StandKVWeight["项目名称"] = -100
+					tab1.StandKV["项目名称"] = append(tab1.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: tmn[rownum]["tag"].(string), Weight: -100})
 				}
 				ts = append(ts, tab1)
 			}
@@ -2092,11 +2035,20 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
 			}
 		} else if val, bvs := v1.(string); bvs && len(index) == 1 {
 			//删除子包的kv
-			k1tags, _, _, _, _, _ := CommonDataAnaly(k1, "", "", val)
-			if len(k1tags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(k1tags[0]) { //(k1tags[0].Value == "采购单位" || k1tags[0].Value == "项目编号")) {
-				//log.Println("remove", k1, val)
-				tn.SortKV.RemoveKey(k1)
-				tn.assemblePackage(k1, val, index[0])
+			kvTags, _ := CommonDataAnaly(k1, "", "", val)
+			for kvTag_k, kvTag_v := range kvTags {
+				hasValid := false
+				for _, kvTag_vv := range kvTag_v {
+					if kvTag_vv.IsInvalid {
+						continue
+					}
+					hasValid = true
+				}
+				if hasValid && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(kvTag_k) {
+					tn.SortKV.RemoveKey(k1)
+					tn.assemblePackage(k1, val, index[0])
+					//log.Println("remove", k1, val)
+				}
 			}
 			//u.Debug("----==2==-------", k1)
 		}
@@ -2131,18 +2083,34 @@ func (tn *Table) isGoonNext() {
 					if bp.TableKV == nil {
 						bp.TableKV = u.NewJobKv()
 					}
-					for k2, v2 := range mv.ColonKV.Kv {
-						if bp.TableKV.Kv[k2] == "" {
-							bp.TableKV.Kv[k2] = v2
-							bp.TableKV.KvTag[k2] = mv.ColonKV.KvTag[k2]
-							bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+					for k2, v2 := range mv.ColonKV.KvTags {
+						for _, v2v := range v2 {
+							isExists := false
+							for _, v2vv := range bp.TableKV.KvTags[k2] {
+								if v2v.Value == v2vv.Value {
+									isExists = true
+									break
+								}
+							}
+							if !isExists {
+								bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
+								bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+							}
 						}
 					}
-					for k2, v2 := range mv.SpaceKV.Kv {
-						if bp.TableKV.Kv[k2] == "" {
-							bp.TableKV.Kv[k2] = v2
-							bp.TableKV.KvTag[k2] = mv.SpaceKV.KvTag[k2]
-							bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+					for k2, v2 := range mv.SpaceKV.KvTags {
+						for _, v2v := range v2 {
+							isExists := false
+							for _, v2vv := range bp.SpaceKV.KvTags[k2] {
+								if v2v.Value == v2vv.Value {
+									isExists = true
+									break
+								}
+							}
+							if !isExists {
+								bp.SpaceKV.KvTags[k2] = append(bp.SpaceKV.KvTags[k2], v2v)
+								bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+							}
 						}
 					}
 				}
@@ -2314,22 +2282,9 @@ func (tn *Table) assemblePackage(k1, v1, key string) {
 		bp.TableKV = u.NewJobKv()
 	}
 	if v1 != "" {
-		k2, _, w1, v2, _, bf := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
-		if bf {
-			for pos, k3 := range k2 {
-				if bp.TableKV.Kv != nil && bp.TableKV.KvTag[k3] != nil && (bp.TableKV.Kv[k3] == "" || w1[pos] > bp.TableKV.KvTag[k3].Weight) {
-					bp.TableKV.Kv[k3] = v2
-					bp.TableKV.KvTag[k3] = &u.Tag{Value: v2, Weight: w1[pos]}
-				} else {
-					bp.TableKV.Kv[k1] = qutil.ObjToString(v1)
-					//if tn.SortKV.Map[k3] == nil {
-					//	tn.SortKV.AddKey(k3, v2) //添加匹配到抽取关键词的key,value
-					//	tn.StandKVWeight[k3]=w1[pos]
-					//}
-				}
-			}
-		} else {
-			bp.TableKV.Kv[k1] = qutil.ObjToString(v1)
+		kvTags, _ := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
+		for k3, v3 := range kvTags {
+			bp.TableKV.KvTags[k3] = append(bp.TableKV.KvTags[k3], v3...)
 		}
 	}
 	k1 = regReplAllSpace.ReplaceAllString(k1, "")
@@ -2696,9 +2651,9 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
 	} else {
 		//
 		if !strings.HasSuffix(td_k, "方式") {
-			_, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts, 3)
-			if len(kTag) == 1 {
-				tagVal, _ := u.FirstKeyValueInMap(kTag)
+			kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts)
+			if len(kvTags) == 1 {
+				tagVal, _ := u.FirstKeyValueInMap(kvTags)
 				if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
 					return
 				}
@@ -2719,8 +2674,6 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
 			}
 		}
 		td.SortKV.AddKey(myContactType+td_k, td_v)
-		log.Println(myContactType, td_k, td_v)
-		delete(td.SortKV.NotTagKey, td_k)
 	}
 }
 
@@ -2738,8 +2691,8 @@ func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[s
 		if ContactInfoVagueReg.MatchString(td_k) {
 			thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch)
 		} else if k == "采购单位" { //打标签,权重高的重新覆盖
-			_, kTag := KvTagsToKV([]*u.Kv{td_kv}, "", []string{"采购单位"}, 3)
-			tagVal, weightVal := u.FirstKeyValueInMap(kTag)
+			kvTags := GetKvTags([]*u.Kv{td_kv}, "", []string{"采购单位"})
+			tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
 			if tagVal == k {
 				if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
 					weightMap[k][k] = weightVal.(int)
@@ -2792,9 +2745,9 @@ func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string
 	}
 	isAddToMatchMap := true
 	if !strings.HasSuffix(td_k, "方式") {
-		_, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts, 3)
-		if len(kTag) == 1 {
-			tagVal, weightVal := u.FirstKeyValueInMap(kTag)
+		kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts)
+		if len(kvTags) == 1 {
+			tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
 			if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
 				isAddToMatchMap = false
 			}

+ 52 - 57
src/jy/pretreated/colonkv.go

@@ -4,7 +4,7 @@ package pretreated
 import (
 	"jy/clear"
 	. "jy/util"
-	jutil "jy/util"
+	"log"
 	qutil "qfw/util"
 	"regexp"
 	"sort"
@@ -398,9 +398,9 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 						matchMap[ct_k] = map[string]bool{}
 					}
 					if !strings.HasSuffix(k, "方式") {
-						_, kTag := KvTagsToKV([]*Kv{&Kv{Key: k, Value: v}}, "", BuyerContacts, 1)
-						if len(kTag) == 1 {
-							tagVal, weightVal := FirstKeyValueInMap(kTag)
+						kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", BuyerContacts)
+						if len(kvTags) == 1 {
+							tagVal, weightVal := FirstKeyValueInMap(kvTags)
 							if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(v) {
 								isAddToMatchMap = false
 							}
@@ -430,8 +430,8 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 					}
 				}
 				if ct_k == "采购单位" { //打标签,权重高的重新覆盖
-					_, kTag := KvTagsToKV([]*Kv{&Kv{Key: k, Value: v}}, "", []string{"采购单位"}, 1)
-					tagVal, weightVal := FirstKeyValueInMap(kTag)
+					kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", []string{"采购单位"})
+					tagVal, weightVal := FirstKeyValueInMap(kvTags)
 					if tagVal == ct_k {
 						if weightMap[ct_k][ct_k] == nil || (weightVal != nil && weightVal.(int) > weightMap[ct_k][ct_k].(int)) {
 							weightMap[ct_k][ct_k] = weightVal.(int)
@@ -538,9 +538,9 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 		allMatchCount++
 		delete(totalIndexMap, myContactType)
 		if !strings.HasSuffix(k, "方式") {
-			_, kTag := KvTagsToKV([]*Kv{&Kv{Key: myContactType + k, Value: v}}, "", BuyerContacts, 1)
-			if len(kTag) == 1 {
-				tagVal, _ := FirstKeyValueInMap(kTag)
+			kvTags := GetKvTags([]*Kv{&Kv{Key: myContactType + k, Value: v}}, "", BuyerContacts)
+			if len(kvTags) == 1 {
+				tagVal, _ := FirstKeyValueInMap(kvTags)
 				if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(v) {
 					continue
 				}
@@ -633,33 +633,53 @@ func HasOrderContactType(text string) []string {
 func GetKVAll(content, title string, contactFormat *ContactFormat, from int) *JobKv {
 	content = formatText(content, "kv")
 	m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from)
-	m1, m1Weight := KvTagsToKV(m1Kvs, title, nil, from)
-	if m1 == nil {
-		m1 = map[string]string{}
+	kvTags := GetKvTags(m1Kvs, title, nil)
+	m2Kvs, m2KvTags := GetKvFromtxt(content, title, from)
+	MergeKvTags(kvTags, m2KvTags)
+	return &JobKv{
+		Kvs:    m1Kvs,
+		Kvs_2:  m2Kvs,
+		KvTags: kvTags,
 	}
-	m2Kvs, m2, m2Weight := GetKvFromtxt(content, title, from)
-	for k, v := range m2 {
-		if m1[k] == "" {
-			m1[k] = v
-			m1Weight[k] = m2Weight[k]
+}
+
+//合并kv标签,把kvTags_2合并到kvTags_1
+func MergeKvTags(kvTags_1, kvTags_2 map[string][]*Tag) {
+	for k, v := range kvTags_2 {
+		for _, vv := range v {
+			if strings.TrimSpace(vv.Value) == "" {
+				continue
+			}
+			isExists := false
+			for _, vvv := range kvTags_1[k] {
+				if vvv.Value == vv.Value && vvv.Weight == vv.Weight {
+					isExists = true
+					break
+				}
+			}
+			if !isExists {
+				kvTags_1[k] = append(kvTags_1[k], vv)
+			}
 		}
 	}
-	return &JobKv{
-		Kvs:   m1Kvs,
-		Kvs_2: m2Kvs,
-		Kv:    m1,
-		KvTag: m1Weight,
+}
+
+//控制台输出kv的值
+func PrintKvTags(kvTags map[string][]*Tag) {
+	for k, v := range kvTags {
+		for _, vv := range v {
+			log.Println("kvTags===", k, "---", vv.Key, vv.Value, vv.Weight, vv.IsInvalid)
+		}
 	}
 }
 
 //KVTags转kv
-func KvTagsToKV(findkvs []*Kv, title string, tagdbs []string, from int) (map[string]string, map[string]*Tag) {
-	kvTags := map[string]*Tag{}
+func GetKvTags(findkvs []*Kv, title string, tagdbs []string) map[string][]*Tag {
+	kvTags := map[string][]*Tag{}
 	if title != "" && BlockTagMap[title] {
-		kvTags[title] = &Tag{title, 0, nil}
+		kvTags[title] = append(kvTags[title], &Tag{title, title, 0, nil, false})
 	}
 	for _, findkv := range findkvs {
-		kvMap := map[string]string{}
 		k, val := findkv.Key, findkv.Value
 		//val是空的话,不打标签
 		if filterValue.MatchString(val) {
@@ -688,43 +708,18 @@ func KvTagsToKV(findkvs []*Kv, title string, tagdbs []string, from int) (map[str
 				}
 			}
 		}
-		if len(tags) == 0 {
-			//go AddtoNoMatchMap(key)
-			//Debug(key)
-			//continue
-			//由跳过修改为保留
-			tags = []*Tag{&Tag{k, jutil.RetainKvWeight, nil}}
-		}
-		for _, tk := range tags {
-			//分包过来给kv打标签的时候,只取第一个,后面的不覆盖
-			if kvTags[tk.Value] == nil || (kvTags[tk.Value].Weight < tk.Weight && from != 4) {
-				//				fc := StandardNameMap[tk.Value]
-				//				if (fc != nil && fc.CheckNum) || (moneyreg.MatchString(tk.Value)) {
-				//					val += GetMoneyUnit(k, val)
-				//				}
+		if len(tags) > 0 {
+			for _, tk := range tags {
 				if moneyreg.MatchString(tk.Value) {
 					val += GetMoneyUnit(k, val)
 				}
-				//Debug("KV-key", tk, val)
-				kvTags[tk.Value] = &Tag{val, tk.Weight, nil}
-				kvMap[tk.Value] = val
-				//Debug("KV-key", tk.Value, val, key, tk.Weight)
-			}
-		}
-	}
-	//
-	kv := map[string]string{}
-	kvWeight := map[string]*Tag{}
-	if len(kvTags) > 0 {
-		for k, v := range kvTags {
-			if kv[k] != "" {
-				continue
+				kvTags[tk.Value] = append(kvTags[tk.Value], &Tag{Key: k, Value: val, Weight: tk.Weight})
 			}
-			kv[k] = v.Value
-			kvWeight[k] = v
+		} else {
+			kvTags[key] = append(kvTags[key], &Tag{Key: k, Value: val, IsInvalid: true})
 		}
 	}
-	return kv, kvWeight
+	return kvTags
 }
 
 func FilterContactKey(key string) string {

+ 4 - 30
src/jy/pretreated/division.go

@@ -645,33 +645,12 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 				colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1)
 				if headKey != "" {
 					kvAgain := GetKVAll(text, "", nil, 4)
-					for kv_k, kv_v := range kvAgain.Kv {
-						if colonJobKv.Kv[kv_k] == "" {
-							colonJobKv.Kv[kv_k] = kv_v
-							colonJobKv.KvTag[kv_k] = kvAgain.KvTag[kv_k]
-						}
-					}
-				}
-				for kv_k, kv_v := range colonJobKv.Kv {
-					if kv_v == "" {
-						continue
-					}
-					if (*blockPackage)[index].ColonKV.Kv[kv_k] != "" {
-						continue
-					}
-					(*blockPackage)[index].ColonKV.Kv[kv_k] = kv_v
+					MergeKvTags(colonJobKv.KvTags, kvAgain.KvTags)
 				}
+				MergeKvTags((*blockPackage)[index].ColonKV.KvTags, colonJobKv.KvTags)
 				//合并空格kv
 				spaceJobKv := SspacekvEntity.Entrance(text, "", nil)
-				for kv_k, kv_v := range spaceJobKv.Kv {
-					if kv_v == "" {
-						continue
-					}
-					if (*blockPackage)[index].SpaceKV.Kv[kv_k] != "" {
-						continue
-					}
-					(*blockPackage)[index].SpaceKV.Kv[kv_k] = kv_v
-				}
+				MergeKvTags((*blockPackage)[index].SpaceKV.KvTags, spaceJobKv.KvTags)
 			} else {
 				newBpkg := &util.BlockPackage{
 					Origin:   bk,
@@ -683,12 +662,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 				finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4)
 				if headKey != "" {
 					kvAgain := GetKVAll(text, "", nil, 4)
-					for kv_k, kv_v := range kvAgain.Kv {
-						if finalKv.Kv[kv_k] == "" {
-							finalKv.Kv[kv_k] = kv_v
-							finalKv.KvTag[kv_k] = kvAgain.KvTag[kv_k]
-						}
-					}
+					MergeKvTags(finalKv.KvTags, kvAgain.KvTags)
 				}
 				newBpkg.ColonKV = finalKv
 				newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil)

+ 3 - 4
src/jy/pretreated/spacekv.go

@@ -27,11 +27,10 @@ func (se *SpacekvEntity) Entrance(text, title string, contactFormat *util.Contac
 		kvMaps = append(kvMaps, kvMap...)
 	}
 	FormatContactKv(&kvMaps, title, nil, contactFormat)
-	kv, tagKv := KvTagsToKV(kvMaps, title, nil, 1)
+	kvTags := GetKvTags(kvMaps, title, nil)
 	return &util.JobKv{
-		Kvs:   kvMaps,
-		Kv:    kv,
-		KvTag: tagKv,
+		Kvs:    kvMaps,
+		KvTags: kvTags,
 	}
 }
 

+ 56 - 75
src/jy/pretreated/tablev2.go

@@ -22,13 +22,12 @@ type TableResult struct {
 	Itype          int         //1全文 2是块
 	BlockTag       string      //块标签
 	Html           string
-	Tabs           []*Table           //子表集合,子表中包含标准化kv或原始kv
-	GoqueryTabs    *goquery.Selection //goquery对象
-	TableSize      int                //子表的个数0,1,n
-	IsMultiPackage bool               //是否有子包
-	PackageMap     *SortMap           //子包对象的sortmap,含标准化过的
-	SortKV         *SortMap           //全局KVmap值,标准化处理过的
-	SortKVWeight   map[string]int     //全局KVmap值,标准化处理过的
+	Tabs           []*Table            //子表集合,子表中包含标准化kv或原始kv
+	GoqueryTabs    *goquery.Selection  //goquery对象
+	TableSize      int                 //子表的个数0,1,n
+	IsMultiPackage bool                //是否有子包
+	PackageMap     *SortMap            //子包对象的sortmap,含标准化过的
+	KvTags         map[string][]*u.Tag //全局KVmap值,标准化处理过的
 	WinnerOrder    []map[string]interface{}
 	BrandData      [][]map[string]string //品牌抽取结果
 	HasKey         int                   //有key
@@ -40,17 +39,16 @@ type TableResult struct {
 //快速创建TableResult对象
 func NewTableResult(Id interface{}, Toptype, BlockTag, con string, Itype int, ruleBlock *u.RuleBlock) *TableResult {
 	return &TableResult{
-		Id:           Id,
-		Toptype:      Toptype,
-		Html:         con,
-		Itype:        Itype,
-		BlockTag:     BlockTag,
-		Tabs:         []*Table{},
-		GoqueryTabs:  &goquery.Selection{},
-		PackageMap:   NewSortMap(),
-		SortKV:       NewSortMap(),
-		SortKVWeight: map[string]int{},
-		RuleBlock:    ruleBlock,
+		Id:          Id,
+		Toptype:     Toptype,
+		Html:        con,
+		Itype:       Itype,
+		BlockTag:    BlockTag,
+		Tabs:        []*Table{},
+		GoqueryTabs: &goquery.Selection{},
+		PackageMap:  NewSortMap(),
+		KvTags:      map[string][]*u.Tag{},
+		RuleBlock:   ruleBlock,
 	}
 }
 
@@ -135,22 +133,14 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 	ub, _ = DivideBlock("", txt, 2, table.TableResult.RuleBlock)
 	//看是否划块
 	if len(ub) > 0 {
-		colonKvWeight := map[string]int{}
-		spaceKvWeight := map[string]int{}
 		for _, bl := range ub {
 			//冒号kv
-			for bl_ck, bl_cv := range bl.ColonKV.Kv {
-				if td.SortKV.Map[bl_ck] == nil || bl.ColonKV.KvTag[bl_ck].Weight >= colonKvWeight[bl_ck] {
-					colonKvWeight[bl_ck] = bl.ColonKV.KvTag[bl_ck].Weight
-					td.SortKV.AddKey(bl_ck, bl_cv)
-				}
+			for bl_ck, bl_cv := range bl.ColonKV.KvTags {
+				td.SortKV.AddKey(bl_ck, bl_cv)
 			}
 			//空格kv
-			for bl_sk, bl_sv := range bl.SpaceKV.Kv {
-				if td.SortKV.Map[bl_sk] == nil || bl.SpaceKV.KvTag[bl_sk].Weight >= spaceKvWeight[bl_sk] {
-					spaceKvWeight[bl_sk] = bl.SpaceKV.KvTag[bl_sk].Weight
-					td.SortKV.AddKey(bl_sk, bl_sv)
-				}
+			for bl_sk, bl_sv := range bl.SpaceKV.KvTags {
+				td.SortKV.AddKey(bl_sk, bl_sv)
 			}
 		}
 	}
@@ -158,12 +148,12 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 	proCode := projectcodeReg.FindString(text)
 	if proCode != "" {
 		ckv := GetKVAll(proCode, "", nil, 1)
-		for k, v := range ckv.Kv {
+		for k, v := range ckv.KvTags {
 			td.SortKV.AddKey(k, v)
 		}
 	} else if proCode = projectcodeReg2.FindString(text); proCode != "" {
 		ckv := GetKVAll(proCode, "", nil, 1)
-		for k, v := range ckv.Kv {
+		for k, v := range ckv.KvTags {
 			td.SortKV.AddKey(k, v)
 		}
 	}
@@ -228,12 +218,11 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR) {
 
 				//sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id, table.TableResult.RuleBlock) //又一次调用解析表格入口
 				td.BH = false
-				for k, v := range sonts.SortKV.Map {
+				for k, v := range sonts.KvTags {
 					if td.TR.Table.TableResult == nil {
 						td.TR.Table.TableResult = NewTableResult(sonts.Id, sonts.Toptype, sonts.BlockTag, sonts.Html, sonts.Itype, sonts.RuleBlock)
 					}
-					td.TR.Table.TableResult.SortKV.AddKey(k, v)
-					td.TR.Table.TableResult.SortKVWeight[k] = sonts.SortKVWeight[k]
+					td.TR.Table.TableResult.KvTags[k] = append(td.TR.Table.TableResult.KvTags[k], v...)
 				}
 				td.SonTableResult = sonts
 				//for _, k := range sonts.SortKV.Keys {
@@ -273,11 +262,23 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR) {
 							tb1.AddKey(k, v)
 						} else {
 							bp := tb1.Map[k].(*u.BlockPackage)
-							if bp != nil && v1.TableKV != nil && v1.TableKV.Kv != nil {
-								for k2, v2 := range v1.TableKV.Kv {
-									if bp.TableKV.Kv != nil && bp.TableKV.Kv[k2] == "" {
-										bp.TableKV.Kv[k2] = v2
-										bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+							if bp != nil && v1.TableKV != nil {
+								for k2, v2 := range v1.TableKV.KvTags {
+									if bp.TableKV == nil {
+										bp.TableKV = u.NewJobKv()
+									}
+									isExists := false
+									for _, v2v := range v2 {
+										for _, v2vv := range bp.TableKV.KvTags[k2] {
+											if v2v.Value == v2vv.Value {
+												isExists = true
+												break
+											}
+										}
+										if !isExists {
+											bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
+											bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+										}
 									}
 								}
 							}
@@ -303,22 +304,14 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 		ub, _ = DivideBlock("", td.Text, 2, table.TableResult.RuleBlock) //对td的原始值
 		//看是否划块
 		if len(ub) > 0 {
-			colonKvWeight := map[string]int{}
-			spaceKvWeight := map[string]int{}
 			for _, bl := range ub {
 				//冒号kv
-				for bl_ck, bl_cv := range bl.ColonKV.Kv {
-					if td.SortKV.Map[bl_ck] == nil || bl.ColonKV.KvTag[bl_ck].Weight >= colonKvWeight[bl_ck] {
-						colonKvWeight[bl_ck] = bl.ColonKV.KvTag[bl_ck].Weight
-						td.SortKV.AddKey(bl_ck, bl_cv)
-					}
+				for bl_ck, bl_cv := range bl.ColonKV.KvTags {
+					td.SortKV.AddKey(bl_ck, bl_cv)
 				}
 				//空格kv
-				for bl_sk, bl_sv := range bl.SpaceKV.Kv {
-					if td.SortKV.Map[bl_sk] == nil || bl.SpaceKV.KvTag[bl_sk].Weight >= spaceKvWeight[bl_sk] {
-						spaceKvWeight[bl_sk] = bl.SpaceKV.KvTag[bl_sk].Weight
-						td.SortKV.AddKey(bl_sk, bl_sv)
-					}
+				for bl_sk, bl_sv := range bl.SpaceKV.KvTags {
+					td.SortKV.AddKey(bl_sk, bl_sv)
 				}
 			}
 		}
@@ -359,16 +352,8 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 				if bp.TableKV == nil {
 					bp.TableKV = u.NewJobKv()
 				}
-				for k2, v2 := range bp_v.ColonKV.Kv {
-					if bp.TableKV.Kv[k2] == "" {
-						bp.TableKV.Kv[k2] = v2
-					}
-				}
-				for k2, v2 := range bp_v.SpaceKV.Kv {
-					if bp.TableKV.Kv[k2] == "" {
-						bp.TableKV.Kv[k2] = v2
-					}
-				}
+				MergeKvTags(bp.TableKV.KvTags, bp_v.ColonKV.KvTags)
+				MergeKvTags(bp.TableKV.KvTags, bp_v.SpaceKV.KvTags)
 				table.TableResult.PackageMap.Map[bp_k] = bp
 			}
 		}
@@ -574,8 +559,7 @@ type Table struct {
 	TDNum                  int                       //td个数
 	BPackage               bool                      //是否有包
 	SortKV                 *SortMap                  //带排序的KV值
-	StandKV                map[string]string         //过滤后的标准化kv
-	StandKVWeight          map[string]int            //过滤后的标准化kv
+	StandKV                map[string][]*u.Tag       //过滤后的标准化kv
 	StandRuleKV            map[string]string         //过滤后的规则kv
 	kvscope                map[int]map[int][]*TD     //sortkey第几个元素的的第几个值的结束位置
 	kTD                    map[int]*TD               //根据索引找到key的TD元素
@@ -601,8 +585,7 @@ func NewTable(Html string, TableResult *TableResult, tab *goquery.Selection) *Ta
 	return &Table{
 		Html:                   Html,
 		SortKV:                 NewSortMap(),
-		StandKV:                map[string]string{},
-		StandKVWeight:          map[string]int{},
+		StandKV:                map[string][]*u.Tag{},
 		kvscope:                map[int]map[int][]*TD{},
 		kTD:                    map[int]*TD{},
 		SonTables:              []*Table{},
@@ -641,20 +624,18 @@ func (t *Table) InsertTR(tr *TR) {
 
 //支持排序的map
 type SortMap struct {
-	Index     map[string]int
-	Keys      []string
-	Map       map[string]interface{}
-	Lock      sync.Mutex
-	NotTagKey map[string]bool
+	Index map[string]int
+	Keys  []string
+	Map   map[string]interface{}
+	Lock  sync.Mutex
 }
 
 //快速创建排序map
 func NewSortMap() *SortMap {
 	return &SortMap{
-		Index:     map[string]int{},
-		Keys:      []string{},
-		Map:       map[string]interface{}{},
-		NotTagKey: map[string]bool{},
+		Index: map[string]int{},
+		Keys:  []string{},
+		Map:   map[string]interface{}{},
 	}
 }
 

+ 3 - 3
src/jy/pretreated/winnerorder.go

@@ -224,10 +224,10 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 				object["type"] = 1
 			}
 		} else { //中标金额
-			_, standardKvTag := KvTagsToKV([]*util.Kv{&util.Kv{Key: k, Value: v}}, "", []string{"中标金额"}, 1)
+			kvTags := GetKvTags([]*util.Kv{&util.Kv{Key: k, Value: v}}, "", []string{"中标金额"})
 			//找到了中标金额
-			if standardKvTag["中标金额"] != nil && object["entname"] != nil {
-				val := wo.clear("中标金额", standardKvTag["中标金额"].Value)
+			if len(kvTags["中标金额"]) > 0 && object["entname"] != nil {
+				val := wo.clear("中标金额", kvTags["中标金额"][0].Value)
 				if val != nil {
 					object["price"] = val
 				}

+ 7 - 7
src/jy/util/article.go

@@ -155,19 +155,19 @@ type Kv struct {
 
 //最终放到job上的kv
 type JobKv struct {
-	Kvs     []*Kv             //有序的冒号kv
-	Kvs_2   []*Kv             //有序的冒号kv
-	Kv      map[string]string //table kv (分出的对应的KV值)
-	KvIndex map[string]int    //kv_index(流程)
-	KvTag   map[string]*Tag   //带权重的kv
+	Kvs   []*Kv //有序的冒号kv
+	Kvs_2 []*Kv //有序的冒号kv
+	//Kv      map[string]*Tag   //table kv (分出的对应的KV值)
+	//KvIndex map[string]int    //kv_index(流程)
+	KvTags map[string][]*Tag //带权重的kv
 }
 
 func NewJobKv() *JobKv {
 	return &JobKv{
 		Kvs:   []*Kv{},
 		Kvs_2: []*Kv{},
-		Kv:    map[string]string{},
-		KvTag: map[string]*Tag{},
+		//Kv:     map[string]*Tag{},
+		KvTags: map[string][]*Tag{},
 	}
 }
 

+ 8 - 14
src/jy/util/tagmatch.go

@@ -14,9 +14,11 @@ var lock sync.Mutex
 
 //单条tag
 type Tag struct {
-	Value  string //
-	Weight int    //权重
-	TagReg *regexp.Regexp
+	Key       string //原始的key
+	Value     string //
+	Weight    int    //权重
+	TagReg    *regexp.Regexp
+	IsInvalid bool //是否打上标签
 }
 type Tags []*Tag //
 //tag文件
@@ -53,7 +55,7 @@ func (t *TagFile) Load(path string) {
 	//排序
 	t.Items = make([]*Tag, len(lines))
 	for k, v := range lines {
-		t.Items[k] = &Tag{v, 0 - k, nil}
+		t.Items[k] = &Tag{"", v, 0 - k, nil, false}
 	}
 	sort.Sort(t.Items)
 	t.Name = name
@@ -142,11 +144,7 @@ func GetAppointTags(src string, array []string) Tags {
 			continue
 		}
 		if ok, tag := v.Match(src); ok {
-			ret = append(ret, &Tag{
-				v.Name,
-				tag.Weight,
-				tag.TagReg,
-			})
+			ret = append(ret, &Tag{src, v.Name, tag.Weight, tag.TagReg, false})
 		}
 	}
 	lock.Unlock()
@@ -160,11 +158,7 @@ func GetBlockTags(src string) Tags {
 	ret := make(Tags, 0)
 	for _, v := range blocktagdb {
 		if ok, tag := v.Match(src); ok {
-			ret = append(ret, &Tag{
-				v.Name,
-				tag.Weight,
-				nil,
-			})
+			ret = append(ret, &Tag{src, v.Name, tag.Weight, nil, false})
 		}
 	}
 	//sort.Sort(ret)

+ 3 - 4
src/jy/util/util.go

@@ -21,10 +21,9 @@ var BrandRules map[string]map[string]string
 var GoodsConfig []string
 var BrandConfig []string
 
-var GoodsGet *DFA           //商品
-var BrandGet *DFA           //品牌
-var IsBrandGoods bool       //是否开启品牌抽取
-var RetainKvWeight = -99999 //没有标准化的kv的权重
+var GoodsGet *DFA     //商品
+var BrandGet *DFA     //品牌
+var IsBrandGoods bool //是否开启品牌抽取
 
 func init() {
 	syncint = make(chan bool, 1)

+ 7 - 0
src/jy/util/util2.go

@@ -245,6 +245,13 @@ func FirstKeyValueInMap(m interface{}) (string, interface{}) {
 		for k, v := range mv {
 			return k, v.Weight
 		}
+	} else if mv, ok := m.(map[string][]*Tag); ok {
+		for k, v := range mv {
+			if len(v) > 0 {
+				return k, v[0].Weight
+			}
+			return k, 0
+		}
 	} else if mv, ok := m.(map[string]string); ok {
 		for k, v := range mv {
 			return k, v

+ 1 - 1
src/web/templates/admin/blockinfo.html

@@ -94,7 +94,7 @@ $(function () {
 			var _tit="",htmlObj={},obj,tag=[]
 			switch(n){
 			case "classify":
-				window.location.href = "/admin/version/blockclassify?vid=5b88f682752d50045860188a";
+				window.location.href = "/admin/version/blockclassify?vid={{.vid}}";
 				break
 			case "edit":	
                 obj=ttable.row($(this).closest("tr")).data();