fengweiqiang 5 年之前
父節點
當前提交
7776f3daed

+ 117 - 5
src/jy/extract/extpackage.go

@@ -7,22 +7,134 @@ import (
 	"log"
 	qu "qfw/util"
 	"reflect"
+	"sort"
 )
 
+func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask) {
+
+	if pkg.ColonKV != nil {
+		kvparse(pkg.ColonKV,  e, sonJobResult)
+	}
+	if pkg.TableKV != nil {
+		kvparse(pkg.TableKV,  e, sonJobResult)
+	}
+	if pkg.SpaceKV != nil {
+		kvparse(pkg.SpaceKV,  e, sonJobResult)
+	}
+}
+
+func kvparse(p *ju.JobKv,  e *ExtractTask, sonJobResult *map[string]interface{}) {
+	if p != nil {
+		for pk, pv2 := range p.KvTags {
+			if len(pv2) > 1 && !(pk == "预算" || pk == "中标金额") {
+				tmp := []*ju.Tag{}
+				var tmpindex, tmpweight int = -9999, -9999
+				for ii, vv := range pv2 {
+					if tmpweight < vv.Weight {
+						tmpindex = ii
+						tmpweight = vv.Weight
+					}
+				}
+				tmp = append(tmp, pv2[tmpindex])
+				p.KvTags[pk] = tmp
+			}
+		}
+		for pk, pv := range p.KvTags {
+			if len(pv) == 0 {
+				continue
+			}
+			tags := ju.GetTags(pk)
+			if tags.Len() > 0 {
+				if ((*sonJobResult)["name"]  == nil || (*sonJobResult)["name"] == "")&& tags[0].Key == "项目名称"{
+					(*sonJobResult)["name"] = pv[0].Value
+				}
+				if qu.Float64All((*sonJobResult)["budget"]) == 0 && tags[0].Key == "预算" {
+					lock.Lock()
+					cfn := e.ClearFn["budget"]
+					lock.Unlock()
+					data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
+					(*sonJobResult)["budget"] = data[0]
+					continue
+				}
+				if qu.Float64All((*sonJobResult)["bidamount"]) == 0 && tags[0].Key == "中标金额" {
+					lock.Lock()
+					cfn := e.ClearFn["budget"]
+					lock.Unlock()
+					data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
+					(*sonJobResult)["bidamount"] = data[0]
+					continue
+				}
+				if ((*sonJobResult)["winner"] == nil || (*sonJobResult)["winner"] == "" )&& tags[0].Key == "中标单位"{
+					(*sonJobResult)["winner"] = pv[0].Value
+				}
+
+			}
+			if (*sonJobResult)["name"] == nil && pk == "名称" {
+				(*sonJobResult)["name"] = pv[0].Value
+			}
+		}
+	}
+}
+
 //处理分包信息
 func PackageDetail(j *ju.Job, e *ExtractTask) {
 	qu.Try(func() {
 		if len(j.BlockPackage) > 0 {
+			tmpkeys := []string{}
+			for k, _ := range j.BlockPackage {
+				tmpkeys = append(tmpkeys, k)
+			}
+			sort.Strings(tmpkeys)
 			packageResult := map[string]map[string]interface{}{}
 			//packagenum := len(j.BlockPackage)
-			for pkName, pkg := range j.BlockPackage {
+			for i, pkName := range tmpkeys {
+				pkg, ok := j.BlockPackage[pkName]
+				if !ok {
+					continue
+				}
 				//是否清理标记
 				clearmap := map[string]bool{}
 				sonJobResult := map[string]interface{}{}
-				sonJobResult["text"] = pkg.Text
-				sonJobResult["origin"] = pkg.Origin
-				sonJobResult["type"] = pkg.Type
-				sonJobResult["winnerorder"] = pkg.WinnerOrder
+				if pkg != nil {
+					sonJobResult["origin"] = pkg.Origin
+					sonJobResult["text"] = pkg.Text
+					sonJobResult["budget"] = pkg.Budget
+					sonJobResult["bidamount"] = pkg.Bidamount
+					wins := make([]map[string]interface{}, 0)
+					if pkg.Winner == "" && len(j.Winnerorder) > 0 {
+						if sonJobResult["winnerorder"] == nil {
+							for _, tv := range j.Winnerorder {
+									if tv["type"].(int) == i{
+										wins = append(wins, tv)
+									}
+							}
+							sonJobResult["winnerorder"] = wins
+						}
+						sonJobResult["bidamount"] = j.Winnerorder[i]["price"]
+						sonJobResult["winner"] = wins[0]["entname"]
+					} else {
+						if len(j.Winnerorder) > 0 {
+							sonJobResult["bidamount"] = j.Winnerorder[0]["price"]
+							sonJobResult["winner"] = wins[0]["entname"]
+						}
+						sonJobResult["winnerorder"] = pkg.WinnerOrder
+					}
+					pkvdata(pkg, &sonJobResult, e)
+
+					sonJobResult["type"] = pkg.Type
+					if len(tmpkeys) == 1{
+						if qu.Float64All(sonJobResult["budget"])==0{
+							for _,bv := range j.Block{
+								kvparse(bv.ColonKV,e,&sonJobResult)
+								kvparse(bv.TableKV,e,&sonJobResult)
+								kvparse(bv.SpaceKV,e,&sonJobResult)
+							}
+						}
+					}
+					if sonJobResult["name"] == nil {
+						sonJobResult["name"] = j.Title
+					}
+				}
 				//分包暂不参与选举
 				/*
 					for k, tags := range e.Tag {

+ 3 - 4
src/jy/pretreated/analystep.go

@@ -6,7 +6,6 @@ package pretreated
 import (
 	"encoding/json"
 	"jy/util"
-
 	//"log"
 	"strings"
 
@@ -31,9 +30,9 @@ func AnalyStart(job *util.Job) {
 		}
 	}
 	blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock) //分块
-	if len(blockArrays) > 0 {                                                //有分块
+	if len(blockArrays) > 0 { //有分块
 		//从块里面找分包
-		job.BlockPackage = FindPackageFromBlocks(&blockArrays, job.Title) //从块里面找分包
+		job.BlockPackage = FindPackageFromBlocks(&blockArrays) //从块里面找分包
 		for _, bl := range blockArrays {
 			//log.Println(bl.Text)
 			if len([]rune(bl.Text)) > 80 {
@@ -90,7 +89,7 @@ func processTableInBlock(bl *util.Block, job *util.Job, packageFlag bool) {
 	for _, tab := range tabs {
 		job.HasTable = 1
 		//添加标识:文本中有table
-		tabres := AnalyTableV2(tab, job.Category, bl.Title, tab.Text(), 2, job.SourceMid, job.RuleBlock) //解析表格入口 返回:汇总表格对象
+		tabres := AnalyTableV2(tab, job.Category, strings.TrimSpace(tab.Nodes[0].PrevSibling.Data), tab.Text(), 2, job.SourceMid, job.RuleBlock) //解析表格入口 返回:汇总表格对象
 		if packageFlag {
 			tabres.PackageMap = nil
 			tabres.IsMultiPackage = false

+ 50 - 41
src/jy/pretreated/analytable.go

@@ -159,9 +159,9 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (kvTags map[s
 			}
 			if winnerOrderAndBidResult.MatchString(tabletag) && t1.Value == "采购单位联系人" { //处理table中项目负责人
 				kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
-			} else if regexp.MustCompile("(中标候选人|名单及其排序|排序)").MatchString(tabletag) && t1.Value == "采购单位"{
-				kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight-100})
-			}else{
+			} else if regexp.MustCompile("(中标候选人|名单及其排序|排序)").MatchString(tabletag) && t1.Value == "采购单位" {
+				kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight - 100})
+			} else {
 				kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight})
 			}
 		}
@@ -224,6 +224,9 @@ func (table *Table) KVFilter() {
 		v := table.SortKV.Map[k]
 		if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
 			k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
+			if k == "2、建设规模" {
+				k = "预算"
+			}
 			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
 			//qutil.Debug(k, v, k1, w1, v1, tag, b)
 			if tag != "" && table.Tag == "" {
@@ -665,7 +668,9 @@ func (ts *TableResult) Analy() {
 					bp := &u.BlockPackage{}
 					bp.Index = v1
 					bp.Origin = matchres[0][0]
-					bp.TableKV = u.NewJobKv()
+					if bp.TableKV == nil {
+						bp.TableKV = u.NewJobKv()
+					}
 					for _, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
 						if len(table.StandKV[k]) > 0 {
 							bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
@@ -675,6 +680,9 @@ func (ts *TableResult) Analy() {
 					if table.BlockPackage.Map[v1] == nil {
 						table.BPackage = true
 						table.BlockPackage.AddKey(v1, bp)
+					}else {
+						table.BlockPackage.RemoveKey(v1)
+						table.BlockPackage.AddKey(v1, bp)
 					}
 				}
 			}
@@ -797,13 +805,6 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
 			//log.Println(table.TableResult.Id, table.Html)
 			//分割表格
 			table.bSplit(n, ts)
-			//对没有表头表格的处理
-			if table.Tag != "" {
-				_, _, b := CheckMultiPackage(table.Tag, "")
-				if b {
-					table.StandKV["项目名称"] = append(table.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: table.Tag, Weight: -100})
-				}
-			}
 			table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
 			//开始查找kv,核心模块,table.SortKV
 			table.FindKV()
@@ -811,13 +812,40 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
 			if u.IsBrandGoods {
 				table.analyBrand()
 			}
-			//判断是否是多包,并处理分包的//遍历td分块
-			table.CheckMultiPackageByTable()
 			res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
 			if !res {
 				//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
 				table.KVFilter()
 			}
+			//对没有表头表格的处理
+			if table.Tag != "" {
+				co, m, b := CheckMultiPackage(table.Tag, "")
+				if b {
+					table.BPackage = b
+					if len(table.BlockPackage.Map) == 0 {
+						for _,av := range m{
+							kv := u.NewJobKv()
+							kv.KvTags= table.StandKV
+							bd:=u.PackageNumberConvert(av[0])
+							blockPackage := &u.BlockPackage{
+								Origin:av[0],
+								Name:av[0],
+								Text:co,
+								TableKV:kv,
+								Index:bd,
+							}
+							if bd !=""{
+								table.BlockPackage.AddKey(bd, blockPackage)
+							}else {
+								table.BlockPackage.AddKey(av[0], blockPackage)
+							}
+						}
+					}
+					table.StandKV["项目名称"] = append(table.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: table.Tag, Weight: -300})
+				}
+			}
+			//判断是否是多包,并处理分包的//遍历td分块
+			table.CheckMultiPackageByTable()
 			//MergeKvTags(table.TableResult.KvTags, table.StandKV)
 		}
 	}
@@ -1062,27 +1090,6 @@ func (table *Table) FindTag() {
 	if table.Tag != "" {
 		return
 	}
-	t1, _ := goquery.OuterHtml(table.Goquery)
-	//t1, _ := table.Goquery.OuterHtml()
-	html := table.Html
-	pos := strings.Index(html, t1)
-	if pos <= 0 {
-		doc, _ := goquery.NewDocumentFromReader(strings.NewReader(table.Html))
-		html, _ = doc.Html()
-		pos = strings.Index(html, t1)
-	}
-	//u.Debug("--------", t1, "====\n\n\n\n=====", html)
-	if pos > 0 {
-		tcon := html[:pos]
-		tcon = cut.ClearHtml(tcon)
-		tcon = ClearTagReg.ReplaceAllString(tcon, "")
-		//u.Debug(pos, "-----------", tcon)
-		strs := ttagreg.FindStringSubmatch(tcon)
-		if len(strs) > 0 {
-			table.Tag = strs[0]
-			//u.Debug(table.Tag)
-		}
-	}
 	if table.Tag == "" {
 		table.Tag = table.TableResult.BlockTag
 	}
@@ -1693,7 +1700,7 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 		near.KVDirect = direct
 		near.KeyDirect = vdirect
 		td.KVDirect = direct
-		key := near.Val
+		key := repSpace.ReplaceAllString(near.Val, "")
 		if near.Val == "" {
 			key = fmtkey("k", near.TR.RowPos, near.ColPos)
 		}
@@ -1953,10 +1960,12 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 			//根据数组index分包长度添加table.BlockPackage子包数组
 			for nk, v := range index {
 				if tn.BlockPackage.Map[v] == nil {
+					kv := u.NewJobKv()
+					kv.KvTags = tn.StandKV
 					bp := &u.BlockPackage{}
 					bp.Index = v                  //序号 (转换后编号,只有数字或字母)
 					bp.Origin = oldIndex[nk]      //包的原始值
-					bp.TableKV = u.NewJobKv()     //table kv (分出的对应的KV值)
+					bp.TableKV = kv               //table kv (分出的对应的KV值)
 					tn.BlockPackage.AddKey(v, bp) //table子包数组
 				}
 			}
@@ -1971,8 +1980,8 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 	//查找分包中的中标人排序
 	if tn.BlockPackage != nil && tn.BlockPackage.Keys != nil && len(tn.BlockPackage.Keys) > 0 {
 		for _, v := range tn.BlockPackage.Keys {
-			vv := tn.BlockPackage.Map[v].(*u.BlockPackage)
-			if vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0 {
+			vv, ok := tn.BlockPackage.Map[v].(*u.BlockPackage)
+			if ok && (vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0) {
 				vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2)
 			}
 		}
@@ -2084,7 +2093,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
 func (tn *Table) isGoonNext() {
 	blockPackage := map[string]*u.BlockPackage{}
 	for _, k := range tn.SortKV.Keys {
-		if excludeKey.MatchString(k) {
+		if excludeKey.MatchString(k) || strings.Contains(k, "批复") {
 			continue
 		}
 		str := "" //拼装为冒号kv
@@ -2233,7 +2242,7 @@ func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, keyEx
 func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, index_pos []int, val int, pac int, hasPkgTd map[string]bool) (rkey_index int, rindex []string, rindex_pos []int, rval int, rpac int, rhasPkgTd map[string]bool) {
 	for in, k := range tn.SortKV.Keys {
 		//涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)就跳过
-		if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) {
+		if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) || strings.Contains(k, "批复") {
 			continue
 		}
 		v := tn.SortKV.Map[k]
@@ -3168,7 +3177,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
 	for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
 		val := table.SortKV.Map[key]
 		key = regReplAllSpace.ReplaceAllString(key, "")
-		key = strings.Replace(key, "", "", -1)    //处理一个特殊的采购量 经上层处理空格后未处理掉
+		key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
 		if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
 			/*
 				{

+ 1 - 1
src/jy/pretreated/colonkv.go

@@ -746,7 +746,7 @@ func MergeKvTags(kvTags_1, kvTags_2 map[string][]*Tag) {
 	for k, v := range kvTags_2 {
 		for _, vv := range v {
 			value_vv := strings.TrimSpace(vv.Value)
-			if value_vv == "" {
+			if value_vv == "" || vv.Key == vv.Value {
 				continue
 			}
 			isExists := false

+ 190 - 149
src/jy/pretreated/division.go

@@ -543,7 +543,7 @@ func filterTitle(title string) string {
 }
 
 //从块里面找分包
-func FindPackageFromBlocks(blocks *[]*util.Block, title string) (blockPackage map[string]*util.BlockPackage) {
+func FindPackageFromBlocks(blocks *[]*util.Block) (blockPackage map[string]*util.BlockPackage) {
 	blockPackage = map[string]*util.BlockPackage{}
 	//块分包
 	for _, v := range *blocks {
@@ -552,13 +552,15 @@ func FindPackageFromBlocks(blocks *[]*util.Block, title string) (blockPackage ma
 		if text == "" {
 			continue
 		}
-		ok, surplusText := divisionPackageChild(&blockPackage, text, title, true, v.Tag["中标单位"])
-		//把分包内容摘除掉有问题 有的项目名称中包含二标段
-		if ok && false {
-			v.Text = surplusText
-			v.ColonKV = GetKVAll(surplusText, v.Title, nil, 1)
-			v.SpaceKV = SspacekvEntity.Entrance(surplusText, v.Title, nil)
-		}
+		//var ok bool
+		//var surplusText string
+			divisionPackageChild(&blockPackage, text, v.Title, true, v.Tag["中标单位"])
+		////把分包内容摘除掉有问题 有的项目名称中包含二标段
+		//if ok && false {
+		//	v.Text = surplusText
+		//	v.ColonKV = GetKVAll(surplusText, v.Title, nil, 1)
+		//	v.SpaceKV = SspacekvEntity.Entrance(surplusText, v.Title, nil)
+		//}
 	}
 	return
 }
@@ -583,164 +585,198 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 	//	util.Debug(con)
 	//	util.Debug(pkg)
 	//分包前面添加换行
-	appendWarpIndex := []int{}
-	for _, v := range pkg {
+	//log.Println(con)
+	bools := make(map[string]bool)
+	for k, v := range pkg {
 		//如果文本内容以识别出来的分包标识结尾,不是分包
 		if len(pkg) == 1 && strings.HasSuffix(con, v[0]) {
 			return false, ""
 		}
 		//
-		is := regexp.MustCompile(v[0]+"[::]*").FindAllStringIndex(con, -1)
-		for _, sv := range is {
-			appendWarpIndex = append(appendWarpIndex, sv[0])
-		}
-	}
-	appendWarpIndex = getPkgIndex(appendWarpIndex)
-	conTemp := ""
-	for k, v := range appendWarpIndex {
-		if k == 0 {
-			conTemp += con[:v] + "\n"
-		} else {
-			conTemp += "\n" + con[appendWarpIndex[k-1]:v]
-		}
-		if k == len(appendWarpIndex)-1 {
-			conTemp += "\n" + con[v:]
-		}
-	}
-	con = conTemp
-	con = replSerial.ReplaceAllString(con, "\n")
-	con = regMoreWrap.ReplaceAllString(con, "\n")
-	//util.Debug(con)
-	//根据分包,找索引位置
-	indexMap := map[int]int{}
-	indexKeyStringMap := map[int]string{}
-	indexKeyIntMap := map[int]int{}
-	indexs := []int{}
-	startEndMap := map[int]int{}
-	pkgIndexMap := map[string][]int{}
-	indexPkgMap := map[int]string{}
-	//遍历分包,把kv在包前面的移动到包后面
-	for _, v := range pkg {
-		pgflag := v[0] + "[::]*"
-		is := regexp.MustCompile(pgflag).FindAllStringIndex(con, -1)
+		is := regexp.MustCompile(v[0] + "[::]*").FindAllString(con, -1)
 		for _, sv := range is {
-			indexMap[sv[0]] = sv[1]
-			indexs = append(indexs, sv[0])
-			pkgIndexMap[v[0]] = append(pkgIndexMap[v[0]], sv[0])
-			indexPkgMap[sv[0]] = v[0]
-		}
-		//key在包前面,并且在一行的开头
-		keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)"+pgflag).FindAllStringSubmatchIndex(con, -1)
-		if len(keys) == 0 {
-			//key在包前面,并且key以冒号结尾
-			keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
-		}
-		if len(keys) == 0 {
-			keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
-		}
-		for _, key := range keys {
-			startEndMap[key[5]] = key[4]
-			//
-			headkey := con[key[4]:key[5]]
-			headkey = regReplAllSpace.ReplaceAllString(headkey, "")
-			if !regDivision.MatchString(headkey) {
-				headkey += ":"
+			newBpkg := &util.BlockPackage{
+				Origin:   sv,
+				Text:     con,
+				Index:    k,
+				Accuracy: false,
 			}
-			headkey = moreColonReg.ReplaceAllString(headkey, ":")
-			colonIndexs := regDivision.FindAllStringIndex(headkey, -1)
-			if len(colonIndexs) > 1 {
-				headkey = headkey[colonIndexs[len(colonIndexs)-2][1]:colonIndexs[len(colonIndexs)-1][1]]
+			if (*blockPackage)[k] == nil && !bools[sv] {
+				bools[sv] = true
+				con = strings.ReplaceAll(con, sv, "\n")
+				//log.Println(k, con)
+				kvAll := GetKVAll(con, title, nil, 4)
+				newBpkg.ColonKV = kvAll
+				newBpkg.SpaceKV = SspacekvEntity.Entrance(con, "", nil)
+				(*blockPackage)[k] = newBpkg
+			} else if (*blockPackage)[k].ColonKV != nil {
+				kvAll := GetKVAll(con, title, nil, 4)
+				MergeKvTags((*blockPackage)[k].ColonKV.KvTags, kvAll.KvTags)
 			}
-			indexKeyStringMap[key[5]] = headkey
-			indexKeyIntMap[key[5]] = key[1]
-		}
-	}
-	indexs = getPkgIndex(indexs)
-	for ik, iv := range indexs {
-		if indexKeyStringMap[iv] != "" {
-			continue
-		}
-		if indexKeyIntMap[iv] == indexMap[iv] {
-			continue
-		}
-		if ik > 0 {
-			indexKeyStringMap[iv] = indexKeyStringMap[indexs[ik-1]]
-		}
-	}
-	//
-	//获取截取标识
-	surplusText, maxWarpCount, indexTextMap, indexWarpMap := interceptText(indexs, indexPkgMap, pkgIndexMap, startEndMap, con)
-	//查找分包内容,分kv
-	for _, iv := range indexs {
-		text := indexTextMap[iv]
-		//
-		warpIndex := regSpliteSegment.FindAllStringIndex(text, -1)
-		if len(indexWarpMap) > 0 {
-			maxWarpCount = indexWarpMap[iv]
-		}
-		if maxWarpCount > 0 && len(warpIndex) >= 5 && len(warpIndex) > maxWarpCount {
-			textTemp := text
-			text = textTemp[:warpIndex[maxWarpCount-1][1]]
-			surplusText += textTemp[warpIndex[maxWarpCount-1][0]:]
-		}
-		for bk, bv := range pkg {
-			//判断分包如果在这段文字里面,该段文字就属于该包的
-			if !strings.HasPrefix(text, bv[0]) {
-				continue
-			}
-			index := util.PackageNumberConvert(bk)
-			//去掉前缀,空格必须要加,分kv的时候要用
-			text = regexp.MustCompile(bv[0]+"[::]*").ReplaceAllString(text, "")
-			headKey := ""
-			if indexKeyStringMap[iv] != "" {
-				//if !filterPkgTitleKey.MatchString(indexKeyStringMap[iv]) {
-				headKey = indexKeyStringMap[iv]
-				text = indexKeyStringMap[iv] + "  " + text
-				//}
-				for _, pkgIndexMap_v := range pkgIndexMap[bv[0]] {
-					delete(indexKeyStringMap, pkgIndexMap_v)
-				}
-			}
-			//如果一块中有多个相同的包,合并到一个
-			if (*blockPackage)[index] != nil {
-				//合并文本
-				(*blockPackage)[index].Text += "\n" + text
-				//合并冒号kv
-				colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1)
-				if headKey != "" {
-					kvAgain := GetKVAll(text, "", nil, 4)
-					MergeKvTags(colonJobKv.KvTags, kvAgain.KvTags)
-				}
-				MergeKvTags((*blockPackage)[index].ColonKV.KvTags, colonJobKv.KvTags)
-				//合并空格kv
-				spaceJobKv := SspacekvEntity.Entrance(text, "", nil)
-				MergeKvTags((*blockPackage)[index].SpaceKV.KvTags, spaceJobKv.KvTags)
-			} else {
-				newBpkg := &util.BlockPackage{
-					Origin:   bk,
-					Text:     text,
-					Index:    index,
-					Type:     bv[1],
-					Accuracy: accuracy,
-				}
-				finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4)
-				if headKey != "" {
-					kvAgain := GetKVAll(text, "", nil, 4)
-					MergeKvTags(finalKv.KvTags, kvAgain.KvTags)
+			for kk, vv := range (*blockPackage)[k].ColonKV.KvTags {
+				for _, vvvv := range vv {
+					//log.Println(kk,vv,kkk,vvvv.Value,vvvv.Key)
+					//if kk == "项目编号" && (*blockPackage)[k].Origin == "" {
+					//	(*blockPackage)[k].Origin = strings.TrimRightFunc(vvvv.Value, func(r rune) bool {
+					//		return r == 65289 || r == 41 || r == 46 || r == 12290
+					//	})
+					//	break
+					//} else
+					if kk == "项目名称" && (*blockPackage)[k].Name == "" {
+						(*blockPackage)[k].Name = vvvv.Value
+						break
+					} else if kk == "预算" && (*blockPackage)[k].Budget == 0 {
+						(*blockPackage)[k].Budget = qutil.Float64All(vvvv.Value)
+						break
+					} else if kk == "中标单位" && (*blockPackage)[k].Winner == "" {
+						(*blockPackage)[k].Winner = vvvv.Value
+						break
+					} else if kk == "标段类型" && (*blockPackage)[k].Type == "" {
+						(*blockPackage)[k].Type = vvvv.Value
+						break
+					} else if kk == "中标金额" && (*blockPackage)[k].Bidamount == 0 {
+						(*blockPackage)[k].Bidamount = qutil.Float64All(vvvv.Value)
+						break
+					}
 				}
-				newBpkg.ColonKV = finalKv
-				newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil)
-				(*blockPackage)[index] = newBpkg
+
 			}
 		}
 	}
+	//log.Println(con)
+	//con = replSerial.ReplaceAllString(con, "\n")
+	//con = regMoreWrap.ReplaceAllString(con, "\n")
+	////util.Debug(con)
+	////根据分包,找索引位置
+	//indexMap := map[int]int{}
+	//indexKeyStringMap := map[int]string{}
+	//indexKeyIntMap := map[int]int{}
+	//indexs := []int{}
+	//startEndMap := map[int]int{}
+	//pkgIndexMap := map[string][]int{}
+	//indexPkgMap := map[int]string{}
+	////遍历分包,把kv在包前面的移动到包后面
+	//for _, v := range pkg {
+	//	pgflag := v[0] + "[::]*"
+	//	is := regexp.MustCompile(pgflag).FindAllStringIndex(con, -1)
+	//	for _, sv := range is {
+	//		indexMap[sv[0]] = sv[1]
+	//		indexs = append(indexs, sv[0])
+	//		pkgIndexMap[v[0]] = append(pkgIndexMap[v[0]], sv[0])
+	//		indexPkgMap[sv[0]] = v[0]
+	//	}
+	//	//key在包前面,并且在一行的开头
+	//	keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)"+pgflag).FindAllStringSubmatchIndex(con, -1)
+	//	if len(keys) == 0 {
+	//		//key在包前面,并且key以冒号结尾
+	//		keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
+	//	}
+	//	if len(keys) == 0 {
+	//		keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
+	//	}
+	//	for _, key := range keys {
+	//		startEndMap[key[5]] = key[4]
+	//		//
+	//		headkey := con[key[4]:key[5]]
+	//		headkey = regReplAllSpace.ReplaceAllString(headkey, "")
+	//		if !regDivision.MatchString(headkey) {
+	//			headkey += ":"
+	//		}
+	//		headkey = moreColonReg.ReplaceAllString(headkey, ":")
+	//		colonIndexs := regDivision.FindAllStringIndex(headkey, -1)
+	//		if len(colonIndexs) > 1 {
+	//			headkey = headkey[colonIndexs[len(colonIndexs)-2][1]:colonIndexs[len(colonIndexs)-1][1]]
+	//		}
+	//		indexKeyStringMap[key[5]] = headkey
+	//		indexKeyIntMap[key[5]] = key[1]
+	//	}
+	//}
+	//indexs = getPkgIndex(indexs)
+	//for ik, iv := range indexs {
+	//	if indexKeyStringMap[iv] != "" {
+	//		continue
+	//	}
+	//	if indexKeyIntMap[iv] == indexMap[iv] {
+	//		continue
+	//	}
+	//	if ik > 0 {
+	//		indexKeyStringMap[iv] = indexKeyStringMap[indexs[ik-1]]
+	//	}
+	//}
+	////
+	////获取截取标识
+	//surplusText, maxWarpCount, indexTextMap, indexWarpMap := interceptText(indexs, indexPkgMap, pkgIndexMap, startEndMap, con)
+	////查找分包内容,分kv
+	//for _, iv := range indexs {
+	//	text := indexTextMap[iv]
+	//	//
+	//	warpIndex := regSpliteSegment.FindAllStringIndex(text, -1)
+	//	if len(indexWarpMap) > 0 {
+	//		maxWarpCount = indexWarpMap[iv]
+	//	}
+	//	if maxWarpCount > 0 && len(warpIndex) >= 5 && len(warpIndex) > maxWarpCount {
+	//		textTemp := text
+	//		text = textTemp[:warpIndex[maxWarpCount-1][1]]
+	//		surplusText += textTemp[warpIndex[maxWarpCount-1][0]:]
+	//	}
+	//	for bk, bv := range pkg {
+	//		//判断分包如果在这段文字里面,该段文字就属于该包的
+	//		if !strings.HasPrefix(text, bv[0]) {
+	//			continue
+	//		}
+	//		index := util.PackageNumberConvert(bk)
+	//		//去掉前缀,空格必须要加,分kv的时候要用
+	//		text = regexp.MustCompile(bv[0]+"[::]*").ReplaceAllString(text, "")
+	//		headKey := ""
+	//		if indexKeyStringMap[iv] != "" {
+	//			//if !filterPkgTitleKey.MatchString(indexKeyStringMap[iv]) {
+	//			headKey = indexKeyStringMap[iv]
+	//			text = indexKeyStringMap[iv] + "  " + text
+	//			//}
+	//			for _, pkgIndexMap_v := range pkgIndexMap[bv[0]] {
+	//				delete(indexKeyStringMap, pkgIndexMap_v)
+	//			}
+	//		}
+	//		//如果一块中有多个相同的包,合并到一个
+	//		if (*blockPackage)[index] != nil {
+	//			//合并文本
+	//			(*blockPackage)[index].Text += "\n" + text
+	//			//合并冒号kv
+	//			colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1)
+	//			if headKey != "" {
+	//				kvAgain := GetKVAll(text, "", nil, 4)
+	//				MergeKvTags(colonJobKv.KvTags, kvAgain.KvTags)
+	//			}
+	//			MergeKvTags((*blockPackage)[index].ColonKV.KvTags, colonJobKv.KvTags)
+	//			//合并空格kv
+	//			spaceJobKv := SspacekvEntity.Entrance(text, "", nil)
+	//			MergeKvTags((*blockPackage)[index].SpaceKV.KvTags, spaceJobKv.KvTags)
+	//		} else {
+	//			newBpkg := &util.BlockPackage{
+	//				Origin:   bk,
+	//				Text:     text,
+	//				Index:    index,
+	//				Type:     bv[1],
+	//				Accuracy: accuracy,
+	//			}
+	//			finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4)
+	//			if headKey != "" {
+	//				kvAgain := GetKVAll(text, "", nil, 4)
+	//				MergeKvTags(finalKv.KvTags, kvAgain.KvTags)
+	//			}
+	//			newBpkg.ColonKV = finalKv
+	//			newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil)
+	//			(*blockPackage)[index] = newBpkg
+	//		}
+	//	}
+	//}
 	//中标人排序
-	if isFindWinnerOrder && blockPackage != nil && len(*blockPackage) > 0 {
+	if !strings.Contains(title, "招标") && isFindWinnerOrder && blockPackage != nil && len(*blockPackage) > 0 {
 		for _, v := range *blockPackage {
 			v.WinnerOrder = winnerOrderEntity.Find(v.Text, true, 2)
 		}
 	}
-	return true, surplusText
+	return true, con
 }
 func getPkgIndex(indexs []int) []int {
 	sort.Ints(indexs)
@@ -777,6 +813,11 @@ func interceptText(indexs []int, indexPkgMap map[int]string, pkgIndexMap map[str
 		} else {
 			text = con[iv:]
 		}
+		if strings.Contains(text, "、") {
+			text = strings.Split(text, "、")[0]
+		} else if strings.Contains(text, "\n") {
+			text = strings.Split(text, "\n")[0]
+		}
 		indexTextMap[iv] = text
 		warpCount := len(regSpliteSegment.FindAllStringIndex(text, -1))
 		if warpCount > maxWarpCount {

+ 6 - 1
src/jy/pretreated/multipackage.go

@@ -3,6 +3,7 @@ package pretreated
 import (
 	"regexp"
 	"sort"
+	"strings"
 )
 
 var (
@@ -10,7 +11,7 @@ var (
 	监理 施工没有处理
 	**/
 	//替换容易混淆的词
-	PreReg  = regexp.MustCompile("(同|每|对|[^其]中|仅|分|任意)[一二三四五六七八九十\\d]个?(子|合同|分|施工|监理)?(标段?|包)|项目标号|文件A包|涉及包号|包件号?|标段(名称|编号)|0\\s?个标段|1\\-[\\d]标段|子包(\\d、)+\\d|\\d\\.\\d(标段|包)[^一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]|[1-9]标。")
+	PreReg  = regexp.MustCompile("(同|每|对|[^其]中|仅|任意)[一二三四五六七八九十\\d]个?(子|合同|分|施工|监理)?(标段?|包)|项目标号|文件A包|涉及包号|包件号?|0\\s?个标段|1\\-[\\d]标段|子包(\\d、)+\\d|\\d\\.\\d(标段|包)[^一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]|[1-9]标。")
 	PreReg1 = regexp.MustCompile("[^\n]([A-Z]?([一二三四五六七八九十]|\\d)、)+[A-Z]?([一二三四五六七八九十]|\\d)(标段?|包)")
 	//有分包划分情况的直接对比是1的肯定不是分包
 	PreCheckMulti = regexp.MustCompile("[^第]([一二三四五六七八九十两0-9ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)[  \u3000\u2003\u00a0]*个?((子|合同|分|施工|监理)?(标段?|包|合同段|标包))进行|(划分|分[设为成]?|共[分设有计]?)[::]?[  \u3000\u2003\u00a0]*([一二三四五六七八九十两0-9ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)[  \u3000\u2003\u00a0]*个?((子|合同|分|施工|监理)?(标段?|包|合同段|标包|项目))")
@@ -48,6 +49,10 @@ func CheckMultiPackage(con, title string) (content string, m map[string][]string
 	//if TitleReg.MatchString(title) {
 	//log.Println(title+"\n------------------", TitleReg.FindAllStringSubmatch(title, -1))
 	//}
+	if strings.Trim(con,"") == "标包划分:共划分1个标包。" {
+		m["1"]=[]string{"包1","包"}
+		return con, m, true
+	}
 	con = PreReg.ReplaceAllString(con, "")
 	con = PreReg1.ReplaceAllString(con, "")
 	pres := PreCheckMulti.FindStringSubmatch(con)

+ 5 - 1
src/jy/pretreated/tablev2.go

@@ -341,7 +341,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 		}
 		if isFindPkg {
 			if len(ub) > 0 {
-				blockPackage = FindPackageFromBlocks(&ub, "") //从块里面找分包
+				blockPackage = FindPackageFromBlocks(&ub) //从块里面找分包
 			} else {
 				blockPackage = FindPackageFromText("", td.Val) //从正文里面找分包
 			}
@@ -412,6 +412,10 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 			td.Valtype = repl
 			td.MustBH = must
 			td.BH = btw
+			if strings.Contains(txt,"年估算额年(万元)"){
+				td.MustBH = true
+				td.BH = true
+			}
 		}
 	} else if len(ub) == 0 {
 		//之前这里没加判断,现在加上判断,造成分块之后的kv被覆盖掉

+ 119 - 104
src/jy/pretreated/winnerorder.go

@@ -109,11 +109,13 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int) []map[string
 }
 
 //获取中标人排序文本
-func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp.Regexp, from int) string {
+func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp.Regexp, from int) []string {
 	isWinnerReg1 := reg_2 == winnerReg1
+	rdata := []string{}
 	//确定中标候选人排序在哪个块里面
 	rightIndex, prevMax := -1, -1
-	var winnerFlag *WinnerFlag
+	rightIndexs := []int{}
+	var winnerFlag []*WinnerFlag
 	for b_k, b_v := range blocks {
 		indexs := []*WinnerFlag{}
 		array := reg_2.FindAllStringSubmatchIndex(b_v, -1)
@@ -138,130 +140,143 @@ func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp
 		if wf != nil && wf.max >= prevMax {
 			prevMax = wf.max
 			rightIndex = b_k
-			winnerFlag = wf
+			rightIndexs = append(rightIndexs, b_k)
+			winnerFlag = append(winnerFlag, wf)
 		}
 	}
-	//在这个块里面,截取
+	////在这个块里面,截取
 	if rightIndex == -1 {
-		return ""
+		return rdata
 	}
-	text = blocks[rightIndex]
-	warpCount := wo.interceptText(winnerFlag.indexs, text)
-	if warpCount == 0 {
-		warpCount = 1
-	}
-	textEnd := text[winnerFlag.textEnd:]
-	text = text[winnerFlag.textStart:winnerFlag.textEnd]
-	warpIndex := regSpliteSegment.FindAllStringIndex(textEnd, -1)
-	if len(warpIndex) >= warpCount {
-		textEnd = textEnd[:warpIndex[warpCount-1][1]]
-	}
-	text = text + textEnd
-	if isWinnerReg1 {
-		text = reg_2.ReplaceAllString(text, "$1\n$2$15")
-	} else {
-		text = reg_2.ReplaceAllString(text, "\n$1")
-	}
-	text = regReplWrapSpace.ReplaceAllString(text, "")
-	lines := SspacekvEntity.getLines(text)
-	text = ""
-	for k, v := range lines {
-		v = strings.TrimSpace(v)
-		v = colonSpaceReg.ReplaceAllString(v, ":")
-		if reg_2.MatchString(v) && !regDivision.MatchString(v) {
-			if isWinnerReg1 {
-				v = reg_2.ReplaceAllString(v, "$1$2:$15")
-			} else {
-				v = reg_2.ReplaceAllString(v, "$1:")
-			}
+	for i, rightIndex := range rightIndexs {
+		text = blocks[rightIndex]
+		warpCount := wo.interceptText(winnerFlag[i].indexs, text)
+		if warpCount == 0 {
+			warpCount = 1
+		}
+		textEnd := text[winnerFlag[i].textEnd:]
+		text = text[winnerFlag[i].textStart:winnerFlag[i].textEnd]
+		warpIndex := regSpliteSegment.FindAllStringIndex(textEnd, -1)
+		if len(warpIndex) >= warpCount {
+			textEnd = textEnd[:warpIndex[warpCount-1][1]]
+		}
+		text = text + textEnd
+		if isWinnerReg1 {
+			text = reg_2.ReplaceAllString(text, "$1\n$2$15")
+		} else {
+			text = reg_2.ReplaceAllString(text, "\n$1")
 		}
-		//逗号之类符号的分割,查找紧跟在中标候选人之后的中标金额
-		//如果后面没有什么标识,只有金额的情况下,把中标金额加到金额前面
-		if reg_2.MatchString(v) {
-			//两个kv连到一起
-			if len(regDivision.FindAllString(v, -1)) > 1 && !findamountReg.MatchString(v) {
-				v = companyWarpReg.ReplaceAllString(v, "$1\n$2")
+		text = regReplWrapSpace.ReplaceAllString(text, "")
+		lines := SspacekvEntity.getLines(text)
+		text = ""
+		for k, v := range lines {
+			v = strings.TrimSpace(v)
+			v = colonSpaceReg.ReplaceAllString(v, ":")
+			if reg_2.MatchString(v) && !regDivision.MatchString(v) {
+				if isWinnerReg1 {
+					v = reg_2.ReplaceAllString(v, "$1$2:$15")
+				} else {
+					v = reg_2.ReplaceAllString(v, "$1:")
+				}
 			}
-			vs := findamountReg.Split(v, -1)
-			if len(vs) > 1 {
-				vs_1 := strings.TrimSpace(vs[1])
-				if amountReg.MatchString(vs_1) {
-					v = strings.Replace(v, vs[1], "中标金额:"+vs_1, 1)
+			//逗号之类符号的分割,查找紧跟在中标候选人之后的中标金额
+			//如果后面没有什么标识,只有金额的情况下,把中标金额加到金额前面
+			if reg_2.MatchString(v) {
+				//两个kv连到一起
+				if len(regDivision.FindAllString(v, -1)) > 1 && !findamountReg.MatchString(v) {
+					v = companyWarpReg.ReplaceAllString(v, "$1\n$2")
+				}
+				vs := findamountReg.Split(v, -1)
+				if len(vs) > 1 {
+					vs_1 := strings.TrimSpace(vs[1])
+					if amountReg.MatchString(vs_1) {
+						v = strings.Replace(v, vs[1], "中标金额:"+vs_1, 1)
+					}
 				}
 			}
+			v = toWarpReg.ReplaceAllString(v, "\n")
+			text += v
+			if (!reg_2.MatchString(v) || !colonEndReg.MatchString(v)) && k < len(lines)-1 {
+				text += "\n"
+			}
 		}
-		v = toWarpReg.ReplaceAllString(v, "\n")
-		text += v
-		if (!reg_2.MatchString(v) || !colonEndReg.MatchString(v)) && k < len(lines)-1 {
-			text += "\n"
-		}
+		rdata = append(rdata, text)
 	}
-	return text
+	return rdata
 }
 
 //抽取对应的排序结果
 func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *regexp.Regexp, from int) []map[string]interface{} {
 	text := wo.getText(content, blocks, reg_2, from)
 	winners := []map[string]interface{}{}
-	object := map[string]interface{}{}
-	count := 0
-	kvs := colonkvEntity.getColonSpaceKV(text)
-	for _, kv := range kvs {
-		k, v := kv.Key, kv.Value
-		if regDivision.MatchString(v) {
-			v_k := regDivision.Split(v, -1)[0]
-			if reg_2.MatchString(v_k) {
-				k = v_k
-			}
-		}
-		if reg_2.MatchString(k) { //中标人
-			if len(object) > 0 {
-				winners = append(winners, object)
-				object = map[string]interface{}{}
-			}
-			val := wo.clear("中标单位", v)
-			if val != nil {
-				count++
-				object["entname"] = val
-				object["sort"] = wo.toNumber(k, count)
-				object["sortstr"] = thisNumberReg.FindString(k)
-				object["type"] = 1
-			}
-		} else { //中标金额
-			findOfferFlag := false
-			if offerReg.MatchString(k) {
-				findOfferFlag = true
-			} else {
-				kvTags := GetKvTags([]*util.Kv{&util.Kv{Key: k, Value: v}}, "", []string{"中标金额"})
-				if len(kvTags["中标金额"]) > 0 {
-					findOfferFlag = true
+	if len(text) < 1 {
+		return winners
+	}
+	for i, v := range text {
+		object := map[string]interface{}{}
+		count := 0
+		kvs := colonkvEntity.getColonSpaceKV(v)
+		for _, kv := range kvs {
+			k, v := kv.Key, kv.Value
+			if regDivision.MatchString(v) {
+				v_k := regDivision.Split(v, -1)[0]
+				if reg_2.MatchString(v_k) {
+					k = v_k
 				}
 			}
-			//找到了中标金额
-			if findOfferFlag && object["entname"] != nil {
-				val := wo.clear("中标金额", v)
+			if reg_2.MatchString(k) { //中标人
+				if len(object) > 0 {
+					winners = append(winners, object)
+					object = map[string]interface{}{}
+				}
+				val := wo.clear("中标单位", v)
 				if val != nil {
-					object["price"] = val
+					count++
+					object["entname"] = val
+					object["sort"] = wo.toNumber(k, count)
+					object["sortstr"] = thisNumberReg.FindString(k)
+					object["type"] = i
+				}
+			} else { //中标金额
+				findOfferFlag := false
+				if offerReg.MatchString(k) {
+					findOfferFlag = true
+				} else {
+					kvTags := GetKvTags([]*util.Kv{&util.Kv{Key: k, Value: v}}, "", []string{"中标金额"})
+					if len(kvTags["中标金额"]) > 0 {
+						findOfferFlag = true
+					}
+				}
+				//找到了中标金额
+				if findOfferFlag && object["entname"] != nil {
+					val := wo.clear("中标金额", v)
+					if val != nil {
+						object["price"] = val
+					}
+					winners = append(winners, object)
+					object = map[string]interface{}{}
 				}
-				winners = append(winners, object)
-				object = map[string]interface{}{}
 			}
 		}
-	}
-	if len(object) > 0 {
-		winners = append(winners, object)
-	}
-	indexs := []*WinnerFlag{}
-	for _, winner := range winners {
-		indexs = append(indexs, &WinnerFlag{
-			index: winner["sort"].(int),
-		})
-	}
-	winnerFlag := wo.getMax(indexs, from)
-	if winnerFlag != nil {
-		winners = winners[winnerFlag.start : winnerFlag.end+1]
-	} else {
-		winners = []map[string]interface{}{}
+		if len(object) > 0 {
+			winners = append(winners, object)
+		}
+		indexs := []*WinnerFlag{}
+		//tym := make(map[int]bool, 0)
+		for _, winner := range winners {
+			indexs = append(indexs, &WinnerFlag{
+				index: winner["sort"].(int),
+				//ttype: winner["type"].(int),
+			})
+			//tym[winner["type"].(int)] = true
+		}
+
+		//winnerFlag := wo.getMax(indexs, from)
+		//if winnerFlag != nil {
+		//	winners = winners[winnerFlag.start : winnerFlag.end+1]
+		//} else {
+		//	winners = []map[string]interface{}{}
+		//}
 	}
 	return winners
 }

+ 6 - 3
src/jy/util/article.go

@@ -38,7 +38,6 @@ type Job struct {
 	SimAreaScore      map[string]float64                //简称province得分
 	SimCityScore      map[string]float64                //简称city得分
 	SimDistrictScore  map[string]float64                //简称district得分
-
 }
 
 type ExtField struct {
@@ -127,10 +126,14 @@ type Segment struct {
 
 //包
 type BlockPackage struct {
+	Origin   string                   //包的原始值
+	Name        string                   //标段(包)名称
+	Text        string                   //包文 (包对应的正文)
+	Budget      float64                  //标段(包)预算
+	Winner      string                   //标段(包)中标单位
+	Bidamount   float64                  //标段(包)中标价
 	Index       string                   //序号 (转换后编号,只有数字或字母)
-	Origin      string                   //包的原始值
 	Type        string                   //类型 (匹配后面的标段、包之类的词)
-	Text        string                   //包文 (包对应的正文)
 	ColonKV     *JobKv                   //冒号kv (分出的对应的KV值)
 	TableKV     *JobKv                   //table kv (分出的对应的KV值)
 	SpaceKV     *JobKv                   //空格 kv (分出的对应的KV值)