Browse Source

品牌抽取

unknown 6 năm trước cách đây
mục cha
commit
f1fc8d559e

+ 3 - 0
src/jy/extract/extract.go

@@ -118,6 +118,9 @@ func StartExtractTaskId(taskId string) bool {
 	ext.InitAuditClass()
 	ext.InitAuditRecogField()
 
+	//品牌抽取是否开启
+	ju.IsBrandGoods = ju.Config["brandgoods"].(bool)
+
 	ext.IsRun = true
 	go ext.ResultSave()
 	go ext.BidSave()

+ 545 - 95
src/jy/pretreated/analytable.go

@@ -4,7 +4,6 @@ import (
 	"fmt"
 	u "jy/util"
 	qutil "qfw/util"
-	"reflect"
 	"regexp"
 	"strings"
 
@@ -15,12 +14,14 @@ import (
 全局变量,主要是一堆判断正则
 **/
 var (
+	//清理品目中数字
+	itemclear = regexp.MustCompile("^[\\d一二三四五六七八九十]+")
 	//清理表格title中的不需要的内容
 	tabletitleclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。、_/((人民币万元件个公斤))]")
 	//清理表格中是key中包含的空格或数字等
 	tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。、_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]")
 	//清理表格td中的符号
-	tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、..。、_??;;~\\-#\\\\]*")
+	tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、,。、_??;;~\\-#\\\\附(件|图)]|^*")
 	//判断key是金额,对万元的处理
 	moneyreg = regexp.MustCompile("(预算|费|价|额|规模|投资)")
 	//根据表格的内容判断是不是表头,如果含有金额则不是表头
@@ -202,7 +203,7 @@ func (table *Table) KVFilter() {
 	//遍历每一行
 	for _, tr := range table.TRs {
 		for _, td := range tr.TDs {
-			//u.Debug(td.BH, td.MustBH, td.Val, td.SortKV.Map)
+			//fmt.Println(td.BH, td.MustBH, td.Val, td.SortKV.Map)
 			bc := false
 			if !td.BH {
 				//表头是否是无用内容
@@ -657,6 +658,8 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
 		TR := NewTR(table)
 		tdTextIsNull := true
 		tds.Each(func(m int, selm *goquery.Selection) {
+			//			t, _ := selm.Html()
+			//			fmt.Println("t---------", t)
 			//对隐藏列不处理!!!
 			if IsHide(selm) {
 				return
@@ -664,7 +667,7 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
 			//进入每一个单元格
 			td := NewTD(selm, TR, table)
 			//num++
-			//log.Println(td.SortKV.Keys, td.SortKV.Map)
+			//fmt.Println("------", td.SortKV.Keys, td.SortKV.Map)
 			TR.AddTD(td)
 			if td.Val != "" { //删除一个tr,tr中所有td是空值的
 				tdTextIsNull = false
@@ -781,7 +784,7 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
 			table.FindKV()
 			//table中抽取品牌
 			if u.IsBrandGoods {
-				table.analyBrand()
+				table.analyBrand1()
 			}
 			//判断是否是多包,并处理分包的
 			table.CheckMultiPackageByTable()
@@ -859,6 +862,7 @@ func (table *Table) Adjust() {
 			}
 		}
 	}
+
 	if float32(count)/float32(table.TDNum) < 0.85 {
 		//精确计算起止行列是表头的概率
 		table.ComputeRowColIsKeyRation()
@@ -1329,6 +1333,9 @@ func (table *Table) FindKV() {
 					u.Debug(td.BH, td.Val, r, t)
 				}
 				**/
+				//				if td.Val == "电视" || td.Val == "电话机" || td.Val == "传真机" || td.Val == "音响" {
+				//	qutil.Debug("----", "td.BH:", td.BH, "KVDirect:", td.KVDirect, "Val:", td.Val, "direct:", direct, "vdirect:", vdirect)
+				//				}
 				if !td.BH && td.KVDirect < 3 {
 					if !table.FindTdVal(td, direct, vdirect) {
 						if !table.FindTdVal(td, vdirect, direct) {
@@ -1340,10 +1347,11 @@ func (table *Table) FindKV() {
 							}
 						}
 					}
-					//log.Println("td", td.Val, td.BH, td.HeadTd, td.KVDirect)
+					//fmt.Println("td:", td.Val, td.BH, td.HeadTd, td.KVDirect)
 				}
 			}
 		}
+		//qutil.Debug("FindKV", table.SortKV.Map)
 	} else if len(table.TRs) > 0 { //没有表头的表格处理,默认纵向吧
 		res := make([][]string, len(table.TRs[0].TDs))
 		for n, _ := range res {
@@ -1436,7 +1444,7 @@ func (table *Table) FindKV() {
 			}
 		}
 	}
-	//log.Println("FindKV", table.SortKV.Map)
+	//qutil.Debug("FindKV", table.SortKV.Map)
 }
 
 //获取中标人顺序
@@ -1553,7 +1561,7 @@ func GetBidSort(str string, n int) int {
 func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 	near := table.FindNear(td, direct)
 	//	if near != nil {
-	//		log.Println("td", near.Val, td.Val)
+	//		fmt.Println("near----", near.Val, td.Val)
 	//	}
 	if near != nil && near.BH && (near.KeyDirect == vdirect || near.KeyDirect == 0) && (near.KVDirect == direct || near.KVDirect == 0) && near.KVDirect < 3 {
 		near.KVDirect = direct
@@ -1564,6 +1572,7 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 			key = fmtkey("k", near.TR.RowPos, near.ColPos)
 		}
 		val := table.SortKV.Map[key]
+		//qutil.Debug("====================", "key:", key, "val:", val)
 		bthiskey := false
 		if val != nil {
 			curpos := table.SortKV.Index[key]
@@ -1653,9 +1662,10 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 			tkey := fmtkey("k", near.TR.RowPos, near.ColPos)
 			table.SortKV.ReplaceKey(key, val, tkey)
 		} else {
-			//log.Println("AddKey", near.Val, td.Val, val)
 			table.SortKV.AddKey(key, val)
+			//if table.SortKV.Map[key] != nil {
 			pos := table.SortKV.Index[key]
+			//qutil.Debug("=========", "key:", key, "val:", val, "pos:", pos)
 			if barr {
 				mval := table.kvscope[pos]
 				if mval != nil {
@@ -1676,10 +1686,11 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 				}
 				table.kTD[pos] = near
 			}
+			//}
 		}
 		b = true
 	}
-	//log.Println("map", b, table.SortKV.Map)
+	//qutil.Debug("map", b, table.SortKV.Map)
 	return
 }
 
@@ -2657,30 +2668,269 @@ L:
 	//	}
 }
 
+func (table *Table) analyBrand1() {
+	//5c2d8c05a5cb26b9b782572b
+	//产品名称 品牌 规格 单价 单位 数量  小计 质保期
+	lineMapArr1 := make(map[string]*SortMap)
+	lineMap1 := make(map[string]*SortMap)
+	brandRule := u.BrandRules
+	//将val为数组和string的分开
+	for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
+		val := table.SortKV.Map[key]
+		//qutil.Debug(key, "====", val)
+		key = regReplAllSpace.ReplaceAllString(key, "")
+		key = strings.Replace(key, "", "", -1)    //处理一个特殊的采购量 经上层处理空格后未处理掉
+		if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
+			/*
+				{
+					"商品":["",""],
+					"商品_"["",""],
+				}
+
+			*/
+			valArr, f := filterval(realTypeVal...) //过滤数据
+			if f {
+				continue
+			}
+			realTypeVal = valArr
+			line := underline.FindString(key)
+			lineValMap1 := lineMapArr1[line]
+			i := 1
+		L:
+			for { //去除数组空数据
+				last := realTypeVal[len(realTypeVal)-i]
+				if last == "" {
+					i++
+					if i > len(realTypeVal) {
+						break
+					}
+					goto L
+				} else {
+					break
+				}
+			}
+			dislodgeNull := realTypeVal[:(len(realTypeVal) - i + 1)] //去除数组中空数据
+			if lineValMap1 == nil && len(realTypeVal) != 0 {
+				tmp := NewSortMap()
+				tmp.AddKey(key, dislodgeNull)
+				lineMapArr1[line] = tmp
+			} else {
+				lineValMap1.AddKey(key, dislodgeNull)
+			}
+			//qutil.Debug("lineMapArr1---", lineMapArr1[line].Keys, lineMapArr1[line].Map)
+		} else if realTypeVal, b := val.(string); b { //val为字符串 {"数量":"1"}
+			/*
+				{
+					"商品:"",名称:"",
+					"商品_:"",名称_:"",
+					"商品__:"",名称__:"",
+				}
+			*/
+			valArr, f := filterval(realTypeVal) //过滤数据
+			if f {
+				continue
+			}
+			realTypeVal = valArr[0]
+			line := underline.FindString(key)
+			lineValMap1 := lineMap1[line]
+			if lineValMap1 == nil {
+				tmp := NewSortMap()
+				tmp.AddKey(key, realTypeVal)
+				lineMap1[line] = tmp
+			} else {
+				lineValMap1.AddKey(key, realTypeVal)
+			}
+			//qutil.Debug("lineMap1---", lineMap1[line].Keys, lineMap1[line].Map)
+		} else {
+			// "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")5c2b0551a5cb26b9b7cb05db否5c2a42e6a5cb26b9b763ba5a采购人:一、采购人5c2b06f5a5cb26b9b7cc4409
+			//成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]]
+			//fmt.Println("err data:", key, val)
+		}
+	}
+	//处理数组数据后,匹配必须title和替换要保存的title
+	//qutil.Debug("lineMapArr1----", len(lineMapArr1))
+	if len(lineMapArr1) > 0 {
+		for _, aMap := range lineMapArr1 {
+			maxNum := 0
+			arrcount1 := 0 //记录key是否存在必须title(数组数据)
+			arrcount2 := 0
+			ka := make(map[string][]string) //最终存储数据
+			//qutil.Debug(k, "aMap.Keys----", aMap.Keys)
+			for _, k0 := range aMap.Keys {
+				v0 := aMap.Map[k0].([]string)
+				//qutil.Debug("k0:", k0, "v0:", v0)
+				//匹配必须title
+				for nameM, r := range brandRule["must"] {
+					if convert(k0, r) { //匹配成功
+						if len(ka[nameM]) != 0 && strings.Contains(k0, "描述") { //防止k0匹配到多次 和特殊情况 物料名称 物料描述同时出现
+							continue
+						}
+						if nameM == "itemname" || nameM == "modal" {
+							if nameM == "itemname" {
+								varr, f := filteritem(v0...) //过滤品目
+								if f {
+									break
+								}
+								v0 = varr
+							}
+							hasGoods(table, v0...) //判断itemname和modal中有没有商品
+						}
+						if nameM == "brandname" || nameM == "modal" {
+							if len(ka["brandname"]) == 0 {
+								brand, allNull := hasBrand(table, v0...)
+								if !allNull {
+									ka["brandname"] = brand
+								}
+							}
+						}
+						if nameM != "brandname" && len(ka[nameM]) == 0 {
+							ka[nameM] = v0
+						}
+						arrcount1++
+					}
+				}
+				//替换其它要保存字段
+				for nameR, r := range brandRule["replace"] {
+					if convert(k0, r) { //匹配成功
+						ka[nameR] = v0
+						arrcount2++
+					}
+				}
+			}
+			//找最终存储数据的最小len(arr)
+			//			for _, vf := range ka {
+			//				//找最短的数组
+			//				lenVal := len(vf)
+			//				if minNum == 0 || minNum > lenVal { //maxNum = len(最短数组)
+			//					minNum = lenVal
+			//				}
+			//			}
+			//找最终存储数据的最大len(arr),小的补空
+			//fmt.Println("ka==============", ka)
+			for _, vf1 := range ka {
+				lenVal := len(vf1)
+				if lenVal > maxNum {
+					maxNum = lenVal
+				}
+			}
+			finishKa := make(map[string][]string)
+			for vf2K, vf2 := range ka {
+				if len(vf2) < maxNum {
+					lenMv := maxNum - len(vf2)
+					for i := 0; i < lenMv; i++ {
+						vf2 = append(vf2, "")
+					}
+				}
+				finishKa[vf2K] = vf2
+			}
+			hasKey(table, arrcount1) //是否匹配到table中的标题
+			//qutil.Debug("finishKa----", finishKa)
+			if arrcount1 >= 1 {
+				if arrcount1+arrcount2 == 1 { //删除只匹配到一个价钱(总价)
+					delete(finishKa, "unitprice")
+				}
+				finishData := dealArrData(maxNum, finishKa)
+				table.BrandData = append(table.BrandData, finishData)
+			}
+		}
+	}
+	//处理string数据后,匹配必须title和替换要保存的title
+	//qutil.Debug("lineMap1----", len(lineMap1))
+	if len(lineMap1) > 0 {
+		for _, sMap := range lineMap1 {
+			strcount1 := 0 //记录key是否存在必须title(字符串数据)
+			strcount2 := 0
+			endStrMap := make(map[string]string)
+			//qutil.Debug(k, "aMap.Keys----", sMap.Keys)
+			for _, k1 := range sMap.Keys {
+				v1 := qutil.ObjToString(sMap.Map[k1])
+				//	for k1, v1 := range sMap {
+				//qutil.Debug(k1, "++++++++++", v1)
+				if v1 == "" {
+					continue
+				}
+				//匹配必须title
+				for nameM, r := range brandRule["must"] {
+					if convert(k1, r) { //匹配成功
+						if nameM == "itemname" || nameM == "modal" { //特殊处理itemname
+							if nameM == "itemname" {
+								varr, f := filteritem(v1) //过滤品目
+								if f {
+									break
+								}
+								v1 = varr[0]
+							}
+							hasGoods(table, v1)
+						}
+						if nameM == "brandname" || nameM == "modal" { //特殊处理brandname
+							if len(endStrMap["brandname"]) == 0 {
+								brand, allNull := hasBrand(table, v1)
+								if !allNull {
+									endStrMap["brandname"] = brand[0]
+								}
+							}
+						}
+						if nameM != "brandname" && len(endStrMap[nameM]) == 0 {
+							endStrMap[nameM] = v1
+						}
+						strcount1++
+					}
+				}
+				//替换其它要保存字段
+				for nameR, r := range brandRule["replace"] {
+					if convert(k1, r) { //匹配成功
+						endStrMap[nameR] = v1
+						strcount2++
+					}
+				}
+				//}
+			}
+			//原始字符串数据处理
+			hasKey(table, strcount1) //是否匹配到table中的标题
+			//qutil.Debug("endStrMap----", endStrMap)
+			if strcount1 >= 1 {
+				if strcount1+strcount2 == 1 { //删除只匹配到一个价钱(总价)
+					delete(endStrMap, "unitprice")
+				}
+				finishData := dealStrData(endStrMap) //处理数据
+				if len(finishData) > 0 {
+					table.BrandData = append(table.BrandData, finishData)
+				}
+			}
+		}
+	}
+}
+
 func (table *Table) analyBrand() {
+	//5c2d8c05a5cb26b9b782572b
 	//产品名称 品牌 规格 单价 单位 数量  小计 质保期
 	lineMap := make(map[string]map[string]string)
 	lineMapArr := make(map[string]map[string][]string)
 	brandRule := u.BrandRules
 	//将val为数组和string的分开
+	//qutil.Debug("table.SortKV.Map====", table.SortKV.Map)
 	for key, val := range table.SortKV.Map {
 		key = regReplAllSpace.ReplaceAllString(key, "")
-		key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
-		kind := reflect.TypeOf(val).String()
-		//处理多个key相同的数据
-		if kind == "[]string" { //val为数组 {"数量":["1","2","3"]}
+		key = strings.Replace(key, "", "", -1)    //处理一个特殊的采购量 经上层处理空格后未处理掉
+		if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
 			/*
 				{
-					"商品":["","",],
+					"商品":["",""],
 					"商品_"["",""],
 				}
 
 			*/
-			realTypeVal := val.([]string)
-			hasGoods(table, realTypeVal) //判断val中是否含产品
-			hasBrand(table, realTypeVal) //判断val中是否含品牌
+			//			valArr, f := filterval(realTypeVal...)
+			//			if f {
+			//				qutil.Debug("----", key, valArr)
+			//				continue
+			//			}
+			//			realTypeVal = valArr
+			//hasGoods1(table, realTypeVal) //判断val中是否含产品
+			//hasBrand1(table, realTypeVal) //判断val中是否含品牌
 			line := underline.FindString(key)
 			lineValMap := lineMapArr[line]
+			//qutil.Debug("----", key, line, lineValMap)
 			i := 1
 		L:
 			for { //去除数组空数据
@@ -2703,7 +2953,8 @@ func (table *Table) analyBrand() {
 					lineValMap[key] = dislodgeNull
 				}
 			}
-		} else if kind == "string" { //val为字符串 {"数量":"1"}
+			//qutil.Debug("lineMapArr---", lineMapArr)
+		} else if realTypeVal, b := val.(string); b { //val为字符串 {"数量":"1"}
 			/*
 				{
 					"商品:"",名称:"",
@@ -2711,21 +2962,24 @@ func (table *Table) analyBrand() {
 					"商品__:"",名称__:"",
 				}
 			*/
-
-			realTypeVal := val.(string)
-			afterFilter := tabletdclear.ReplaceAllString(realTypeVal, "")
-			if afterFilter == "" { //空val值舍弃
-				continue
-			}
-			hasGoods(table, realTypeVal) //判断val中是否含产品
-			hasBrand(table, realTypeVal) //判断val中是否含品牌
+			//			valArr, f := filterval(realTypeVal)
+			//			if f {
+			//				continue
+			//			}
+			//			realTypeVal = valArr[0]
+			//hasGoods1(table, realTypeVal) //判断val中是否含产品
+			//hasBrand1(table, realTypeVal) //判断val中是否含品牌
 			line := underline.FindString(key)
 			lineValMap := lineMap[line]
 			if len(lineValMap) == 0 { //没有数据
 				lineMap[line] = map[string]string{key: realTypeVal}
-			} else { //新增数据
+			} else {
 				lineValMap[key] = realTypeVal
 			}
+		} else {
+			// "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")
+			//成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]]
+			//fmt.Println("err data:", key, val)
 		}
 	}
 
@@ -2733,23 +2987,50 @@ func (table *Table) analyBrand() {
 	if len(lineMapArr) > 0 {
 		for _, aMap := range lineMapArr {
 			maxNum := 0
-			arrcount := 0                   //记录key是否存在必须title(数组数据)
+			arrcount1 := 0 //记录key是否存在必须title(数组数据)
+			arrcount2 := 0
 			ka := make(map[string][]string) //最终存储数据
+			//fmt.Println("++++++++++++", aMap)
 			for k0, v0 := range aMap {
+				//qutil.Debug("k0:", k0, "v0:", v0, len(v0))
 				//匹配必须title
 				for nameM, r := range brandRule["must"] {
 					if convert(k0, r) { //匹配成功
 						if len(ka[nameM]) != 0 && strings.Contains(k0, "描述") { //防止k0匹配到多次 和特殊情况 物料名称 物料描述同时出现
 							continue
 						}
-						ka[nameM] = v0
-						arrcount++
+						if nameM == "itemname" || nameM == "modal" {
+							if nameM == "itemname" {
+								varr, f := filteritem(v0...) //过滤品目
+								if f {
+									break
+								}
+								v0 = varr
+							}
+							hasGoods(table, v0...) //判断itemname和modal中有没有商品
+						}
+						if nameM == "brandname" || nameM == "modal" {
+							//qutil.Debug(nameM, "++++++", len(ka["brandname"]), len(v0))
+							if len(ka["brandname"]) == 0 {
+								brand, allNull := hasBrand(table, v0...)
+								//qutil.Debug(len(brand), "-------", nameM, brand)
+								if !allNull {
+									ka["brandname"] = brand
+								}
+								//qutil.Debug("brandname====", len(ka["brandname"]), ka["brandname"])
+							}
+						}
+						if nameM != "brandname" {
+							ka[nameM] = v0
+						}
+						arrcount1++
 					}
 				}
 				//替换其它要保存字段
 				for nameR, r := range brandRule["replace"] {
 					if convert(k0, r) { //匹配成功
 						ka[nameR] = v0
+						arrcount2++
 					}
 				}
 			}
@@ -2762,6 +3043,7 @@ func (table *Table) analyBrand() {
 			//				}
 			//			}
 			//找最终存储数据的最大len(arr),小的补空
+			//fmt.Println("ka==============", ka)
 			for _, vf1 := range ka {
 				lenVal := len(vf1)
 				if lenVal > maxNum {
@@ -2778,8 +3060,11 @@ func (table *Table) analyBrand() {
 				}
 				finishKa[vf2K] = vf2
 			}
-			hasKey(table, arrcount) //是否匹配到table中的标题
-			if arrcount >= 1 {
+			hasKey(table, arrcount1) //是否匹配到table中的标题
+			if arrcount1 >= 1 {
+				if arrcount1+arrcount2 == 1 { //删除只匹配到一个价钱(总价)
+					delete(finishKa, "unitprice")
+				}
 				finishData := dealArrData(maxNum, finishKa)
 				table.BrandData = append(table.BrandData, finishData)
 			}
@@ -2788,26 +3073,54 @@ func (table *Table) analyBrand() {
 	//处理string数据后,匹配必须title和替换要保存的title
 	if len(lineMap) > 0 {
 		for _, sMap := range lineMap {
-			strcount := 0 //记录key是否存在必须title(字符串数据)
+			strcount1 := 0 //记录key是否存在必须title(字符串数据)
+			strcount2 := 0
 			endStrMap := make(map[string]string)
 			for k1, v1 := range sMap {
+				//qutil.Debug(k1, "++++++++++", v1)
 				//匹配必须title
 				for nameM, r := range brandRule["must"] {
 					if convert(k1, r) { //匹配成功
-						endStrMap[nameM] = v1
-						strcount++
+						if nameM == "itemname" || nameM == "modal" { //特殊处理itemname
+							if nameM == "itemname" {
+								varr, f := filteritem(v1) //过滤品目
+								if f {
+									break
+								}
+								v1 = varr[0]
+							}
+							hasGoods(table, v1)
+						}
+						if nameM == "brandname" || nameM == "modal" { //特殊处理brandname
+							if len(endStrMap["brandname"]) == 0 {
+								brand, allNull := hasBrand(table, v1)
+								qutil.Debug("----", nameM, brand, v1)
+								if !allNull {
+									endStrMap["brandname"] = brand[0]
+								}
+								qutil.Debug("endStrMap----", endStrMap)
+							}
+						}
+						if nameM != "brandname" {
+							endStrMap[nameM] = v1
+						}
+						strcount1++
 					}
 				}
 				//替换其它要保存字段
 				for nameR, r := range brandRule["replace"] {
 					if convert(k1, r) { //匹配成功
 						endStrMap[nameR] = v1
+						strcount2++
 					}
 				}
 			}
 			//原始字符串数据处理
-			hasKey(table, strcount) //是否匹配到table中的标题
-			if strcount >= 1 {
+			hasKey(table, strcount1) //是否匹配到table中的标题
+			if strcount1 >= 1 {
+				if strcount1+strcount2 == 1 { //删除只匹配到一个价钱(总价)
+					delete(endStrMap, "unitprice")
+				}
 				finishData := dealStrData(endStrMap) //处理数据
 				if len(finishData) > 0 {
 					table.BrandData = append(table.BrandData, finishData)
@@ -2817,14 +3130,14 @@ func (table *Table) analyBrand() {
 	}
 }
 
-func dealArrData(minNum int, ka map[string][]string) []map[string]string {
+func dealArrData(maxNum int, ka map[string][]string) []map[string]string {
 	for k2, v2 := range ka {
 		//处理数组长度不相等,使长度一致
-		if len(v2) > minNum {
-			ka[k2] = v2[:minNum]
+		if len(v2) > maxNum {
+			ka[k2] = v2[:maxNum]
 		}
 	}
-	finalData := assembleData(ka)
+	finalData := assembleData(ka, 1)
 	if len(finalData) > 0 {
 		return finalData
 	}
@@ -2833,13 +3146,15 @@ func dealArrData(minNum int, ka map[string][]string) []map[string]string {
 }
 func dealStrData(kv map[string]string) []map[string]string {
 	finalData := []map[string]string{}
-	finalData = assembleData(kv)
+	if len(kv) > 0 {
+		finalData = assembleData(kv, 2)
+	}
 	return finalData
 
 }
 
 //组装数据,每一行的数据为一数据集合
-func assembleData(m interface{}) []map[string]string {
+func assembleData(m interface{}, n int) []map[string]string {
 	defer qutil.Catch()
 	/*
 		{
@@ -2848,8 +3163,7 @@ func assembleData(m interface{}) []map[string]string {
 		}
 	*/
 	datas := []map[string]string{}
-	switch reflect.TypeOf(m).String() {
-	case "map[string][]string": //数组数据
+	if n == 1 { //数组数据
 		realTypeM := m.(map[string][]string)
 		//根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr
 		/*
@@ -2878,11 +3192,11 @@ func assembleData(m interface{}) []map[string]string {
 						data[k4] = v4[i]
 					} else {
 						delete(data, k4)
-						continue
+						//continue
 					}
 				} else {
 					fmt.Println("err table")
-					continue
+					//continue
 				}
 			}
 			datas[i] = data
@@ -2895,14 +3209,78 @@ func assembleData(m interface{}) []map[string]string {
 				}
 			}
 		}
-	case "map[string]string": //字符串数据
+	} else { //字符串数据
 		realTypeM := m.(map[string]string)
 		datas = append(datas, realTypeM)
-	default:
 	}
 	return datas
 }
 
+////组装数据,每一行的数据为一数据集合
+//func assembleData(m interface{}, n int) []map[string]string {
+//	defer qutil.Catch()
+//	/*
+//		{
+//			"itemname":["计算机","打印机","机柜"],
+//			"number"  :["1","12","4"]
+//		}
+//	*/
+//	datas := []map[string]string{}
+//	switch reflect.TypeOf(m).String() {
+//	case "map[string][]string": //数组数据
+//		realTypeM := m.(map[string][]string)
+//		//根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr
+//		/*
+//			arr1 ["a1","b1","c1"]
+//			arr2 ["a2","b2","c2"]
+
+//			[
+//				{"a1","a2"},
+//				{"b1","b2"},
+//				{"c1","c2"}
+//			]
+//		*/
+//		//start
+//		for k3, v3 := range realTypeM {
+//			for _, val := range v3 {
+//				data := make(map[string]string)
+//				data[k3] = val
+//				datas = append(datas, data)
+//			}
+//			break
+//		}
+//		for i, data := range datas {
+//			for k4, v4 := range realTypeM {
+//				if i < len(v4) { //数组数据长度不一致
+//					if v4[i] != " " {
+//						data[k4] = v4[i]
+//					} else {
+//						delete(data, k4)
+//						//continue
+//					}
+//				} else {
+//					fmt.Println("err table")
+//					//continue
+//				}
+//			}
+//			datas[i] = data
+//		}
+//		//end
+//		//		for _, fdv := range datas { //清除空数据和只含特殊符号的数据
+//		//			for fmk, fmv := range fdv {
+//		//				if tabletdclear.ReplaceAllString(fmv, "") == "" {
+//		//					delete(fdv, fmk)
+//		//				}
+//		//			}
+//		//		}
+//	case "map[string]string": //字符串数据
+//		realTypeM := m.(map[string]string)
+//		datas = append(datas, realTypeM)
+//	default:
+//	}
+//	return datas
+//}
+
 func convert(key, r string) bool {
 	flag := false
 	key = tabletitleclear.ReplaceAllString(key, "")
@@ -2924,62 +3302,134 @@ func hasKey(table *Table, n int) {
 	}
 }
 
-//是否有商品
-func hasGoods(table *Table, data interface{}) {
+////是否有商品
+//func hasGoods1(table *Table, data interface{}) {
+//	if table.TableResult.HasGoods == 1 {
+//		return
+//	}
+//	sData, ok := data.(string)
+//	proFlag := ""
+//	if ok { //string数据检查goods
+//		if sData != "" {
+//			proFlag = u.GoodsGet.CheckSensitiveWord(sData)
+//			if len(proFlag) > 0 {
+//				table.TableResult.HasGoods = 1
+//			}
+//		}
+//	} else { //arr数据检查goods
+//		arrData := data.([]string)
+//		if len(arrData) > 0 {
+//			for _, src := range arrData {
+//				if src != "" {
+//					proFlag = u.GoodsGet.CheckSensitiveWord(src)
+//					if len(proFlag) > 0 {
+//						table.TableResult.HasGoods = 1
+//						break
+//					}
+//				}
+//			}
+//		}
+//	}
+//}
+
+////是否有品牌
+//func hasBrand1(table *Table, data interface{}) {
+//	if table.TableResult.HasBrand == 1 {
+//		return
+//	}
+//	sData, ok := data.(string)
+//	if ok { //string数据检查brand
+//		if sData != "" {
+//			brand := u.BrandGet.CheckSensitiveWord(sData)
+//			if len(brand) > 0 {
+//				fmt.Println("brand:", brand, sData)
+//				table.TableResult.HasBrand = 1
+//			}
+//		}
+//	} else { //arr数据检查brand
+//		arrData := data.([]string)
+//		if len(arrData) > 0 {
+//			for _, src := range arrData {
+//				if src != "" {
+//					brand := u.BrandGet.CheckSensitiveWord(src)
+//					if len(brand) > 0 {
+//						table.TableResult.HasBrand = 1
+//						break
+//					}
+//				}
+//			}
+//		}
+//	}
+//}
+
+func hasGoods(table *Table, data ...string) {
+	goodsArr := make([]string, len(data))
+	//fmt.Println("table.TableResult.HasGoods=====", table.TableResult.HasGoods)
 	if table.TableResult.HasGoods == 1 {
 		return
 	}
-	sData, ok := data.(string)
-	proFlag := ""
-	if ok { //string数据检查goods
-		if sData != "" {
-			proFlag = u.GoodsGet.CheckSensitiveWord(sData)
-			if len(proFlag) > 0 {
+	for i, d := range data {
+		if d != "" {
+			goods := u.GoodsGet.CheckSensitiveWord(d)
+			//fmt.Println("goods======", goods)
+			goodsArr[i] = goods
+			if len(goods) > 0 {
 				table.TableResult.HasGoods = 1
+				break
 			}
 		}
-	} else { //arr数据检查goods
-		arrData := data.([]string)
-		if len(arrData) > 0 {
-			for _, src := range arrData {
-				if src != "" {
-					proFlag = u.GoodsGet.CheckSensitiveWord(src)
-					if len(proFlag) > 0 {
-						table.TableResult.HasGoods = 1
-						break
-					}
-				}
-			}
+	}
+}
+func hasBrand(table *Table, data ...string) ([]string, bool) {
+	//fmt.Println("table.TableResult.HasBrand---------", table.TableResult.HasBrand)
+	brandArr := make([]string, len(data))
+	//	if table.TableResult.HasBrand == 1 {
+	//		return brandArr, 1
+	//	}
+	allNull := true
+	for i, d := range data {
+		//if d != "" {
+		brand := u.BrandGet.CheckSensitiveWord(d)
+		if brand != "" {
+			allNull = false
+		}
+		//fmt.Println("brand======", brand)
+		brandArr[i] = brand
+		if len(brand) > 0 {
+			table.TableResult.HasBrand = 1
 		}
+		//}
 	}
+	return brandArr, allNull
 }
 
-//是否有品牌
-func hasBrand(table *Table, data interface{}) {
-	if table.TableResult.HasBrand == 1 {
-		return
+//过滤td值
+func filterval(val ...string) ([]string, bool) {
+	flag := false
+	for i, v := range val {
+		afterFilter := tabletdclear.ReplaceAllString(v, "")
+		afterFilter = NullVal.ReplaceAllString(afterFilter, "")
+		if afterFilter == "" {
+			flag = true
+		} else {
+			flag = false
+		}
+		val[i] = afterFilter
 	}
-	sData, ok := data.(string)
-	brandFlag := ""
-	if ok { //string数据检查brand
-		if sData != "" {
-			brandFlag = u.BrandGet.CheckSensitiveWord(sData)
-			if len(brandFlag) > 0 {
-				table.TableResult.HasBrand = 1
-			}
-		}
-	} else { //arr数据检查brand
-		arrData := data.([]string)
-		if len(arrData) > 0 {
-			for _, src := range arrData {
-				if src != "" {
-					brandFlag = u.BrandGet.CheckSensitiveWord(src)
-					if len(brandFlag) > 0 {
-						table.TableResult.HasBrand = 1
-						break
-					}
-				}
-			}
+	return val, flag
+}
+
+//过滤错误的品目值
+func filteritem(itemval ...string) ([]string, bool) {
+	flag := false
+	for i, v := range itemval {
+		afterFilter := itemclear.ReplaceAllString(v, "")
+		if afterFilter == "" {
+			flag = true
+		} else {
+			flag = false
 		}
+		itemval[i] = afterFilter
 	}
+	return itemval, flag
 }

+ 1 - 1
src/jy/pretreated/multipackage.go

@@ -20,7 +20,7 @@ var (
 	//替换容易混淆的词
 	PreCon1 = regexp.MustCompile("(\\d+\\.?)+万?元")
 	//提取分包标识
-	MultiReg = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)#?((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|分|合同|分|施工|监理)?(标|包件?)(段|号)?)[  \u3000\u2003\u00a0]*((\\d[.])+\\d|[一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)[::]?")
+	MultiReg = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)#?((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|分|合同|分|施工|监理)?(标|包件?)(段|号)?)[  \u3000\u2003\u00a0]*((\\d[.])+\\d|[一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)[::]?|操作系统")
 	//匹配到的包格式分类统计
 	keyregs = []map[*regexp.Regexp]int{
 		map[*regexp.Regexp]int{

+ 4 - 4
src/jy/pretreated/tablev2.go

@@ -559,13 +559,13 @@ func NewSortMap() *SortMap {
 }
 
 //增加值
-var nullVal = regexp.MustCompile("^[/无,.。;、]+$|^详见.{2,8}$")
+var NullVal = regexp.MustCompile("^[/无,.。;、附]+$|^详见.{2,8}$")
 
 func (s *SortMap) AddKey(key string, val interface{}) {
 	//判断val
-	if v, ok := val.(string); ok && nullVal.ReplaceAllString(u.TrimLRSpace(v, ""), "") == "" {
-		return
-	}
+	//	if v, ok := val.(string); ok && NullVal.ReplaceAllString(u.TrimLRSpace(v, ""), "") == "" {
+	//		return
+	//	}
 	s.Lock.Lock()
 	defer s.Lock.Unlock()
 	//重复

+ 1 - 1
src/main_test.go

@@ -14,7 +14,7 @@ import (
 func Test_task(t *testing.T) {
 	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
 	//extract.StartExtractTaskId("5b8f804025e29a290415aee1")
-	extract.StartExtractTestTask("5b8f804025e29a290415aee1", "5caafa83a5cb26b9b7ec03b7", "5", "mxs_v5", "mxs_v3")
+	extract.StartExtractTestTask("5c528686698414055c47b115", "5c2a439aa5cb26b9b76405de", "1", "mxs_v2", "mxs_v2")
 	//extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
 	time.Sleep(5 * time.Second)
 }

+ 8 - 2
src/res/brand.json

@@ -99,7 +99,6 @@
     "步箭",
     "ARCTIC",
     "圣迪威",
-    "其它",
     "埃普",
     "精明",
     "KingWin",
@@ -1186,5 +1185,12 @@
     "金立",
     "国美手机",
     "索尼移动",
-    "一加"
+    "一加",
+	"得力",
+	"HP",
+	"西普教育",
+	"帝华电子",
+	"NEC",
+	"卓软",
+	"德浩"
 ]

+ 4 - 4
src/res/brandrule.json

@@ -1,13 +1,13 @@
 {
 	"must":{
-		"itemname":"((^(货物|品目|产品|商品|物资|印刷品|物料|材料|(分项)项目|设备|成交标(的)?)(名称|种类|名|描述|内容|服务)+|服务产品|采购(目录|设备)|^(品名|品目|项目)$)和?)+",
-		"brandname":"品牌(名称)?|^厂家",
-		"modal":"^(规格)?(型号|参数)|规格$|^(服务内容|采购需求)", 
+		"itemname":"((^(货物|品目|产品|商品|物资|印刷品|物料|材料|采购项目|设备|成交标(的)?)(名称|种类|内容|服务)+|服务产品|(采购|机械)(目录|设备)|^(品名|品目)$)和?)+",
+		"brandname":"^(品牌(名称)?|厂家)",
+		"modal":"^(规格)?(型号|参数)|规格$|技术规格", 
 		"unitprice":"单价|^价格|(预算|采购预算)(金额)?$|(单个商品|包件)最高限价|(中标成交|单次服务|控制)+金额|^金额$"
 	},
 	"replace":{	
 		"unitname":"(产品|计量|数量)单位|^单位",
-		"number":"(采购|需求)(数)?量|^数量|服务次数",
+		"number":"(采购|需求|预估)(数)?量|^数量|服务次数",
 		"totalprice":"小计|总价|预算总价|合计|报价总金额",
 		"guaranteetime":"(免费)?质保期|服务期(限)?|服务时间"
 	}	

+ 1 - 1
src/res/tablev1.json

@@ -1,7 +1,7 @@
 {
 	"normalhead":[
 		"^((.{2,6}(名称|编号|代码|时间|类型|性质|行政区域|原因|项目|意见|须知|程度))|标段(编号)?|招标金额|规模|统一社会信用代码|拟?中标供应商|质量|(质量)?承诺|地址|招标代理|序号|材料|结构|结构层数|评委|单位|数量|排名|标的|标项|开户银行|邮编|账号|电话|传真|网址|得分|名次|包件?号|职务|(建设|招标|采购|中标|成交|甲|乙)(单位|人|供应商|方|规模).{0,2}|.{0,5}(价格?|额|资金|[预概]算|投资|费用|报价|投标价)(万?元?([大小]写)?))$__M",
-		"^.{0,7}(((单位)?名称|总监|经理|负责人|信息|率|费|期|人|方|号|码|(价格?|额|资金)(万?元?([大小]写)?)|员|品目|标包|代表|区域|方式|因素|合价|合计|小计|地点|条件|(资质|类别和)等级|类别|状态)|得分|注册专业|方法|家数|全称|简称|邮件|执业或职业资格|证书|部门|事项|来源|划分|长度|规模|保证金|目标)$__",
+		"^.{0,7}(((单位)?名称|总监|经理|负责人|信息|率|费|期|人|号|码|(价格?|额|资金)(万?元?([大小]写)?)|员|品目|标包|代表|区域|方式|因素|合价|合计|小计|地点|条件|(资质|类别和)等级|类别|状态)|得分|注册专业|方法|家数|全称|简称|邮件|执业或职业资格|证书|部门|事项|来源|划分|长度|规模|保证金|目标)$__",
 		"(名单|证号|名称|要求|时间|日期|地点|单位|条款|机构|范围|情况|概况|品名|规格|参数|标准|指标|型号|限价|数量|方式|等级|依据|明细|概况|内容|次数|产品|性质|地区|地址|币种|主题|详情|说明|代理(公司|机构)|节支率|名单|结果|结果公示)$|^(职称|姓名|级别|职称专业|证书名称|证书编号)$__",
 		"^(联系|评标|单位|公告|采购|商品|附件|质保|用途|公示|机构|评审|品名|规格|参数|指标|型号|数量|证书).{0,10}$__",
 		"(专家|评委|打分)$__",