Ver Fonte

table fenbao

fengweiqiang há 6 anos atrás
pai
commit
0d21167cea
1 ficheiros alterados com 158 adições e 144 exclusões
  1. 158 144
      src/jy/pretreated/analytable.go

+ 158 - 144
src/jy/pretreated/analytable.go

@@ -768,8 +768,8 @@ func (table *Table) createTabe(trs *goquery.Selection) {
 }
 
 //对table进行整体解析处理
-func (table *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
-	ts := table.tableSubDemolitionTable() //分包,拆表
+func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
+	ts := tn.tableSubDemolitionTable() //分包,拆表
 	for n, table := range ts {
 		//处理每个table
 		if len(table.TRs) > 0 {
@@ -1871,10 +1871,10 @@ func (tn *Table) GetTdByRCNo(row, col int) *TD {
 
 //判断表格是否是分包
 func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
-	pac := 0
-	val := 0
-	index = []string{} //初始化返回值index
-	index_pos := []int{}
+	pac := 0             //包的数量
+	val := 0             //分值
+	index = []string{}   //存储分包,使用tbale.SortKV的key和value使用正则等处理对值进行判断
+	index_pos := []int{} //下标
 	//是数组且能找到标段之类的提示
 	//arr_count := 0 //计数table.SortKV的value是数组的数量,后面没用
 	key_index := -1
@@ -1896,13 +1896,13 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 			val += 4
 		}
 		//根据table.SortKV的key判断是否分包,如果没有再根据value判断
-		val, index, index_pos = foundPacBySortKV(tn, val, index, index_pos, pac, &keyExistsCount, &commonKeyVals, key_index, hasPkgTd)
+		val, index, index_pos = foundPacBySortKV(tn, val, index, index_pos, &keyExistsCount, &commonKeyVals, key_index, hasPkgTd)
 	}
 	//	u.Debug(index)
 	//过滤重复及标准化!
 	standIndex := []string{}
 	standIndex_pos := []int{}
-	oldIndex := []string{}
+	oldIndex := []string{} //存放包的原始值
 	brepeat := map[string]bool{}
 	for k, v := range index {
 		v = u.PackageNumberConvert(v)
@@ -1910,7 +1910,7 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 			brepeat[v] = true
 			standIndex = append(standIndex, v)
 			standIndex_pos = append(standIndex_pos, index_pos[k])
-			oldIndex = append(oldIndex, v)
+			oldIndex = append(oldIndex, index[k])
 		}
 	}
 	index = standIndex
@@ -1930,163 +1930,174 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 		//多包解析
 		if b {
 			tn.BPackage = true
+			//根据数组index分包长度添加table.BlockPackage子包数组
 			for nk, v := range index {
 				if tn.BlockPackage.Map[v] == nil {
 					bp := &u.BlockPackage{}
-					bp.Index = v
-					bp.Origin = oldIndex[nk]
-					bp.TableKV = u.NewJobKv()
+					bp.Index = v                  //序号 (转换后编号,只有数字或字母)
+					bp.Origin = oldIndex[nk]      //包的原始值
+					bp.TableKV = u.NewJobKv()     //table kv (分出的对应的KV值)
 					tn.BlockPackage.AddKey(v, bp) //table子包数组
 				}
 			}
-			if len(index) == 1 { //是一个的情况
-				if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 {
-					beq := true
-					for _, v2 := range tn.SortKV.Map {
-						if _, ok := v2.(string); !ok {
-							beq = false
-							break
-						}
-					}
-					if beq { //统一处理为数组
-						td := tn.GetTdByRCNo(tn.RowNum-1, 0)
-						if !td.BH && FindVal2_1.MatchString(td.Val) {
-							for k2, v2 := range tn.SortKV.Map {
-								tn.SortKV.Map[k2] = []string{v2.(string)}
-							}
-						} else {
-							//没有处理成数组的情况下,继续调用正文查找分包的方法
-							isGoonNext = true
-						}
-					}
-				}
+			isGoonNext = tn.manyPackageProcessByIndex(index,standIndex_pos)
+		}
+	} else {
+		isGoonNext = true
+	}
+	if isGoonNext { //没有处理成数组的情况下,继续调用正文查找分包的方法
+		tn.isGoonNext()
+	}
+	//查找分包中的中标人排序
+	if tn.BlockPackage != nil && tn.BlockPackage.Map != nil && len(tn.BlockPackage.Map) > 0 {
+		for _, v := range tn.BlockPackage.Map {
+			vv := v.(*u.BlockPackage)
+			if vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0 {
+				vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2)
 			}
-			for _, k1 := range tn.SortKV.Keys {
-				v1 := tn.SortKV.Map[k1]
-				if _, bvs := v1.(string); bvs && len(index) > 1 && !strings.HasSuffix(k1, "_") {
-					v1_array := []string{v1.(string)}
-					underline := ""
-					for {
-						underline += "_"
-						if tn.SortKV.Map[k1+underline] == nil {
-							break
-						} else if v3, v2_ok := tn.SortKV.Map[k1+underline].(string); v2_ok && v3 != "" {
-							v1_array = append(v1_array, v3)
-						}
-					}
-					v1 = v1_array
+		}
+	}
+	return
+}
+//多包处理
+func (tn *Table)manyPackageProcessByIndex(index []string ,standIndex_pos []int,)(isGoonNext bool){
+	if len(index) == 1 { //是一个的情况
+		if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 {//table带排序的KV值小于10并且小于10列和小于4行
+			beq := true
+			for _, v2 := range tn.SortKV.Map {
+				if _, ok := v2.(string); !ok {
+					beq = false
+					break
 				}
-				if val, bvs := v1.([]string); bvs {
-					if len(val) <= len(index) {
-						for k, v := range val {
-							tn.assemblePackage(k1, v, index[k])
-						}
-					} else {
-						for sk1, sv2 := range index {
-							v := val[sk1]
-							//处理http://www.hljcg.gov.cn/xwzs!queryOneXwxxqx.action?xwbh=8145b599-a11e-45cb-a76a-12157a715570
-							if v == "" && strings.Index(k1, "供应商") > -1 {
-								if sk1 != len(index)-1 {
-									//u.Debug(val[sk1+1], val[sk1+2])
-									if standIndex_pos[sk1+1]-standIndex_pos[sk1] > 1 {
-										v = val[standIndex_pos[sk1]+1]
-									}
-								} else {
-									if standIndex_pos[sk1] < len(val)-1 {
-										v = val[standIndex_pos[sk1]+1]
-									}
-								}
-							}
-							tn.assemblePackage(k1, v, sv2)
-						}
-					}
-					//删除子包的kv
-					//u.Debug("----==1==-------", k1)
-					k1tags := u.GetTags(k1)
-					//if !(len(k1tags) > 0 && k1tags[0].Value == "采购单位") {
-					//	tn.SortKV.RemoveKey(k1)
-					//}
-					for _, vcgdw := range k1tags {
-						if vcgdw.Value == "采购单位" {
-							tn.SortKV.RemoveKey(k1)
-						}
-					}
-				} else if val, bvs := v1.(string); bvs && len(index) == 1 {
-					//删除子包的kv
-					k1tags, _, _, _, _ := CommonDataAnaly(k1, "", "", val)
-					if len(k1tags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(k1tags[0]) { //(k1tags[0].Value == "采购单位" || k1tags[0].Value == "项目编号")) {
-						//log.Println("remove", k1, val)
-						tn.assemblePackage(k1, val, index[0])
-						tn.SortKV.RemoveKey(k1)
+			}
+			if beq { //统一处理为数组
+				td := tn.GetTdByRCNo(tn.RowNum-1, 0)
+				if !td.BH && FindVal2_1.MatchString(td.Val) {
+					for k2, v2 := range tn.SortKV.Map {
+						tn.SortKV.Map[k2] = []string{v2.(string)}
 					}
-					//u.Debug("----==2==-------", k1)
+				} else {
+					//没有处理成数组的情况下,继续调用正文查找分包的方法
+					isGoonNext = true
 				}
-
 			}
 		}
-	} else {
-		isGoonNext = true
 	}
-	if isGoonNext {
-		blockPackage := map[string]*u.BlockPackage{}
-		for _, k := range tn.SortKV.Keys {
-			if excludeKey.MatchString(k) {
-				continue
+	for _, k1 := range tn.SortKV.Keys {
+		v1 := tn.SortKV.Map[k1]
+		if _, bvs := v1.(string); bvs && len(index) > 1 && !strings.HasSuffix(k1, "_") { //table.SortKV.Map.value为字符串并且index有分包而且table.SortKV.Map.key没有_
+			v1_array := []string{v1.(string)}
+			underline := ""
+			for {
+				underline += "_"
+				if tn.SortKV.Map[k1+underline] == nil {
+					break
+				} else if v3, v2_ok := tn.SortKV.Map[k1+underline].(string); v2_ok && v3 != "" {
+					v1_array = append(v1_array, v3)
+				}
 			}
-			str := ""
-			v := tn.SortKV.Map[k]
-			nk := regReplAllSpace.ReplaceAllString(k, "")
-			if vs, ok := v.([]string); ok {
-				str += fmt.Sprintf("%s:%s\n", nk, strings.Join(vs, " "))
+			v1 = v1_array
+		}
+		if val, bvs := v1.([]string); bvs {
+			if len(val) <= len(index) {//table.SortKV.Map.value数组小于等于分包index
+				for k, v := range val {
+					tn.assemblePackage(k1, v, index[k]) //组装解析到的分包
+				}
 			} else {
-				str += fmt.Sprintf("%s:%s\n", nk, v)
-			}
-			b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false)
-			if b && len(blockPackage) > 0 {
-				tn.BPackage = true
-				for mk, mv := range blockPackage {
-					if tn.BlockPackage.Map[mk] == nil {
-						tn.BlockPackage.AddKey(mk, mv)
-					} else {
-						bp := tn.BlockPackage.Map[mk].(*u.BlockPackage)
-						if bp.TableKV == nil {
-							bp.TableKV = u.NewJobKv()
-						}
-						for k2, v2 := range mv.ColonKV.Kv {
-							if bp.TableKV.Kv[k2] == "" {
-								bp.TableKV.Kv[k2] = v2
-								bp.TableKV.KvTag[k2] = mv.ColonKV.KvTag[k2]
-								bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+				for sk1, sv2 := range index {
+					v := val[sk1]
+					//处理http://www.hljcg.gov.cn/xwzs!queryOneXwxxqx.action?xwbh=8145b599-a11e-45cb-a76a-12157a715570
+					if v == "" && strings.Index(k1, "供应商") > -1 {
+						if sk1 != len(index)-1 {
+							//u.Debug(val[sk1+1], val[sk1+2])
+							if standIndex_pos[sk1+1]-standIndex_pos[sk1] > 1 {
+								v = val[standIndex_pos[sk1]+1]
 							}
-						}
-						for k2, v2 := range mv.SpaceKV.Kv {
-							if bp.TableKV.Kv[k2] == "" {
-								bp.TableKV.Kv[k2] = v2
-								bp.TableKV.KvTag[k2] = mv.SpaceKV.KvTag[k2]
-								bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+						} else {
+							if standIndex_pos[sk1] < len(val)-1 {
+								v = val[standIndex_pos[sk1]+1]
 							}
 						}
 					}
+					tn.assemblePackage(k1, v, sv2)
 				}
-				tn.BPackage = true
-				tn.SortKV.RemoveKey(k)
 			}
+			//删除子包的kv
+			//u.Debug("----==1==-------", k1)
+			k1tags := u.GetTags(k1) //取得匹配
+			//if !(len(k1tags) > 0 && k1tags[0].Value == "采购单位") {
+			//	tn.SortKV.RemoveKey(k1)
+			//}
+			for _, vcgdw := range k1tags {
+				if vcgdw.Value == "采购单位" {
+					tn.SortKV.RemoveKey(k1)
+				}
+			}
+		} else if val, bvs := v1.(string); bvs && len(index) == 1 {
+			//删除子包的kv
+			k1tags, _, _, _, _ := CommonDataAnaly(k1, "", "", val)
+			if len(k1tags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(k1tags[0]) { //(k1tags[0].Value == "采购单位" || k1tags[0].Value == "项目编号")) {
+				//log.Println("remove", k1, val)
+				tn.SortKV.RemoveKey(k1)
+				tn.assemblePackage(k1, val, index[0])
+			}
+			//u.Debug("----==2==-------", k1)
 		}
+
 	}
-	//查找分包中的中标人排序
-	if tn.BlockPackage != nil && tn.BlockPackage.Map != nil && len(tn.BlockPackage.Map) > 0 {
-		for _, v := range tn.BlockPackage.Map {
-			vv := v.(*u.BlockPackage)
-			if vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0 {
-				vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2)
+	return isGoonNext
+}
+
+//没有处理成数组的情况下,继续调用正文查找分包的方法
+func (tn *Table) isGoonNext() {
+	blockPackage := map[string]*u.BlockPackage{}
+	for _, k := range tn.SortKV.Keys {
+		if excludeKey.MatchString(k) {
+			continue
+		}
+		str := "" //拼装为冒号kv
+		v := tn.SortKV.Map[k]
+		nk := regReplAllSpace.ReplaceAllString(k, "")
+		if vs, ok := v.([]string); ok {
+			str += fmt.Sprintf("%s:%s\n", nk, strings.Join(vs, " "))
+		} else {
+			str += fmt.Sprintf("%s:%s\n", nk, v)
+		}
+		b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false) //分块之后分包
+		if b && len(blockPackage) > 0 {
+			tn.BPackage = true
+			for mk, mv := range blockPackage {
+				if tn.BlockPackage.Map[mk] == nil {
+					tn.BlockPackage.AddKey(mk, mv)
+				} else {
+					bp := tn.BlockPackage.Map[mk].(*u.BlockPackage)
+					if bp.TableKV == nil {
+						bp.TableKV = u.NewJobKv()
+					}
+					for k2, v2 := range mv.ColonKV.Kv {
+						if bp.TableKV.Kv[k2] == "" {
+							bp.TableKV.Kv[k2] = v2
+							bp.TableKV.KvTag[k2] = mv.ColonKV.KvTag[k2]
+							bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+						}
+					}
+					for k2, v2 := range mv.SpaceKV.Kv {
+						if bp.TableKV.Kv[k2] == "" {
+							bp.TableKV.Kv[k2] = v2
+							bp.TableKV.KvTag[k2] = mv.SpaceKV.KvTag[k2]
+							bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+						}
+					}
+				}
 			}
+			tn.BPackage = true
+			tn.SortKV.RemoveKey(k)
 		}
 	}
-	return
 }
 
 //根据table.SortKV的key判断是否分包,如果没有再根据value判断
-func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, pac int, keyExistsCount *map[string]int, commonKeyVals *map[string][]string, key_index int, hasPkgTd map[string]bool) (rval int, rindex []string, rindex_pos []int) {
+func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, keyExistsCount *map[string]int, commonKeyVals *map[string][]string, key_index int, hasPkgTd map[string]bool) (rval int, rindex []string, rindex_pos []int) {
 	keyIsPkg := false
 	for in, k := range tn.SortKV.Keys {
 		if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) { //判断分包前排除
@@ -2103,7 +2114,7 @@ func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, pac i
 				index = append(index, pkgFlag)
 				index_pos = append(index_pos, len(index))
 				val += 1
-				pac++
+				//pac++
 			} else {
 				k = strings.TrimRight(k, "_")
 			}
@@ -2129,7 +2140,7 @@ func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, pac i
 						index = append(index, v1)
 						index_pos = append(index_pos, in2)
 						val += 1
-						pac++
+						//pac++
 					}
 				}
 			} else if v1, ok := v.(string); ok && !hasPkgTd[k] {
@@ -2141,7 +2152,7 @@ func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, pac i
 						index = append(index, v1)
 						index_pos = append(index_pos, 0)
 						val += 1
-						pac++
+						//pac++
 						underline := ""
 						for {
 							underline += "_"
@@ -2221,7 +2232,7 @@ func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, inde
 				index_pos = append(index_pos, 0)
 				val += 1
 				pac++
-			} else if getTd := tn.GetTdByRCNo(0, tn.SortKV.Index[k]); getTd != nil && getTd.KVDirect == 2 {
+			} else if getTd := tn.GetTdByRCNo(0, tn.SortKV.Index[k]); getTd != nil && getTd.KVDirect == 2 { //纵向
 				/*处理这种情况:
 				<tr><td>包一:xxxxxxxxx</td></tr>
 				*/
@@ -2239,19 +2250,22 @@ func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, inde
 	return key_index, index, index_pos, val, pac, hasPkgTd
 }
 
-//组装解析到的分包
+//组装解析到的分包,//key如果匹配到抽取关键词就添加到table.SortKV
 func (tn *Table) assemblePackage(k1, v1, key string) {
 	bp := tn.BlockPackage.Map[key].(*u.BlockPackage)
 	if bp.TableKV == nil {
 		bp.TableKV = u.NewJobKv()
 	}
 	if v1 != "" {
-		k2, w1, v2, _, bf := CommonDataAnaly(k1, "中标情况", "", v1)
+		k2, w1, v2, _, bf := CommonDataAnaly(k1, "中标情况", "", v1)//匹配抽取关键词
 		if bf {
 			for pos, k3 := range k2 {
 				if bp.TableKV.Kv != nil && bp.TableKV.KvTag[k3] != nil && (bp.TableKV.Kv[k3] == "" || w1[pos] > bp.TableKV.KvTag[k3].Weight) {
 					bp.TableKV.Kv[k3] = v2
 					bp.TableKV.KvTag[k3] = &u.Tag{Value: v2, Weight: w1[pos]}
+				}else {
+					bp.TableKV.Kv[k1] = qutil.ObjToString(v1)
+					tn.SortKV.AddKey(k3,v2)
 				}
 			}
 		} else {
@@ -3109,7 +3123,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
 	for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
 		val := table.SortKV.Map[key]
 		key = regReplAllSpace.ReplaceAllString(key, "")
-		key = strings.Replace(key, "", "", -1)    //处理一个特殊的采购量 经上层处理空格后未处理掉
+		key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
 		if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
 			/*
 				{