ソースを参照

Merge branch 'dev3.2' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.2

# Conflicts:
#	src/jy/pretreated/analytable.go
fengweiqiang 6 年 前
コミット
4796741837
3 ファイル変更26 行追加30 行削除
  1. 7 12
      src/jy/pretreated/analytable.go
  2. 1 0
      src/jy/pretreated/colonkv.go
  3. 18 18
      src/jy/pretreated/tablev2.go

+ 7 - 12
src/jy/pretreated/analytable.go

@@ -103,10 +103,9 @@ var (
 	MultipleValueSplitReg       = regexp.MustCompile("[,,、\\s\u3000\u2003\u00a0]")
 	BuyerContacts               = []string{"采购单位联系人", "采购单位联系电话", "采购单位联系地址"}
 	FilterSerial                = regexp.MustCompile(".+[、..::,]")
-	filterTableWror             = regexp.MustCompile("班子成员")
 	underline                   = regexp.MustCompile("_+$")
 	iswinnertabletag            = regexp.MustCompile("(中标|候选人|成交|结果)")
-	nswinnertabletag            = regexp.MustCompile("[评得分估]+")
+	nswinnertabletag            = regexp.MustCompile("[评得分估]+|标的|班子成员")
 	jsonReg                     = regexp.MustCompile(`\{.+:[^}]*\} `) //  \{".*\":\".+\"}
 	regHz                       = regexp.MustCompile("[\u4e00-\u9fa5]")
 	winnerOrderAndBidResult     = regexp.MustCompile("((中标)?候选人|(中标|评标)结果)")
@@ -208,10 +207,6 @@ func (table *Table) KVFilter() {
 	//4.对KV的处理
 	//判断表格是否有用,调用abandontable正则数组进行判断
 	//遍历每一行
-	winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签
-	if !winnertag {
-		winnertag = iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
-	}
 	table.analyTdKV() //1.遍历每行每列td的sortkv添加到table.SorkVK中;2.td有子表格的处理
 	as := NewSortMap()
 
@@ -239,11 +234,7 @@ func (table *Table) KVFilter() {
 		}
 	}
 	//处理值是数组的kv放入标准化kv中//处理table.SortKV.value为数组的情况
-	table.sortKVArr(as, winnertag)
-	//
-	if filterTableWror.MatchString(table.Tag) {
-		table.WinnerOrder = nil
-	}
+	table.sortKVArr(as)
 	//
 	if len(table.WinnerOrder) > 0 || !table.BPackage {
 		winnerOrder := []map[string]interface{}{}
@@ -312,7 +303,11 @@ func (table *Table) KVFilter() {
 }
 
 //处理table.SortKV.value为数组的情况
-func (table *Table) sortKVArr(as *SortMap, winnertag bool) {
+func (table *Table) sortKVArr(as *SortMap) {
+	winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签
+	if !winnertag {
+		winnertag = iswinnertabletag.MatchString(table.TableResult.BlockTag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
+	}
 	checkKey := map[int]bool{}
 	for kn, k := range as.Keys { //遍历table.SortKV.value为数组的key
 		v := as.Map[k]

+ 1 - 0
src/jy/pretreated/colonkv.go

@@ -371,6 +371,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 				if buyer == "" {
 					continue
 				}
+
 				prevLine := kv.PrevLine
 				prevLine = strings.TrimSpace(prevLine)
 				prevLine = strings.Split(prevLine, " ")[0]

+ 18 - 18
src/jy/pretreated/tablev2.go

@@ -150,24 +150,24 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 		//}
 		//}
 	}
-	//抽取不到走正则抽
-	proCode := projectcodeReg.FindString(text)
-	if proCode != "" {
-		ckv := GetKVAll(proCode, "", nil, 1)
-		for _, v := range ckv.KvTags {
-			for _, vv := range v {
-				td.SortKV.AddKey(vv.Key, vv.Value)
-			}
-		}
-	} else if proCode = projectcodeReg2.FindString(text); proCode != "" {
-		ckv := GetKVAll(proCode, "", nil, 1)
-		for _, v := range ckv.KvTags {
-			for _, vv := range v {
-				td.SortKV.AddKey(vv.Key, vv.Value)
-			}
-		}
-	}
-	if proCode = jsonReg.FindString(text); proCode != "" {
+	////抽取不到走正则抽
+	//proCode := projectcodeReg.FindString(text)
+	//if proCode != "" {
+	//	ckv := GetKVAll(proCode, "", nil, 1)
+	//	for _, v := range ckv.KvTags {
+	//		for _, vv := range v {
+	//			td.SortKV.AddKey(vv.Key, vv.Value)
+	//		}
+	//	}
+	//} else if proCode = projectcodeReg2.FindString(text); proCode != "" {
+	//	ckv := GetKVAll(proCode, "", nil, 1)
+	//	for _, v := range ckv.KvTags {
+	//		for _, vv := range v {
+	//			td.SortKV.AddKey(vv.Key, vv.Value)
+	//		}
+	//	}
+	//}
+	if proCode := jsonReg.FindString(text); proCode != "" {
 		jsonMap := make(map[string]string)
 		json.Unmarshal([]byte(proCode), &jsonMap)
 		for k, v := range jsonMap {