fengweiqiang 6 лет назад
Родитель
Сommit
983ff35be6

+ 8 - 1
src/jy/pretreated/analystep.go

@@ -57,6 +57,7 @@ func AnalyStart(job *util.Job) {
 			newCon = TextAfterRemoveTable(con)
 			job.BlockPackage = FindPackageFromText(job.Title, newCon)
 			for i := 0; i < len(tabs); i++ {
+				//log.Println(tabs[i].Text())
 				//添加标识:文本中有table
 				tabres := AnalyTableV2(tabs[i], job.Category, "", con, 1, job.SourceMid, job.RuleBlock) //解析表格入口 返回:汇总表格对象
 				processTableResult(tabres, bl, job)
@@ -103,8 +104,14 @@ func FindProjectCode(newCon string, job *util.Job) {
 		return
 	}
 	var proCode string
-	proCode = projectcodeReg.FindString(newCon)
 	blCode := &util.Block{}
+	if projectcodeRegAll.MatchString(newCon){//项目名称项目编号一起的5d424bdfa5cb26b9b7ac7a85
+		splitStr := strings.Split(newCon, " ")
+		if len(splitStr) >=2{
+			newCon = "项目编号:"+splitStr[len(splitStr)-1]
+		}
+	}
+	proCode = projectcodeReg.FindString(newCon)
 	if proCode != "" {
 		ckv := GetKVAll(proCode, job.Title, nil, 1)
 		blCode.ColonKV = ckv

+ 2 - 1
src/jy/pretreated/analytable.go

@@ -107,8 +107,9 @@ var (
 	underline                   = regexp.MustCompile("_+$")
 	iswinnertabletag            = regexp.MustCompile("(中标|候选人|成交|结果)")
 	nswinnertabletag            = regexp.MustCompile("[评得分估]+")
+	projectcodeRegAll           = regexp.MustCompile(`采购项目名称及项目编号[:|:]?`)
 	projectcodeReg              = regexp.MustCompile(`((|\(|\[){1}(编号|项目编号|标段编号|招标编号){1}(:|:)(.){4,30}()|\)|\])`)
-	projectcodeReg2             = regexp.MustCompile(`((?:^|\n)编号|项目编号|标段编号){1}(:|:)(.){4,30}[0-9a-zA-Z号]`)
+	projectcodeReg2             = regexp.MustCompile(`((?:^|\n)编号|项目编号|标段编号){1}(:|:)(.){4,30}[0-9a-zA-Z号]`)
 	projectcodeReg3             = regexp.MustCompile("(^询价单编号[A-Za-z0-9/-]*|公告编号[A-Za-z0-9/-]*)")
 	jsonReg                     = regexp.MustCompile(`\{.+:[^}]*\} `) //  \{".*\":\".+\"}
 	regHz                       = regexp.MustCompile("[\u4e00-\u9fa5]")

+ 67 - 68
src/jy/pretreated/tablev2.go

@@ -218,84 +218,83 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR) {
 				}
 				stag = str
 			}
-			for _, tv := range tabs {
-				if IsHide(tv) {
-					continue
-				}
-				sonts := NewTableResult(ts.Id, ts.Toptype, stag, td.Html, 2, td.TR.Table.TableResult.RuleBlock)
-				sonts.GoqueryTabs = tv
-				sonts.Analy()
-
-				//sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id, table.TableResult.RuleBlock) //又一次调用解析表格入口
-				td.BH = false
-				if td.TR.Table.TableResult == nil {
-					td.TR.Table.TableResult = NewTableResult(sonts.Id, sonts.Toptype, sonts.BlockTag, sonts.Html, sonts.Itype, sonts.RuleBlock)
-				}
-				MergeKvTags(td.TR.Table.TableResult.KvTags, sonts.KvTags)
-				td.SonTableResult = sonts
-				//for _, k := range sonts.SortKV.Keys {
-				//u.Debug(k, sonts.SortKV.Map[k])
-				//				td.TR.Table.StandKV[k] = sonts.SortKV.Map[k].(string)
-				//				td.TR.Table.StandKVWeight[k] = sonts.SortKVWeight[k]
-				//}
-				//增加brand (子表)
-				//fmt.Println("sonsHasKey=============", sonts.HasKey)
-				//fmt.Println("sonsHasGoods========", sonts.HasGoods)
-				//fmt.Println("sonsHasBrand========", sonts.HasBrand)
-				if sonts.HasKey != 0 {
-					td.TR.Table.TableResult.HasKey = sonts.HasKey
-				}
-				if sonts.HasGoods != 0 {
-					td.TR.Table.TableResult.HasGoods = sonts.HasGoods
-				}
-				if sonts.HasBrand != 0 {
-					td.TR.Table.TableResult.HasBrand = sonts.HasBrand
-				}
-				if sonts.BrandData != nil && len(sonts.BrandData) > 0 { //子table
-					for _, v := range sonts.BrandData {
-						if len(v) > 0 {
-							td.TR.Table.TableResult.BrandData = append(td.TR.Table.TableResult.BrandData, v)
-						}
+		}
+		for _, tv := range tabs {
+			if IsHide(tv) {
+				continue
+			}
+			sonts := NewTableResult(ts.Id, ts.Toptype, stag, td.Html, 2, td.TR.Table.TableResult.RuleBlock)
+			sonts.GoqueryTabs = tv
+			sonts.Analy()
+
+			//sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id, table.TableResult.RuleBlock) //又一次调用解析表格入口
+			td.BH = false
+			if td.TR.Table.TableResult == nil {
+				td.TR.Table.TableResult = NewTableResult(sonts.Id, sonts.Toptype, sonts.BlockTag, sonts.Html, sonts.Itype, sonts.RuleBlock)
+			}
+			MergeKvTags(td.TR.Table.TableResult.KvTags, sonts.KvTags)
+			td.SonTableResult = sonts
+			//for _, k := range sonts.SortKV.Keys {
+			//u.Debug(k, sonts.SortKV.Map[k])
+			//				td.TR.Table.StandKV[k] = sonts.SortKV.Map[k].(string)
+			//				td.TR.Table.StandKVWeight[k] = sonts.SortKVWeight[k]
+			//}
+			//增加brand (子表)
+			//fmt.Println("sonsHasKey=============", sonts.HasKey)
+			//fmt.Println("sonsHasGoods========", sonts.HasGoods)
+			//fmt.Println("sonsHasBrand========", sonts.HasBrand)
+			if sonts.HasKey != 0 {
+				td.TR.Table.TableResult.HasKey = sonts.HasKey
+			}
+			if sonts.HasGoods != 0 {
+				td.TR.Table.TableResult.HasGoods = sonts.HasGoods
+			}
+			if sonts.HasBrand != 0 {
+				td.TR.Table.TableResult.HasBrand = sonts.HasBrand
+			}
+			if sonts.BrandData != nil && len(sonts.BrandData) > 0 { //子table
+				for _, v := range sonts.BrandData {
+					if len(v) > 0 {
+						td.TR.Table.TableResult.BrandData = append(td.TR.Table.TableResult.BrandData, v)
 					}
 				}
-				if sonts.WinnerOrder != nil && len(sonts.WinnerOrder) > 0 {
-					td.TR.Table.TableResult.WinnerOrder = sonts.WinnerOrder
-				}
-				if sonts.IsMultiPackage {
-					td.TR.Table.BPackage = true
-					tb1 := td.TR.Table.BlockPackage
-					for k, v := range sonts.PackageMap.Map {
-						v1 := v.(*u.BlockPackage)
-						if tb1.Map[k] == nil {
-							tb1.AddKey(k, v)
-						} else {
-							bp := tb1.Map[k].(*u.BlockPackage)
-							if bp != nil && v1.TableKV != nil {
-								for k2, v2 := range v1.TableKV.KvTags {
-									if bp.TableKV == nil {
-										bp.TableKV = u.NewJobKv()
-									}
-									isExists := false
-									for _, v2v := range v2 {
-										for _, v2vv := range bp.TableKV.KvTags[k2] {
-											if v2v.Value == v2vv.Value {
-												isExists = true
-												break
-											}
-										}
-										if !isExists {
-											bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
-											bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+			}
+			if sonts.WinnerOrder != nil && len(sonts.WinnerOrder) > 0 {
+				td.TR.Table.TableResult.WinnerOrder = sonts.WinnerOrder
+			}
+			if sonts.IsMultiPackage {
+				td.TR.Table.BPackage = true
+				tb1 := td.TR.Table.BlockPackage
+				for k, v := range sonts.PackageMap.Map {
+					v1 := v.(*u.BlockPackage)
+					if tb1.Map[k] == nil {
+						tb1.AddKey(k, v)
+					} else {
+						bp := tb1.Map[k].(*u.BlockPackage)
+						if bp != nil && v1.TableKV != nil {
+							for k2, v2 := range v1.TableKV.KvTags {
+								if bp.TableKV == nil {
+									bp.TableKV = u.NewJobKv()
+								}
+								isExists := false
+								for _, v2v := range v2 {
+									for _, v2vv := range bp.TableKV.KvTags[k2] {
+										if v2v.Value == v2vv.Value {
+											isExists = true
+											break
 										}
 									}
+									if !isExists {
+										bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
+										bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
+									}
 								}
 							}
 						}
 					}
-					//u.Debug(fmt.Sprintf("%v", td.TR.Table.BlockPackage.Map["1"]))
 				}
+				//u.Debug(fmt.Sprintf("%v", td.TR.Table.BlockPackage.Map["1"]))
 			}
-
 		}
 	}
 }

+ 5 - 0
src/res/fieldscore.json

@@ -375,6 +375,11 @@
                 "describe": "标段编号匹配-2",
                 "regstr": "/.{2}",
                 "score": -2
+            },
+            {
+                "describe": "-结束没有抽取完",
+                "regstr": "-$",
+                "score": -1
             }
         ],
         "length": [