zhengkun 3 жил өмнө
parent
commit
a6183c16e6

+ 6 - 4
src/jy/extract/extract.go

@@ -2489,12 +2489,14 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 
 
 
-
+	//金额比例异常-
 	if _, ok := tmp["bidamount"].(string); ok {
 		delete(tmp, "bidamount")
-	} else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && (fb/5 > qu.Float64All(tmp["budget"]) || qu.Float64All(tmp["budget"])/1000 > fb) {
-		delete(tmp, "bidamount")
-
+	} else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && (fb/10 > qu.Float64All(tmp["budget"]) || qu.Float64All(tmp["budget"])/1000 > fb) {
+		if fb>1000.0 &&fb<100000000.0{
+		}else {
+			delete(tmp, "bidamount")
+		}
 	}
 	if _, ok := tmp["budget"].(string); ok {
 		delete(tmp, "budget")

+ 13 - 11
src/jy/pretreated/winnerorder.go

@@ -52,7 +52,7 @@ var (
     winnerReg16 = regexp.MustCompile("(中[标|选]候选人)排序[::]([1-9一二三四五六七])[\\s]+.{1,4}名称[::](.*公司)[\\s]+.{1,4}报价[::]([¥〇0-9\\.人民币零点壹贰叁肆伍陆柒捌玖拾佰仟万亿元圆角分整]+)")
     winnerReg17 = regexp.MustCompile("(报价金额|投标金额|应答含税总价|预期中标价格)[::]?[\n]?([¥〇0-9\\.人民币零点壹贰叁肆伍陆柒捌玖拾佰仟万亿元圆角分整]+)")
     winnerReg18 = regexp.MustCompile("([中|投]标候选人[弟|第])([1-9一二三四五])[\\s]?名[::]([\u4E00-\u9FA5]{4,20})([((].*公司[))])?[,,\\s]+投标报价[::]([0-9\\.\\s 万元]+)")
-    winnerReg19 = regexp.MustCompile("([弟|第][1-9一二三四五]名(中标候选人)?)[::]?([\u4E00-\u9FA5]{4,20}公司)[((]?[,,]?(报价|投标报价)[::]?([0-9\\.\\s万元]+)")
+    winnerReg19 = regexp.MustCompile("([弟|第][1-9一二三四五]名(中标候选人)?|[弟|第][1-9一二三四五]中标候选人)[::]?([\u4E00-\u9FA5]{4,20}公司)[((]?[,,]?(报价|投标报价|投标含税总价)[::]?([0-9\\.\\s万元]+)")
     winnerReg19_1 = regexp.MustCompile("(第[一二三1-3]候选人)[::]([\u4E00-\u9FA5()()]{4,25}公司)[((]([0-9.,,万元]+)[))]")
 
 
@@ -84,6 +84,7 @@ var (
 
 	//清洗影响候选人-抽取的文本
 	cleanWinnerReg1 = regexp.MustCompile("第[一二三123]中标候选人项目业绩[::]")
+	cleanWinnerReg2 = regexp.MustCompile("(第[一二三123])中标单位名称[::]")
 
 
 
@@ -134,17 +135,18 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool,
 	}
 
 	if codeSite=="sh_shszfhcxjsglwyh_jsgc_zhbhxrgs" {
-		text = winnerReg80.ReplaceAllString(text,"\n$2:$1$4 中标金额:$3 万元\n$6:$5$8 中标金额:$7 万元\n")
-		text = winnerReg81.ReplaceAllString(text,"\n$3:$1$2 中标金额:$4 万元\n$7:$5$6 中标金额:$8 万元\n")
-		text = winnerReg82.ReplaceAllString(text,"\n$2$5:$1$4 中标金额:$3 万元\n$7$10:$6$9 中标金额:$8 万元\n")
+		text = winnerReg80.ReplaceAllString(text,"\n${2}:${1}${4} 中标金额:${3} 万元\n${6}:${5}${8} 中标金额:${7} 万元\n")
+		text = winnerReg81.ReplaceAllString(text,"\n${3}:${1}${2} 中标金额:${4} 万元\n${7}:${5}${6} 中标金额:${8} 万元\n")
+		text = winnerReg82.ReplaceAllString(text,"\n${2}${5}:${1}${4} 中标金额:${3} 万元\n${7}${10}:${6}${9} 中标金额:${8} 万元\n")
 	}
 
 
-	//
+	//指定清理-替换-影响抽取候选人
+	text = cleanWinnerReg2.ReplaceAllString(text,"${1}中标候选人:")
 
 
 
-	text = winnerReg5.ReplaceAllString(text, "\n$3:$1\n")
+	text = winnerReg5.ReplaceAllString(text, "\n${3}:${1}\n")
 	text = winnerReg20.ReplaceAllString(text,"\n${4}:${2}\n")
 	text = winnerReg21.ReplaceAllString(text,"\n${3}成交候选人:${1}\n")
 	text = strings.ReplaceAll(text,"有\n限公司","有限公司")
@@ -164,7 +166,7 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool,
 	text = winnerReg17.ReplaceAllString(text,"\n中标金额:${2}\n")
 	text = winnerReg18.ReplaceAllString(text,"\n${1}${2}名:${3}\n中标金额:${5}\n")
 	text = winnerReg19.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n")
-	text = winnerReg19_1.ReplaceAllString(text,"\n$1:$2 中标金额:$3\n")
+	text = winnerReg19_1.ReplaceAllString(text,"\n${1}:${2} 中标金额:${3}\n")
 
 
 
@@ -184,11 +186,11 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool,
 	}
 
 	text = winnerReg51.ReplaceAllString(text,"${1}${2}\n中标金额:${3}\n")
-	text = winnerReg52.ReplaceAllString(text,"$1$2")
+	text = winnerReg52.ReplaceAllString(text,"${1}${2}")
 
-	text = winnerReg53.ReplaceAllString(text,"\n$1:$4 中标金额:$8 万元\n$2:$5 中标金额:$10 万元\n")
-	text = winnerReg54.ReplaceAllString(text,"\n$1:$2 中标金额:$4\n")
-	text = winnerReg55.ReplaceAllString(text,"\n第$2$1\n中标单位:$3 中标金额:$4\n第$5$1\n中标单位:$6 中标金额:$7\n\n")
+	text = winnerReg53.ReplaceAllString(text,"\n${1}:${4} 中标金额:${8} 万元\n${2}:${5} 中标金额:${10} 万元\n")
+	text = winnerReg54.ReplaceAllString(text,"\n${1}:${2} 中标金额:${4}\n")
+	text = winnerReg55.ReplaceAllString(text,"\n第${2}${1}\n中标单位:${3} 中标金额:${4}\n第${5}${1}\n中标单位:${6} 中标金额:${7}\n\n")
 
 
 	//清洗一下影响候选人-抽取的文字

+ 7 - 4
src/main.go

@@ -76,11 +76,14 @@ func main() {
 
 //验证规则
 func testMain()  {
-	text :=`中标候选人:第一候选人:中国铁路通信信号上海工程局(集团)有限公司(738.5083万元);评标得分:91.3835;项目经理(项目负责人):江志聪;执业证书编号:沪13106085689;承诺质量目标:符合铁路建设标准;承诺工期:60日历天;第二候选人:通号工程局集团有限公司(738.4852万元);评标得分:90.9573;项目经理(项目负责人):张庆忠;执业证书编号:京111091015510;承诺质量目标:符合铁路建设标准;承诺工期:60日历天;第三候选人:中铁电气化局集团第三工程有限公司(738.417万元);评标得分:89.804;项目经理(项目负责人):丁行南;执业证书编号:豫141060700799;承诺质量目标:符合铁路建设标准;承诺工期:60日历天;
+	text :=`
+第一中标候选人:中国联合网络通信有限公司东莞市分公司,投标含税总价:111.36万元。
+第二中标候选人:中国移动通信集团广东有限公司东莞分公司,投标含税总价:108.576万元。
+
 `
-	var winnerReg19_1 = regexp.MustCompile("(第[一二三1-3]候选人)[::]([\u4E00-\u9FA5()()]{4,25}公司)[((]([0-9.,,万元]+)[))]")
-	if 	winnerReg19_1.MatchString(text) {
-		text = winnerReg19_1.ReplaceAllString(text,"\n$1:$2 中标金额:$3\n")
+	var winnerReg19 = regexp.MustCompile("([弟|第][1-9一二三四五]名(中标候选人)?|[弟|第][1-9一二三四五]中标候选人)[::]?([\u4E00-\u9FA5]{4,20}公司)[((]?[,,]?(报价|投标报价|投标含税总价)[::]?([0-9\\.\\s万元]+)")
+	if 	winnerReg19.MatchString(text) {
+		text = winnerReg19.ReplaceAllString(text,"\n${2}:${1}${4} 中标金额:${3} 万元\n${6}:${5}${8} 中标金额:${7} 万元\n")
 		log.Debug(text)
 	}else {
 		log.Debug("不匹配")