Browse Source

备份-最新抽取-0112

zhengkun 3 years ago
parent
commit
021cc9bb0b

+ 1 - 1
src/jy/extract/extractudp.go

@@ -199,7 +199,7 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				//	continue
 				//}
 
-				if spidercode[qu.ObjToString(v["spidercode"])] { //临时开标记录
+				if spidercode[qu.ObjToString(v["spidercode"])] {
 					log.Debug(index, qu.BsonIdToSId(v["_id"]), "//开标记录")
 					continue
 				}

+ 1 - 1
src/jy/pretreated/analytable.go

@@ -2309,7 +2309,7 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
 	if isGoonNext { //没有处理成数组的情况下,继续调用正文查找分包的方法
 		tn.isGoonNext(isSite, codeSite)
 	}
-	//查找分包中的中标人排序
+	//查找分包中的中标人排序-分包找候选人
 	if tn.BlockPackage != nil && tn.BlockPackage.Keys != nil && len(tn.BlockPackage.Keys) > 0 {
 		for _, v := range tn.BlockPackage.Keys {
 			vv, ok := tn.BlockPackage.Map[v].(*u.BlockPackage)

+ 1 - 1
src/jy/pretreated/colonkv.go

@@ -20,7 +20,7 @@ var (
 	regReplKV2     = regexp.MustCompile("(.+?[\u4e00-\u9fa5))][\\s\u3000\u2003\u00a0]*[::].*[((]?[^\r\n\\s\u3000\u2003\u00a0标段包]+?[))]?)([一二三四五六七八九十]+[、..][^一二三四五六七八九十]+?)")
 	regKV          = regexp.MustCompile("([\\p{Han}][^,,。、.;;\r\n]{1,30}?)[::](.*)")
 	filterK        = regexp.MustCompile("[((\\[【].*?[))\\]】]|<[^>].+?>|[①②③¥·;;‘“'’”,*<>((\\[【、))/\\]】??,。.\".\\s\u3000\u2003\u00a0]+|^[一二三四五六七八九十0-91234567890]+")
-	filterValue    = regexp.MustCompile("(^(无)$|^[\r\n\\s\u3000\u2003\u00a0]+$|^<.*>)")
+	filterValue    = regexp.MustCompile("(^(无)$|^[\r\n\\s\u3000\u2003\u00a0]+$|^<.*>|(完全响应)$)")
 	filterWinner   = regexp.MustCompile(".{2,40}(集团|公司|学校|中心|家具城|门诊|[大中小]学|部|院|局|厂|店|所|队|社|室|厅|会|场|行)")
 	regReplKey     = regexp.MustCompile("^(包(.+[A-Za-z\\d])?|本项目|推荐|的|本次)|([约为元万亿]+|[大小]写|人民币|[全]称|姓名)$")
 	buyerAndAgency = regexp.MustCompile("(代理(机构|人)|采购(人|单位))")

+ 15 - 4
src/jy/pretreated/winnerorder.go

@@ -65,7 +65,8 @@ var (
     winnerReg21     = regexp.MustCompile("(石城(.*公司|.*厂|.*有\n限公司))[0-9.]+([弟|第][一二三四五六七八九十0-9])成交[\n]?候选人")
 	winnerReg22     = regexp.MustCompile("投标人[::](.{4,20}公司)[\\s-]+标段[::][1-3][\\s-]+排名[::]([1-9])")
 	winnerReg23     = regexp.MustCompile("([\u4E00-\u9FA5]{4,20})\n(有限公司|公司)[\\s]+(第[一二三四五1-9]中[选|标]候选人)")
-    winnerReg24     = regexp.MustCompile("[\\s\\n]+([\u4E00-\u9FA5]{4,20}公司)[\\s\\n]+(第[一二三四五六七八九十]中[选|标]?候选人)")
+    winnerReg24 = regexp.MustCompile("[\\s\\n]+([\u4E00-\u9FA5,]{4,30}([((]集团[))])?(公司|有限公司)|)[\\s\\n]+(第[一二三四五六七八九十]中[选|标]?候选人)")
+	//winnerReg24     = regexp.MustCompile("[\\s\\n]+(.{4,20}公司)[\\s\\n]+(第[一二三四五六七八九十]中[选|标]?候选人)")
 
 
     winnerRegclear = regexp.MustCompile("(买方人员|经评审.*排名第[一二三四五六七八九十1-9]+)")
@@ -84,6 +85,10 @@ var (
 	nofferReg       = regexp.MustCompile("(费率|折扣率)")
 	nobidValReg       = regexp.MustCompile("^(\\d{2}%|[0-9]+\\.[0-9]+%)$")
 
+
+	//特殊 - 不分割
+ 	winnerNoSplitReg   = regexp.MustCompile("^(第[一二三四五六七八九十]中[选|标]?候选人)[::]([\u4E00-\u9FA5]{4,20}([((]集团[))])?(有限公司|公司))[,,]([\u4E00-\u9FA5]{4,20}([((]集团[))])?(有限公司|公司))$")
+
 )
 
 /*
@@ -105,7 +110,7 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool,
 	text = strings.ReplaceAll(text,"有\n限公司","有限公司")
 	text = winnerReg22.ReplaceAllString(text,"\n中标候选人第${2}名:${1}\n")
 	text = winnerReg23.ReplaceAllString(text,"\n${3}:${1}${2}\n")
-	text = winnerReg24.ReplaceAllString(text,"\n${2}:${1}\n")
+	text = winnerReg24.ReplaceAllString(text,"\n${4}:${1}\n")
 
 
 
@@ -230,6 +235,7 @@ func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp
 	if rightIndex == -1 {
 		return rdata
 	}
+
 	for i, rightIndex := range rightIndexs {
 		text = blocks[rightIndex]
 		warpCount := wo.interceptText(winnerFlag[i].indexs, text)
@@ -276,8 +282,13 @@ func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp
 					}
 				}
 			}
-			v = toWarpReg.ReplaceAllString(v, "\n")
-			text += v
+			if !winnerNoSplitReg.MatchString(v) {
+				v = toWarpReg.ReplaceAllString(v, "\n")
+				text += v
+			}else {
+				text += v
+			}
+
 			if (!reg_2.MatchString(v) || !colonEndReg.MatchString(v)) && k < len(lines)-1 {
 				text += "\n"
 			}

+ 5 - 6
src/main.go

@@ -75,12 +75,11 @@ func main() {
 
 //验证规则
 func testMain()  {
-	text :=`拟定的唯一供货商名称及地址:
-供应商名称:宁波康达鼎新医疗器械有限公司(01包)、北京盛世源康科技发展有限公司(02包)
-`
-	var packageReg6  = regexp.MustCompile("供应商名称[::](.{4,20}公司)[((]([0]?1包)[))][、,,](.{4,20}公司)[((]([0]?2包)[))]")
-	if packageReg6.MatchString(text) {
-		text  = packageReg6.ReplaceAllString(text,"\n$2\n中标单位:$1\n$4\n中标单位:$3")
+	text :=`第一中标候选人:中国铁路设计集团有限公司,上海市城市建设设计研究总院(集团)有限公司`
+
+	var winnerNoSplitReg   = regexp.MustCompile("^(第[一二三四五六七八九十]中[选|标]?候选人)[::]([\u4E00-\u9FA5]{4,20}([((]集团[))])?(有限公司|公司))[,,]([\u4E00-\u9FA5]{4,20}([((]集团[))])?(有限公司|公司))$")
+	if winnerNoSplitReg.MatchString(text) {
+		text = winnerNoSplitReg.ReplaceAllString(text,"\n${4}:${1}\n")
 		log.Debug(text)
 	}else {
 		log.Debug("不匹配")