fengweiqiang 4 жил өмнө
parent
commit
0621ed1654

+ 30 - 11
src/jy/pretreated/analystep.go

@@ -14,25 +14,34 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 
-var yjReg *regexp.Regexp = regexp.MustCompile("单位业绩|个人业绩|主要人员相关资料|投标文件格式|项目业绩|否决投标的?情况说明")
+var yjReg *regexp.Regexp = regexp.MustCompile("单位业绩|个人业绩|投标人业绩|主要人员相关资料|投标文件格式|唱标记录|项目业绩|否决投标的?情况说明")
+var hisReg = regexp.MustCompile("(<td>|<tr>|<table>).*(开标记录|业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(</tr>|</table>|</td>)")
+var hisReg2 = regexp.MustCompile("(开标记录|业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(</tr>|</table>|</td>)")
+var formattext = regexp.MustCompile("(投标总价)([0-9,.万元]*)")
+var formattext2 = regexp.MustCompile("中标单价.*(中标总价.*)")
 
 func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 	con := job.Content
 	//全文的需要修复表格
 	con = RepairCon(con)
 	//格式化正文
+	con = hisReg.ReplaceAllString(con, "")
+	con = hisReg2.ReplaceAllString(con, "${2}")
+	con = formattext.ReplaceAllString(con, "${1}:${2}")
+	con = formattext2.ReplaceAllString(con, "${1}")
 	con = formatText(con, "all")
 	job.Content = con
 	//计算表格占比,返回表格数组、占比
-	tabs, ration := ComputeConRatio(con, 1)
-	if len(tabs) > 0 {
+	tabs, _ := ComputeConRatio(con, 1)
+	/*if len(tabs) > 0 {
 		newcon, newtabs, newration := FindBigText(con, ration, tabs)
 		if newcon != "" {
 			con = newcon
+			con = formatText(con, "all")
 			tabs = newtabs
 			ration = newration
 		}
-	}
+	}*/
 	job.BlockPackage = map[string]*util.BlockPackage{}
 	blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock, isSite, codeSite) //分块
 	if len(blockArrays) > 0 {                                                                  //有分块
@@ -66,13 +75,18 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 			job.HasTable = 1 //添加标识:文本中有table
 			newCon = TextAfterRemoveTable(con)
 			//log.Println(newCon)
-			if newCon!=""{
+			if newCon != "" {
 				job.BlockPackage = FindPackageFromText(job.Title, newCon, isSite, codeSite)
 			}
 			for i := 0; i < len(tabs); i++ {
-				//fmt.Println(tabs[i].Html())
+				blockTag := ""
+				if len(tabs[i].Nodes) > 0 {
+					if tabs[i].Nodes[0].PrevSibling != nil {
+						blockTag = tabs[i].Nodes[0].PrevSibling.Data
+					}
+				}
 				//添加标识:文本中有table
-				tabres := AnalyTableV2(tabs[i], job.Category, "", con, 1, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
+				tabres := AnalyTableV2(tabs[i], job.Category, blockTag, con, 1, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
 				processTableResult(tabres, bl, job, isSite, codeSite)
 			}
 		} else {
@@ -83,11 +97,12 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 		//log.Println(bl.Text)
 		FindProjectCode(bl.Text, job) //匹配项目编号
 		if yjReg.MatchString(bl.Text) {
-			if strings.Index(bl.Text, "业绩")>1{
+			if strings.Index(bl.Text, "业绩") > 1 {
 				bl.Text = bl.Text[:strings.Index(bl.Text, "业绩")]
 			}
 		}
 		//调用kv解析
+		bl.Text = formatText(bl.Text, "all")
 		bl.ColonKV = GetKVAll(bl.Text, "", nil, 1, isSite, codeSite)
 		bl.SpaceKV = SspacekvEntity.Entrance(bl.Text, "", nil, isSite, codeSite)
 		//新加 未分块table中未能解析到中标候选人,从正文中解析
@@ -106,7 +121,9 @@ func processTableInBlock(bl *util.Block, job *util.Job, isSite bool, codeSite st
 	for _, tab := range tabs {
 		job.HasTable = 1
 		tmptag := ""
-		if tab.Nodes[0] != nil && tab.Nodes[0].PrevSibling != nil {
+		if bl.Title != "" && len(bl.Title) < 20 {
+			tmptag = bl.Title
+		} else if tab.Nodes[0] != nil && tab.Nodes[0].PrevSibling != nil {
 			tmptag = strings.TrimSpace(tab.Nodes[0].PrevSibling.Data)
 		}
 		//添加标识:文本中有table
@@ -290,8 +307,10 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job, i
 						wror = append(wror, v)
 						continue
 					} else if tmpWins[v["entname"].(string)] > 0 && tmpWins[v["entname"].(string)] == v["sort"].(int) && v["price"] != nil {
-						if tmpWins[v["entname"].(string)]-1 >= 0 {
-							job.Winnerorder[tmpWins[v["entname"].(string)]-1] = v
+						if tmpWins[v["entname"].(string)]-1 >= 0 && len(job.Winnerorder) > 0 {
+							if len(job.Winnerorder) > (tmpWins[v["entname"].(string)] - 1) {
+								job.Winnerorder[tmpWins[v["entname"].(string)]-1] = v
+							}
 							continue
 						}
 					}

+ 24 - 10
src/jy/pretreated/winnerorder.go

@@ -1,6 +1,8 @@
 package pretreated
 
 import (
+	"jy/clear"
+	//"jy/clear"
 	"jy/util"
 	qutil "qfw/util"
 	"regexp"
@@ -27,14 +29,16 @@ var (
 	numberReg2        = regexp.MustCompile("[\\d一二三四五六七八九十.,,]+")
 	thisNumberReg     = regexp.MustCompile("第" + numberReg.String())
 	winnerReg0        = regexp.MustCompile("(中标候选人第\\d名)")
-	winnerReg1        = regexp.MustCompile("(^|[^为])(【?(推荐)?第[一二三四五六七八九十1-9]+(合格|名|包|标段)?】?((候|侯)选)?(入围|备选|成交|中(标|选))人?([((]成交[))])?((候|侯)选|排序)?(人(单位)?|供[应货]商|单位|机构)(名称)?为?)($|[^,;;。,])")
-	winnerReg2        = regexp.MustCompile("(排名第[一二三四五六七八九十1-9]+|第[一二三四五六七八九十1-9]+(候|侯)选人)")
+	winnerReg1        = regexp.MustCompile("(^|[^为])(【?(推荐)?第[一二三四五六七八九十1-9]+(合格|名|包|标段)?】?([候|侯]选)?(入围|备选|成交|中[标|选])人?([((]成交[))])?([候|侯]选|排序)?(人(单位)?|供[应货]商|单位|机构)(名称)?为?)($|[^,;;。,])")
+	winnerReg2        = regexp.MustCompile("(排名第[一二三四五六七八九十1-9]+|第[一二三四五六七八九十1-9]+(中标)?[候|侯]选人|中标候选人排名[:]\\d)")
 	//winnerReg2     = regexp.MustCompile("(第[一二三四五六七八九十1-9]+(候|侯)选人)")
 	winnerReg3     = regexp.MustCompile("((中标候选人)?第[一二三四五六七八九十1-9]+名)")
-	winnerReg4     = regexp.MustCompile("((确认|推荐|评审|排[名|序])[为::]+|(由高到低排序前.名|公示下列内容|(确定|推荐)的?中[标|选]候选人|\n中[标|选]候选.{1,3}\\s*\n|\n(中(标|选)候选.{1,3}[::\u3000\u2003\u00a0\\s]|成交候选供应商)|(排[名|序]|公[示|告]|具体|推荐|结果(公示)?|中[标|选]候选人.{0,2})如下|[一二三四五六七八九十\\d]+、(中[标|选]候选[^\n::]{1,8}|.{0,8}(成交|结果)信息|成交[^\n::]{2,8}))[为::]?)")
+	winnerReg4     = regexp.MustCompile("((确认|推荐|评审|排[名|序])[为::]+|(由高到低排序前.名|公示下列内容|(确定|推荐)的?中[标|选]候选人|\n中[标|选]候选.{1,3}\\s*\n|\n(中[标|选]候选.{1,3}[::\u3000\u2003\u00a0\\s]|成交候选供应商)|(排[名|序]|公[示|告]|具体|推荐|结果(公示)?|中[标|选]候选人.{0,2})如下|[一二三四五六七八九十\\d]+、(中[标|选]候选[^\n::]{1,8}|.{0,8}(成交|结果)信息|成交[^\n::]{2,8}))[为::]?)")
 	winnerReg5     = regexp.MustCompile("([^,;;。,、\n投标人]+?)(为?)(第[一二三四五六七八九十1-9]+(成交|中标)?([候|侯]选(人|供应商|单位|机构)|名)|排名第[一二三四五六七八九十1-9]+)([,;;。,、]|\\s+\n)")
 	winnerReg6     = regexp.MustCompile("(^(排名)?第[一二三四五六七八九十1-9]+[名中标成交备选候人单位供应商]*)")
 	winnerReg7     = regexp.MustCompile("第[一二三四五六七八九十]{1}标段[::]")
+	winnerReg8     = regexp.MustCompile("(第[一二三四五六七八九十]中标候选人)[::]?\n(1)单位名称:(.*)\n(2)投标报价(含税):(.*)")
+	winnerRegclear = regexp.MustCompile("(买方人员|经评审.*排名第[一二三四五六七八九十1-9]+)")
 	colonEndReg    = regexp.MustCompile("[::]$")
 	toWarpReg      = regexp.MustCompile("[,。,;;]+")
 	findamountReg  = regexp.MustCompile("[,。,;;\u3000\u2003\u00a0\\s]+")
@@ -42,8 +46,8 @@ var (
 	companyWarpReg = regexp.MustCompile("(公司)(.+?[::])")
 	findCompanyReg = regexp.MustCompile("[^::]+公司")
 	colonSpaceReg  = regexp.MustCompile("[::]\\s+")
-	findCandidate  = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合(会|体))|工作室)")
-	findCandidate2 = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|局|站|城|处|行|部|队|联合(会|体)|工作室)$)")
+	findCandidate  = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体])|工作室)")
+	findCandidate2 = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|局|站|城|处|行|部|队|联合[会|体]|工作室|有限司)$)")
 	clearSpace1    = regexp.MustCompile("([((][\\d一二三四五六七八九十][))][\\s\u3000\u2003\u00a0\\t]*|<[^>].+?>)")
 	clearSpace2    = regexp.MustCompile("</?[^>]+>")
 	offerReg       = regexp.MustCompile("(中标|磋商|投标|报|单|成交)总?(价|金额)")
@@ -55,10 +59,15 @@ var (
  *from 来源
  */
 func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool, codeSite string) []map[string]interface{} {
-	if clearSpace2.MatchString(text) {
-		return  []map[string]interface{}{}
+	if clearSpace2.MatchString(text){
+		text = TextAfterRemoveTable(text)
+	}
+	text = winnerRegclear.ReplaceAllString(text,"")
+	if nswinnertabletag.MatchString(text) {
+		return []map[string]interface{}{}
 	}
 	text = winnerReg5.ReplaceAllString(text, "\n$3:$1\n")
+	text = winnerReg8.ReplaceAllString(text,"\n${1}:${2}\n中标金额:${3}\n")
 	/*
 		"_id" : ObjectId("5c2c6f60a5cb26b9b7b62cd8")
 
@@ -240,7 +249,7 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 				val := wo.clear("中标单位", v)
 				if val != nil && utf8.RuneCountInString(qutil.ObjToString(val)) > 5 {
 					count++
-					object["entname"] = strings.TrimSpace(qutil.ObjToString(val))
+					object["entname"] = strings.TrimRight(strings.ReplaceAll(strings.TrimSpace(qutil.ObjToString(val)), "公司", "公司,"), ",")
 					object["sort"] = wo.toNumber(k, count)
 					object["sortstr"] = thisNumberReg.FindString(k)
 					object["type"] = i
@@ -257,9 +266,14 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 				}
 				//找到了中标金额
 				if findOfferFlag && object["entname"] != nil {
-					val := wo.clear("中标金额", v)
+					val := wo.clear("中标金额", v+GetMoneyUnit(k, v))
 					if val != nil {
-						object["price"] = val
+						moneys := clear.ObjToMoney([]interface{}{val, ""})
+						if len(moneys) > 0 {
+							if vf, ok := moneys[0].(float64); ok &&  moneys[len(moneys)-1].(bool){
+								object["price"] = float64(vf)
+							}
+						}
 					}
 					winners = append(winners, object)
 					object = map[string]interface{}{}