|
@@ -14,25 +14,34 @@ import (
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
)
|
|
|
|
|
|
-var yjReg *regexp.Regexp = regexp.MustCompile("单位业绩|个人业绩|主要人员相关资料|投标文件格式|项目业绩|否决投标的?情况说明")
|
|
|
+var yjReg *regexp.Regexp = regexp.MustCompile("单位业绩|个人业绩|投标人业绩|主要人员相关资料|投标文件格式|唱标记录|项目业绩|否决投标的?情况说明")
|
|
|
+var hisReg = regexp.MustCompile("(<td>|<tr>|<table>).*(开标记录|业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(</tr>|</table>|</td>)")
|
|
|
+var hisReg2 = regexp.MustCompile("(开标记录|业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(</tr>|</table>|</td>)")
|
|
|
+var formattext = regexp.MustCompile("(投标总价)([0-9,.万元]*)")
|
|
|
+var formattext2 = regexp.MustCompile("中标单价.*(中标总价.*)")
|
|
|
|
|
|
func AnalyStart(job *util.Job, isSite bool, codeSite string) {
|
|
|
con := job.Content
|
|
|
//全文的需要修复表格
|
|
|
con = RepairCon(con)
|
|
|
//格式化正文
|
|
|
+ con = hisReg.ReplaceAllString(con, "")
|
|
|
+ con = hisReg2.ReplaceAllString(con, "${2}")
|
|
|
+ con = formattext.ReplaceAllString(con, "${1}:${2}")
|
|
|
+ con = formattext2.ReplaceAllString(con, "${1}")
|
|
|
con = formatText(con, "all")
|
|
|
job.Content = con
|
|
|
//计算表格占比,返回表格数组、占比
|
|
|
- tabs, ration := ComputeConRatio(con, 1)
|
|
|
- if len(tabs) > 0 {
|
|
|
+ tabs, _ := ComputeConRatio(con, 1)
|
|
|
+ /*if len(tabs) > 0 {
|
|
|
newcon, newtabs, newration := FindBigText(con, ration, tabs)
|
|
|
if newcon != "" {
|
|
|
con = newcon
|
|
|
+ con = formatText(con, "all")
|
|
|
tabs = newtabs
|
|
|
ration = newration
|
|
|
}
|
|
|
- }
|
|
|
+ }*/
|
|
|
job.BlockPackage = map[string]*util.BlockPackage{}
|
|
|
blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock, isSite, codeSite) //分块
|
|
|
if len(blockArrays) > 0 { //有分块
|
|
@@ -66,13 +75,18 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
|
|
|
job.HasTable = 1 //添加标识:文本中有table
|
|
|
newCon = TextAfterRemoveTable(con)
|
|
|
//log.Println(newCon)
|
|
|
- if newCon!=""{
|
|
|
+ if newCon != "" {
|
|
|
job.BlockPackage = FindPackageFromText(job.Title, newCon, isSite, codeSite)
|
|
|
}
|
|
|
for i := 0; i < len(tabs); i++ {
|
|
|
- //fmt.Println(tabs[i].Html())
|
|
|
+ blockTag := ""
|
|
|
+ if len(tabs[i].Nodes) > 0 {
|
|
|
+ if tabs[i].Nodes[0].PrevSibling != nil {
|
|
|
+ blockTag = tabs[i].Nodes[0].PrevSibling.Data
|
|
|
+ }
|
|
|
+ }
|
|
|
//添加标识:文本中有table
|
|
|
- tabres := AnalyTableV2(tabs[i], job.Category, "", con, 1, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
|
|
|
+ tabres := AnalyTableV2(tabs[i], job.Category, blockTag, con, 1, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
|
|
|
processTableResult(tabres, bl, job, isSite, codeSite)
|
|
|
}
|
|
|
} else {
|
|
@@ -83,11 +97,12 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
|
|
|
//log.Println(bl.Text)
|
|
|
FindProjectCode(bl.Text, job) //匹配项目编号
|
|
|
if yjReg.MatchString(bl.Text) {
|
|
|
- if strings.Index(bl.Text, "业绩")>1{
|
|
|
+ if strings.Index(bl.Text, "业绩") > 1 {
|
|
|
bl.Text = bl.Text[:strings.Index(bl.Text, "业绩")]
|
|
|
}
|
|
|
}
|
|
|
//调用kv解析
|
|
|
+ bl.Text = formatText(bl.Text, "all")
|
|
|
bl.ColonKV = GetKVAll(bl.Text, "", nil, 1, isSite, codeSite)
|
|
|
bl.SpaceKV = SspacekvEntity.Entrance(bl.Text, "", nil, isSite, codeSite)
|
|
|
//新加 未分块table中未能解析到中标候选人,从正文中解析
|
|
@@ -106,7 +121,9 @@ func processTableInBlock(bl *util.Block, job *util.Job, isSite bool, codeSite st
|
|
|
for _, tab := range tabs {
|
|
|
job.HasTable = 1
|
|
|
tmptag := ""
|
|
|
- if tab.Nodes[0] != nil && tab.Nodes[0].PrevSibling != nil {
|
|
|
+ if bl.Title != "" && len(bl.Title) < 20 {
|
|
|
+ tmptag = bl.Title
|
|
|
+ } else if tab.Nodes[0] != nil && tab.Nodes[0].PrevSibling != nil {
|
|
|
tmptag = strings.TrimSpace(tab.Nodes[0].PrevSibling.Data)
|
|
|
}
|
|
|
//添加标识:文本中有table
|
|
@@ -290,8 +307,10 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job, i
|
|
|
wror = append(wror, v)
|
|
|
continue
|
|
|
} else if tmpWins[v["entname"].(string)] > 0 && tmpWins[v["entname"].(string)] == v["sort"].(int) && v["price"] != nil {
|
|
|
- if tmpWins[v["entname"].(string)]-1 >= 0 {
|
|
|
- job.Winnerorder[tmpWins[v["entname"].(string)]-1] = v
|
|
|
+ if tmpWins[v["entname"].(string)]-1 >= 0 && len(job.Winnerorder) > 0 {
|
|
|
+ if len(job.Winnerorder) > (tmpWins[v["entname"].(string)] - 1) {
|
|
|
+ job.Winnerorder[tmpWins[v["entname"].(string)]-1] = v
|
|
|
+ }
|
|
|
continue
|
|
|
}
|
|
|
}
|