|
@@ -2,6 +2,7 @@ package pretreated
|
|
|
|
|
|
import (
|
|
|
"fmt"
|
|
|
+ "jy/clear"
|
|
|
"jy/util"
|
|
|
|
|
|
qutil "qfw/util"
|
|
@@ -55,7 +56,8 @@ var (
|
|
|
regStartWrap = regexp.MustCompile("^[\r\n]")
|
|
|
regEndWrap = regexp.MustCompile("[\r\n]$")
|
|
|
regMoreWrap = regexp.MustCompile("[\r\n]{2,}")
|
|
|
- regStrWrap = regexp.MustCompile("分包名称[::]")
|
|
|
+ regStrWrap = regexp.MustCompile("分包名称[::]")
|
|
|
+ regBZJWarap = regexp.MustCompile("保证金.*")
|
|
|
replSerial = regexp.MustCompile("(\r\n|^)([\\d一二三四五六七八九十][、..::,])+\\d")
|
|
|
moreColonReg = regexp.MustCompile("[::]+")
|
|
|
regFilter = regexp.MustCompile("等$")
|
|
@@ -90,7 +92,7 @@ var (
|
|
|
)
|
|
|
|
|
|
//分块
|
|
|
-func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock,isSite bool,codeSite string) ([]*util.Block, int) {
|
|
|
+func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock, isSite bool, codeSite string) ([]*util.Block, int) {
|
|
|
defer qutil.Catch()
|
|
|
returnValue := 0
|
|
|
var blocks []*util.Block
|
|
@@ -270,7 +272,7 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock,isSite
|
|
|
}
|
|
|
block.Title = title
|
|
|
block.Titles = titles
|
|
|
- if ruleBlock != nil{
|
|
|
+ if ruleBlock != nil {
|
|
|
block.Classify, block.NotClassifyTitles = ruleBlock.Classify.GetClassify(tp, titles)
|
|
|
}
|
|
|
tagsToBlocks(blocks, block)
|
|
@@ -304,8 +306,8 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock,isSite
|
|
|
for _, bl := range returnBlocks {
|
|
|
//解析kv
|
|
|
newText := TextAfterRemoveTable(bl.Text)
|
|
|
- bl.ColonKV = GetKVAll(newText, bl.Title, contactFormat, from,isSite,codeSite)
|
|
|
- bl.SpaceKV = SspacekvEntity.Entrance(newText, bl.Title, contactFormat,isSite,codeSite)
|
|
|
+ bl.ColonKV = GetKVAll(newText, bl.Title, contactFormat, from, isSite, codeSite)
|
|
|
+ bl.SpaceKV = SspacekvEntity.Entrance(newText, bl.Title, contactFormat, isSite, codeSite)
|
|
|
//正则抽取的时候有时需要匹配换行或者句号,这里在解析完kv之后,在块结尾添加换行和句号
|
|
|
bl.Text = appendWarpStop(bl.Text)
|
|
|
}
|
|
@@ -548,7 +550,7 @@ func filterTitle(title string) string {
|
|
|
}
|
|
|
|
|
|
//从块里面找分包
|
|
|
-func FindPackageFromBlocks(blocks *[]*util.Block,isSite bool,codeSite string) (blockPackage map[string]*util.BlockPackage) {
|
|
|
+func FindPackageFromBlocks(blocks *[]*util.Block, isSite bool, codeSite string) (blockPackage map[string]*util.BlockPackage) {
|
|
|
blockPackage = map[string]*util.BlockPackage{}
|
|
|
//块分包
|
|
|
for _, v := range *blocks {
|
|
@@ -559,7 +561,7 @@ func FindPackageFromBlocks(blocks *[]*util.Block,isSite bool,codeSite string) (b
|
|
|
}
|
|
|
//var ok bool
|
|
|
//var surplusText string
|
|
|
- divisionPackageChild(&blockPackage, text, v.Title, true, v.Tag["中标单位"],isSite,codeSite)
|
|
|
+ divisionPackageChild(&blockPackage, text, v.Title, true, v.Tag["中标单位"], isSite, codeSite)
|
|
|
////把分包内容摘除掉有问题 有的项目名称中包含二标段
|
|
|
//if ok && false {
|
|
|
// v.Text = surplusText
|
|
@@ -567,23 +569,81 @@ func FindPackageFromBlocks(blocks *[]*util.Block,isSite bool,codeSite string) (b
|
|
|
// v.SpaceKV = SspacekvEntity.Entrance(surplusText, v.Title, nil)
|
|
|
//}
|
|
|
}
|
|
|
+ for k, v := range blockPackage {
|
|
|
+ if v.ColonKV != nil && v.ColonKV.KvTags != nil {
|
|
|
+ for kc, cv := range v.ColonKV.KvTags {
|
|
|
+ if kc == "预算" && v.Budget <= 0 {
|
|
|
+ moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
|
|
|
+ if len(moneys) > 0 {
|
|
|
+ if vf, ok := moneys[0].(float64); ok {
|
|
|
+ blockPackage[k].Budget = vf
|
|
|
+ blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
|
|
|
+ } else if vi, ok := moneys[0].(int); ok {
|
|
|
+ blockPackage[k].Budget = float64(vi)
|
|
|
+ blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else if kc == "中标金额" && v.Bidamount <= 0 {
|
|
|
+ moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
|
|
|
+ if len(moneys) > 0 {
|
|
|
+ if vf, ok := moneys[0].(float64); ok {
|
|
|
+ blockPackage[k].Bidamount = vf
|
|
|
+ blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
|
|
|
+ } else if vi, ok := moneys[0].(int); ok {
|
|
|
+ blockPackage[k].Bidamount = float64(vi)
|
|
|
+ blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if v.SpaceKV != nil && v.SpaceKV.KvTags != nil {
|
|
|
+ for kc, cv := range v.SpaceKV.KvTags {
|
|
|
+ if kc == "预算" && v.Budget <= 0 {
|
|
|
+ moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
|
|
|
+ if len(moneys) > 0 {
|
|
|
+ if vf, ok := moneys[0].(float64); ok {
|
|
|
+ blockPackage[k].Budget = vf
|
|
|
+ blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
|
|
|
+ } else if vi, ok := moneys[0].(int); ok {
|
|
|
+ blockPackage[k].Budget = float64(vi)
|
|
|
+ blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ } else if kc == "中标金额" && v.Bidamount <= 0 {
|
|
|
+ moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
|
|
|
+ if len(moneys) > 0 {
|
|
|
+ if vf, ok := moneys[0].(float64); ok {
|
|
|
+ blockPackage[k].Bidamount = vf
|
|
|
+ blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
|
|
|
+ } else if vi, ok := moneys[0].(int); ok {
|
|
|
+ blockPackage[k].Bidamount = float64(vi)
|
|
|
+ blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
return
|
|
|
}
|
|
|
|
|
|
//从正文里面找分包
|
|
|
-func FindPackageFromText(title string, content string,isSite bool,codeSite string) (blockPackage map[string]*util.BlockPackage) {
|
|
|
+func FindPackageFromText(title string, content string, isSite bool, codeSite string) (blockPackage map[string]*util.BlockPackage) {
|
|
|
blockPackage = map[string]*util.BlockPackage{}
|
|
|
//从正文里面找分包
|
|
|
- divisionPackageChild(&blockPackage, content, title, true, false,isSite,codeSite)
|
|
|
+ divisionPackageChild(&blockPackage, content, title, true, false, isSite, codeSite)
|
|
|
return
|
|
|
}
|
|
|
|
|
|
//分块之后分包
|
|
|
-func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content, title string, isFindWinnerOrder, accuracy bool,isSite bool,codeSite string) (bool, string) {
|
|
|
+func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content, title string, isFindWinnerOrder, accuracy bool, isSite bool, codeSite string) (bool, string) {
|
|
|
//查找知否有分包
|
|
|
content = regStrWrap.ReplaceAllString(content, "\n")
|
|
|
content = regMoreWrap.ReplaceAllString(content, "\n")
|
|
|
content = regEndWrap.ReplaceAllString(content, "")
|
|
|
+ content = regBZJWarap.ReplaceAllString(content, "")
|
|
|
con, pkg, flag := CheckMultiPackage(content, title)
|
|
|
if !flag {
|
|
|
return false, ""
|
|
@@ -597,7 +657,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
|
if len(pkg) == 1 && strings.HasSuffix(con, v[0]) {
|
|
|
return false, ""
|
|
|
}
|
|
|
- is := regexp.MustCompile(v[0]+"[::]*").FindAllStringIndex(con, -1)
|
|
|
+ is := regexp.MustCompile(v[0] + "[::]*").FindAllStringIndex(con, -1)
|
|
|
for _, sv := range is {
|
|
|
appendWarpIndex = append(appendWarpIndex, sv[0])
|
|
|
}
|
|
@@ -637,13 +697,13 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
|
indexPkgMap[sv[0]] = v[0]
|
|
|
}
|
|
|
//key在包前面,并且在一行的开头
|
|
|
- keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
+ keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)" + pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
if len(keys) == 0 {
|
|
|
//key在包前面,并且key以冒号结尾
|
|
|
- keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
+ keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])" + pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
}
|
|
|
if len(keys) == 0 {
|
|
|
- keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
+ keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])" + pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
}
|
|
|
for _, key := range keys {
|
|
|
startEndMap[key[5]] = key[4]
|
|
@@ -697,7 +757,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
|
}
|
|
|
index := util.PackageNumberConvert(bk)
|
|
|
//去掉前缀,空格必须要加,分kv的时候要用
|
|
|
- text = regexp.MustCompile(bv[0]+"[::]*").ReplaceAllString(text, "")
|
|
|
+ text = regexp.MustCompile(bv[0] + "[::]*").ReplaceAllString(text, "")
|
|
|
headKey := ""
|
|
|
if indexKeyStringMap[iv] != "" {
|
|
|
//if !filterPkgTitleKey.MatchString(indexKeyStringMap[iv]) {
|
|
@@ -706,6 +766,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
|
//}
|
|
|
for _, pkgIndexMap_v := range pkgIndexMap[bv[0]] {
|
|
|
delete(indexKeyStringMap, pkgIndexMap_v)
|
|
|
+ break
|
|
|
}
|
|
|
}
|
|
|
//如果一块中有多个相同的包,合并到一个
|
|
@@ -713,31 +774,32 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
|
//合并文本
|
|
|
(*blockPackage)[index].Text += "\n" + text
|
|
|
//合并冒号kv
|
|
|
- colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1,isSite,codeSite)
|
|
|
+ colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1, isSite, codeSite)
|
|
|
if headKey != "" {
|
|
|
- kvAgain := GetKVAll(text, "", nil, 4,isSite,codeSite)
|
|
|
+ kvAgain := GetKVAll(text, "", nil, 4, isSite, codeSite)
|
|
|
MergeKvTags(colonJobKv.KvTags, kvAgain.KvTags)
|
|
|
}
|
|
|
MergeKvTags((*blockPackage)[index].ColonKV.KvTags, colonJobKv.KvTags)
|
|
|
//合并空格kv
|
|
|
- spaceJobKv := SspacekvEntity.Entrance(text, "", nil,isSite,codeSite)
|
|
|
+ spaceJobKv := SspacekvEntity.Entrance(text, "", nil, isSite, codeSite)
|
|
|
MergeKvTags((*blockPackage)[index].SpaceKV.KvTags, spaceJobKv.KvTags)
|
|
|
} else {
|
|
|
newBpkg := &util.BlockPackage{
|
|
|
Origin: bk,
|
|
|
Text: text,
|
|
|
Index: index,
|
|
|
+ Name: bv[0],
|
|
|
Type: bv[1],
|
|
|
Accuracy: accuracy,
|
|
|
}
|
|
|
//fmt.Println(text)
|
|
|
- finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4,isSite,codeSite)
|
|
|
+ finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4, isSite, codeSite)
|
|
|
if headKey != "" {
|
|
|
- kvAgain := GetKVAll(text, "", nil, 4,isSite,codeSite)
|
|
|
+ kvAgain := GetKVAll(text, "", nil, 4, isSite, codeSite)
|
|
|
MergeKvTags(finalKv.KvTags, kvAgain.KvTags)
|
|
|
}
|
|
|
newBpkg.ColonKV = finalKv
|
|
|
- newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil,isSite,codeSite)
|
|
|
+ newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil, isSite, codeSite)
|
|
|
(*blockPackage)[index] = newBpkg
|
|
|
}
|
|
|
}
|
|
@@ -745,7 +807,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
|
//中标人排序
|
|
|
if isFindWinnerOrder && blockPackage != nil && len(*blockPackage) > 0 {
|
|
|
for _, v := range *blockPackage {
|
|
|
- v.WinnerOrder = winnerOrderEntity.Find(v.Text, true, 2,isSite,codeSite)
|
|
|
+ v.WinnerOrder = winnerOrderEntity.Find(v.Text, true, 2, isSite, codeSite)
|
|
|
}
|
|
|
}
|
|
|
return true, surplusText
|
|
@@ -792,21 +854,21 @@ func interceptText(indexs []int, indexPkgMap map[int]string, pkgIndexMap map[str
|
|
|
//} else
|
|
|
if strings.Contains(text, "\n") {
|
|
|
texts := strings.Split(text, "\n")
|
|
|
- text2 :=""
|
|
|
+ text2 := ""
|
|
|
if ik+1 < len(indexs)-1 {
|
|
|
if startEndMap[indexs[ik+1+1]] != 0 {
|
|
|
text2 = con[startEndMap[indexs[ik+1]]:startEndMap[indexs[ik+1+1]]]
|
|
|
} else {
|
|
|
text2 = con[indexs[ik+1]:indexs[ik+1+1]]
|
|
|
}
|
|
|
- if texts[len(texts)-1] == text2{
|
|
|
+ if texts[len(texts)-1] == text2 {
|
|
|
text = texts[0]
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- if utf8.RuneCountInString(text)<5{
|
|
|
+ if utf8.RuneCountInString(text) < 5 {
|
|
|
indexTextMap[iv] = tmptext
|
|
|
- }else {
|
|
|
+ } else {
|
|
|
indexTextMap[iv] = text
|
|
|
}
|
|
|
warpCount := len(regSpliteSegment.FindAllStringIndex(text, -1))
|
|
@@ -876,8 +938,8 @@ func interceptText(indexs []int, indexPkgMap map[int]string, pkgIndexMap map[str
|
|
|
}
|
|
|
|
|
|
//分块之后的kv
|
|
|
-func kvAfterDivideBlock(tp, text string, from int, ruleBlock *util.RuleBlock,isSite bool,codeSite string) []*util.Kv {
|
|
|
- blocks, _ := DivideBlock(tp, text, from, ruleBlock,isSite,codeSite)
|
|
|
+func kvAfterDivideBlock(tp, text string, from int, ruleBlock *util.RuleBlock, isSite bool, codeSite string) []*util.Kv {
|
|
|
+ blocks, _ := DivideBlock(tp, text, from, ruleBlock, isSite, codeSite)
|
|
|
kvs := []*util.Kv{}
|
|
|
for _, v := range blocks {
|
|
|
//util.Debug(v.Text)
|