|
@@ -543,7 +543,7 @@ func filterTitle(title string) string {
|
|
|
}
|
|
|
|
|
|
//从块里面找分包
|
|
|
-func FindPackageFromBlocks(blocks *[]*util.Block, title string) (blockPackage map[string]*util.BlockPackage) {
|
|
|
+func FindPackageFromBlocks(blocks *[]*util.Block) (blockPackage map[string]*util.BlockPackage) {
|
|
|
blockPackage = map[string]*util.BlockPackage{}
|
|
|
//块分包
|
|
|
for _, v := range *blocks {
|
|
@@ -552,13 +552,15 @@ func FindPackageFromBlocks(blocks *[]*util.Block, title string) (blockPackage ma
|
|
|
if text == "" {
|
|
|
continue
|
|
|
}
|
|
|
- ok, surplusText := divisionPackageChild(&blockPackage, text, title, true, v.Tag["中标单位"])
|
|
|
- //把分包内容摘除掉有问题 有的项目名称中包含二标段
|
|
|
- if ok && false {
|
|
|
- v.Text = surplusText
|
|
|
- v.ColonKV = GetKVAll(surplusText, v.Title, nil, 1)
|
|
|
- v.SpaceKV = SspacekvEntity.Entrance(surplusText, v.Title, nil)
|
|
|
- }
|
|
|
+ //var ok bool
|
|
|
+ //var surplusText string
|
|
|
+ divisionPackageChild(&blockPackage, text, v.Title, true, v.Tag["中标单位"])
|
|
|
+ ////把分包内容摘除掉有问题 有的项目名称中包含二标段
|
|
|
+ //if ok && false {
|
|
|
+ // v.Text = surplusText
|
|
|
+ // v.ColonKV = GetKVAll(surplusText, v.Title, nil, 1)
|
|
|
+ // v.SpaceKV = SspacekvEntity.Entrance(surplusText, v.Title, nil)
|
|
|
+ //}
|
|
|
}
|
|
|
return
|
|
|
}
|
|
@@ -583,164 +585,198 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
|
|
|
// util.Debug(con)
|
|
|
// util.Debug(pkg)
|
|
|
//分包前面添加换行
|
|
|
- appendWarpIndex := []int{}
|
|
|
- for _, v := range pkg {
|
|
|
+ //log.Println(con)
|
|
|
+ bools := make(map[string]bool)
|
|
|
+ for k, v := range pkg {
|
|
|
//如果文本内容以识别出来的分包标识结尾,不是分包
|
|
|
if len(pkg) == 1 && strings.HasSuffix(con, v[0]) {
|
|
|
return false, ""
|
|
|
}
|
|
|
//
|
|
|
- is := regexp.MustCompile(v[0]+"[::]*").FindAllStringIndex(con, -1)
|
|
|
- for _, sv := range is {
|
|
|
- appendWarpIndex = append(appendWarpIndex, sv[0])
|
|
|
- }
|
|
|
- }
|
|
|
- appendWarpIndex = getPkgIndex(appendWarpIndex)
|
|
|
- conTemp := ""
|
|
|
- for k, v := range appendWarpIndex {
|
|
|
- if k == 0 {
|
|
|
- conTemp += con[:v] + "\n"
|
|
|
- } else {
|
|
|
- conTemp += "\n" + con[appendWarpIndex[k-1]:v]
|
|
|
- }
|
|
|
- if k == len(appendWarpIndex)-1 {
|
|
|
- conTemp += "\n" + con[v:]
|
|
|
- }
|
|
|
- }
|
|
|
- con = conTemp
|
|
|
- con = replSerial.ReplaceAllString(con, "\n")
|
|
|
- con = regMoreWrap.ReplaceAllString(con, "\n")
|
|
|
- //util.Debug(con)
|
|
|
- //根据分包,找索引位置
|
|
|
- indexMap := map[int]int{}
|
|
|
- indexKeyStringMap := map[int]string{}
|
|
|
- indexKeyIntMap := map[int]int{}
|
|
|
- indexs := []int{}
|
|
|
- startEndMap := map[int]int{}
|
|
|
- pkgIndexMap := map[string][]int{}
|
|
|
- indexPkgMap := map[int]string{}
|
|
|
- //遍历分包,把kv在包前面的移动到包后面
|
|
|
- for _, v := range pkg {
|
|
|
- pgflag := v[0] + "[::]*"
|
|
|
- is := regexp.MustCompile(pgflag).FindAllStringIndex(con, -1)
|
|
|
+ is := regexp.MustCompile(v[0] + "[::]*").FindAllString(con, -1)
|
|
|
for _, sv := range is {
|
|
|
- indexMap[sv[0]] = sv[1]
|
|
|
- indexs = append(indexs, sv[0])
|
|
|
- pkgIndexMap[v[0]] = append(pkgIndexMap[v[0]], sv[0])
|
|
|
- indexPkgMap[sv[0]] = v[0]
|
|
|
- }
|
|
|
- //key在包前面,并且在一行的开头
|
|
|
- keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
- if len(keys) == 0 {
|
|
|
- //key在包前面,并且key以冒号结尾
|
|
|
- keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
- }
|
|
|
- if len(keys) == 0 {
|
|
|
- keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
- }
|
|
|
- for _, key := range keys {
|
|
|
- startEndMap[key[5]] = key[4]
|
|
|
- //
|
|
|
- headkey := con[key[4]:key[5]]
|
|
|
- headkey = regReplAllSpace.ReplaceAllString(headkey, "")
|
|
|
- if !regDivision.MatchString(headkey) {
|
|
|
- headkey += ":"
|
|
|
+ newBpkg := &util.BlockPackage{
|
|
|
+ Origin: sv,
|
|
|
+ Text: con,
|
|
|
+ Index: k,
|
|
|
+ Accuracy: false,
|
|
|
}
|
|
|
- headkey = moreColonReg.ReplaceAllString(headkey, ":")
|
|
|
- colonIndexs := regDivision.FindAllStringIndex(headkey, -1)
|
|
|
- if len(colonIndexs) > 1 {
|
|
|
- headkey = headkey[colonIndexs[len(colonIndexs)-2][1]:colonIndexs[len(colonIndexs)-1][1]]
|
|
|
+ if (*blockPackage)[k] == nil && !bools[sv] {
|
|
|
+ bools[sv] = true
|
|
|
+ con = strings.ReplaceAll(con, sv, "\n")
|
|
|
+ //log.Println(k, con)
|
|
|
+ kvAll := GetKVAll(con, title, nil, 4)
|
|
|
+ newBpkg.ColonKV = kvAll
|
|
|
+ newBpkg.SpaceKV = SspacekvEntity.Entrance(con, "", nil)
|
|
|
+ (*blockPackage)[k] = newBpkg
|
|
|
+ } else if (*blockPackage)[k].ColonKV != nil {
|
|
|
+ kvAll := GetKVAll(con, title, nil, 4)
|
|
|
+ MergeKvTags((*blockPackage)[k].ColonKV.KvTags, kvAll.KvTags)
|
|
|
}
|
|
|
- indexKeyStringMap[key[5]] = headkey
|
|
|
- indexKeyIntMap[key[5]] = key[1]
|
|
|
- }
|
|
|
- }
|
|
|
- indexs = getPkgIndex(indexs)
|
|
|
- for ik, iv := range indexs {
|
|
|
- if indexKeyStringMap[iv] != "" {
|
|
|
- continue
|
|
|
- }
|
|
|
- if indexKeyIntMap[iv] == indexMap[iv] {
|
|
|
- continue
|
|
|
- }
|
|
|
- if ik > 0 {
|
|
|
- indexKeyStringMap[iv] = indexKeyStringMap[indexs[ik-1]]
|
|
|
- }
|
|
|
- }
|
|
|
- //
|
|
|
- //获取截取标识
|
|
|
- surplusText, maxWarpCount, indexTextMap, indexWarpMap := interceptText(indexs, indexPkgMap, pkgIndexMap, startEndMap, con)
|
|
|
- //查找分包内容,分kv
|
|
|
- for _, iv := range indexs {
|
|
|
- text := indexTextMap[iv]
|
|
|
- //
|
|
|
- warpIndex := regSpliteSegment.FindAllStringIndex(text, -1)
|
|
|
- if len(indexWarpMap) > 0 {
|
|
|
- maxWarpCount = indexWarpMap[iv]
|
|
|
- }
|
|
|
- if maxWarpCount > 0 && len(warpIndex) >= 5 && len(warpIndex) > maxWarpCount {
|
|
|
- textTemp := text
|
|
|
- text = textTemp[:warpIndex[maxWarpCount-1][1]]
|
|
|
- surplusText += textTemp[warpIndex[maxWarpCount-1][0]:]
|
|
|
- }
|
|
|
- for bk, bv := range pkg {
|
|
|
- //判断分包如果在这段文字里面,该段文字就属于该包的
|
|
|
- if !strings.HasPrefix(text, bv[0]) {
|
|
|
- continue
|
|
|
- }
|
|
|
- index := util.PackageNumberConvert(bk)
|
|
|
- //去掉前缀,空格必须要加,分kv的时候要用
|
|
|
- text = regexp.MustCompile(bv[0]+"[::]*").ReplaceAllString(text, "")
|
|
|
- headKey := ""
|
|
|
- if indexKeyStringMap[iv] != "" {
|
|
|
- //if !filterPkgTitleKey.MatchString(indexKeyStringMap[iv]) {
|
|
|
- headKey = indexKeyStringMap[iv]
|
|
|
- text = indexKeyStringMap[iv] + " " + text
|
|
|
- //}
|
|
|
- for _, pkgIndexMap_v := range pkgIndexMap[bv[0]] {
|
|
|
- delete(indexKeyStringMap, pkgIndexMap_v)
|
|
|
- }
|
|
|
- }
|
|
|
- //如果一块中有多个相同的包,合并到一个
|
|
|
- if (*blockPackage)[index] != nil {
|
|
|
- //合并文本
|
|
|
- (*blockPackage)[index].Text += "\n" + text
|
|
|
- //合并冒号kv
|
|
|
- colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1)
|
|
|
- if headKey != "" {
|
|
|
- kvAgain := GetKVAll(text, "", nil, 4)
|
|
|
- MergeKvTags(colonJobKv.KvTags, kvAgain.KvTags)
|
|
|
- }
|
|
|
- MergeKvTags((*blockPackage)[index].ColonKV.KvTags, colonJobKv.KvTags)
|
|
|
- //合并空格kv
|
|
|
- spaceJobKv := SspacekvEntity.Entrance(text, "", nil)
|
|
|
- MergeKvTags((*blockPackage)[index].SpaceKV.KvTags, spaceJobKv.KvTags)
|
|
|
- } else {
|
|
|
- newBpkg := &util.BlockPackage{
|
|
|
- Origin: bk,
|
|
|
- Text: text,
|
|
|
- Index: index,
|
|
|
- Type: bv[1],
|
|
|
- Accuracy: accuracy,
|
|
|
- }
|
|
|
- finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4)
|
|
|
- if headKey != "" {
|
|
|
- kvAgain := GetKVAll(text, "", nil, 4)
|
|
|
- MergeKvTags(finalKv.KvTags, kvAgain.KvTags)
|
|
|
+ for kk, vv := range (*blockPackage)[k].ColonKV.KvTags {
|
|
|
+ for _, vvvv := range vv {
|
|
|
+ //log.Println(kk,vv,kkk,vvvv.Value,vvvv.Key)
|
|
|
+ //if kk == "项目编号" && (*blockPackage)[k].Origin == "" {
|
|
|
+ // (*blockPackage)[k].Origin = strings.TrimRightFunc(vvvv.Value, func(r rune) bool {
|
|
|
+ // return r == 65289 || r == 41 || r == 46 || r == 12290
|
|
|
+ // })
|
|
|
+ // break
|
|
|
+ //} else
|
|
|
+ if kk == "项目名称" && (*blockPackage)[k].Name == "" {
|
|
|
+ (*blockPackage)[k].Name = vvvv.Value
|
|
|
+ break
|
|
|
+ } else if kk == "预算" && (*blockPackage)[k].Budget == 0 {
|
|
|
+ (*blockPackage)[k].Budget = qutil.Float64All(vvvv.Value)
|
|
|
+ break
|
|
|
+ } else if kk == "中标单位" && (*blockPackage)[k].Winner == "" {
|
|
|
+ (*blockPackage)[k].Winner = vvvv.Value
|
|
|
+ break
|
|
|
+ } else if kk == "标段类型" && (*blockPackage)[k].Type == "" {
|
|
|
+ (*blockPackage)[k].Type = vvvv.Value
|
|
|
+ break
|
|
|
+ } else if kk == "中标金额" && (*blockPackage)[k].Bidamount == 0 {
|
|
|
+ (*blockPackage)[k].Bidamount = qutil.Float64All(vvvv.Value)
|
|
|
+ break
|
|
|
+ }
|
|
|
}
|
|
|
- newBpkg.ColonKV = finalKv
|
|
|
- newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil)
|
|
|
- (*blockPackage)[index] = newBpkg
|
|
|
+
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ //log.Println(con)
|
|
|
+ //con = replSerial.ReplaceAllString(con, "\n")
|
|
|
+ //con = regMoreWrap.ReplaceAllString(con, "\n")
|
|
|
+ ////util.Debug(con)
|
|
|
+ ////根据分包,找索引位置
|
|
|
+ //indexMap := map[int]int{}
|
|
|
+ //indexKeyStringMap := map[int]string{}
|
|
|
+ //indexKeyIntMap := map[int]int{}
|
|
|
+ //indexs := []int{}
|
|
|
+ //startEndMap := map[int]int{}
|
|
|
+ //pkgIndexMap := map[string][]int{}
|
|
|
+ //indexPkgMap := map[int]string{}
|
|
|
+ ////遍历分包,把kv在包前面的移动到包后面
|
|
|
+ //for _, v := range pkg {
|
|
|
+ // pgflag := v[0] + "[::]*"
|
|
|
+ // is := regexp.MustCompile(pgflag).FindAllStringIndex(con, -1)
|
|
|
+ // for _, sv := range is {
|
|
|
+ // indexMap[sv[0]] = sv[1]
|
|
|
+ // indexs = append(indexs, sv[0])
|
|
|
+ // pkgIndexMap[v[0]] = append(pkgIndexMap[v[0]], sv[0])
|
|
|
+ // indexPkgMap[sv[0]] = v[0]
|
|
|
+ // }
|
|
|
+ // //key在包前面,并且在一行的开头
|
|
|
+ // keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
+ // if len(keys) == 0 {
|
|
|
+ // //key在包前面,并且key以冒号结尾
|
|
|
+ // keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
+ // }
|
|
|
+ // if len(keys) == 0 {
|
|
|
+ // keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
|
|
|
+ // }
|
|
|
+ // for _, key := range keys {
|
|
|
+ // startEndMap[key[5]] = key[4]
|
|
|
+ // //
|
|
|
+ // headkey := con[key[4]:key[5]]
|
|
|
+ // headkey = regReplAllSpace.ReplaceAllString(headkey, "")
|
|
|
+ // if !regDivision.MatchString(headkey) {
|
|
|
+ // headkey += ":"
|
|
|
+ // }
|
|
|
+ // headkey = moreColonReg.ReplaceAllString(headkey, ":")
|
|
|
+ // colonIndexs := regDivision.FindAllStringIndex(headkey, -1)
|
|
|
+ // if len(colonIndexs) > 1 {
|
|
|
+ // headkey = headkey[colonIndexs[len(colonIndexs)-2][1]:colonIndexs[len(colonIndexs)-1][1]]
|
|
|
+ // }
|
|
|
+ // indexKeyStringMap[key[5]] = headkey
|
|
|
+ // indexKeyIntMap[key[5]] = key[1]
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+ //indexs = getPkgIndex(indexs)
|
|
|
+ //for ik, iv := range indexs {
|
|
|
+ // if indexKeyStringMap[iv] != "" {
|
|
|
+ // continue
|
|
|
+ // }
|
|
|
+ // if indexKeyIntMap[iv] == indexMap[iv] {
|
|
|
+ // continue
|
|
|
+ // }
|
|
|
+ // if ik > 0 {
|
|
|
+ // indexKeyStringMap[iv] = indexKeyStringMap[indexs[ik-1]]
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+ ////
|
|
|
+ ////获取截取标识
|
|
|
+ //surplusText, maxWarpCount, indexTextMap, indexWarpMap := interceptText(indexs, indexPkgMap, pkgIndexMap, startEndMap, con)
|
|
|
+ ////查找分包内容,分kv
|
|
|
+ //for _, iv := range indexs {
|
|
|
+ // text := indexTextMap[iv]
|
|
|
+ // //
|
|
|
+ // warpIndex := regSpliteSegment.FindAllStringIndex(text, -1)
|
|
|
+ // if len(indexWarpMap) > 0 {
|
|
|
+ // maxWarpCount = indexWarpMap[iv]
|
|
|
+ // }
|
|
|
+ // if maxWarpCount > 0 && len(warpIndex) >= 5 && len(warpIndex) > maxWarpCount {
|
|
|
+ // textTemp := text
|
|
|
+ // text = textTemp[:warpIndex[maxWarpCount-1][1]]
|
|
|
+ // surplusText += textTemp[warpIndex[maxWarpCount-1][0]:]
|
|
|
+ // }
|
|
|
+ // for bk, bv := range pkg {
|
|
|
+ // //判断分包如果在这段文字里面,该段文字就属于该包的
|
|
|
+ // if !strings.HasPrefix(text, bv[0]) {
|
|
|
+ // continue
|
|
|
+ // }
|
|
|
+ // index := util.PackageNumberConvert(bk)
|
|
|
+ // //去掉前缀,空格必须要加,分kv的时候要用
|
|
|
+ // text = regexp.MustCompile(bv[0]+"[::]*").ReplaceAllString(text, "")
|
|
|
+ // headKey := ""
|
|
|
+ // if indexKeyStringMap[iv] != "" {
|
|
|
+ // //if !filterPkgTitleKey.MatchString(indexKeyStringMap[iv]) {
|
|
|
+ // headKey = indexKeyStringMap[iv]
|
|
|
+ // text = indexKeyStringMap[iv] + " " + text
|
|
|
+ // //}
|
|
|
+ // for _, pkgIndexMap_v := range pkgIndexMap[bv[0]] {
|
|
|
+ // delete(indexKeyStringMap, pkgIndexMap_v)
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // //如果一块中有多个相同的包,合并到一个
|
|
|
+ // if (*blockPackage)[index] != nil {
|
|
|
+ // //合并文本
|
|
|
+ // (*blockPackage)[index].Text += "\n" + text
|
|
|
+ // //合并冒号kv
|
|
|
+ // colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1)
|
|
|
+ // if headKey != "" {
|
|
|
+ // kvAgain := GetKVAll(text, "", nil, 4)
|
|
|
+ // MergeKvTags(colonJobKv.KvTags, kvAgain.KvTags)
|
|
|
+ // }
|
|
|
+ // MergeKvTags((*blockPackage)[index].ColonKV.KvTags, colonJobKv.KvTags)
|
|
|
+ // //合并空格kv
|
|
|
+ // spaceJobKv := SspacekvEntity.Entrance(text, "", nil)
|
|
|
+ // MergeKvTags((*blockPackage)[index].SpaceKV.KvTags, spaceJobKv.KvTags)
|
|
|
+ // } else {
|
|
|
+ // newBpkg := &util.BlockPackage{
|
|
|
+ // Origin: bk,
|
|
|
+ // Text: text,
|
|
|
+ // Index: index,
|
|
|
+ // Type: bv[1],
|
|
|
+ // Accuracy: accuracy,
|
|
|
+ // }
|
|
|
+ // finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4)
|
|
|
+ // if headKey != "" {
|
|
|
+ // kvAgain := GetKVAll(text, "", nil, 4)
|
|
|
+ // MergeKvTags(finalKv.KvTags, kvAgain.KvTags)
|
|
|
+ // }
|
|
|
+ // newBpkg.ColonKV = finalKv
|
|
|
+ // newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil)
|
|
|
+ // (*blockPackage)[index] = newBpkg
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ //}
|
|
|
//中标人排序
|
|
|
- if isFindWinnerOrder && blockPackage != nil && len(*blockPackage) > 0 {
|
|
|
+ if !strings.Contains(title, "招标") && isFindWinnerOrder && blockPackage != nil && len(*blockPackage) > 0 {
|
|
|
for _, v := range *blockPackage {
|
|
|
v.WinnerOrder = winnerOrderEntity.Find(v.Text, true, 2)
|
|
|
}
|
|
|
}
|
|
|
- return true, surplusText
|
|
|
+ return true, con
|
|
|
}
|
|
|
func getPkgIndex(indexs []int) []int {
|
|
|
sort.Ints(indexs)
|
|
@@ -777,6 +813,11 @@ func interceptText(indexs []int, indexPkgMap map[int]string, pkgIndexMap map[str
|
|
|
} else {
|
|
|
text = con[iv:]
|
|
|
}
|
|
|
+ if strings.Contains(text, "、") {
|
|
|
+ text = strings.Split(text, "、")[0]
|
|
|
+ } else if strings.Contains(text, "\n") {
|
|
|
+ text = strings.Split(text, "\n")[0]
|
|
|
+ }
|
|
|
indexTextMap[iv] = text
|
|
|
warpCount := len(regSpliteSegment.FindAllStringIndex(text, -1))
|
|
|
if warpCount > maxWarpCount {
|