|
- package pretreated
- import (
- "jy/clear"
- //"jy/clear"
- "jy/util"
- qutil "qfw/util"
- "regexp"
- "strconv"
- "strings"
- "unicode/utf8"
- )
- type WinnerOrderEntity struct {
- }
- type WinnerFlag struct {
- index int //数组索引
- start int //数组开始索引
- end int //数组结束索引
- textStart int //文本开始索引
- textEnd int //文本结束索引
- max int
- indexs []int
- }
- var (
- winnerOrderEntity = &WinnerOrderEntity{}
- numberReg = regexp.MustCompile("[一二三四五六七八九十0-9]+")
- numberReg2 = regexp.MustCompile("[\\d一二三四五六七八九十.,,]+")
- thisNumberReg = regexp.MustCompile("第" + numberReg.String())
- winnerReg0 = regexp.MustCompile("(中标候选人第\\d名|第[0-9一二三四五](中选|中标|成交)候选(人|供应商)|中标人[1-9]|[第|弟][一二三四五0-9]中标人)")
- winnerReg1 = regexp.MustCompile("(^|[^为])(【?(推荐)?第[一二三四五六七八九十1-9]+(合格|名|包|标段)?】?([候|侯]选)?(入围|备选|成交|中[标|选])人?([((]成交[))])?([候|侯]选|排序)?(人(单位)?|供[应货]商|单位|机构)(名称)?为?)($|[^,;;。,])")
- winnerReg2 = regexp.MustCompile("(排名第[一二三四五六七八九十1-9]+|[第|弟][一二三四五六七八九十1-9]+(中标|中选)?[候|侯]选人|中标候选人排名[::]\\d)")
- winnerReg3 = regexp.MustCompile("((中标候选人)?第[一二三四五六七八九十1-9]+名|(中标候选人)[1-9])")
- winnerReg4 = regexp.MustCompile("((确认|推荐|评审|排[名|序])[为::]+|(由高到低排序前.名|公示下列内容|(确定|推荐)的?中[标|选]候选人|\n中[标|选]候选.{1,3}\\s*\n|\n(中[标|选]候选.{1,3}[::\u3000\u2003\u00a0\\s]|成交候选供应商)|(排[名|序]|公[示|告]|具体|推荐|结果(公示)?|中[标|选]候选人.{0,2})如下|[一二三四五六七八九十\\d]+、(中[标|选]候选[^\n::]{1,8}|.{0,8}(成交|结果)信息|成交[^\n::]{2,8}))[为::]?)")
- winnerReg5 = regexp.MustCompile("([^,;;。,、\n投标人]+?)(为?)(第[一二三四五六七八九十1-9]+(成交|中标)?([候|侯]选(人|供应商|单位|机构)|名)|排名第[一二三四五六七八九十1-9]+)([,;;。,、]|\\s+\n)")
- winnerReg6 = regexp.MustCompile("(^(排名)?第[一二三四五六七八九十1-9]+[名中标成交备选候人单位供应商]*)")
- winnerReg7 = regexp.MustCompile("第[一二三四五六七八九十]{1}标段[::]")
- //带金额
- winnerReg8 = regexp.MustCompile("(第[一二三四五六七八九十]中选候选人)[::\\s]+?[((]1[))][\\s]+?(单位名称)[::]?(.*)[\\s]+?[((]2[))][\\s]+(参选报价|投标报价(含税))[::]?(.*)")
- winnerReg9 = regexp.MustCompile("(第[一二三四五六七八九十]中[选|标]?候选人|中标人[1-9])[::\\s]+?([\u4E00-\u9FA5]{4,20})[\\s]+([0-9\\.\\s万元]+)")
- winnerReg10 = regexp.MustCompile("(第[一二三四五六七八九十]中标人)[::\\s]+?报价[¥]?([0-9\\.\\s万元]+)[;;]([\u4E00-\u9FA5]{4,20})")
- winnerReg11 = regexp.MustCompile("([弟|第][一二三四五六七八九十]中[标|选]候选人)[::\\s]+?(单位名称|投标人名称)[::]?(.*)[\\s]+?(参选报价|投标报价[((]含税[))]|投标报价[((]元[))])[::]?(.*)")
- winnerReg12 = regexp.MustCompile("(中[标|选]候选人[弟|第][一二三四五六七八九十0-9]名|[弟|第][一二三四五六七八九十0-9](中标)?候选人)[::\\s ]+?(.*)[ \\s,,]+?(投标报价|投标总报价|金额)[::]?([0-9\\.\\s万元]+)")
- winnerReg13 = regexp.MustCompile("([弟|第][一二三四五六七八九十0-9])\n(成交候选人|成交供应商)\n(.*)\n([0-9\\.\\s万元]+)")
- winnerReg14 = regexp.MustCompile("(中标候选人|成交候选人)\n.*\n.*\n第[1-9][\\s]+?名")
- winnerReg14_1 = regexp.MustCompile("(第[1-9])[\\s]+?名[::](.{4,20}公司)[\\s]+中标价[::]([0-9\\.\\s万元]+)")
- winnerReg15 = regexp.MustCompile("([弟|第][一二三四五六七八九十0-9](中标|中选)?候选人)[::](.*)[ \\s\\n,,]+(最终报价[::\\s]+不含税单价.*)?不含税总价[::]?([0-9\\.()\\s万元]+)")
- winnerReg16 = regexp.MustCompile("(中[标|选]候选人)排序[::]([1-9一二三四五六七])[\\s]+.{1,4}名称[::](.*公司)[\\s]+.{1,4}报价[::]([¥〇0-9\\.人民币零点壹贰叁肆伍陆柒捌玖拾佰仟万亿元圆角分整]+)")
- winnerReg17 = regexp.MustCompile("(报价金额|应答含税总价)[::]?([¥〇0-9\\.人民币零点壹贰叁肆伍陆柒捌玖拾佰仟万亿元圆角分整]+)")
- winnerReg18 = regexp.MustCompile("([中|投]标候选人[弟|第])([1-9一二三四五])[\\s]?名[::]([\u4E00-\u9FA5]{4,20})([((].*公司[))])?[,,\\s]+投标报价[::]([0-9\\.\\s万元]+)")
- //格式化中标金额换行
- winnerReg100 = regexp.MustCompile("中标金额:[\\s]+([0-9\\.万元]+)")
- //不带金额
- winnerReg20 = regexp.MustCompile("(中标单位候选人名称)[\\s]+(.*)[\\s]+(中标候选人单位名次)[\\s]+([弟|第][一二三四五六七八九十0-9]中标人)")
- winnerReg21 = regexp.MustCompile("(石城(.*公司|.*厂|.*有\n限公司))[0-9.]+([弟|第][一二三四五六七八九十0-9])成交[\n]?候选人")
- winnerReg22 = regexp.MustCompile("投标人[::](.{4,20}公司)[\\s-]+标段[::][1-3][\\s-]+排名[::]([1-9])")
- winnerReg23 = regexp.MustCompile("([\u4E00-\u9FA5]{4,20})\n(有限公司|公司)[\\s]+(第[一二三四五1-9]中[选|标]候选人)")
- winnerReg24 = regexp.MustCompile("[\\s\\n]+([\u4E00-\u9FA5]{4,20}公司)[\\s\\n]+(第[一二三四五六七八九十]中[选|标]?候选人)")
- winnerRegclear = regexp.MustCompile("(买方人员|经评审.*排名第[一二三四五六七八九十1-9]+)")
- colonEndReg = regexp.MustCompile("[::]$")
- toWarpReg = regexp.MustCompile("[,。,;;]+")
- findamountReg = regexp.MustCompile("[,。,;;\u3000\u2003\u00a0\\s]+")
- amountReg = regexp.MustCompile("^\\d+(\\.\\d+)?([百|千]?元|[百|千]?[万|亿]元?)$")
- companyWarpReg = regexp.MustCompile("(公司)(.+?[::])")
- findCompanyReg = regexp.MustCompile("[^::]+公司")
- colonSpaceReg = regexp.MustCompile("[::]\\s+")
- findCandidate = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体])|工作室)")
- findCandidate2 = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|局|站|城|处|行|部|队|联合[会|体]|工作室|有限司)$)")
- clearSpace1 = regexp.MustCompile("([((][\\d一二三四五六七八九十][))][\\s\u3000\u2003\u00a0\\t]*|<[^>].+?>)")
- clearSpace2 = regexp.MustCompile("</?[^>]+>")
- offerReg = regexp.MustCompile("(中标|磋商|投标|报|单|成交)总?(价|金额)")
- nofferReg = regexp.MustCompile("(费率|折扣率)")
- nobidValReg = regexp.MustCompile("^\\d{2}%$")
- )
- /*
- *查找分包中的中标人排序
- *text文本,flag非否精确查找
- *from 来源
- */
- func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool, codeSite string) []map[string]interface{} {
- if clearSpace2.MatchString(text){
- text = TextAfterRemoveTable(text)
- }//评得分估|标的|班子成员|人员
- text = winnerRegclear.ReplaceAllString(text,"")
- if nswinnertabletag.MatchString(text) && !winnerReg0.MatchString(text){
- return []map[string]interface{}{}
- }
- text = winnerReg5.ReplaceAllString(text, "\n$3:$1\n")
- text = winnerReg20.ReplaceAllString(text,"\n${4}:${2}\n")
- text = winnerReg21.ReplaceAllString(text,"\n${3}成交候选人:${1}\n")
- text = strings.ReplaceAll(text,"有\n限公司","有限公司")
- text = winnerReg22.ReplaceAllString(text,"\n中标候选人第${2}名:${1}\n")
- text = winnerReg23.ReplaceAllString(text,"\n${3}:${1}${2}\n")
- text = winnerReg24.ReplaceAllString(text,"\n${2}:${1}\n")
- text = winnerReg8.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n")
- text = winnerReg9.ReplaceAllString(text,"\n${1}:${2}\n中标金额:${3}\n")
- text = winnerReg10.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${2}\n")
- text = winnerReg11.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n")
- text = winnerReg12.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n")
- text = winnerReg13.ReplaceAllString(text,"\n${1}${2}:${3}\n中标金额:${4}\n")
- text = winnerReg15.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n")
- text = winnerReg16.ReplaceAllString(text,"\n第${2}${1}:${3}\n中标金额:${4}\n")
- text = winnerReg17.ReplaceAllString(text,"\n中标金额:${2}\n")
- text = winnerReg18.ReplaceAllString(text,"\n${1}${2}名:${3}\n中标金额:${5}\n")
- //中标金额格式化
- text = winnerReg100.ReplaceAllString(text,"中标金额:${1}")
- //特殊格式
- if winnerReg14.MatchString(text) {
- text = winnerReg14_1.ReplaceAllString(text,"\n中标候选人${1}名:${2}\n中标金额:${3}\n")
- }
- text = clearSpace1.ReplaceAllString(text, "") //清理(1) 单位名称:成都维诺信科技有限公司-->单位名称:成都维诺信科技有限公司
- if strings.TrimSpace(text) == "" {
- return []map[string]interface{}{}
- }
- blocks := winnerReg4.Split(text, -1)
- if len(blocks) == 0 {
- blocks = append(blocks, text)
- } else if len(blocks) == 1 {
- blocks = winnerReg7.Split(text, -1)
- }
- winners := wo.findByReg(text, blocks, winnerReg0, from, isSite, codeSite)
- if len(winners) == 0 {
- winners = wo.findByReg(text, blocks, winnerReg2, from, isSite, codeSite)
- }
- if len(winners) == 0 {
- if flag {
- //异常
- winners = wo.findByReg(text, blocks, winnerReg3, from, isSite, codeSite)
- } else {
- indexs_4 := winnerReg4.Split(text, -1)
- if len(indexs_4) > 1 {
- for _, v_4 := range indexs_4 {
- indexs_3 := winnerReg3.FindAllStringIndex(v_4, -1)
- if len(indexs_3) < 2 {
- continue
- }
- for _, v_3 := range indexs_3 {
- if strings.Count(v_4[:v_3[1]], "\n") <= 3 {
- winners = wo.findByReg(text, blocks, winnerReg3, from, isSite, codeSite)
- break
- }
- }
- }
- }
- }
- }
- //候选人有一半以上是错误的话,那么就认为全部抽错了
- invalidCount := 0
- for _, v := range winners {
- if !findCandidate.MatchString(qutil.ObjToString(v["entname"])) {
- invalidCount++
- }
- }
- if invalidCount > len(winners)/2 {
- return []map[string]interface{}{}
- }
- return winners
- }
- //获取中标人排序文本
- func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp.Regexp, from int) []string {
- isWinnerReg1 := reg_2 == winnerReg1
- rdata := []string{}
- //确定中标候选人排序在哪个块里面
- rightIndex, prevMax := -1, -1
- rightIndexs := []int{}
- var winnerFlag []*WinnerFlag
- for b_k, b_v := range blocks {
- indexs := []*WinnerFlag{}
- array := reg_2.FindAllStringSubmatchIndex(b_v, -1)
- for _, v := range array {
- var wrfg *WinnerFlag
- if isWinnerReg1 {
- if v[4]<0 || v[5]<0 {
- continue
- }
- wrfg = &WinnerFlag{
- index: wo.toNumber(b_v[v[4]:v[5]], 0),
- textStart: v[4],
- textEnd: v[5],
- }
- } else {
- if v[2]<0 || v[3]<0 {
- continue
- }
- wrfg = &WinnerFlag{
- index: wo.toNumber(b_v[v[2]:v[3]], 0),
- textStart: v[2],
- textEnd: v[3],
- }
- }
- indexs = append(indexs, wrfg)
- }
- wf := wo.getMax(indexs, from)
- if wf != nil && wf.max >= prevMax {
- prevMax = wf.max
- rightIndex = b_k
- rightIndexs = append(rightIndexs, b_k)
- winnerFlag = append(winnerFlag, wf)
- }
- }
- ////在这个块里面,截取
- if rightIndex == -1 {
- return rdata
- }
- for i, rightIndex := range rightIndexs {
- text = blocks[rightIndex]
- warpCount := wo.interceptText(winnerFlag[i].indexs, text)
- if warpCount == 0 {
- warpCount = 1
- }
- textEnd := text[winnerFlag[i].textEnd:]
- text = text[winnerFlag[i].textStart:winnerFlag[i].textEnd]
- warpIndex := regSpliteSegment.FindAllStringIndex(textEnd, -1)
- if len(warpIndex) >= warpCount {
- textEnd = textEnd[:warpIndex[warpCount-1][1]]
- }
- text = text + textEnd
- if isWinnerReg1 {
- text = reg_2.ReplaceAllString(text, "$1\n$2$15")
- } else {
- text = reg_2.ReplaceAllString(text, "\n$1")
- }
- text = regReplWrapSpace.ReplaceAllString(text, "")
- lines := SspacekvEntity.getLines(text)
- text = ""
- for k, v := range lines {
- v = strings.TrimSpace(v)
- v = colonSpaceReg.ReplaceAllString(v, ":")
- if reg_2.MatchString(v) && !regDivision.MatchString(v) {
- if isWinnerReg1 {
- v = reg_2.ReplaceAllString(v, "$1$2:$15")
- } else {
- v = reg_2.ReplaceAllString(v, "$1:")
- }
- }
- //逗号之类符号的分割,查找紧跟在中标候选人之后的中标金额
- //如果后面没有什么标识,只有金额的情况下,把中标金额加到金额前面
- if reg_2.MatchString(v) {
- //两个kv连到一起
- if len(regDivision.FindAllString(v, -1)) > 1 && !findamountReg.MatchString(v) {
- v = companyWarpReg.ReplaceAllString(v, "$1\n$2")
- }
- vs := findamountReg.Split(v, -1)
- if len(vs) > 1 {
- vs_1 := strings.TrimSpace(vs[1])
- if amountReg.MatchString(vs_1) {
- v = strings.Replace(v, vs[1], "中标金额:"+vs_1, 1)
- }
- }
- }
- v = toWarpReg.ReplaceAllString(v, "\n")
- text += v
- if (!reg_2.MatchString(v) || !colonEndReg.MatchString(v)) && k < len(lines)-1 {
- text += "\n"
- }
- }
- rdata = append(rdata, text)
- }
- return rdata
- }
- //抽取对应的排序结果
- func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *regexp.Regexp, from int, isSite bool, codeSite string) []map[string]interface{} {
- text := wo.getText(content, blocks, reg_2, from)
- winners := []map[string]interface{}{}
- if len(text) < 1 {
- return winners
- }
- for i, v := range text {
- object := map[string]interface{}{}
- count := 0
- kvs := colonkvEntity.getColonSpaceKV(v, isSite, codeSite)
- for _, kv := range kvs {
- k, v := kv.Key, kv.Value
- if regDivision.MatchString(v) {
- v_k := regDivision.Split(v, -1)[0]
- if reg_2.MatchString(v_k) {
- k = v_k
- }
- }
- if reg_2.MatchString(k) { //中标人
- if len(object) > 0 {
- winners = append(winners, object)
- object = map[string]interface{}{}
- }
- val := wo.clear("中标单位", v)
- if val != nil && utf8.RuneCountInString(qutil.ObjToString(val)) > 5 {
- count++
- object["entname"] = strings.TrimRight(strings.ReplaceAll(strings.TrimSpace(qutil.ObjToString(val)), "公司", "公司,"), ",")
- object["sort"] = wo.toNumber(k, count)
- object["sortstr"] = thisNumberReg.FindString(k)
- object["type"] = i
- }
- }else { //中标金额 - 折扣率系数-待定
- findOfferFlag := false
- if offerReg.MatchString(k) && !nofferReg.MatchString(k){
- findOfferFlag = true
- } else {
- kvTags := GetKvTags([]*util.Kv{&util.Kv{Key: k, Value: v}}, "", []string{"中标金额"}, isSite, codeSite)
- if len(kvTags["中标金额"]) > 0 {
- findOfferFlag = true
- }
- }
- //找到了中标金额
- if findOfferFlag && object["entname"] != nil {
- val := wo.clear("中标金额", v+GetMoneyUnit(k, v))
- if val != nil && !nobidValReg.MatchString(qutil.ObjToString(val)) {
- moneys := clear.ObjToMoney([]interface{}{val, ""})
- if len(moneys) > 0 {
- if vf, ok := moneys[0].(float64); ok && moneys[len(moneys)-1].(bool){
- object["price"] = float64(vf)
- }
- }
- }
- winners = append(winners, object)
- object = map[string]interface{}{}
- }
- }
- }
- if len(object) > 0 {
- winners = append(winners, object)
- }
- indexs := []*WinnerFlag{}
- //tym := make(map[int]bool, 0)
- for _, winner := range winners {
- indexs = append(indexs, &WinnerFlag{
- index: winner["sort"].(int),
- //ttype: winner["type"].(int),
- })
- //tym[winner["type"].(int)] = true
- }
- //winnerFlag := wo.getMax(indexs, from)
- //if winnerFlag != nil {
- // winners = winners[winnerFlag.start : winnerFlag.end+1]
- //} else {
- // winners = []map[string]interface{}{}
- //}
- }
- return winners
- }
- //清理结果
- func (wo *WinnerOrderEntity) clear(typ, v string) interface{} {
- if typ == "中标单位" && regDivision.MatchString(v) {
- v = findCompanyReg.FindString(v)
- v = filterWinner.FindString(v)
- }
- v = filterValue.ReplaceAllString(v, "")
- //过滤
- return v //clear.ClearResult(typ, v)
- }
- //
- func (wo *WinnerOrderEntity) toNumber(value string, defaultNum int) int {
- value = numberReg.FindString(value)
- if value == "" {
- return defaultNum
- }
- v := util.ChineseNumberToInt(value)
- if v < 1 {
- v, _ = strconv.Atoi(value)
- }
- if v > 0 {
- return v
- }
- return defaultNum
- }
- //
- func (wo *WinnerOrderEntity) getMax(indexs []*WinnerFlag, from int) *WinnerFlag {
- allMap := map[int]*WinnerFlag{}
- max, start, textStart := -1, -1, -1
- isContinue := false
- flag := false
- is := []int{}
- for k, winnerFlag := range indexs {
- v := winnerFlag.index
- //从1开始,1前面的过滤掉
- if v == 1 {
- flag = true
- }
- if !flag {
- continue
- }
- if v == 1 {
- start = k
- textStart = winnerFlag.textStart
- isContinue = false
- }
- if isContinue {
- continue
- }
- nextIndex := 0
- if k < len(indexs)-1 {
- nextIndex = indexs[k+1].index
- }
- //从1-n是一组,遇到小于n的从新添加分组,分组不是1开头的过滤掉
- is = append(is, winnerFlag.textStart, winnerFlag.textEnd)
- if nextIndex-v != 1 {
- isContinue = true
- if max < k-start {
- max = k - start
- allMap[k-start] = &WinnerFlag{
- textStart: textStart,
- textEnd: winnerFlag.textEnd,
- start: start,
- end: k,
- max: max,
- indexs: is,
- }
- }
- }
- }
- if max != -1 {
- if from != 3 && len(allMap[max].indexs) <= 2 {
- return nil
- }
- return allMap[max]
- }
- return nil
- }
- //如果有两个,看第一个有几个换行,用第一个里面的最后一个换行作为第二个的结束位置
- //如果有两个以上,取前两个中换行最多的最后一个换行,作为其他的结束位置
- func (wo *WinnerOrderEntity) interceptText(indexs []int, con string) int {
- if len(indexs) == 1 {
- return 0
- }
- count := 0
- for ik, iv := range indexs {
- text := ""
- if ik < len(indexs)-1 {
- text = con[iv:indexs[ik+1]]
- } else {
- text = con[iv:]
- }
- //如果两个
- if len(indexs) == 2 {
- //取第一个有几个换行符
- if ik == 0 {
- count = len(regSpliteSegment.FindAllStringIndex(text, -1))
- }
- } else {
- //多个,取前两个中换行符最多的
- if ik <= 1 {
- thisCount := len(regSpliteSegment.FindAllStringIndex(text, -1))
- if thisCount > count {
- count = thisCount
- }
- }
- }
- }
- return count
- }
- //排序
- func (wo *WinnerOrderEntity) Order(winnerOrder []map[string]interface{}) {
- if winnerOrder == nil || len(winnerOrder) <= 1 {
- return
- }
- for x, _ := range winnerOrder {
- for y := 0; y < len(winnerOrder)-x-1; y++ {
- dt1, xok := winnerOrder[y]["sort"].(int)
- dt2, yok := winnerOrder[y+1]["sort"].(int)
- if xok && yok && dt1 > dt2 {
- temp := winnerOrder[y]
- winnerOrder[y] = winnerOrder[y+1]
- winnerOrder[y+1] = temp
- }
- }
- }
- }
- //合并
- func (wo *WinnerOrderEntity) Merge(winnerOrder, wors []map[string]interface{}) {
- if wors == nil || len(wors) == 0 {
- return
- }
- for _, v := range wors {
- for _, tv := range winnerOrder {
- sort, _ := v["sort"].(int)
- t_sort, _ := tv["sort"].(int)
- if sort == 0 || sort != t_sort {
- continue
- }
- if qutil.ObjToString(tv["entname"]) == "" && qutil.ObjToString(v["entname"]) != "" {
- tv["entname"] = v["entname"]
- }
- t_price, _ := tv["price"].(float64)
- price, _ := v["price"].(float64)
- if t_price == 0 && price != 0 {
- tv["price"] = v["price"]
- }
- }
- }
- }
|