package pretreated import ( "jy/clear" //"jy/clear" "jy/util" qutil "qfw/util" "regexp" "strconv" "strings" "unicode/utf8" ) type WinnerOrderEntity struct { } type WinnerFlag struct { index int //数组索引 start int //数组开始索引 end int //数组结束索引 textStart int //文本开始索引 textEnd int //文本结束索引 max int indexs []int } var ( winnerOrderEntity = &WinnerOrderEntity{} numberReg = regexp.MustCompile("[一二三四五六七八九十0-9]+") numberReg2 = regexp.MustCompile("[\\d一二三四五六七八九十.,,]+") thisNumberReg = regexp.MustCompile("第" + numberReg.String()) winnerReg0 = regexp.MustCompile("(中标候选人第\\d名|第[0-9一二三四五](中选|中标|成交)候选(人|供应商)|中标人[1-9]|[第|弟][一二三四五0-9]中标人)") winnerReg1 = regexp.MustCompile("(^|[^为])(【?(推荐)?第[一二三四五六七八九十1-9]+(合格|名|包|标段)?】?([候|侯]选)?(入围|备选|成交|中[标|选])人?([((]成交[))])?([候|侯]选|排序)?(人(单位)?|供[应货]商|单位|机构)(名称)?为?)($|[^,;;。,])") winnerReg2 = regexp.MustCompile("(排名第[一二三四五六七八九十1-9]+|[第|弟][一二三四五六七八九十1-9]+(中标|中选)?[候|侯]选人|中标候选人排名[::]\\d)") winnerReg3 = regexp.MustCompile("((中标候选人)?第[一二三四五六七八九十1-9]+名|(中标候选人)[1-9])") winnerReg4 = regexp.MustCompile("((确认|推荐|评审|排[名|序])[为::]+|(由高到低排序前.名|公示下列内容|(确定|推荐)的?中[标|选]候选人|\n中[标|选]候选.{1,3}\\s*\n|\n(中[标|选]候选.{1,3}[::\u3000\u2003\u00a0\\s]|成交候选供应商)|(排[名|序]|公[示|告]|具体|推荐|结果(公示)?|中[标|选]候选人.{0,2})如下|[一二三四五六七八九十\\d]+、(中[标|选]候选[^\n::]{1,8}|.{0,8}(成交|结果)信息|成交[^\n::]{2,8}))[为::]?)") winnerReg5 = regexp.MustCompile("([^,;;。,、\n投标人]+?)(为?)(第[一二三四五六七八九十1-9]+(成交|中标)?([候|侯]选(人|供应商|单位|机构)|名)|排名第[一二三四五六七八九十1-9]+)([,;;。,、]|\\s+\n)") winnerReg6 = regexp.MustCompile("(^(排名)?第[一二三四五六七八九十1-9]+[名中标成交备选候人单位供应商]*)") winnerReg7 = regexp.MustCompile("第[一二三四五六七八九十]{1}标段[::]") //带金额 winnerReg8 = regexp.MustCompile("(第[一二三四五六七八九十]中选候选人)[::\\s]+?[((]1[))][\\s]+?(单位名称)[::]?(.*)[\\s]+?[((]2[))][\\s]+(参选报价|投标报价(含税))[::]?(.*)") winnerReg9 = regexp.MustCompile("(第[一二三四五六七八九十]中[选|标]?候选人|中标人[1-9])[::\\s]+?([\u4E00-\u9FA5]{4,20})[\\s]+([0-9\\.\\s万元]+)") winnerReg10 = regexp.MustCompile("(第[一二三四五六七八九十]中标人)[::\\s]+?报价[¥]?([0-9\\.\\s万元]+)[;;]([\u4E00-\u9FA5]{4,20})") winnerReg11 = regexp.MustCompile("([弟|第][一二三四五六七八九十]中[标|选]候选人)[::\\s]+?(单位名称|投标人名称)[::]?(.*)[\\s]+?(参选报价|投标报价[((]含税[))]|投标报价[((]元[))])[::]?(.*)") winnerReg12 = regexp.MustCompile("(中[标|选]候选人[弟|第][一二三四五六七八九十0-9]名|[弟|第][一二三四五六七八九十0-9](中标)?候选人)[::\\s ]+?(.*)[ \\s,,]+?(投标报价|投标总报价|金额)[::]?([0-9\\.\\s万元]+)") winnerReg13 = regexp.MustCompile("([弟|第][一二三四五六七八九十0-9])\n(成交候选人|成交供应商)\n(.*)\n([0-9\\.\\s万元]+)") winnerReg14 = regexp.MustCompile("(中标候选人|成交候选人)\n.*\n.*\n第[1-9][\\s]+?名") winnerReg14_1 = regexp.MustCompile("(第[1-9])[\\s]+?名[::](.{4,20}公司)[\\s]+中标价[::]([0-9\\.\\s万元]+)") winnerReg15 = regexp.MustCompile("([弟|第][一二三四五六七八九十0-9](中标|中选)?候选人)[::](.*)[ \\s\\n,,]+(最终报价[::\\s]+不含税单价.*)?不含税总价[::]?([0-9\\.()\\s万元]+)") winnerReg16 = regexp.MustCompile("(中[标|选]候选人)排序[::]([1-9一二三四五六七])[\\s]+.{1,4}名称[::](.*公司)[\\s]+.{1,4}报价[::]([¥〇0-9\\.人民币零点壹贰叁肆伍陆柒捌玖拾佰仟万亿元圆角分整]+)") winnerReg17 = regexp.MustCompile("(报价金额|应答含税总价)[::]?([¥〇0-9\\.人民币零点壹贰叁肆伍陆柒捌玖拾佰仟万亿元圆角分整]+)") winnerReg18 = regexp.MustCompile("([中|投]标候选人[弟|第])([1-9一二三四五])[\\s]?名[::]([\u4E00-\u9FA5]{4,20})([((].*公司[))])?[,,\\s]+投标报价[::]([0-9\\.\\s万元]+)") //格式化中标金额换行 winnerReg100 = regexp.MustCompile("中标金额:[\\s]+([0-9\\.万元]+)") //不带金额 winnerReg20 = regexp.MustCompile("(中标单位候选人名称)[\\s]+(.*)[\\s]+(中标候选人单位名次)[\\s]+([弟|第][一二三四五六七八九十0-9]中标人)") winnerReg21 = regexp.MustCompile("(石城(.*公司|.*厂|.*有\n限公司))[0-9.]+([弟|第][一二三四五六七八九十0-9])成交[\n]?候选人") winnerReg22 = regexp.MustCompile("投标人[::](.{4,20}公司)[\\s-]+标段[::][1-3][\\s-]+排名[::]([1-9])") winnerReg23 = regexp.MustCompile("([\u4E00-\u9FA5]{4,20})\n(有限公司|公司)[\\s]+(第[一二三四五1-9]中[选|标]候选人)") winnerReg24 = regexp.MustCompile("[\\s\\n]+([\u4E00-\u9FA5]{4,20}公司)[\\s\\n]+(第[一二三四五六七八九十]中[选|标]?候选人)") winnerRegclear = regexp.MustCompile("(买方人员|经评审.*排名第[一二三四五六七八九十1-9]+)") colonEndReg = regexp.MustCompile("[::]$") toWarpReg = regexp.MustCompile("[,。,;;]+") findamountReg = regexp.MustCompile("[,。,;;\u3000\u2003\u00a0\\s]+") amountReg = regexp.MustCompile("^\\d+(\\.\\d+)?([百|千]?元|[百|千]?[万|亿]元?)$") companyWarpReg = regexp.MustCompile("(公司)(.+?[::])") findCompanyReg = regexp.MustCompile("[^::]+公司") colonSpaceReg = regexp.MustCompile("[::]\\s+") findCandidate = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体])|工作室)") findCandidate2 = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|局|站|城|处|行|部|队|联合[会|体]|工作室|有限司)$)") clearSpace1 = regexp.MustCompile("([((][\\d一二三四五六七八九十][))][\\s\u3000\u2003\u00a0\\t]*|<[^>].+?>)") clearSpace2 = regexp.MustCompile("]+>") offerReg = regexp.MustCompile("(中标|磋商|投标|报|单|成交)总?(价|金额)") nofferReg = regexp.MustCompile("(费率|折扣率)") nobidValReg = regexp.MustCompile("^\\d{2}%$") ) /* *查找分包中的中标人排序 *text文本,flag非否精确查找 *from 来源 */ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool, codeSite string) []map[string]interface{} { if clearSpace2.MatchString(text){ text = TextAfterRemoveTable(text) }//评得分估|标的|班子成员|人员 text = winnerRegclear.ReplaceAllString(text,"") if nswinnertabletag.MatchString(text) && !winnerReg0.MatchString(text){ return []map[string]interface{}{} } text = winnerReg5.ReplaceAllString(text, "\n$3:$1\n") text = winnerReg20.ReplaceAllString(text,"\n${4}:${2}\n") text = winnerReg21.ReplaceAllString(text,"\n${3}成交候选人:${1}\n") text = strings.ReplaceAll(text,"有\n限公司","有限公司") text = winnerReg22.ReplaceAllString(text,"\n中标候选人第${2}名:${1}\n") text = winnerReg23.ReplaceAllString(text,"\n${3}:${1}${2}\n") text = winnerReg24.ReplaceAllString(text,"\n${2}:${1}\n") text = winnerReg8.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n") text = winnerReg9.ReplaceAllString(text,"\n${1}:${2}\n中标金额:${3}\n") text = winnerReg10.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${2}\n") text = winnerReg11.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n") text = winnerReg12.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n") text = winnerReg13.ReplaceAllString(text,"\n${1}${2}:${3}\n中标金额:${4}\n") text = winnerReg15.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n") text = winnerReg16.ReplaceAllString(text,"\n第${2}${1}:${3}\n中标金额:${4}\n") text = winnerReg17.ReplaceAllString(text,"\n中标金额:${2}\n") text = winnerReg18.ReplaceAllString(text,"\n${1}${2}名:${3}\n中标金额:${5}\n") //中标金额格式化 text = winnerReg100.ReplaceAllString(text,"中标金额:${1}") //特殊格式 if winnerReg14.MatchString(text) { text = winnerReg14_1.ReplaceAllString(text,"\n中标候选人${1}名:${2}\n中标金额:${3}\n") } text = clearSpace1.ReplaceAllString(text, "") //清理(1) 单位名称:成都维诺信科技有限公司-->单位名称:成都维诺信科技有限公司 if strings.TrimSpace(text) == "" { return []map[string]interface{}{} } blocks := winnerReg4.Split(text, -1) if len(blocks) == 0 { blocks = append(blocks, text) } else if len(blocks) == 1 { blocks = winnerReg7.Split(text, -1) } winners := wo.findByReg(text, blocks, winnerReg0, from, isSite, codeSite) if len(winners) == 0 { winners = wo.findByReg(text, blocks, winnerReg2, from, isSite, codeSite) } if len(winners) == 0 { if flag { //异常 winners = wo.findByReg(text, blocks, winnerReg3, from, isSite, codeSite) } else { indexs_4 := winnerReg4.Split(text, -1) if len(indexs_4) > 1 { for _, v_4 := range indexs_4 { indexs_3 := winnerReg3.FindAllStringIndex(v_4, -1) if len(indexs_3) < 2 { continue } for _, v_3 := range indexs_3 { if strings.Count(v_4[:v_3[1]], "\n") <= 3 { winners = wo.findByReg(text, blocks, winnerReg3, from, isSite, codeSite) break } } } } } } //候选人有一半以上是错误的话,那么就认为全部抽错了 invalidCount := 0 for _, v := range winners { if !findCandidate.MatchString(qutil.ObjToString(v["entname"])) { invalidCount++ } } if invalidCount > len(winners)/2 { return []map[string]interface{}{} } return winners } //获取中标人排序文本 func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp.Regexp, from int) []string { isWinnerReg1 := reg_2 == winnerReg1 rdata := []string{} //确定中标候选人排序在哪个块里面 rightIndex, prevMax := -1, -1 rightIndexs := []int{} var winnerFlag []*WinnerFlag for b_k, b_v := range blocks { indexs := []*WinnerFlag{} array := reg_2.FindAllStringSubmatchIndex(b_v, -1) for _, v := range array { var wrfg *WinnerFlag if isWinnerReg1 { if v[4]<0 || v[5]<0 { continue } wrfg = &WinnerFlag{ index: wo.toNumber(b_v[v[4]:v[5]], 0), textStart: v[4], textEnd: v[5], } } else { if v[2]<0 || v[3]<0 { continue } wrfg = &WinnerFlag{ index: wo.toNumber(b_v[v[2]:v[3]], 0), textStart: v[2], textEnd: v[3], } } indexs = append(indexs, wrfg) } wf := wo.getMax(indexs, from) if wf != nil && wf.max >= prevMax { prevMax = wf.max rightIndex = b_k rightIndexs = append(rightIndexs, b_k) winnerFlag = append(winnerFlag, wf) } } ////在这个块里面,截取 if rightIndex == -1 { return rdata } for i, rightIndex := range rightIndexs { text = blocks[rightIndex] warpCount := wo.interceptText(winnerFlag[i].indexs, text) if warpCount == 0 { warpCount = 1 } textEnd := text[winnerFlag[i].textEnd:] text = text[winnerFlag[i].textStart:winnerFlag[i].textEnd] warpIndex := regSpliteSegment.FindAllStringIndex(textEnd, -1) if len(warpIndex) >= warpCount { textEnd = textEnd[:warpIndex[warpCount-1][1]] } text = text + textEnd if isWinnerReg1 { text = reg_2.ReplaceAllString(text, "$1\n$2$15") } else { text = reg_2.ReplaceAllString(text, "\n$1") } text = regReplWrapSpace.ReplaceAllString(text, "") lines := SspacekvEntity.getLines(text) text = "" for k, v := range lines { v = strings.TrimSpace(v) v = colonSpaceReg.ReplaceAllString(v, ":") if reg_2.MatchString(v) && !regDivision.MatchString(v) { if isWinnerReg1 { v = reg_2.ReplaceAllString(v, "$1$2:$15") } else { v = reg_2.ReplaceAllString(v, "$1:") } } //逗号之类符号的分割,查找紧跟在中标候选人之后的中标金额 //如果后面没有什么标识,只有金额的情况下,把中标金额加到金额前面 if reg_2.MatchString(v) { //两个kv连到一起 if len(regDivision.FindAllString(v, -1)) > 1 && !findamountReg.MatchString(v) { v = companyWarpReg.ReplaceAllString(v, "$1\n$2") } vs := findamountReg.Split(v, -1) if len(vs) > 1 { vs_1 := strings.TrimSpace(vs[1]) if amountReg.MatchString(vs_1) { v = strings.Replace(v, vs[1], "中标金额:"+vs_1, 1) } } } v = toWarpReg.ReplaceAllString(v, "\n") text += v if (!reg_2.MatchString(v) || !colonEndReg.MatchString(v)) && k < len(lines)-1 { text += "\n" } } rdata = append(rdata, text) } return rdata } //抽取对应的排序结果 func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *regexp.Regexp, from int, isSite bool, codeSite string) []map[string]interface{} { text := wo.getText(content, blocks, reg_2, from) winners := []map[string]interface{}{} if len(text) < 1 { return winners } for i, v := range text { object := map[string]interface{}{} count := 0 kvs := colonkvEntity.getColonSpaceKV(v, isSite, codeSite) for _, kv := range kvs { k, v := kv.Key, kv.Value if regDivision.MatchString(v) { v_k := regDivision.Split(v, -1)[0] if reg_2.MatchString(v_k) { k = v_k } } if reg_2.MatchString(k) { //中标人 if len(object) > 0 { winners = append(winners, object) object = map[string]interface{}{} } val := wo.clear("中标单位", v) if val != nil && utf8.RuneCountInString(qutil.ObjToString(val)) > 5 { count++ object["entname"] = strings.TrimRight(strings.ReplaceAll(strings.TrimSpace(qutil.ObjToString(val)), "公司", "公司,"), ",") object["sort"] = wo.toNumber(k, count) object["sortstr"] = thisNumberReg.FindString(k) object["type"] = i } }else { //中标金额 - 折扣率系数-待定 findOfferFlag := false if offerReg.MatchString(k) && !nofferReg.MatchString(k){ findOfferFlag = true } else { kvTags := GetKvTags([]*util.Kv{&util.Kv{Key: k, Value: v}}, "", []string{"中标金额"}, isSite, codeSite) if len(kvTags["中标金额"]) > 0 { findOfferFlag = true } } //找到了中标金额 if findOfferFlag && object["entname"] != nil { val := wo.clear("中标金额", v+GetMoneyUnit(k, v)) if val != nil && !nobidValReg.MatchString(qutil.ObjToString(val)) { moneys := clear.ObjToMoney([]interface{}{val, ""}) if len(moneys) > 0 { if vf, ok := moneys[0].(float64); ok && moneys[len(moneys)-1].(bool){ object["price"] = float64(vf) } } } winners = append(winners, object) object = map[string]interface{}{} } } } if len(object) > 0 { winners = append(winners, object) } indexs := []*WinnerFlag{} //tym := make(map[int]bool, 0) for _, winner := range winners { indexs = append(indexs, &WinnerFlag{ index: winner["sort"].(int), //ttype: winner["type"].(int), }) //tym[winner["type"].(int)] = true } //winnerFlag := wo.getMax(indexs, from) //if winnerFlag != nil { // winners = winners[winnerFlag.start : winnerFlag.end+1] //} else { // winners = []map[string]interface{}{} //} } return winners } //清理结果 func (wo *WinnerOrderEntity) clear(typ, v string) interface{} { if typ == "中标单位" && regDivision.MatchString(v) { v = findCompanyReg.FindString(v) v = filterWinner.FindString(v) } v = filterValue.ReplaceAllString(v, "") //过滤 return v //clear.ClearResult(typ, v) } // func (wo *WinnerOrderEntity) toNumber(value string, defaultNum int) int { value = numberReg.FindString(value) if value == "" { return defaultNum } v := util.ChineseNumberToInt(value) if v < 1 { v, _ = strconv.Atoi(value) } if v > 0 { return v } return defaultNum } // func (wo *WinnerOrderEntity) getMax(indexs []*WinnerFlag, from int) *WinnerFlag { allMap := map[int]*WinnerFlag{} max, start, textStart := -1, -1, -1 isContinue := false flag := false is := []int{} for k, winnerFlag := range indexs { v := winnerFlag.index //从1开始,1前面的过滤掉 if v == 1 { flag = true } if !flag { continue } if v == 1 { start = k textStart = winnerFlag.textStart isContinue = false } if isContinue { continue } nextIndex := 0 if k < len(indexs)-1 { nextIndex = indexs[k+1].index } //从1-n是一组,遇到小于n的从新添加分组,分组不是1开头的过滤掉 is = append(is, winnerFlag.textStart, winnerFlag.textEnd) if nextIndex-v != 1 { isContinue = true if max < k-start { max = k - start allMap[k-start] = &WinnerFlag{ textStart: textStart, textEnd: winnerFlag.textEnd, start: start, end: k, max: max, indexs: is, } } } } if max != -1 { if from != 3 && len(allMap[max].indexs) <= 2 { return nil } return allMap[max] } return nil } //如果有两个,看第一个有几个换行,用第一个里面的最后一个换行作为第二个的结束位置 //如果有两个以上,取前两个中换行最多的最后一个换行,作为其他的结束位置 func (wo *WinnerOrderEntity) interceptText(indexs []int, con string) int { if len(indexs) == 1 { return 0 } count := 0 for ik, iv := range indexs { text := "" if ik < len(indexs)-1 { text = con[iv:indexs[ik+1]] } else { text = con[iv:] } //如果两个 if len(indexs) == 2 { //取第一个有几个换行符 if ik == 0 { count = len(regSpliteSegment.FindAllStringIndex(text, -1)) } } else { //多个,取前两个中换行符最多的 if ik <= 1 { thisCount := len(regSpliteSegment.FindAllStringIndex(text, -1)) if thisCount > count { count = thisCount } } } } return count } //排序 func (wo *WinnerOrderEntity) Order(winnerOrder []map[string]interface{}) { if winnerOrder == nil || len(winnerOrder) <= 1 { return } for x, _ := range winnerOrder { for y := 0; y < len(winnerOrder)-x-1; y++ { dt1, xok := winnerOrder[y]["sort"].(int) dt2, yok := winnerOrder[y+1]["sort"].(int) if xok && yok && dt1 > dt2 { temp := winnerOrder[y] winnerOrder[y] = winnerOrder[y+1] winnerOrder[y+1] = temp } } } } //合并 func (wo *WinnerOrderEntity) Merge(winnerOrder, wors []map[string]interface{}) { if wors == nil || len(wors) == 0 { return } for _, v := range wors { for _, tv := range winnerOrder { sort, _ := v["sort"].(int) t_sort, _ := tv["sort"].(int) if sort == 0 || sort != t_sort { continue } if qutil.ObjToString(tv["entname"]) == "" && qutil.ObjToString(v["entname"]) != "" { tv["entname"] = v["entname"] } t_price, _ := tv["price"].(float64) price, _ := v["price"].(float64) if t_price == 0 && price != 0 { tv["price"] = v["price"] } } } }