package pretreated import ( "fmt" "jy/clear" u "jy/util" qutil "qfw/util" "regexp" "strings" "unicode/utf8" "github.com/PuerkitoBio/goquery" ) /** 全局变量,主要是一堆判断正则 **/ var ( //key 的日期单位 dateReg *regexp.Regexp = regexp.MustCompile(`[年|月|日|天]`) //清理品目中数字 numclear = regexp.MustCompile("^[\\d一二三四五六七八九十.]+") num1 = regexp.MustCompile("(\\d)") //清理表格title中的不需要的内容 tabletitleclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、.,.。_/((人民币万元件个公斤户))]") tabletitleclear2 = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕]*") //清理表格中是key中包含的空格或数字等 tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]") //清理表格td中的符号 tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕¥$]*") //判断key是金额,对万元的处理 moneyreg = regexp.MustCompile("(预算|费|价|额|规模|投资)") //key不需要清理-例如折扣 费率 no_clear_key_reg = regexp.MustCompile(`[((](费率|年|月|日|天|日历天|历天)[))]`) //根据表格的内容判断是不是表头,如果含有金额则不是表头 MoneyReg = regexp.MustCompile("^[\\s ::0-9.万元()()人民币¥$]+$") GSReg = regexp.MustCompile(".*公司.*") //判断分包时 moneyNum = regexp.MustCompile("[元整¥万]") //对隐藏表格的判断 display = regexp.MustCompile("(?i).*?display\\s?[:]\\s?none.*") //--------------- //求是分包的概率 //根据表格的标签对分包进行打分 TableMultiPackageReg_4 = regexp.MustCompile("(标段|分包|包段|划分|子包|标包|合同段)") TableMultiPackageReg_2 = regexp.MustCompile("(概况|范围|情况|内容|详细|结果|信息)") //在判断分包打分前过虑表格key FilterKey_2 = regexp.MustCompile("招标|投标|项目") //根据表格的key进行分包打分 FindKey_2 = regexp.MustCompile("([分子][包标](号)?|标[号项段包](划分)?|包件?[号段名数]|包[组件])") FindKey_3 = regexp.MustCompile("(标段编号|标包)") //对值进行分包判断 FindVal_1 = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|合同|分|施工|监理)?(标|包)(段|号)?)[ \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)") FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$") //判断分包前排除 包件号? excludeKey = regexp.MustCompile("(标识|数量|分包个数|标段代码|涉及包号|分包数量|项目标号|规格|型号|招标范围|业绩|废标|标段选择要求)|(^编号$)|([^包段标]编号)") //编号|划分 excludeKey2 = regexp.MustCompile("包/[0-9]{0,4}[箱纸张]") excludeKey3 = regexp.MustCompile("(分包个数|每包[0-9]*元|标线|国标|享受一包服务)") //------------- cut = u.NewCut() //清理表格标签正则 ClearTagReg = regexp.MustCompile("<[^>]*?>|[\\s\\n\\r]*$") //查找表格标签正则 ttagreg = regexp.MustCompile("(?s)([^\\n::。,;\\s\u3000\u2003\u00a0]{2,30})[::]?[^::。;!\\n]{0,35}[\\s\\n]*$") //判断表格是表头的概率 checkval = float32(0.6) //tdval_reg = regexp.MustCompile(`([\p{Han}][\p{Han}\s、()\\(\\)]{1,9})[::]([^::\\n。]{5,60})(?:[;;,,.。\\n\\t\\s])?`) //空格替换 repSpace = regexp.MustCompile("[\\s\u3000\u2003\u00a0::]+|\\\\t+") //对表格kv的处理 //对不能标准化的key做批识 filter_tag_zb = regexp.MustCompile("(中标|成交|投标)[\\p{Han}]{0,6}(情况|结果|信息|明细)?") //中标金额 //包含以下字眼做标准化处理 filter_zbje_k = regexp.MustCompile("(中标|成交|总|拦标|合同|供[应货]商|报)[\\p{Han}、]{0,6}(价|额|[大小]写|[万亿]?元).{0,4}$") //简单判断金额 filter_zbje_jd = regexp.MustCompile("^[^(售|保证)]{0,4}(价|额).{0,4}$") //预算金额 filter_ysje_jd = regexp.MustCompile("(预算|预控价|项目概.|项目信息)") //且排队以下字眼的key filter_zbje_kn = regexp.MustCompile("得分|打分|时间|业绩|须知|分|电话|要求|需求数量|发布规模$|第[2二3三4四5五]|地址|询价保证金|行号") //且值包含以下字眼 filter_zbje_v = regexp.MustCompile("[¥$$0-9一二三四五六七八九十,,〇零点..壹贰叁肆伍陆柒捌玖拾百佰千仟万亿億元圆角分整正()::大小写]{2,16}") //中标单位的处理 //包含以下字眼的Key标准化 filter_zbdw_ky = regexp.MustCompile("(中标|成交|拦标|合同|选中|投标|拟|预|最终)[\\p{Han}、]{0,6}(供[应货]商|企业|单位|人|机构)(名称)?.{0,4}$") //识别中标单位相关信息 filter_zbdw_info = regexp.MustCompile("(中标|成交|中选|供(货|应))[^候选]{0,}") //简单判断 filter_zbdw_jd = regexp.MustCompile("(投标|成交|中标|合同)(供应商|单位|人|名称).{0,4}$") //且不包含以下字眼 filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址|询价保证金") //且值包含以下字眼 //且值包含以下字眼 filter_zbdw_v = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)") //且值包含以下字眼 filter_zbdw_v2 = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$") //Tg = map[string]interface{}{} //一些表格没有表头,是空的,对值是排序的做处理对应 NullTxBid NullTdReg = regexp.MustCompile("(首选|第[一二三四五1-5])(中标|成交)?(名(称)?|(候选|排序)?(人|单位|供应商))") NullTxtBid = "成交供应商排名" projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$") MhSpilt = regexp.MustCompile("[::]") //识别采购单位联系人、联系电话、代理机构联系人、联系电话 -- 名称有异常 ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?(((联系)?(电话|方式|号码)([//及]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(详细)?(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表") ContactInfoMustReg = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$") ContactType = map[string]*regexp.Regexp{ "采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|招标(服务)?|甲|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心(地址)?|业主|收料人|采购部"), "代理机构": regexp.MustCompile("(代理|受托|集中采购).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"), "中标单位": regexp.MustCompile("^((拟(定)?|预|最终|唯一)?(中标|成交|中选|供(货|应))((成交))?)[^候选]{0,2}(人|方|单位|公司|(服务|供应)?商|企业)"), "监督部门": regexp.MustCompile("投诉受理部门"), } ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$") MultipleValueSplitReg = regexp.MustCompile("[,,、\\s\u3000\u2003\u00a0]") BuyerContacts = []string{"采购单位联系人", "采购单位联系电话", "采购单位联系地址"} FilterSerial = regexp.MustCompile(".+[、..::,]") underline = regexp.MustCompile("_+$") iswinnertabletag = regexp.MustCompile("(中标|候选人|成交|结果|磋商情况)") nswinnertabletag = regexp.MustCompile("评得分估|标的信息|班子成员") jsonReg = regexp.MustCompile(`\{.+:[^}]*\} `) // \{".*\":\".+\"} regHz = regexp.MustCompile("[\u4e00-\u9fa5]") winnerOrderAndBidResult = regexp.MustCompile("((中标)?候选人|(中标|评标)结果)") WinnerOrderStr = regexp.MustCompile(`(集团|公司|学校|中心|家具城|门诊|[大中小]+学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行)$`) DoubtReg = regexp.MustCompile("(我中心|有(疑问|质疑|异议|意见)|(书面)?提出|不再受理|投诉|质疑|书面形式|监督|公示期(限)?)") ) //在解析时,判断表格元素是否隐藏 func IsHide(g *goquery.Selection) (b bool) { style, exists := g.Attr("style") if exists { b = display.MatchString(style) } return } //59.992664,33.495715,20.001306 var clearnum *regexp.Regexp = regexp.MustCompile("(([0-9.]{1,6}[,,]+){4,}|(\\d{6}[,,]\\d{2}.){2,})") //对表格的key进行标准化处理,多个k相同时,出现覆盖问题 //待扩展,暂不支持正则标签库 清理key func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}, isSite bool, codeSite string) (kvTags map[string][]*u.Tag, returntag string) { kvTags = map[string][]*u.Tag{} v1 := "" if sv, sok := v.(string); sok { //取KV v1 = sv } else if sv, sok := v.([]string); sok { //是数组先默认取第一个 if len(sv) >= 1 { v1 = sv[0] } } //对值单位的处理 (预算|费|价|额|规模|投资) if moneyreg.MatchString(k) { v1 += GetMoneyUnit(k, v1) } //先清理key //u.Debug(1, k, v1) //指定-key不清理 拦标价(费率或单价等) k1:="" if !no_clear_key_reg.MatchString(k) { k1 = ClearKey(k, 2) } //u.Debug(2, k) //取标准key if tabletag == "中标情况" { if k1=="价格" { k1="中标金额" } } res := u.GetTags(k1, isSite, codeSite) if len(res) == 0 && k1 != k { res = u.GetTags(k, isSite, codeSite) k1 = k } //log.Println(k, res) // if len(res) == 0 { // go u.AddtoNoMatchMap(tk) // } //当取到标准化值时,放入数组 if len(res) > 0 { for _, t1 := range res { //降低冒号值的权重 if MhSpilt.MatchString(v1) { t1.Weight -= 50 } if winnerOrderAndBidResult.MatchString(tabletag) && t1.Value == "采购单位联系人" { //处理table中项目负责人 kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true}) } else if regexp.MustCompile("(中标候选人|名单及其排序|排序)").MatchString(tabletag) && t1.Value == "采购单位" { kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight - 150}) } else { kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight}) } } res[0].IsInvalid = true //k1 = res[0].Value } /*else { kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true}) //没有取到标准化key时,对中标金额和中标单位的逻辑处理 if filter_zbje_k.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) && utf8.RuneCountInString(v1) < 20 { if tabletag == "" { returntag = "中标情况" } kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: k, Value: v1, Weight: -100, IsInvalid: true}) } else if filter_zbdw_ky.MatchString(k) && !filter_zbdw_kn.MatchString(k) && filter_zbdw_v.MatchString(v1) { kvTags["中标单位"] = append(kvTags["中标单位"], &u.Tag{Key: k, Value: v1, Weight: -100, IsInvalid: true}) if tabletag == "" { returntag = "中标情况" } } else if !filter_zbje_jd.MatchString(tabletag) && !filter_zbje_jd.MatchString(k) && utf8.RuneCountInString(v1) < 13 { //对上一步没有取到标准化key的进一步处理 if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) { //u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1)) if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) { if filter_ysje_jd.MatchString(k) { kvTags["预算金额"] = append(kvTags["预算金额"], &u.Tag{Key: k, Value: v1, Weight: -100}) } else if !filter_zbdw_kn.MatchString(k) { kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: k, Value: v1, Weight: -100}) } } } } }*/ return } var glRex *regexp.Regexp = regexp.MustCompile("(成交|中标|候选|排名|名次|供应商排序|中标候选人|名单及其排序|排序)") var djReg *regexp.Regexp = regexp.MustCompile("^单价") //对解析后的表格的kv进行过滤 func (table *Table) KVFilter(isSite bool, codeSite string) { //1.标准化值查找 //2.对数组的处理 //3.对分包的处理 //4.对KV的处理 //判断表格是否有用,调用abandontable正则数组进行判断 //遍历每一行 table.analyTdKV() //1.遍历每行每列td的sortkv添加到table.SorkVK中;2.td有子表格的处理 as := NewSortMap() //遍历table.sortkv,进行过滤处理,并放入标准化KV中,如果值是数组跳到下一步处理 for _, k := range table.SortKV.Keys { //遍历所有key sort.kv //表格描述处理,对成交结果的处理 if k=="第一询价结果候选人" { //fmt.Println("标准化key") } if glRex.MatchString(k) { table.Desc += "成交结果," } if djReg.MatchString(k) { continue } v := table.SortKV.Map[k] if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight k = pkgFilter.ReplaceAllString(k, "") k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "") kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v, isSite, codeSite) //对key标准化处理,没有找到会走中标 if tag != "" && table.Tag == "" { table.Tag = tag } MergeKvTags(table.StandKV, kvTags) } else { as.AddKey(k, v) } } //核心-候选人-相关 //处理值是数组的kv 放入标准化kv中 standKV //处理table.SortKV.value为数组的情况 table.sortKVArr(as, isSite, codeSite) // if len(table.WinnerOrder) > 0 || !table.BPackage { winnerOrder := []map[string]interface{}{} maxSort := 0 //调整顺序 for i := 0; i < 2; i++ { for _, v := range table.WinnerOrder { sortstr, _ := v["sortstr"].(string) if (i == 0 && sortstr == "") || (i == 1 && sortstr != "") { continue } sort, _ := v["sort"].(int) if i == 0 { if maxSort == 0 || sort > maxSort { maxSort = sort } } else { maxSort++ v["sort"] = maxSort } winnerOrder = append(winnerOrder, v) } if len(winnerOrder) == len(table.WinnerOrder) { break } } table.WinnerOrder = winnerOrder if len(table.WinnerOrder) == 0 { winnerOrder = []map[string]interface{}{} //遍历每个td,查询中标人 for _, tr := range table.TRs { for _, td := range tr.TDs { winnerOrder = winnerOrderEntity.Find(td.Val, true, 3, isSite, codeSite) if len(winnerOrder) > 0 { //中标候选人合并 winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder) } } } } if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder if len(winnerOrder) > 1 { table.WinnerOrder = winnerOrder } } } //对中标候选人进行排序 winnerOrderEntity.Order(table.WinnerOrder) //该表格有一个分包,并且有中标候选人排序的情况下,把中标候选人放到包里面 if table.BlockPackage != nil && table.BlockPackage.Keys != nil && len(table.BlockPackage.Keys) == 1 { if table.BlockPackage.Map != nil { onePkgKey := table.BlockPackage.Keys[0] onePkg, _ := table.BlockPackage.Map[onePkgKey].(*u.BlockPackage) if onePkg != nil && (onePkg.WinnerOrder != nil || len(onePkg.WinnerOrder) == 0) { onePkg.WinnerOrder = table.WinnerOrder table.BlockPackage.AddKey(onePkgKey, onePkg) } } } } var winMoneyReg *regexp.Regexp = regexp.MustCompile("(报价|投标价|投标总价)") //处理table.SortKV.value为数组的情况 func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) { winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签 if !winnertag { winnertag = iswinnertabletag.MatchString(table.TableResult.BlockTag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签 } if !winnertag { winnertag = iswinnertabletag.MatchString(table.Desc) } if !winnertag { winnertag = iswinnertabletag.MatchString(table.Html) } checkKey := map[int]bool{} //tmpBidmout := []string{} //log.Println(tmpBidmout) for kn, k := range as.Keys { //遍历table.SortKV.value为数组的key v := as.Map[k] if vm, ok := v.([]map[string]interface{}); ok && k == NullTxtBid { if table.WinnerOrder == nil { table.WinnerOrder = []map[string]interface{}{} } table.WinnerOrder = append(table.WinnerOrder, vm...) } else { //增加候选人排序逻辑 if (table.WinnerOrder == nil || len(table.WinnerOrder) == 0) && !checkKey[kn] { if vs1, ok := v.([]string); ok { smap := make([]map[string]interface{}, len(vs1)) for n1, _ := range vs1 { smap[n1] = map[string]interface{}{} } //hadSort := false tmpEntname := make([]string, len(vs1)) tmpPrice := make([]string, len(vs1)) for kn1, k := range as.Keys[kn:] { v := as.Map[k] if ContactType["采购单位"].MatchString(k) || ContactType["代理机构"].MatchString(k) { kvTags, _ := CommonDataAnaly(k, table.Tag, table.Desc, v, isSite, codeSite) //对key标准化处理,没有找到会走中标 for k := range kvTags { if table.StandKV[k] == nil { MergeKvTags(table.StandKV, kvTags) } } continue } //目前对数组数据的key做判断,但是某些额可以是不满足情况的 //载明内容:[第一中标候选人 第二中标候选人] id:5d00587da5cb26b9b75e367b if vs, ok := v.([]string); ok && len(vs) == len(vs1) { //数组值的个数相同 res, _, _, _, repl := CheckCommon(k, "bidorder") kv := "" if !res { kt := u.GetTags(k, isSite, codeSite) if kt.Len() > 0 { if kt[0].Value == "单品报价" && winnertag { kv = "中标金额" } else { kv = kt[0].Value } } } if !res && kv == "" { //key未验证出,验证数组的val值 checkKey[kn+kn1] = true if winnertag { //如果是中标信息 在根据val数组信息解析候选人 for vsk, vsv := range vs { if NullTdReg.MatchString(vsv) { //数据先验证val是否有排序 //hadSort = true smap[vsk]["sortstr"] = vsv smap[vsk]["sort"] = GetBidSort(vsv, vsk+1) } else if findCandidate2.MatchString(vsv) && tmpEntname[vsk] == "" { //数据验证val是否是候选人 entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string) if entname != "" { tmpEntname[vsk] = entname } } else if winMoneyReg.MatchString(k) && len(tmpPrice[vsk]) == 0 { kv = "中标金额" }else { //验证val时如果数组中的第一条数据既不满足sort或者entname 判定此数组数据错误 break } } } } if res || kv != "" { //连续往下找几个key checkKey[kn+kn1] = true SORT: if repl == "sort" { //hadSort = true for vsk, vsv := range vs { smap[vsk]["sortstr"] = vsv smap[vsk]["sort"] = GetBidSort(vsv, vsk+1) if findCandidate2.MatchString(vsv) && kv == "中标单位" && tmpEntname[vsk] == "" { //数据验证val是否是候选人 entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string) if entname != "" { tmpEntname[vsk] = entname } } } } else if repl == "entname" || kv == "中标单位" { for vsk, vsv := range vs { if winnerReg6.MatchString(vsv) { //k:中标候选人 v:["第一名","第二名"] repl = "sort" goto SORT } //if entname, _ := smap[vsk]["entname"].(string); entname != "" || len([]rune(vsv)) < 3 { // break //} //entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string) //if entname != "" { // smap[vsk]["entname"] = entname if tmpEntname[vsk] != "" || len([]rune(vsv)) < 4 { //排除 单位:["台","个","套"] break } entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string) if entname != "" { tmpEntname[vsk] = entname } } } else if kv == "中标金额" { for vsk, vsv := range vs { //过滤price 2348273.432元(万元)-->2348273.432 //tmp1, _ := smap[vsk]["price"].(string) tmp1 := tmpPrice[vsk] p1num := numberReg2.FindString(tmp1) p2num := numberReg2.FindString(vsv) p1 := qutil.Float64All(p1num) p2 := qutil.Float64All(p2num) if p2 > p1 { //smap[vsk]["price"] = winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv)) price := winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv)) if pricestr, _ := price.(string); len(pricestr) < 30 && len(pricestr) > 0 { tmpPrice[vsk] = pricestr } } } } else if kv == "预算" { if strings.Contains(k, "万元") { for vsk, vsv := range vs { if !strings.Contains(vsv, "万元") { vs[vsk] = vsv + "万元" } } } } } } else { //break } } newSmap := []map[string]interface{}{} //qutil.Debug("smap=======", smap) //qutil.Debug("tmpEntname--", len(tmpEntname), tmpEntname) //qutil.Debug("tmpPrice--", len(tmpPrice), tmpPrice) for n, smap_v := range smap { //if hadSort { //有排序,再添加entname和price if len(tmpEntname) > 0 && n < len(tmpEntname) && tmpEntname[n] != "" { smap_v["entname"] = tmpEntname[n] if len(tmpPrice) > 0 && n < len(tmpPrice) && tmpPrice[n] != "" { smap_v["price"] = tmpPrice[n] } } //} else if len(tmpEntname) > 0 { //fmt.Println("table winnerorder only has entname", tmpEntname) //} //qutil.Debug("len-smap_v--", len(smap_v)) if len(smap_v) > 2 { //只有排序信息 sort和sortstr newSmap = append(newSmap, smap_v) } } if len(newSmap) > 0 { table.WinnerOrder = newSmap } } } else if vsss, ok := v.([]string); ok { if (len(table.WinnerOrder) > 0 && table.WinnerOrder[0]["price"] == nil && len(vsss) == len(table.WinnerOrder)) || (len(table.WinnerOrder) > 0 && strings.Contains(k,"总报价") && len(vsss) == len(table.WinnerOrder)){ kv := "" if winMoneyReg.MatchString(k) { kv = "中标金额" } else { kt := u.GetTags(k, isSite, codeSite) if kt.Len() > 0 { if kt[0].Value == "单品报价" && winnertag { kv = "中标金额" } else { kv = kt[0].Value } } } if kv == "中标金额" { for i, vx := range vsss { p1num := numberReg2.FindString(vx) if strings.Contains(p1num, ",") && strings.Contains(p1num, ".") { p1num = strings.ReplaceAll(p1num, ",", "") } p1 := qutil.Float64All(p1num) if p1 > 0 { //smap[vsk]["price"] = winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv)) price := winnerOrderEntity.clear(kv, vx+GetMoneyUnit(k, vx)) if pricestr, _ := price.(string); len(pricestr) < 30 && len(pricestr) > 0 && !clearnum.MatchString(pricestr) { table.WinnerOrder[i]["price"] = pricestr } } } } } else if table.StandKV[k] == nil { kvTags, _ := CommonDataAnaly(k, table.Tag, table.Desc, v, isSite, codeSite) //对key标准化处理,没有找到会走中标 MergeKvTags(table.StandKV, kvTags) } } } } //特殊处理--组合候选人 } //1.遍历每行每列td的sortkv添加到table.SorkVK中;2.td有子表格的处理 func (table *Table) analyTdKV() { //遍历每一行 for _, tr := range table.TRs { for _, td := range tr.TDs { //fmt.Println(td.BH, td.MustBH, td.Val, td.SortKV.Map) bc := false if !td.BH { //表头是否是无用内容 if td.HeadTd != nil { bc, _, _, _, _ = CheckCommon(td.HeadTd.Val, "abandontable") } } if !bc { //td元素有内嵌kv,遍历放入table的Kv中 if len(td.SortKV.Keys) > 0 { for _, k3 := range td.SortKV.Keys { _val := td.SortKV.Map[k3] //thisFlag := false if td.HeadTd != nil && len([]rune(k3)) < 4 { k3 = td.HeadTd.Val + k3 } if table.SortKV.Map[k3] == nil && _val != nil && _val != "" { //u.Debug(k3, _val) //if !thisFlag || (thisFlag && table.SortKV.Map[k3] == nil) { table.SortKV.AddKey(k3, _val) } } } } //td有子表格的处理 //u.Debug(td.BH, td.Val, td.SonTableResult) if td.SonTableResult != nil { //u.Debug(td.SonTableResult.SortKV.Map, "-------", td.SonTableResult.Tabs) for k3, v3 := range td.SonTableResult.KvTags { table.StandKV[k3] = append(table.StandKV[k3], v3...) } //中标候选人排序 if table.WinnerOrder == nil || len(table.WinnerOrder) == 0 { table.WinnerOrder = td.SonTableResult.WinnerOrder } else { winnerOrderEntity.Merge(table.WinnerOrder, td.SonTableResult.WinnerOrder) } } } } } //表格结果合并到父表格集中 func (table *Table) MergerToTableresult() { //对多包表格的多包值的合并处理 if table.BPackage { table.TableResult.IsMultiPackage = true for _, v2 := range table.BlockPackage.Keys { package1 := table.TableResult.PackageMap.Map[v2] if package1 == nil { table.TableResult.PackageMap.AddKey(v2, table.BlockPackage.Map[v2]) if vvv, ok := table.BlockPackage.Map[v2].(*u.BlockPackage); ok { if vvv.TableKV != nil && len(vvv.TableKV.KvTags) > 0 { MergeKvTags(table.TableResult.KvTags, vvv.TableKV.KvTags) } } } else { bp := package1.(*u.BlockPackage) if bp.TableKV == nil { bp.TableKV = u.NewJobKv() } v1 := table.BlockPackage.Map[v2].(*u.BlockPackage) if v1.TableKV != nil && len(v1.TableKV.KvTags) > 0 { for k2, v2 := range v1.TableKV.KvTags { if k2 == "" { continue } isExists := false for _, v2v := range v2 { if v2v.Value == "" { continue } for _, v2vv := range bp.TableKV.KvTags[k2] { if v2v.Value == v2vv.Value { isExists = true break } } if !isExists { bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v) bp.Text += fmt.Sprintf("%v:%v\n", k2, v2) } } } } if bp.Bidamount <= 0 && !bp.IsTrueBidamount { bp.Bidamount = v1.Bidamount bp.IsTrueBidamount = v1.IsTrueBidamount } if bp.Budget <= 0 && !bp.IsTrueBudget { bp.Budget = v1.Budget bp.IsTrueBudget = v1.IsTrueBudget } bp.Text += bp.Text if len(v1.WinnerOrder) > 0 && len(bp.WinnerOrder) == 0 { bp.WinnerOrder = v1.WinnerOrder } } } } //遍历标准key到tableresult.sortkv中 for fieldKey, v := range table.StandKV { for _, vv := range v { if fieldKey=="项目周期"||fieldKey=="工期单位"||fieldKey=="工期时长" { dateStr := dateReg.FindString(vv.Key) if dateStr !="" && !strings.Contains(vv.Value,dateStr) { vv.Value = vv.Value+dateStr } } vv.Value = strings.Replace(vv.Value, "__", "", -1) } } MergeKvTags(table.TableResult.KvTags, table.StandKV) //表格的块标签 if table.TableResult.BlockTag == "" && table.Tag != "" { table.TableResult.BlockTag = table.Tag } //中标候选人(多个table,现在默认取第一个table的信息,考虑需不需要多个table分析合并数据?) if table.TableResult.WinnerOrder == nil || len(table.TableResult.WinnerOrder) == 0 { table.TableResult.WinnerOrder = table.WinnerOrder } //增加brand 并列table if len(table.BrandData) > 0 { for _, v := range table.BrandData { if len(v) > 0 { table.TableResult.BrandData = append(table.TableResult.BrandData, v) } } } //抽取prince和number 并列table if len(table.PriceNumberData) > 0 { for _, v := range table.PriceNumberData { if len(v) > 0 { table.TableResult.PriceNumberData = append(table.TableResult.PriceNumberData, v) } } } if table.BlockPackage != nil && len(table.BlockPackage.Keys) == 0 { for _, v := range table.BlockPackage.Keys { if table.BlockPackage.Map[v] != nil { if vvv, ok := table.BlockPackage.Map[v].((*u.BlockPackage)); ok { if vvv.TableKV != nil && len(vvv.TableKV.KvTags) > 0 { for kk, vv := range vvv.TableKV.KvTags { if kk == "" { continue } if len(table.TableResult.KvTags[kk]) == 0 { table.TableResult.KvTags[kk] = vv } } } } } } } } /** 解析表格入口 返回:汇总表格对象 **/ func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock, isSite bool, codeSite string) (tabres *TableResult) { defer qutil.Catch() //u.Debug(con) if itype == 1 { //修复表格 con = RepairCon(con) } //生成tableresult对象 tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock) if fblbReg.MatchString(blockTag) { return } //可以有多个table //for _, table := range tabs { //隐藏表格跳过 if IsHide(tabs) { return } tabres.GoqueryTabs = tabs //} //解析表格集 tabres.Analy(isSite, codeSite) return } //开始解析表格集 func (ts *TableResult) Analy(isSite bool, codeSite string) { tabs := []*Table{} contactFormat := &u.ContactFormat{ IndexMap: map[int]string{}, MatchMap: map[string]map[string]bool{}, } //for _, table := range ts.GoqueryTabs { tn := NewTable(ts.Html, ts, ts.GoqueryTabs) //核心模块 tsw := tn.Analy(contactFormat, isSite, codeSite) for _, tab := range tsw { if len(tab.TRs) > 0 { tabs = append(tabs, tab) } //fmt.Println("tab.SortKV.Map", tab.SortKV.Keys) } //tn.SonTables = append(tn.SonTables, tn) //} //统一合并,考虑统一多表格是多包的情况---新增 与子表格合并 if len(tabs) > 1 { pns := map[string]string{} pnarr := []string{} for _, table := range tabs { if len(table.StandKV["项目名称"]) == 0 { continue } pn := table.StandKV["项目名称"][0] if pn != nil && pn.Value != "" && TitleReg.MatchString(pn.Value) { pnarr = append(pnarr, pn.Value) matchres := TitleReg.FindAllStringSubmatch(pn.Value, -1) if len(matchres) == 1 && len(matchres[0]) > 0 { v1 := u.PackageNumberConvert(matchres[0][0]) pns[v1] = matchres[0][0] bp := &u.BlockPackage{} bp.Index = v1 bp.Origin = matchres[0][0] if bp.TableKV == nil { bp.TableKV = u.NewJobKv() } for _, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} { if len(table.StandKV[k]) > 0 { bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value}) } } bp.WinnerOrder = table.WinnerOrder if table.BlockPackage.Map[v1] == nil { table.BPackage = true table.BlockPackage.AddKey(v1, bp) } else { table.BlockPackage.RemoveKey(v1) table.BlockPackage.AddKey(v1, bp) } } } } if len(tabs) == len(pns) { //多个表格,每个表格都是一个分包 http://www.cxzwfw.gov.cn/info/1009/6963.htm //项目名称、项目编号、采购单位、招标机构、预算 pname := projectnameReg.ReplaceAllString(pnarr[0], "") btrue := true for _, pn := range pnarr[1:] { pn = projectnameReg.ReplaceAllString(pn, "") //u.Debug(pn, pname) if pn != pname { //项目名称不一致 btrue = false break } } if btrue { ts.KvTags["项目名称"] = append(ts.KvTags["项目名称"], &u.Tag{Key: "项目名称", Value: pname, Weight: 100}) for _, table := range tabs { table.BPackage = true //预算、中标金额、NullTxtBid成交供应商排名 中标单位 成交状态 if table.BlockPackage != nil && len(table.BlockPackage.Keys) == 1 { bp := table.BlockPackage.Map[table.BlockPackage.Keys[0]].(*u.BlockPackage) if table.TableResult.WinnerOrder != nil { bp.WinnerOrder = table.WinnerOrder } if bp != nil && table.StandKV != nil { if bp.TableKV == nil { bp.TableKV = u.NewJobKv() } for nk, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} { if len(table.StandKV[k]) > 0 { bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value}) } if nk < 4 { delete(table.StandKV, k) } } } } } } } } for _, table := range tabs { table.MergerToTableresult() MergeKvTags(ts.KvTags, table.TableResult.KvTags) if !table.Brule { ts.isUnRulesTab = true } } } //解析表格 func (table *Table) Analy(contactFormat *u.ContactFormat, isSite bool, codeSite string) []*Table { //查找表体中的tr对象 trs := table.Goquery.ChildrenFiltered("tbody,thead,tfoot").ChildrenFiltered("tr") if trs.Size() == 0 { trs = table.Goquery.ChildrenFiltered("tr") } ztb := table.Goquery.Find("table").Size() if ztb >= 9 { return []*Table{} } //遍历节点,初始化table 结构 TRs Sorts table.createTabe(trs, isSite, codeSite) if len(table.TRs) == 0 { return []*Table{} } //重置行列 table.ComputeRowColSpan() //对table结构体进行整体解析处理 ts := table.AnalyTables(contactFormat, isSite, codeSite) return ts } var fblbReg *regexp.Regexp = regexp.MustCompile("(废标|流标|负责人资格|负责人业绩|相关业绩|技术评分明细表|开标记录|附件[:0-9]|越南盾|技术分|填报项目业绩|未通过.*原因)") //遍历节点,初始化table 结构体 func (table *Table) createTabe(trs *goquery.Selection, isSite bool, codeSite string) { trs.Each(func(n int, sel *goquery.Selection) { //隐藏行不处理 if IsHide(sel) { return } //遍历每行的td tds := sel.ChildrenFiltered("td,th") TR := NewTR(table) tdTextIsNull := false var empty int tds.Each(func(m int, selm *goquery.Selection) { //对隐藏列不处理!!! if IsHide(selm) { return } //进入每一个单元格 td := NewTD(selm, TR, table, isSite, codeSite) //初始化td,kv处理,td中有table处理,td的方向 //num++ TR.AddTD(td) if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0 { //删除一个tr,tr中所有td是空值的 empty++ if tds.Size() == empty { tdTextIsNull = true } } }) //向table添加每行不为空的tr if !tdTextIsNull { table.AddTR(TR) } }) } //对table进行整体解析处理 func (tn *Table) AnalyTables(contactFormat *u.ContactFormat, isSite bool, codeSite string) []*Table { ts := tn.tableSubDemolitionTable() //分包,拆表 for n, table := range ts { //处理每个table if len(table.TRs) > 0 { //删除尾部空白行 table.deleteTrimTr() //table.Print() //校对表格 table.Adjust(isSite, codeSite) //查找表格的标签,table.Tag字段 table.FindTag() //分割表格 table.bSplit(n, ts, isSite, codeSite) table.TdContactFormat(contactFormat, isSite, codeSite) //contactFormat,处理采购单位,代理机构 //开始查找kv,核心模块,table.SortKV table.FindKV(isSite, codeSite) //table中抽取品牌,table.BrandData if u.IsBrandGoods { table.analyBrand() } //table中抽取单价和个数 if u.IsPriceNumber { //qutil.Debug("======================抽取price和number===========") table.extractPriceNumber() } res, _, _, _, _ := CheckCommon(table.Tag, "abandontable") if !res { //过滤、标准化、合并kv,table.StandKV,table.StandKVWeight table.KVFilter(isSite, codeSite) } //对有表头表格的处理 if table.Tag != "" { co, m, b := CheckMultiPackage(table.Tag) //分包处理 if b { table.BPackage = b if len(table.BlockPackage.Map) == 0 { for _, av := range m { kv := u.NewJobKv() kv.KvTags = table.StandKV bd := u.PackageNumberConvert(av[0]) blockPackage := &u.BlockPackage{ Origin: av[0], Name: av[0], Text: co, TableKV: kv, Index: bd, } if bd != "" { table.BlockPackage.AddKey(bd, blockPackage) } else { table.BlockPackage.AddKey(av[0], blockPackage) } } } table.StandKV["项目名称"] = append(table.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: table.Tag, Weight: -300}) } } //判断是否是多包,并处理分包的//遍历td分块 table.CheckMultiPackageByTable(isSite, codeSite) //分包处理 //MergeKvTags(table.TableResult.KvTags, table.StandKV) } } return ts } //分包,拆表 func (table *Table) tableSubDemolitionTable() []*Table { tm := []map[string]interface{}{} tmk := map[string]bool{} tmn := map[int]map[string]interface{}{} for rownum, tr := range table.TRs { if len(tr.TDs) == 1 && table.ColNum > 1 { //tr里面有一列,table里面有多列 td := tr.TDs[0] //取每行第一个td //td开始列等于0 && td结束列+1等于table列数 && td长度大于1小于50 if td.StartCol == 0 && td.EndCol+1 == table.ColNum && len([]rune(td.Val)) > 1 && len([]rune(td.Val)) < 50 { con, m1, b := CheckMultiPackage(td.Val) //判断分包 if b { for k, _ := range m1 { numstr := u.PackageNumberConvert(k) m2 := map[string]interface{}{ "tag": con, //"num": numstr, //"numtxt": v[0], "startrow": rownum, } tmk[numstr] = true tmn[rownum] = m2 tm = append(tm, m2) break } } } } } //拆表 ts := []*Table{} if len(tmk) > 1 && len(tmk) == len(tm) { var tab1 *Table for rownum, tr := range table.TRs { if tab1 == nil { tab1 = NewTable("", table.TableResult, table.Goquery) tab1.BSplit = true if tmn[rownum] != nil { tab1.StandKV["项目名称"] = append(tab1.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: tmn[rownum]["tag"].(string), Weight: -100}) } ts = append(ts, tab1) } if tmn[rownum] != nil { tab1.Tag = tmn[rownum]["tag"].(string) } else { tab1.AddTR(tr) } if tmn[rownum+1] != nil { tab1 = nil } } } else { ts = append(ts, table) } return ts } //分割表格 func (table *Table) bSplit(n int, ts []*Table, isSite bool, codeSite string) { if table.BSplit { if !table.BHeader && n > 0 { for i := n - 1; i > -1; i-- { if ts[i].BHeader { if ts[i].BFirstRow { //取第一行插入到 table.InsertTR(ts[i].TRs[0]) table.Adjust(isSite, codeSite) } break } } } } } //删除尾部空白行 func (table *Table) deleteTrimTr() { for len(table.TRs) > 0 { npos := len(table.TRs) tailTR := table.TRs[npos-1] //最后一个tr,取最后一行 bspace := true for _, v := range tailTR.TDs { if v.Val != "" || v.SonTableResult != nil || len(v.SortKV.Keys) > 0 { bspace = false break } } //删除尾部空行,是空行的话就删除 if bspace { table.TRs = table.TRs[:npos-1] } else { break } } } //校对表格 func (table *Table) Adjust(isSite bool, codeSite string) { //计算行列起止位置,跨行跨列处理 table.ComputeRowColSpan() // for k1, tr := range table.TRs { // for k2, td := range tr.TDs { // qutil.Debug(k1, k2, td.Val, td.StartRow, td.EndRow, td.StartCol, td.EndCol) // } // } //大概计算每个起止行列的概率 table.GetKeyRation() /* for k, v := range table.StartAndEndRation { for k1, v1 := range v.Poss { bs, _ := json.Marshal(v1) str := "" for _, td := range v.Tdmap[v1] { str += "__" + td.Val + fmt.Sprintf("%d_%d_%d_%d", td.StartRow, td.EndRow, td.StartCol, td.EndCol) } qutil.Debug(k, k1, string(bs), v.Rationmap[v1], str) } } */ //u.Debug("tdnum:", num, table.RowNum, table.ColNum) //是否是规则的表格,单元各个数=行数*列数 table.Brule = table.TDNum == table.RowNum*table.ColNum count := 0 for _, trs := range table.TRs { for _, td := range trs.TDs { if td.BH { count++ } } } if float32(count)/float32(table.TDNum) < 0.85 { //精确计算起止行列是表头的概率 table.ComputeRowColIsKeyRation(isSite, codeSite) bhead := false L: for i, tr := range table.TRs { for _, td := range tr.TDs { if td.BH { //qutil.Debug("----=====---", td.Val, len(table.TRs[len(table.TRs)-1].TDs), i, len(table.TRs)-1) if i == len(table.TRs)-1 && len(table.TRs[len(table.TRs)-1].TDs) == 2 { res, _, _, _, _ := CheckCommon(td.Val, "abandontable") if res { //删除此行 table.TRs = table.TRs[:len(table.TRs)-1] table.Adjust(isSite, codeSite) return } } bhead = true break L } } } table.BHeader = bhead } } //计算行/列表格的结束位置 StartRow=0 EndRow=0,table.TDNum td个数 table.RowNum 行数 func (table *Table) ComputeRowColSpan() { n := 0 //td总个数 mapRC := map[int]map[int]int{} //记录第几行pos,起始列对应的合并值 for k, v := range table.TRs { n += len(v.TDs) //每行的td总数相加 nk := 0 //nk列的起始,k行的起始||如果有合并,起始就不是0 ball := true rowspans := v.TDs[0].Rowspan //某一行第一个td的rowspan for k1, v1 := range v.TDs { if k1 == 0 && k == 0 { table.TRs[k].TDs[k1].MustBH = true table.TRs[k].TDs[k1].BH = true } if v1.Rowspan != rowspans { ball = false break } } for _, v1 := range v.TDs { if ball { v1.Rowspan = 1 } mc := mapRC[k] for { if mc != nil && mc[nk] > 0 { nk += mc[nk] } else { break } } v1.StartCol = nk nk += v1.Colspan - 1 v1.EndCol = nk if nk >= table.ColNum { table.ColNum = nk + 1 } nk++ v1.StartRow = k v1.EndRow = k + v1.Rowspan - 1 ck := fmtkey("c", v1.StartCol, v1.EndCol) tdcs := table.StartAndEndRation[ck] if tdcs == nil { tdcs = NewTDRationScope(ck) table.StartAndEndRation[ck] = tdcs table.StartAndEndRationKSort.AddKey(ck, 1) } tdcs.Addtd(v1) rk := fmtkey("r", v1.StartRow, v1.EndRow) tdrs := table.StartAndEndRation[rk] if tdrs == nil { tdrs = NewTDRationScope(rk) table.StartAndEndRation[rk] = tdrs table.StartAndEndRationKSort.AddKey(rk, 1) } tdrs.Addtd(v1) if v1.Rowspan > 1 { for i := 1; i < v1.Rowspan; i++ { r := k + i if r < len(table.TRs) { mc := mapRC[r] if mc == nil { mc = map[int]int{} } mc[v1.StartCol] = v1.Colspan mapRC[r] = mc } } } } } table.TDNum = n //td总个数 table.RowNum = len(table.TRs) //tr总行数 } func fmtkey(t string, start, end int) string { return fmt.Sprintf("%s_%d_%d", t, start, end) } //查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断 func (table *Table) FindTag() { //查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断 if table.Tag != "" { return } if table.Tag == "" { table.Tag = table.TableResult.BlockTag } //u.Debug(table.Tag) } //计算r/c_start_end的概率 func (table *Table) GetKeyRation() { for _, vn := range table.StartAndEndRationKSort.Keys { v := table.StartAndEndRation[vn] for _, v1 := range v.Poss { count := 0 n := 0 for _, td := range v.Tdmap[v1] { n++ if td.BH { count++ } } v.Rationmap[v1] = float32(count) / float32(n) } } } //计算行列是表头的概率调用GetKeyRation func (table *Table) ComputeRowColIsKeyRation(isSite bool, codeSite string) { //增加对跨行校正限止 // u.Debug(table.Brule, table.ColNum, table.RowNum, table.TDNum) bkeyfirstrow := false bkeyfirstcol := false if table.Brule { //不存在跨行跨列的情况,规则表格 checkCompute := map[string]bool{} for k, tr := range table.TRs { rk := fmtkey("r", tr.TDs[0].StartRow, tr.TDs[0].EndRow) if k == 0 { //第1行的概率 ck := fmtkey("c", tr.TDs[0].StartCol, tr.TDs[0].EndCol) //u.Debug(table.BFirstRow, "--", table.StartAndEndRation[rk], table.StartAndEndRation[ck]) ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0]) ration2, _ := table.StartAndEndRation[ck].GetTDRation(tr.TDs[0]) if (len(tr.TDs) == 2 && ration2 < 0.55) && (len(tr.TDs) == 2 && ration1 > 0.5) { //第一行为key bkeyfirstrow = true ball := true for _, td := range tr.TDs { if MoneyReg.MatchString(td.Val) { bkeyfirstrow = false ball = false td.BH = false break } } for _, td := range tr.TDs { if ball { //td.BH = true td.KeyDirect = 1 td.KVDirect = 2 } } } else if ration2 > 0.55 { //第1列 bkeyfirstcol = true if !checkCompute[ck] { checkCompute[ck] = true //重置第1列 for _, tr1 := range table.TRs { for _, td1 := range tr1.TDs { if td1.StartCol == 0 { if !MoneyReg.MatchString(td1.Val) { //td1.BH = true td1.KeyDirect = 2 td1.KVDirect = 1 } } } } } } if !bkeyfirstrow && !bkeyfirstcol { if len(tr.TDs) > 1 && ration1 > ration2 && ration1 > 0.5 { bkeyfirstrow = true for _, td := range tr.TDs { if !MoneyReg.MatchString(td.Val) { //td.BH = true td.KeyDirect = 1 td.KVDirect = 2 } } } else if tr.Table.ColNum > 1 && ration2 > 0.5 { bkeyfirstcol = true if !checkCompute[ck] { checkCompute[ck] = true //重置第1列 for _, tr1 := range table.TRs { for _, td1 := range tr1.TDs { if td1.StartCol == 0 { if !MoneyReg.MatchString(td1.Val) { td1.BH = true td1.KeyDirect = 2 td1.KVDirect = 1 } } } } } } } } else { if bkeyfirstrow { //第一列的概率 ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0]) if k == 1 || ration1 < checkval { for _, td := range tr.TDs { if !td.MustBH { td.BH = false td.KeyDirect = 0 td.KVDirect = 0 } } } //else {for _, td := range tr.TDs {}} } else { //列在起作用 if bkeyfirstcol { for _, td := range tr.TDs { ck := fmtkey("c", td.StartCol, td.EndCol) ration1, _ := table.StartAndEndRation[ck].GetTDRation(td) if !checkCompute[ck] { checkCompute[ck] = true if ration1 >= checkval && td.ColPos != 1 { for _, tr1 := range table.TRs { for _, td1 := range tr1.TDs { if td1.StartCol == td.StartCol { if !MoneyReg.MatchString(td1.Val) { td1.BH = true td1.KeyDirect = 2 td1.KVDirect = 1 } } } } } else { for _, tr1 := range table.TRs[1:] { for _, td1 := range tr1.TDs[1:] { if td1.StartCol == td.StartCol && !td1.MustBH { td1.BH = false td1.KeyDirect = 0 td1.KVDirect = 0 } } } } } } } } } } } //qutil.Debug("table.Brule", table.Brule, !bkeyfirstcol && !bkeyfirstrow) if !table.Brule || (!bkeyfirstcol && !bkeyfirstrow) { //断行问题,虽然同列或同行,但中间被跨行截断,表格方向调整 for _, k := range table.StartAndEndRationKSort.Keys { v := table.StartAndEndRation[k] //横向判断,要判断最多的方向,否则会出现不定的情况(map遍历问题) k1 := k[:1] for _, v2 := range v.Poss { lentds := len(v.Tdmap[v2]) if v.Rationmap[v2] > checkval { for _, td := range v.Tdmap[v2] { if td.KeyDirect == 0 && !MoneyReg.MatchString(td.Val) && !GSReg.MatchString(td.Val) { if k1 == "r" { ck := fmtkey("c", td.StartCol, td.EndCol) rt := table.StartAndEndRation[ck] //clen := 0 var fv float32 var tdn []*TD if rt != nil { fv, tdn = rt.GetTDRation(td) //clen = len(tdn) } if lentds > 1 { if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" { td.KeyDirect = 1 td.KVDirect = 2 //td.BH = true } } } else { ck := fmtkey("r", td.StartRow, td.EndRow) rt := table.StartAndEndRation[ck] var fv float32 var tdn []*TD //clen := 0 if rt != nil { fv, tdn = rt.GetTDRation(td) //clen = len(tdn) } if lentds > 1 { if td.Valtype != "NOHEAD" && utf8.RuneCountInString(td.Val) < 15 && ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" { td.KeyDirect = 2 td.KVDirect = 1 td.BH = true } } } } else { break } } } else if v.Rationmap[v2] < 0.5 && len(v.Tdmap[v2]) > 3 { for _, td := range v.Tdmap[v2] { // u.Debug(td.Val, "-----", td.BH) if td.KeyDirect == 0 && td.BH && !td.MustBH { if k1 == "r" { ck := fmtkey("c", td.StartCol, td.EndCol) rt := table.StartAndEndRation[ck] clen := 0 var fv float32 var tdn []*TD if rt != nil { fv, tdn = rt.GetTDRation(td) clen = len(tdn) } if lentds >= clen && lentds > 1 { if (tdn != nil && v.Rationmap[v2] < fv) || tdn == nil { td.BH = false } } } else { ck := fmtkey("r", td.StartRow, td.EndRow) rt := table.StartAndEndRation[ck] var fv float32 var tdn []*TD clen := 0 if rt != nil { fv, tdn = rt.GetTDRation(td) clen = len(tdn) } if lentds >= clen && lentds > 1 { if (tdn != nil && v.Rationmap[v2] < fv) || tdn == nil { td.BH = false } } } } else { break } } } } } } table.GetKeyRation() if len(table.TRs) > 0 && len(table.TRs[0].TDs) > 0 { t0 := table.TRs[0].TDs[0] key := fmtkey("r", t0.StartRow, t0.EndRow) r, t := table.StartAndEndRation[key].GetTDRation(t0) if r > 0.9 && len(t) > 1 { table.BFirstRow = true } for k, tr := range table.TRs { if len(tr.TDs) == 1 && tr.TDs[0].StartCol == 0 && tr.TDs[0].EndCol+1 == table.ColNum { tr.TDs[0].BH = false tr.TDs[0].KVDirect = 0 sv := FindKv(tr.TDs[0].Val, "", 2) _, resm := colonkvEntity.entrance(tr.TDs[0].Val, "", nil, 2, isSite, codeSite) for k, v := range resm { sv.AddKey(k, v) } if len(sv.Keys) > 0 { for _, v1 := range sv.Keys { if tr.TDs[0].SortKV.Map[v1] == nil { table.SortKV.AddKey(v1, sv.Map[v1]) } } } else if table.Tag == "" && k == 0 && len(tr.TDs[0].Val) > 11 { table.Tag = tr.TDs[0].Val } } } } } //查找表格的kv,调用FindTdVal func (table *Table) FindKV(isSite bool, codeSite string) { //判断全是key的表格不再查找 if table.BHeader { //只要一个是key即为true direct := If(table.BFirstRow, 2, 1).(int) //kv,2查找方向,向上查找 vdirect := If(direct == 2, 1, 2).(int) //控制跨行表格 bcon := false //增加表格切块判断,只判断切块分包 //控制中标人排序方向 //bodirect := 0 //控制中标人排序数值 //sort := 1 nextdirect, nextvdirect := 0, 0 //开始抽取 //若第一排全为头-临时让第二排-新增 左临 查询,zhengkun tb_first_allhead := false for tr_index, tr := range table.TRs { if tr_index==6 { //fmt.Println("调试指定tr") } bcon = trSingleColumn(tr, bcon, table) //tr单列,是否丢弃内容 if bcon { continue } if tr.TDs[0].StartRow >= 0 { numbh := 0 for _, td := range tr.TDs { //log.Println(tr_index,kkk,td.Val) if td.BH { numbh++ } } if numbh != 0 && numbh == len(tr.TDs) { //5e0d53ef0cf41612e0640495 if tr_index==0 { tb_first_allhead = true } nextdirect, nextvdirect = 2, 1 continue } else if nextdirect > 0 && nextvdirect > 0 { direct, vdirect = 2, 1 } else if numbh > 0 && numbh <= len(tr.TDs)/2 { direct, vdirect = 1, 2 } else { direct, vdirect = 2, 1 } } for _, td := range tr.TDs { if !td.BH && td.KVDirect < 3 { if !table.FindTdVal(td, direct, vdirect) { //table.FindTdVal()存储了table.SortKV if !table.FindTdVal(td, vdirect, direct) { ////都识别不到时,对第一、二中标候选人的处理 //bo, res := GetBidOrder(td, bodirect, sort) //if res { // sort++ // bodirect = bo //} //if len(td.SortKV.Map) > 0 { // for _, tdv := range td.SortKV.Keys { // if tdv == "" || td.SortKV.Map[tdv] == "" { //value为空或者null不再添加到table.SortKV // continue // } // table.SortKV.AddKey(tdv, td.SortKV.Map[tdv]) // } //} } } if tb_first_allhead && tr_index==1 { //临时-让第二排-向左比对 if !table.FindTdVal(td, 1, 2) { //table.FindTdVal()存储了table.SortKV if !table.FindTdVal(td, vdirect, direct) { } } tb_first_allhead = false } //fmt.Println("td:", td.Val, td.BH, td.HeadTd, td.KVDirect) } } nextdirect, nextvdirect = 0, 0 } //qutil.Debug("FindKV", table.SortKV.Map) } else if len(table.TRs) > 0 { //没有表头的表格处理,默认纵向吧 res := initLongitudinalData(table) //拼装纵向数组 //再拆值,类似http://www.ggzy.hi.gov.cn/cgzbgg/16553.jhtml第二列,有多个值 nmapkeys := []int{} nmap := map[int][]*u.Kv{} L: for _, r1 := range res { for n, r := range r1 { if len([]rune(r)) < 60 { // 长度小于60才去分 //res1, _ := GetKVAll(r, "", nil) res1, _ := colonkvEntity.entrance(r, "", nil, 2, isSite, codeSite) if res1 != nil { nmap[n] = res1 nmapkeys = append(nmapkeys, n) /** //截取串 for _k1, _ := range res1 { r = regexp.MustCompile(_k1+".*").ReplaceAllString(r, "") } r1[n] = r res[pos] = r1 **/ } else if nmap[n] != nil { //放空值 nmap[n] = append(nmap[n], &u.Kv{}) } } else { nmap = nil nmapkeys = nil break L } } } //调整 if len(nmap) > 0 { kmapkeys := []string{} kmap := map[string][]string{} for _, mk := range nmapkeys { //同是第n列 for pos, m1 := range nmap[mk] { k, v := m1.Key, m1.Value kv := kmap[k] if kv == nil { kv = []string{} } kv = append(kv, v) kmap[k] = kv kmapkeys = append(kmapkeys, k) for _, k := range kmapkeys { arr := kmap[k] if len(arr) < pos { arr = append(arr, "") kmap[k] = arr kmapkeys = append(kmapkeys, k) } } } } if len(kmap) > 0 { for _, k := range kmapkeys { if len(kmap[k]) == 1 { table.SortKV.AddKey(k, kmap[k][0]) } else if len(kmap[k]) > 1 { table.SortKV.AddKey(k, kmap[k]) } } } } //================= //解析值放到map中 for _, arr := range res { if len(arr) > 0 { v1 := arr[0] _, _, _, _, repl := CheckCommon(v1, "con") if repl == "ENT" { table.SortKV.AddKey("中标单位", arr) continue } else if repl == "BO" { table.SortKV.AddKey("排名", arr) continue } } } } //qutil.Debug("Table-FindKV", table.SortKV.Map) } //初始化组装纵向数据 func initLongitudinalData(table *Table) [][]string { res := make([][]string, len(table.TRs[0].TDs)) //创建table第一行的列数长度 for n, _ := range res { res[n] = []string{} } for _, tr := range table.TRs { for n, td := range table.TRs[0].TDs { //第一行的所有td td1 := table.GetTdByRCNo(tr.TDs[0].StartRow, td.StartCol) //根据行号列号获取td对象 if td1 != nil { res[n] = append(res[n], td1.Val) } else { res[n] = append(res[n], "") } } } return res } //tr单列,是否丢弃内容 func trSingleColumn(tr *TR, bcon bool, table *Table) bool { if len(tr.TDs) == 1 { bcon = false td := tr.TDs[0] if td.StartCol == 0 && td.EndCol+1 == table.ColNum && len([]rune(td.Val)) > 4 && len([]rune(td.Val)) < 50 { res, _, _, _, _ := CheckCommon(td.Val, "abandontable") if res { //以下内容丢弃 bcon = true } } } return bcon } //获取中标人顺序 //direct 0默认 1横向 2纵向 func GetBidOrder(td *TD, direct, n int) (d int, res bool) { if td.Valtype != "BO" { return } if td.Rowspan > 1 { for i := 0; i < td.Rowspan; i++ { nextcol := 1 L1: for { vtd := td.TR.Table.GetTdByRCNo(td.StartRow+i, td.EndCol+nextcol) if vtd == nil { break L1 } nextcol += vtd.Colspan if filter_zbdw_v2.MatchString(vtd.Val) { arrbo := td.TR.Table.SortKV.Map[NullTxtBid] if arrbo == nil { arrbo = []map[string]interface{}{} td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo) } a1 := arrbo.([]map[string]interface{}) a1 = append(a1, map[string]interface{}{ "entname": vtd.Val, "sortstr": td.Val, "sort": GetBidSort(td.Val, n), }) res = true td.TR.Table.SortKV.AddKey(NullTxtBid, a1) } } } } else if td.Colspan > 1 { for i := 1; i < td.Colspan; i++ { nextcol := 0 L2: for { vtd := td.TR.Table.GetTdByRCNo(td.StartRow+i, td.StartCol+nextcol) if vtd == nil || vtd.Colspan >= td.Colspan { break L2 } nextcol += vtd.Colspan if filter_zbdw_v2.MatchString(vtd.Val) { arrbo := td.TR.Table.SortKV.Map[NullTxtBid] if arrbo == nil { arrbo = []map[string]interface{}{} td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo) } a1 := arrbo.([]map[string]interface{}) a1 = append(a1, map[string]interface{}{ "entname": vtd.Val, "sortstr": td.Val, "sort": GetBidSort(td.Val, n), }) res = true td.TR.Table.SortKV.AddKey(NullTxtBid, a1) } } } } else { rtd := td.TR.Table.GetTdByRCNo(td.StartRow, td.EndCol+1) btd := td.TR.Table.GetTdByRCNo(td.EndRow+1, td.StartCol) //if ((rtd != nil && !rtd.BH && rtd.Valtype == "BO") || direct == 1) && btd != nil && filter_zbdw_v.MatchString(btd.Val) { if ((rtd != nil && !rtd.BH) || direct == 1) && btd != nil && filter_zbdw_v2.MatchString(btd.Val) { d = 1 arrbo := td.TR.Table.SortKV.Map[NullTxtBid] if arrbo == nil { arrbo = []map[string]interface{}{} td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo) } a1 := arrbo.([]map[string]interface{}) a1 = append(a1, map[string]interface{}{ "entname": btd.Val, "sortstr": td.Val, "sort": GetBidSort(td.Val, n), }) res = true td.TR.Table.SortKV.AddKey(NullTxtBid, a1) //} else if ((btd != nil && !btd.BH && btd.Valtype == "BO") || direct == 2) && rtd != nil && filter_zbdw_v.MatchString(rtd.Val) { } else if ((btd != nil && !btd.BH) || direct == 2) && rtd != nil && filter_zbdw_v2.MatchString(rtd.Val) { d = 2 arrbo := td.TR.Table.SortKV.Map[NullTxtBid] if arrbo == nil { arrbo = []map[string]interface{}{} td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo) } a1 := arrbo.([]map[string]interface{}) a1 = append(a1, map[string]interface{}{ "entname": rtd.Val, "sortstr": td.Val, "sort": GetBidSort(td.Val, n), }) res = true td.TR.Table.SortKV.AddKey(NullTxtBid, a1) } } return } func GetBidSort(str string, n int) int { val := n if strings.Index(str, "首选") > -1 { val = 1 } else { val = winnerOrderEntity.toNumber(str, n) } return val } var cleardwReg *regexp.Regexp = regexp.MustCompile("[((]{1}\\d*[人元件个公斤户]/[人元件个公斤户][))]") var zbhxrReg *regexp.Regexp = regexp.MustCompile("(中标候选人|投标单位名称)") //查找每一个单元格的表头,调用FindNear func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) { if td.Val == "" || strings.TrimSpace(td.Val) == "" { return } near := table.FindNear(td, direct) if near != nil && near.BH && (near.KeyDirect == vdirect || near.KeyDirect == 0) && (near.KVDirect == direct || near.KVDirect == 0) && near.KVDirect < 3 { near.KVDirect = direct near.KeyDirect = vdirect td.KVDirect = direct key := repSpace.ReplaceAllString(near.Val, "") if key == "名称" && near.StartCol == 0 && near.Rowspan > 0 { for _, vn := range table.TRs[near.Rowspan-1].TDs { if strings.Contains(vn.Val, "代理") { key = "代理机构" break } else if strings.Contains(vn.Val, "招标") { key = "采购单位" break } else if strings.Contains(vn.Val, "中标") { key = "中标单位" break } } } else if zbhxrReg.MatchString(key) && findCandidate2.MatchString(td.Val) { key = "中标单位" } else if key == "单位名称" { tmpnewnear := table.FindNear(near, 2) if tmpnewnear != nil { if tmpnewnear.MustBH || tmpnewnear.BH { key = tmpnewnear.Val + near.Val } } else { tmpnewnear = table.FindNear(near, 1) if tmpnewnear != nil { if tmpnewnear.MustBH || tmpnewnear.BH { key = tmpnewnear.Val + near.Val } } } } if near.Val == "" { key = fmtkey("k", near.TR.RowPos, near.ColPos) } val := table.SortKV.Map[key] //qutil.Debug("====================", "key:", key, "val:", val) bthiskey := false if val != nil { curpos := table.SortKV.Index[key] thistr := table.kTD[curpos] if thistr != near { if strings.TrimSpace(near.Val) == "名称" && near.TR != nil && len(near.TR.TDs) > 0 && near.ColPos-1 >= 0 { rv := near.TR.TDs[near.ColPos-1].Val if near.ColPos > 0 && (strings.Contains(rv, "招标") || strings.Contains(rv, "代理") || strings.Contains(rv, "采购") || strings.Contains(rv, "中标")) { near = near.TR.TDs[near.ColPos-1] } } else { bthiskey = true } } else { bthiskey = true } } bfind := false barr := false varrpos := -1 if bthiskey { //处理是数组值,且有合并行或合并列的情况 kvscope,对数组值的处理 pos := table.SortKV.Index[key] mval := table.kvscope[pos] bvalfind := false if direct == 1 { //kv是横向 L1: for k3, v3 := range mval { for _, v4 := range v3 { if v4.EndRow+1 == td.StartRow && v4.EndCol == td.EndCol { varrpos = k3 bvalfind = true break L1 } } } } else { //kv是纵向 L2: for k3, v3 := range mval { for _, v4 := range v3 { if v4.EndCol+1 == td.StartCol && v4.EndRow == td.EndRow { varrpos = k3 bvalfind = true break L2 } } } } if vals, ok := val.([]string); ok { if near.Val == "" { bn := false for _, vs := range vals { if vs != "" && NullTdReg.MatchString(vs) { bn = true } else { bn = false break } } if bn { near.Val = NullTxtBid key = NullTxtBid bfind = true } } if bvalfind && varrpos > -1 && len(vals) > varrpos { tmapval := strings.TrimSpace(cleardwReg.ReplaceAllString(td.Val, "")) if tmapval == "" { vals = append(vals, td.Val) // 累加 } else { vals = append(vals, tmapval) // 累加 } val = vals //vals[varrpos] = td.Val // += "__" + td.Val } else { //添加时候去除空值和nil newVals := []string{} for _, isval := range vals { if isval == "" { continue } newVals = append(newVals, isval) } //vals = append(vals, td.Val) if td.Val != "" { newVals = append(newVals, td.Val) } val = newVals varrpos = len(vals) - 1 } } else if vals, ok := val.(string); ok && vals != "" && td.Val != "" { tmapval := strings.TrimSpace(cleardwReg.ReplaceAllString(vals, ""))//已存在的kv tmapvaltd := strings.TrimSpace(cleardwReg.ReplaceAllString(td.Val, "")) if bvalfind { //if tmapvaltd == "" { // val = td.Val //vals + "__" + td.Val //} else { // val = tmapvaltd //} if key=="中标单位" { //不能覆盖--- }else { if tmapvaltd == "" { val = td.Val //vals + "__" + td.Val } else { val = tmapvaltd } } } else{ if key=="中标单位" { //新增不能数组 }else { tval := []string{} if tmapval == "" { tval = append(tval, vals) } else { tval = append(tval, tmapval) } if tmapvaltd == "" { tval = append(tval, td.Val) } else { tval = append(tval, tmapvaltd) } val = tval varrpos = 1 } } } barr = true } else { if td.Val != "" { tmapval := strings.TrimSpace(cleardwReg.ReplaceAllString(td.Val, "")) if tmapval == "" { val = td.Val } else { val = tmapval } } else if len(near.SortKV.Map) == 1 && near.SortKV.Map[near.Val] != "" { val = near.SortKV.Map[near.Val] } } td.HeadTd = near if bfind { tkey := fmtkey("k", near.TR.RowPos, near.ColPos) table.SortKV.ReplaceKey(key, val, tkey) } else { if key == "单位名称" && len(near.TR.TDs) > 1 { if near.TR.TDs[0].Val != "序号" { key = near.TR.TDs[0].Val } } table.SortKV.AddKey(key, val) pos := table.SortKV.Index[key] if barr { mval := table.kvscope[pos] if mval != nil { tds := mval[varrpos] if tds != nil { tds = append(tds, td) } else { tds = []*TD{td} } if varrpos > -1 { mval[varrpos] = tds table.kvscope[pos] = mval } } } else { table.kvscope[pos] = map[int][]*TD{ 0: []*TD{td}, } table.kTD[pos] = near } } b = true } return } //查找单元格的表头时,横向或纵向 func (table *Table) FindNear(td *TD, direct int) *TD { if direct == 1 && td.StartCol > 0 { //左临 tr := table.TRs[:td.TR.RowPos+1] for i := len(tr) - 1; i > -1; i-- { tds := tr[i].TDs for _, td1 := range tds { if td1.StartRow <= td.StartRow && td1.EndRow >= td.EndRow && td1.EndCol+1 == td.StartCol { //找到左临节点 if td1.BH { return td1 } else { if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct { return td1.HeadTd } } } } } } else if direct == 2 && td.StartRow > 0 { //上临 tr := table.TRs[:td.TR.RowPos] for i := len(tr) - 1; i > -1; i-- { tds := tr[i].TDs for it, td1 := range tds { if td1.StartCol <= td.StartCol && td1.EndCol >= td.EndCol && td1.EndRow+1 == td.StartRow { //找到左临节点 if td1.BH { return td1 } else if len(tr[i].TDs) == len(td.TR.TDs) && td1.HeadTd != nil && td1.HeadTd.KVDirect == direct { return td1.HeadTd } else if it > 0 && td1.Val == "" && td1.TR.TopTR == nil && len(td.TR.TDs)-(td.StartCol-1) > 0 && strings.Contains(td.TR.TDs[td.StartCol-1].Val, "中标候选人") { return tds[it-1] } else if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct && td.Colspan == td1.Colspan && td.Rowspan == td.Rowspan { return td1.HeadTd } } else if td1.StartCol <= td.StartCol && td1.EndCol >= td.EndCol && td1.EndRow+1 == td.StartRow { } } } } return nil } //根据行号列号获取td对象 func (tn *Table) GetTdByRCNo(row, col int) *TD { for _, tr := range tn.TRs { for _, td := range tr.TDs { if td.StartCol <= col && td.EndCol >= col && td.StartRow <= row && td.EndRow >= row { return td } } } return nil } //判断表格是否是分包 func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool, index []string) { pac := 0 //包的数量 val := 0 //分值 index = []string{} //存储分包,使用tbale.SortKV的key和value使用正则等处理对值进行判断 index_pos := []int{} //下标 //是数组且能找到标段之类的提示 //arr_count := 0 //计数table.SortKV的value是数组的数量,后面没用 key_index := -1 hasPkgTd := map[string]bool{} //初始化CheckMultiPackageByTable方法需要的数据 key_index, index, index_pos, val, pac, hasPkgTd = initCheckMultiPackageByTable(tn, key_index, index, index_pos, val, pac, hasPkgTd) //key是分包的情况 //记录key对应的值 commonKeyVals := map[string][]string{} //记录key出现的次数 keyExistsCount := map[string]int{} if pac > 1 { val = 10 } else { //查找标签 if TableMultiPackageReg_4.MatchString(tn.Tag) { val += 4 } else if TableMultiPackageReg_2.MatchString(tn.Tag) { val += 4 } //根据table.SortKV的key判断是否分包,如果没有再根据value判断 val, index, index_pos = foundPacBySortKV(tn, val, index, index_pos, &keyExistsCount, &commonKeyVals, key_index, hasPkgTd) } // u.Debug(index) //过滤重复及标准化! standIndex := []string{} standIndex_pos := []int{} oldIndex := []string{} //存放包的原始值 brepeat := map[string]bool{} for k, v := range index { v = u.PackageNumberConvert(v) if !brepeat[v] { brepeat[v] = true standIndex = append(standIndex, v) standIndex_pos = append(standIndex_pos, index_pos[k]) oldIndex = append(oldIndex, index[k]) } } index = standIndex //有一个以上的包,并且相同的key出现一次以上,认为这个key是属于包里面的 if len(commonKeyVals) > 0 { for k, v := range commonKeyVals { if len(index) > 1 && keyExistsCount[k] < 2 { continue } tn.SortKV.AddKey(k, v) } } // isGoonNext := false if val > 4 && len(brepeat) > 0 { b = true //多包解析 if b { tn.BPackage = true //pnum := len(index) //根据数组index分包长度添加table.BlockPackage子包数组 for nk, v := range index { if tn.BlockPackage.Map[v] == nil { kv := u.NewJobKv() for tnk, tnv := range tn.StandKV { if nk >= len(tnv) { continue } else if len(index) == len(tnv) { //特殊处理- if tnk=="预算"&& codeSite=="ha_zmdszfcgw_cgxx" && len(tnv)>1{ isEqErr,budget_v := false,"" for bk,bv:=range tnv { if bk==0 { budget_v = bv.Value }else { if budget_v != bv.Value { isEqErr = true break } } } if isEqErr { kv.KvTags[tnk] = append(kv.KvTags[tnk], tnv[nk]) } }else { kv.KvTags[tnk] = append(kv.KvTags[tnk], tnv[nk]) } } } //kv.KvTags = tn.StandKV bp := &u.BlockPackage{} bp.Index = v //序号 (转换后编号,只有数字或字母) bp.Origin = oldIndex[nk] //包的原始值 bp.TableKV = kv //table kv (分出的对应的KV值) bp.Name = v if bp.TableKV != nil && bp.TableKV.KvTags != nil && len(bp.TableKV.KvTags) > 0 { for kc, cv := range bp.TableKV.KvTags { if kc == "预算" && bp.Budget <= 0 { moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""}) if len(moneys) > 0 { if vf, ok := moneys[0].(float64); ok { bp.Budget = vf bp.IsTrueBudget = moneys[len(moneys)-1].(bool) } else if vi, ok := moneys[0].(int); ok { bp.Budget = float64(vi) bp.IsTrueBudget = moneys[len(moneys)-1].(bool) } } } else if kc == "中标金额" && bp.Bidamount <= 0 { moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""}) if len(moneys) > 0 { if vf, ok := moneys[0].(float64); ok { bp.Bidamount = vf bp.IsTrueBidamount = moneys[len(moneys)-1].(bool) } else if vi, ok := moneys[0].(int); ok { bp.Bidamount = float64(vi) bp.IsTrueBidamount = moneys[len(moneys)-1].(bool) } } } else if kc == "中标单位" && bp.Winner == "" { bp.Winner = cv[0].Value } //拼接内容 if !excludeKey.MatchString(kc) { bp.Text += fmt.Sprintf("%v:%v\n", kc, cv[0].Value) } } } tn.BlockPackage.AddKey(v, bp) //table子包数组 } } isGoonNext = tn.manyPackageProcessByIndex(index, standIndex_pos, isSite, codeSite) //多包处理,处理不同情况下的分包 } } else { isGoonNext = true } if isGoonNext { //没有处理成数组的情况下,继续调用正文查找分包的方法 tn.isGoonNext(isSite, codeSite) } //查找分包中的中标人排序 if tn.BlockPackage != nil && tn.BlockPackage.Keys != nil && len(tn.BlockPackage.Keys) > 0 { for _, v := range tn.BlockPackage.Keys { vv, ok := tn.BlockPackage.Map[v].(*u.BlockPackage) if ok && (vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0) { vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2, isSite, codeSite) } } } return } //多包处理,处理不同情况下的分包 func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int, isSite bool, codeSite string) (isGoonNext bool) { if len(index) == 1 { //是一个的情况 if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 { //table带排序的KV值小于10并且小于10列和小于4行 beq := true for _, v2 := range tn.SortKV.Keys { if _, ok := tn.SortKV.Map[v2].(string); !ok { beq = false break } } if beq { //统一处理为数组 td := tn.GetTdByRCNo(tn.RowNum-1, 0) if !td.BH && FindVal2_1.MatchString(td.Val) { for _, v2 := range tn.SortKV.Keys { tn.SortKV.AddKey(v2, []string{tn.SortKV.Map[v2].(string)}) } } else { //没有处理成数组的情况下,继续调用正文查找分包的方法 isGoonNext = true } } } } for _, k1 := range tn.SortKV.Keys { v1 := tn.SortKV.Map[k1] var v1_arr []string if vtmpv1, ok := v1.(string); ok { v1_arr = PreCon4.FindAllString(qutil.ObjToString(vtmpv1), -1) if len(v1_arr) > 0 { if dw := Precon4dw.FindString(vtmpv1); dw != "" { for i, v := range v1_arr { v1_arr[i] = v + dw } } } } else if vtmpv1s, ok := v1.([]string); ok { v1_arr = vtmpv1s } if len(v1_arr) > 0 && len(v1_arr) <= len(index) { //table.SortKV.Map.value数组小于等于分包index for k, v := range v1_arr { tn.assemblePackage(k1, v, index[k], isSite, codeSite) //组装解析到的分包 } } } return isGoonNext } //没有处理成数组的情况下,继续调用正文查找分包的方法 func (tn *Table) isGoonNext(isSite bool, codeSite string) { blockPackage := map[string]*u.BlockPackage{} for _, k := range tn.SortKV.Keys { if excludeKey.MatchString(k) || strings.Contains(k, "批复") || excludeKey3.MatchString(k) { continue } str := "" //拼装为冒号kv v := tn.SortKV.Map[k] nk := regReplAllSpace.ReplaceAllString(k, "") if vs, ok := v.([]string); ok { str += fmt.Sprintf("%s:%s\n", nk, strings.Join(vs, " ")) } else { str += fmt.Sprintf("%s:%s\n", nk, v) } if excludeKey2.MatchString(str) { continue } b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false, isSite, codeSite) //分块之后分包 if b && len(blockPackage) > 0 { tn.BPackage = true for mk, mv := range blockPackage { if tn.BlockPackage.Map[mk] == nil { tn.BlockPackage.AddKey(mk, mv) } else { bp := tn.BlockPackage.Map[mk].(*u.BlockPackage) if bp.TableKV == nil { bp.TableKV = u.NewJobKv() } if bp.SpaceKV == nil { bp.SpaceKV = u.NewJobKv() } for k2, v2 := range mv.ColonKV.KvTags { for _, v2v := range v2 { isExists := false for _, v2vv := range bp.TableKV.KvTags[k2] { if v2v.Value == v2vv.Value { isExists = true break } } if !isExists { bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v) bp.Text += fmt.Sprintf("%v:%v\n", k2, v2) } } } for k2, v2 := range mv.SpaceKV.KvTags { for _, v2v := range v2 { isExists := false for _, v2vv := range bp.SpaceKV.KvTags[k2] { if v2v.Value == v2vv.Value { isExists = true break } } if !isExists { bp.SpaceKV.KvTags[k2] = append(bp.SpaceKV.KvTags[k2], v2v) bp.Text += fmt.Sprintf("%v:%v\n", k2, v2) } } } } } tn.BPackage = true tn.SortKV.RemoveKey(k) } } } //根据table.SortKV的key判断是否分包,如果没有再根据value判断 func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, keyExistsCount *map[string]int, commonKeyVals *map[string][]string, key_index int, hasPkgTd map[string]bool) (rval int, rindex []string, rindex_pos []int) { keyIsPkg := false for in, k := range tn.SortKV.Keys { if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) || excludeKey3.MatchString(k) || regFJWarap.MatchString(k) || regAZWarap.MatchString(k) { //判断分包前排除 continue } v := tn.SortKV.Map[k] //key是分包的情况 if ismatch := FindVal_1.MatchString(k); keyIsPkg || ismatch { if ismatch { keyIsPkg = true val += 4 pkgFlag := FindVal_1.FindString(k) //对值进行分包判断 k = strings.Replace(k, pkgFlag, "", -1) index = append(index, pkgFlag) index_pos = append(index_pos, len(index)) val += 1 //pac++ } else { k = strings.TrimRight(k, "_") } (*keyExistsCount)[k] = (*keyExistsCount)[k] + 1 (*commonKeyVals)[k] = append((*commonKeyVals)[k], qutil.ObjToString(v)) } else if k1 := FilterKey_2.ReplaceAllString(k, ""); FindKey_2.MatchString(k1) { val += 4 //value数组分包 if vs, bvs1 := v.([]string); bvs1 { L: for in2, v1 := range vs { if len([]rune(v1)) < 20 && !moneyNum.MatchString(v1) && FindVal2_1.MatchString(v1) { for _, serial := range tn.TableResult.RuleBlock.TitleRegs { if serial.MatchString(v1) { break L } } if key_index == -1 { key_index = in } else if key_index != in { break } index = append(index, v1) index_pos = append(index_pos, in2) val += 1 //pac++ } } } else if v1, ok := v.(string); ok && !hasPkgTd[k] { //value字符串分包 v1 = replPkgConfusion(v1) //替换分包中混淆的词 for _, v2 := range strings.Split(v1, "/") { if len([]rune(v2)) < 20 && !moneyNum.MatchString(v2) && FindVal2_1.MatchString(v2) { key_index = in index = append(index, v1) index_pos = append(index_pos, 0) val += 1 //pac++ underline := "" for { underline += "_" if tn.SortKV.Map[k+underline] == nil { break } else if v3, v2_ok := tn.SortKV.Map[k+underline].(string); v2_ok && v3 != "" { index = append(index, v3) index_pos = append(index_pos, 1) } else if v3, v2_ok := tn.SortKV.Map[k+underline].([]string); v2_ok { for v2_k, v2_v := range v3 { index = append(index, v2_v) index_pos = append(index_pos, v2_k+1) } } } break } } } if k1=="标段" && len(index)==0 { continue }else { break } } } return val, index, index_pos } //初始化CheckMultiPackageByTable方法需要的数据 func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, index_pos []int, val int, pac int, hasPkgTd map[string]bool) (rkey_index int, rindex []string, rindex_pos []int, rval int, rpac int, rhasPkgTd map[string]bool) { for in, k := range tn.SortKV.Keys { //涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)就跳过 if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) || excludeKey3.MatchString(k) || strings.Contains(k, "批复") { continue } v := tn.SortKV.Map[k] if vs, bvs := v.([]string); bvs { //arr_count++ haspkgs := []string{} for in2, v1 := range vs { v1 = replPkgConfusion(v1) //替换分包中混淆的词 if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) { if key_index == -1 { key_index = in } else if key_index != in { break } index = append(index, FindVal_1.FindString(v1)) index_pos = append(index_pos, in2) val += 1 pac++ } else if FindKey_3.MatchString(k) { //5db2a101a5cb26b9b73054ac index = append(index, v1) index_pos = append(index_pos, in2) val += 1 pac++ } else { if ok, v1new := isHasOnePkgAndNoKv(v1); ok { //td的值里面有一个包,并且没有冒号kv haspkgs = append(haspkgs, v1new) } } } /*处理这种情况: