package pretreated import ( "fmt" u "jy/util" qutil "qfw/util" "regexp" "strings" "github.com/PuerkitoBio/goquery" ) /** 全局变量,主要是一堆判断正则 **/ var ( //清理品目中数字 numclear = regexp.MustCompile("^[\\d一二三四五六七八九十.]+") //清理表格title中的不需要的内容 tabletitleclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。、_/((人民币万元件个公斤))]") //清理表格中是key中包含的空格或数字等 tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。、_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]") //清理表格td中的符号 tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、,。、_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕¥$]*") //判断key是金额,对万元的处理 moneyreg = regexp.MustCompile("(预算|费|价|额|规模|投资)") //根据表格的内容判断是不是表头,如果含有金额则不是表头 MoneyReg = regexp.MustCompile("^[\\s  ::0-9.万元()()人民币¥$]+$") //判断分包时 moneyNum = regexp.MustCompile("[元整¥万]") //对隐藏表格的判断 display = regexp.MustCompile("(?i).*?display\\s?[:]\\s?none.*") //--------------- //求是分包的概率 //根据表格的标签对分包进行打分 TableMultiPackageReg_4 = regexp.MustCompile("(标段|分包|包段|划分|子包|标包|合同段)") TableMultiPackageReg_2 = regexp.MustCompile("(概况|范围|情况|内容|详细|结果|信息)") //在判断分包打分前过虑表格key FilterKey_2 = regexp.MustCompile("招标|投标|项目") //根据表格的key进行分包打分 FindKey_2 = regexp.MustCompile("([分子][包标](号)?|标[号项段包](划分)?|包件?[号段名数])") //对值进行分包判断 FindVal_1 = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|合同|分|施工|监理)?(标|包)(段|号)?)[  \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)") FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$") //判断分包前排除 excludeKey = regexp.MustCompile("(涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)") //编号|划分 //------------- cut = u.NewCut() //清理表格标签正则 ClearTagReg = regexp.MustCompile("<[^>]*?>|[\\s\\n\\r]*$") //查找表格标签正则 ttagreg = regexp.MustCompile("(?s)([^\\n::。,;\\s\u3000\u2003\u00a0]{2,30})[::]?[^::。;!\\n]{0,35}[\\s\\n]*$") //判断表格是表头的概率 checkval = float32(0.6) //tdval_reg = regexp.MustCompile(`([\p{Han}][\p{Han}\s、()\\(\\)]{1,9})[::]([^::\\n。]{5,60})(?:[;;,,.。\\n\\t\\s])?`) //空格替换 repSpace = regexp.MustCompile("[\\s\u3000\u2003\u00a0::]+|\\\\t+") //对表格kv的处理 //对不能标准化的key做批识 filter_tag_zb = regexp.MustCompile("(中标|成交|投标)[\\p{Han}]{0,6}(情况|结果|信息|明细)") //中标金额 //包含以下字眼做标准化处理 filter_zbje_k = regexp.MustCompile("(中标|成交|总|拦标|合同|供[应货]商|报)[\\p{Han}、]{0,6}(价|额|[大小]写|[万亿]?元).{0,4}$") //简单判断金额 filter_zbje_jd = regexp.MustCompile("^[^售]{0,4}(价|额).{0,4}$") //且排队以下字眼的key filter_zbje_kn = regexp.MustCompile("得分|打分|时间|业绩|须知|分$") //且值包含以下字眼 filter_zbje_v = regexp.MustCompile("[¥$$0-9一二三四五六七八九十,,〇零点..壹贰叁肆伍陆柒捌玖拾百佰千仟万亿億元圆角分整正()::大小写]{2,16}") //中标单位的处理 //包含以下字眼的Key标准化 filter_zbdw_ky = regexp.MustCompile("(中标|成交|拦标|合同|选中|投标|拟|预|最终)[\\p{Han}、]{0,6}(供[应货]商|企业|单位|人|机构)(名称)?.{0,4}$") //简单判断 filter_zbdw_jd = regexp.MustCompile("(投标|成交|中标|合同)(供应商|单位|人|名称).{0,4}$") //且不包含以下字眼 filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址") //且值包含以下字眼 filter_zbdw_v = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)") //且值包含以下字眼 filter_zbdw_v2 = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$") //Tg = map[string]interface{}{} //一些表格没有表头,是空的,对值是排序的做处理对应 NullTxBid NullTdReg = regexp.MustCompile("(首选|第[一二三四五1-5])(中标|成交)?(名(称)?|(候选|排序)?(人|单位|供应商))") NullTxtBid = "成交供应商排名" projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$") MhSpilt = regexp.MustCompile("[::]") //识别采购单位联系人、联系电话、代理机构联系人、联系电话 ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|(征求意见|报名审核购买)?((联系人?|办公)?((电话([//]传真)?|手机)(号码)?|邮箱(地址)?|(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|(项目)?经办)人)|采购方代表") ContactInfoMustReg = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$") ContactType = map[string]*regexp.Regexp{ "采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|发布人?|甲|招标(服务)?|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方)|(项目|建(库|设))单位|招标人信息|采购中心地址|业主|收料人"), "代理机构": regexp.MustCompile("(代理|受托).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"), } ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$") MultipleValueSplitReg = regexp.MustCompile("[,,、\\s\u3000\u2003\u00a0]") BuyerContacts = []string{"采购单位联系人", "采购单位联系电话", "采购单位联系地址"} FilterSerial = regexp.MustCompile(".+[、..::,]") filterTableWror = regexp.MustCompile("班子成员") underline = regexp.MustCompile("_+$") iswinnertabletag = regexp.MustCompile("(中标|候选人|成交|结果)") nswinnertabletag = regexp.MustCompile("[评得分估]+") ) //在解析时,判断表格元素是否隐藏 func IsHide(g *goquery.Selection) (b bool) { style, exists := g.Attr("style") if exists { b = display.MatchString(style) } return } //对表格的key进行标准化处理,多个k相同时,出现覆盖问题 //待扩展,暂不支持正则标签库 func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1 []string, weight []int, v1, returntag string, b bool) { k1 = []string{} weight = []int{} tk := k if sv, sok := v.(string); sok { //取KV v1 = sv } else if sv, sok := v.([]string); sok { //是数组先默认取第一个 v1 = sv[0] } //对值单位的处理 if moneyreg.MatchString(tk) { v1 += GetMoneyUnit(tk, v1) } //先清理key //u.Debug(1, k, v1) k = ClearKey(k, 2) //u.Debug(2, k) //取标准key res := u.GetTags(k) if len(res) == 0 && tk != k { res = u.GetTags(tk) } //log.Println(k, res) // if len(res) == 0 { // go u.AddtoNoMatchMap(tk) // } //当取到标准化值时,放入数组 if len(res) > 0 { b = true for _, t1 := range res { k1 = append(k1, t1.Value) weight = append(weight, t1.Weight) } //k1 = res[0].Value } //没有取到标准化key时,对中标金额和中标单位的逻辑处理 if !b { if filter_zbje_k.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) { if tabletag == "" { returntag = "中标情况" } k1 = append(k1, "中标金额") weight = append(weight, -100) b = true } else if filter_zbdw_ky.MatchString(k) && !filter_zbdw_kn.MatchString(k) && filter_zbdw_v.MatchString(v1) { k1 = append(k1, "中标单位") weight = append(weight, -100) if tabletag == "" { returntag = "中标情况" } b = true } } //对上一步没有取到标准化key的进一步处理 if !b { if tabletag == "" { } if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) { //u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1)) if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) { k1 = append(k1, "中标金额") weight = append(weight, -100) b = true } /*else if filter_zbdw_jd.MatchString(k) && filter_zbdw_v.MatchString(v1) { k1 = append(k1, "中标单位") weight = append(weight, -100) b = true }*/ } } return } //对解析后的表格的kv进行过滤 func (table *Table) KVFilter() { //1.标准化值查找 //2.对数组的处理 //3.对分包的处理 //4.对KV的处理 //判断表格是否有用,调用abandontable正则数组进行判断 //遍历每一行 winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签 if !winnertag { winnertag = iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签 } for _, tr := range table.TRs { for _, td := range tr.TDs { //fmt.Println(td.BH, td.MustBH, td.Val, td.SortKV.Map) bc := false if !td.BH { //表头是否是无用内容 if td.HeadTd != nil { bc, _, _, _, _ = CheckCommon(td.HeadTd.Val, "abandontable") } } if !bc { //td元素有内嵌kv,遍历放入table的Kv中 if len(td.SortKV.Keys) > 0 { for _, k3 := range td.SortKV.Keys { _val := td.SortKV.Map[k3] //thisFlag := false if td.HeadTd != nil && len([]rune(k3)) < 4 { k3 = td.HeadTd.Val + k3 } if table.SortKV.Map[k3] == nil { //u.Debug(k3, _val) //if !thisFlag || (thisFlag && table.SortKV.Map[k3] == nil) { table.SortKV.AddKey(k3, _val) } } } } //td有子表格的处理 //u.Debug(td.BH, td.Val, td.SonTableResult) if td.SonTableResult != nil { //u.Debug(td.SonTableResult.SortKV.Map, "-------", td.SonTableResult.Tabs) for _, k3 := range td.SonTableResult.SortKV.Keys { if table.StandKV[k3] == "" || td.SonTableResult.SortKVWeight[k3] > table.StandKVWeight[k3] { table.StandKV[k3] = qutil.ObjToString(td.SonTableResult.SortKV.Map[k3]) table.StandKVWeight[k3] = td.SonTableResult.SortKVWeight[k3] } } //中标候选人排序 if table.WinnerOrder == nil || len(table.WinnerOrder) == 0 { table.WinnerOrder = td.SonTableResult.WinnerOrder } else { winnerOrderEntity.Merge(table.WinnerOrder, td.SonTableResult.WinnerOrder) } } } } as := NewSortMap() //表格描述处理,对成交结果的处理 for _, k := range table.SortKV.Keys { if regexp.MustCompile("(成交|中标|候选|排名|名次|供应商排序)").MatchString(k) { table.Desc += "成交结果," } } //遍历table.sortkv,进行过滤处理,并放入标准化KV中,如果值是数组跳到下一步处理 for _, k := range table.SortKV.Keys { if regexp.MustCompile("^单价").MatchString(k) { continue } v := table.SortKV.Map[k] if _, ok := v.(string); ok { k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "") k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v) //qutil.Debug(k, v, k1, w1, v1, tag, b) if b { //降低冒号值的权重 if MhSpilt.MatchString(v1) { for pos, _ := range k1 { w1[pos] -= 50 } } if tag != "" && table.Tag == "" { table.Tag = tag } for pos, k2 := range k1 { if table.StandKV[k2] == "" || w1[pos] > table.StandKVWeight[k2] { table.StandKV[k2] = v1 //本节点 table.StandKVWeight[k2] = w1[pos] } // else if k2 == "中标金额" { // // u.Debug(qutil.Float64All(v1), qutil.Float64All(table.StandKV[k2])) // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) { // table.StandKV[k2] = v1 // } // } } } } else { //u.Debug(k, v, "---------") as.AddKey(k, v) } } //处理值是数组的kv放入标准化kv中 checkKey := map[int]bool{} for kn, k := range as.Keys { v := as.Map[k] if vm, ok := v.([]map[string]interface{}); ok && k == NullTxtBid { if table.WinnerOrder == nil { table.WinnerOrder = []map[string]interface{}{} } table.WinnerOrder = append(table.WinnerOrder, vm...) } else { //增加候选人排序逻辑 if table.WinnerOrder == nil && !checkKey[kn] { if vs1, ok := v.([]string); ok { smap := make([]map[string]interface{}, len(vs1)) for n1, _ := range vs1 { smap[n1] = map[string]interface{}{} } //hadSort := false tmpEntname := make([]string, len(vs1)) tmpPrice := make([]string, len(vs1)) for kn1, k := range as.Keys[kn:] { v := as.Map[k] if ContactType["采购单位"].MatchString(k) || ContactType["代理机构"].MatchString(k) { continue } //目前对数组数据的key做判断,但是某些额可以是不满足情况的 //载明内容:[第一中标候选人 第二中标候选人] id:5d00587da5cb26b9b75e367b if vs, ok := v.([]string); ok && len(vs) == len(vs1) { //数组值的个数相同 res, _, _, _, repl := CheckCommon(k, "bidorder") kv := "" if !res { kt := u.GetTags(filterThText.ReplaceAllString(ClearKey(k, 2), "")) if kt.Len() > 0 { kv = kt[0].Value } } //qutil.Debug(k, res, repl, kv, "--", vs) if !res && kv == "" { //key未验证出,验证数组的val值 checkKey[kn+kn1] = true if winnertag { //如果是中标信息 在根据val数组信息解析候选人 for vsk, vsv := range vs { if NullTdReg.MatchString(vsv) { //数据先验证val是否有排序 //hadSort = true smap[vsk]["sortstr"] = vsv smap[vsk]["sort"] = GetBidSort(vsv, vsk+1) } else if findCandidate2.MatchString(vsv) && tmpEntname[vsk] == "" { //数据验证val是否是候选人 entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string) if entname != "" { tmpEntname[vsk] = entname } } else { //验证val时如果数组中的第一条数据既不满足sort或者entname 判定此数组数据错误 break } } } } if res || kv != "" { //连续往下找几个key checkKey[kn+kn1] = true SORT: if repl == "sort" { //hadSort = true for vsk, vsv := range vs { smap[vsk]["sortstr"] = vsv smap[vsk]["sort"] = GetBidSort(vsv, vsk+1) } } else if repl == "entname" || kv == "中标单位" { for vsk, vsv := range vs { if winnerReg6.MatchString(vsv) { //k:中标候选人 v:["第一名","第二名"] repl = "sort" goto SORT } // if entname, _ := smap[vsk]["entname"].(string); entname != "" || len([]rune(vsv)) < 3 { // break // } // entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string) // if entname != "" { // smap[vsk]["entname"] = entname // if tmpEntname[vsk] != "" || len([]rune(vsv)) < 4 { //排除 单位:["台","个","套"] break } entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string) if entname != "" { tmpEntname[vsk] = entname } } } else if kv == "中标金额" { for vsk, vsv := range vs { //过滤price 2348273.432元(万元)-->2348273.432 //tmp1, _ := smap[vsk]["price"].(string) tmp1 := tmpPrice[vsk] p1num := numberReg2.FindString(tmp1) p2num := numberReg2.FindString(vsv) p1 := qutil.Float64All(p1num) p2 := qutil.Float64All(p2num) if p2 > p1 { //smap[vsk]["price"] = winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv)) price := winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv)) if pricestr, _ := price.(string); len(pricestr) < 30 && len(pricestr) > 0 { tmpPrice[vsk] = pricestr } } } } } } else { //break } } newSmap := []map[string]interface{}{} //qutil.Debug("smap=======", smap) //qutil.Debug("tmpEntname--", len(tmpEntname), tmpEntname) //qutil.Debug("tmpPrice--", len(tmpPrice), tmpPrice) for n, smap_v := range smap { //if hadSort { //有排序,再添加entname和price if len(tmpEntname) > 0 && n < len(tmpEntname) && tmpEntname[n] != "" { smap_v["entname"] = tmpEntname[n] if len(tmpPrice) > 0 && n < len(tmpPrice) && tmpPrice[n] != "" { smap_v["price"] = tmpPrice[n] } } //} else if len(tmpEntname) > 0 { //fmt.Println("table winnerorder only has entname", tmpEntname) //} if len(smap_v) > 2 { //只有排序信息 sort和sortstr newSmap = append(newSmap, smap_v) } } if len(newSmap) > 0 { table.WinnerOrder = newSmap } } } k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v) if b { if tag != "" && table.Tag == "" { table.Tag = tag } for pos, k2 := range k1 { if table.StandKV[k2] == "" || w1[pos] > table.StandKVWeight[k2] { table.StandKV[k2] = v1 //本节点 table.StandKVWeight[k2] = w1[pos] } // else if k2 == "中标金额" { // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) { // table.StandKV[k2] = v1 // } // } } } } } // if filterTableWror.MatchString(table.Tag) { table.WinnerOrder = nil } // if len(table.WinnerOrder) > 0 || !table.BPackage { winnerOrder := []map[string]interface{}{} maxSort := 0 //调整顺序 for i := 0; i < 2; i++ { for _, v := range table.WinnerOrder { sortstr, _ := v["sortstr"].(string) if (i == 0 && sortstr == "") || (i == 1 && sortstr != "") { continue } sort, _ := v["sort"].(int) if i == 0 { if maxSort == 0 || sort > maxSort { maxSort = sort } } else { maxSort++ v["sort"] = maxSort } winnerOrder = append(winnerOrder, v) } if len(winnerOrder) == len(table.WinnerOrder) { break } } table.WinnerOrder = winnerOrder winnerOrder = []map[string]interface{}{} L: for _, tr := range table.TRs { for _, td := range tr.TDs { winnerOrder = winnerOrderEntity.Find(td.Val, true, 3) if len(winnerOrder) > 0 { break L } } } if len(table.WinnerOrder) > 0 { //中标候选人合并 winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder) if table.StandKV["中标单位"] == "" { ent := table.WinnerOrder[0]["entname"] if ent != nil { table.StandKV["中标单位"], _ = ent.(string) table.StandKVWeight["中标单位"] = -25 } } } else if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder if len(winnerOrder) > 1 { table.WinnerOrder = winnerOrder } } } //对中标候选人进行排序 winnerOrderEntity.Order(table.WinnerOrder) //该表格有一个分包,并且有中标候选人排序的情况下,把中标候选人放到包里面 if table.BlockPackage != nil && table.BlockPackage.Keys != nil && len(table.BlockPackage.Keys) == 1 { if table.BlockPackage.Map != nil { onePkgKey := table.BlockPackage.Keys[0] onePkg, _ := table.BlockPackage.Map[onePkgKey].(*u.BlockPackage) if onePkg != nil && onePkg.WinnerOrder != nil && len(onePkg.WinnerOrder) == 0 { onePkg.WinnerOrder = table.WinnerOrder table.BlockPackage.Map[onePkgKey] = onePkg } } } } //表格结果合并到父表格集中 func (table *Table) MergerToTableresult() { //对多包表格的多包值的合并处理 if table.BPackage { table.TableResult.IsMultiPackage = true for k, v := range table.BlockPackage.Map { package1 := table.TableResult.PackageMap.Map[k] if package1 == nil { table.TableResult.PackageMap.AddKey(k, v) } else { bp := package1.(*u.BlockPackage) if bp.TableKV == nil { bp.TableKV = u.NewJobKv() } v1 := v.(*u.BlockPackage) if v1.TableKV != nil && v1.TableKV.Kv != nil { for k2, v2 := range v1.TableKV.Kv { if bp.TableKV == nil { bp.TableKV = u.NewJobKv() } if bp.TableKV.Kv[k2] == "" || (v1.TableKV.KvTag[k2] != nil && bp.TableKV.KvTag[k2] != nil && v1.TableKV.KvTag[k2].Weight > bp.TableKV.KvTag[k2].Weight) { //可能会报错 assignment to entry in nil map bp.TableKV.Kv[k2] = v2 bp.Text += fmt.Sprintf("%v:%v\n", k2, v2) } } } bp.WinnerOrder = v1.WinnerOrder //table.TableResult.PackageMap.AddKey(k, v) } } // str := "" // for _, k := range table.TableResult.PackageMap.Keys { // v := table.TableResult.PackageMap.Map[k].(*u.BlockPackage) // str += fmt.Sprintf("包号:%s,中标人:%s,中标价:%s,预算:%s,文本:%s,排名:%v ---\t", v.Index, v.TableKV["中标单位"]+v.ColonKV["中标单位"], v.TableKV["中标金额"]+v.ColonKV["中标金额"], v.TableKV["预算"]+v.ColonKV["预算"], v.Text, v.WinnerOrder) // } // u.Debug(table, table.TableResult, str) } //遍历标准key到tableresult.sortkv中 for k, v := range table.StandKV { if table.TableResult.SortKV.Map[k] == nil || table.StandKVWeight[k] > table.TableResult.SortKVWeight[k] || strings.Contains(table.Tag, "变更") { v = strings.Replace(v, "__", "", -1) if table.TableResult.SortKV.Map[k] == nil { table.TableResult.SortKV.AddKey(k, v) //父集 } else { table.TableResult.SortKV.ReplaceKey(k, v, k) } table.TableResult.SortKVWeight[k] = table.StandKVWeight[k] } else if table.TableResult.SortKV.Map[k] != nil { //u.Debug(k, v, table.TableResult.SortKV.Map[k], "..............") } } //表格的块标签 if table.TableResult.BlockTag == "" && table.Tag != "" { table.TableResult.BlockTag = table.Tag } //中标候选人(多个table,现在默认取第一个table的信息,考虑需不需要多个table分析合并数据?) if table.TableResult.WinnerOrder == nil || len(table.TableResult.WinnerOrder) == 0 { table.TableResult.WinnerOrder = table.WinnerOrder } //增加brand 并列table if len(table.BrandData) > 0 { for _, v := range table.BrandData { if len(v) > 0 { table.TableResult.BrandData = append(table.TableResult.BrandData, v) } } } } /** 解析表格入口 返回:汇总表格对象 **/ func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}) (tabres *TableResult) { defer qutil.Catch() //u.Debug(con) if itype == 1 { //修复表格 con = RepairCon(con) } //生成tableresult对象 tabres = NewTableResult(_id, toptype, blockTag, con, itype) //可以有多个table for _, table := range tabs { //隐藏表格跳过 if IsHide(table) { continue } tabres.GoqueryTabs = append(tabres.GoqueryTabs, table) } //解析表格集 tabres.Analy() return } //开始解析表格集 func (ts *TableResult) Analy() { tabs := []*Table{} contactFormat := &u.ContactFormat{ IndexMap: map[int]string{}, MatchMap: map[string]map[string]bool{}, } for _, table := range ts.GoqueryTabs { tn := NewTable(ts.Html, ts, table) //核心模块 ts := tn.Analy(contactFormat) for _, tab := range ts { tabs = append(tabs, tab) //fmt.Println("tab.SortKV.Map", tab.SortKV.Keys) } //tn.SonTables = append(tn.SonTables, tn) } //统一合并,考虑统一多表格是多包的情况---新增 if len(tabs) > 1 { pns := map[string]string{} pnarr := []string{} for _, table := range tabs { pn := table.StandKV["项目名称"] if pn != "" && TitleReg.MatchString(pn) { pnarr = append(pnarr, pn) matchres := TitleReg.FindAllStringSubmatch(pn, -1) if len(matchres) == 1 && len(matchres[0]) > 0 { v1 := u.PackageNumberConvert(matchres[0][0]) pns[v1] = matchres[0][0] bp := &u.BlockPackage{} bp.Index = v1 bp.Origin = matchres[0][0] bp.TableKV = u.NewJobKv() for _, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} { bp.TableKV.Kv[k] = table.StandKV[k] } bp.WinnerOrder = table.WinnerOrder if table.BlockPackage.Map[v1] == nil { table.BPackage = true table.BlockPackage.AddKey(v1, bp) } } } } if len(tabs) == len(pns) { //多个表格,每个表格都是一个分包 http://www.cxzwfw.gov.cn/info/1009/6963.htm //项目名称、项目编号、采购单位、招标机构、预算 pname := projectnameReg.ReplaceAllString(pnarr[0], "") btrue := true for _, pn := range pnarr[1:] { pn = projectnameReg.ReplaceAllString(pn, "") //u.Debug(pn, pname) if pn != pname { //项目名称不一致 btrue = false break } } if btrue { ts.SortKV.AddKey("项目名称", pname) ts.SortKVWeight["项目名称"] = 100 for _, table := range tabs { table.BPackage = true //预算、中标金额、NullTxtBid成交供应商排名 中标单位 成交状态 if table.BlockPackage != nil && len(table.BlockPackage.Keys) == 1 { bp := table.BlockPackage.Map[table.BlockPackage.Keys[0]].(*u.BlockPackage) if table.TableResult.WinnerOrder != nil { bp.WinnerOrder = table.WinnerOrder } if bp != nil && table.StandKV != nil { if bp.TableKV == nil { bp.TableKV = u.NewJobKv() } for nk, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} { bp.TableKV.Kv[k] = table.StandKV[k] if nk < 4 { delete(table.StandKV, k) } } } } } } } } for _, table := range tabs { table.MergerToTableresult() // for k, v := range table.TableResult.SortKV.Map { // qutil.Debug(k, "=====", v) // } } } //解析表格 func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table { //查找表体中的tr对象 trs := table.Goquery.ChildrenFiltered("tbody,thead,tfoot").ChildrenFiltered("tr") if trs.Size() == 0 { trs = table.Goquery.ChildrenFiltered("tr") } //num := 0 //遍历tr数组 trs.Each(func(n int, sel *goquery.Selection) { //隐藏行不处理 if IsHide(sel) { return } //遍历每行的td tds := sel.ChildrenFiltered("td,th") TR := NewTR(table) tdTextIsNull := true tds.Each(func(m int, selm *goquery.Selection) { //对隐藏列不处理!!! if IsHide(selm) { return } //进入每一个单元格 td := NewTD(selm, TR, table) //num++ TR.AddTD(td) if td.Val != "" { //删除一个tr,tr中所有td是空值的 tdTextIsNull = false } }) //tr中所有td的内容为空 将tr删除 if !tdTextIsNull { table.AddTR(TR) } }) //重置行列 table.ComputeRowColSpan() // for n, tr := range table.TRs { // for m, td := range tr.TDs { // qutil.Debug(td.BH, n, m, td.Text, td.StartRow, td.EndRow, td.StartCol, td.EndCol) // } // } tm := []map[string]interface{}{} tmk := map[string]bool{} tmn := map[int]map[string]interface{}{} for rownum, tr := range table.TRs { if len(tr.TDs) == 1 && table.ColNum > 1 { td := tr.TDs[0] if td.StartCol == 0 && td.EndCol+1 == table.ColNum && len([]rune(td.Val)) > 1 && len([]rune(td.Val)) < 50 { con, m1, b := CheckMultiPackage(td.Val, "") if b { for k, _ := range m1 { numstr := u.PackageNumberConvert(k) m2 := map[string]interface{}{ "tag": con, //"num": numstr, //"numtxt": v[0], "startrow": rownum, } tmk[numstr] = true tmn[rownum] = m2 tm = append(tm, m2) break } } } } } //拆表 ts := []*Table{} if len(tmk) > 1 && len(tmk) == len(tm) { var tab1 *Table for rownum, tr := range table.TRs { if tab1 == nil { tab1 = NewTable("", table.TableResult, table.Goquery) tab1.BSplit = true if tmn[rownum] != nil { tab1.StandKV["项目名称"] = tmn[rownum]["tag"].(string) tab1.StandKVWeight["项目名称"] = -100 } ts = append(ts, tab1) } if tmn[rownum] != nil { tab1.Tag = tmn[rownum]["tag"].(string) } else { tab1.AddTR(tr) } if tmn[rownum+1] != nil { tab1 = nil } } } else { ts = append(ts, table) } for n, table := range ts { if len(table.TRs) > 0 { //删除尾部空行 for len(table.TRs) > 0 { npos := len(table.TRs) tailTR := table.TRs[npos-1] //最后一个tr bspace := true for _, v := range tailTR.TDs { if v.Val != "" || v.SonTableResult != nil || len(v.SortKV.Keys) > 0 { bspace = false break } } if bspace { table.TRs = table.TRs[:npos-1] } else { break } } //table.Print() //校对表格 table.Adjust() //查找表格的标签 table.FindTag() //log.Println(table.TableResult.Id, table.Html) //分割表格 if table.BSplit { if !table.BHeader && n > 0 { for i := n - 1; i > -1; i-- { if ts[i].BHeader { if ts[i].BFirstRow { //取第一行插入到 table.InsertTR(ts[i].TRs[0]) table.Adjust() } break } } } } //对没有表头表格的处理 _, _, b := CheckMultiPackage(table.Tag, "") if b { table.StandKV["项目名称"] = table.Tag table.StandKVWeight["项目名称"] = -100 } table.TdContactFormat(contactFormat) //开始查找kv,核心模块 table.FindKV() //table中抽取品牌 if u.IsBrandGoods { table.analyBrand() } //判断是否是多包,并处理分包的 table.CheckMultiPackageByTable() str := "\n" for k, v := range table.StandKV { str += fmt.Sprintf("_==___%s:%v\n", k, v) if table.TableResult.SortKV.Map[k] == nil { table.TableResult.SortKV.AddKey(k, v) table.TableResult.SortKVWeight[k] = table.StandKVWeight[k] } } res, _, _, _, _ := CheckCommon(table.Tag, "abandontable") if !res { //过滤、标准化、合并kv table.KVFilter() } str = "\n" for k, v := range table.StandKV { str += fmt.Sprintf("_____%s:%v\n", k, v) if table.TableResult.SortKV.Map[k] == nil { table.TableResult.SortKV.AddKey(k, v) table.TableResult.SortKVWeight[k] = table.StandKVWeight[k] } } //u.Debug(str) } } return ts } func (table *Table) Adjust() { table.TDNum = func() int { n := 0 for _, tr := range table.TRs { n += len(tr.TDs) } return n }() //有多少行 table.RowNum = len(table.TRs) // for k1, tr := range table.TRs { // for k2, td := range tr.TDs { // qutil.Debug(k1, k2, td.Val, td.Rowspan, td.Colspan, td.ColPos, tr.RowPos) // } // } //计算行列起止位置,跨行跨列处理 table.ComputeRowColSpan() // for k1, tr := range table.TRs { // for k2, td := range tr.TDs { // qutil.Debug(k1, k2, td.Val, td.StartRow, td.EndRow, td.StartCol, td.EndCol) // } // } //大概计算每个起止行列的概率 table.GetKeyRation() /* for k, v := range table.StartAndEndRation { for k1, v1 := range v.Poss { bs, _ := json.Marshal(v1) str := "" for _, td := range v.Tdmap[v1] { str += "__" + td.Val + fmt.Sprintf("%d_%d_%d_%d", td.StartRow, td.EndRow, td.StartCol, td.EndCol) } qutil.Debug(k, k1, string(bs), v.Rationmap[v1], str) } } */ //u.Debug("tdnum:", num, table.RowNum, table.ColNum) //是否是规则的表格,单元各个数=行数*列数 table.Brule = table.TDNum == table.RowNum*table.ColNum count := 0 for _, trs := range table.TRs { for _, td := range trs.TDs { if td.BH { count++ } } } if float32(count)/float32(table.TDNum) < 0.85 { //精确计算起止行列是表头的概率 table.ComputeRowColIsKeyRation() bhead := false L: for i, tr := range table.TRs { for _, td := range tr.TDs { if td.BH { //qutil.Debug("----=====---", td.Val, len(table.TRs[len(table.TRs)-1].TDs), i, len(table.TRs)-1) if i == len(table.TRs)-1 && len(table.TRs[len(table.TRs)-1].TDs) == 2 { res, _, _, _, _ := CheckCommon(td.Val, "abandontable") if res { //删除此行 table.TRs = table.TRs[:len(table.TRs)-1] table.Adjust() return } } bhead = true break L } } } table.BHeader = bhead } } //计算行/列表格的结束位置 StartRow=0 EndRow=0 func (table *Table) ComputeRowColSpan() { mapRC := map[int]map[int]int{} //记录第几行pos,起始列对应的合并值 for k, v := range table.TRs { nk := 0 //nk列的起始,k行的起始||如果有合并,起始就不是0 ball := true rowspans := v.TDs[0].Rowspan //某一行第一个td的rowspan for _, v1 := range v.TDs { if v1.Rowspan != rowspans { ball = false break } } for _, v1 := range v.TDs { if ball { v1.Rowspan = 1 } mc := mapRC[k] for { if mc != nil && mc[nk] > 0 { nk += mc[nk] } else { break } } v1.StartCol = nk nk += v1.Colspan - 1 v1.EndCol = nk if nk >= table.ColNum { table.ColNum = nk + 1 } nk++ v1.StartRow = k v1.EndRow = k + v1.Rowspan - 1 ck := fmtkey("c", v1.StartCol, v1.EndCol) tdcs := table.StartAndEndRation[ck] if tdcs == nil { tdcs = NewTDRationScope(ck) table.StartAndEndRation[ck] = tdcs table.StartAndEndRationKSort.AddKey(ck, 1) } tdcs.Addtd(v1) rk := fmtkey("r", v1.StartRow, v1.EndRow) tdrs := table.StartAndEndRation[rk] if tdrs == nil { tdrs = NewTDRationScope(rk) table.StartAndEndRation[rk] = tdrs table.StartAndEndRationKSort.AddKey(rk, 1) } tdrs.Addtd(v1) if v1.Rowspan > 1 { for i := 1; i < v1.Rowspan; i++ { r := k + i if r < len(table.TRs) { mc := mapRC[r] if mc == nil { mc = map[int]int{} } mc[v1.StartCol] = v1.Colspan mapRC[r] = mc } } } } } } func fmtkey(t string, start, end int) string { return fmt.Sprintf("%s_%d_%d", t, start, end) } func (table *Table) FindTag() { //查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断 if table.Tag != "" { return } t1, _ := table.Goquery.OuterHtml() html := table.Html pos := strings.Index(html, t1) if pos <= 0 { doc, _ := goquery.NewDocumentFromReader(strings.NewReader(table.Html)) html, _ = doc.Html() pos = strings.Index(html, t1) } //u.Debug("--------", t1, "====\n\n\n\n=====", html) if pos > 0 { tcon := html[:pos] tcon = cut.ClearHtml(tcon) tcon = ClearTagReg.ReplaceAllString(tcon, "") //u.Debug(pos, "-----------", tcon) strs := ttagreg.FindStringSubmatch(tcon) if len(strs) > 0 { table.Tag = strs[0] //u.Debug(table.Tag) } } if table.Tag == "" { table.Tag = table.TableResult.BlockTag } //u.Debug(table.Tag) } //计算r/c_start_end的概率 func (table *Table) GetKeyRation() { for _, vn := range table.StartAndEndRationKSort.Keys { v := table.StartAndEndRation[vn] for _, v1 := range v.Poss { count := 0 n := 0 for _, td := range v.Tdmap[v1] { n++ if td.BH { count++ } } v.Rationmap[v1] = float32(count) / float32(n) } } } //计算行列是表头的概率调用GetKeyRation func (table *Table) ComputeRowColIsKeyRation() { //增加对跨行校正限止 // u.Debug(table.Brule, table.ColNum, table.RowNum, table.TDNum) bkeyfirstrow := false bkeyfirstcol := false if table.Brule { //不存在跨行跨列的情况,规则表格 checkCompute := map[string]bool{} for k, tr := range table.TRs { rk := fmtkey("r", tr.TDs[0].StartRow, tr.TDs[0].EndRow) if k == 0 { //第1行的概率 ck := fmtkey("c", tr.TDs[0].StartCol, tr.TDs[0].EndCol) //u.Debug(table.BFirstRow, "--", table.StartAndEndRation[rk], table.StartAndEndRation[ck]) ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0]) ration2, _ := table.StartAndEndRation[ck].GetTDRation(tr.TDs[0]) if (len(tr.TDs) == 2 && ration2 < 0.55) && (len(tr.TDs) == 2 && ration1 > 0.5) { //第一行为key bkeyfirstrow = true ball := true for _, td := range tr.TDs { if MoneyReg.MatchString(td.Val) { bkeyfirstrow = false ball = false td.BH = false break } } for _, td := range tr.TDs { if ball { td.BH = true td.KeyDirect = 1 td.KVDirect = 2 } } } else if ration2 > 0.55 { //第1列 bkeyfirstcol = true if !checkCompute[ck] { checkCompute[ck] = true //重置第1列 for _, tr1 := range table.TRs { for _, td1 := range tr1.TDs { if td1.StartCol == 0 { if !MoneyReg.MatchString(td1.Val) { td1.BH = true td1.KeyDirect = 2 td1.KVDirect = 1 } } } } } } if !bkeyfirstrow && !bkeyfirstcol { if len(tr.TDs) > 1 && ration1 > ration2 && ration1 > 0.5 { bkeyfirstrow = true for _, td := range tr.TDs { if !MoneyReg.MatchString(td.Val) { td.BH = true td.KeyDirect = 1 td.KVDirect = 2 } } } else if tr.Table.ColNum > 1 && ration2 > 0.5 { bkeyfirstcol = true if !checkCompute[ck] { checkCompute[ck] = true //重置第1列 for _, tr1 := range table.TRs { for _, td1 := range tr1.TDs { if td1.StartCol == 0 { if !MoneyReg.MatchString(td1.Val) { td1.BH = true td1.KeyDirect = 2 td1.KVDirect = 1 } } } } } } } } else { if bkeyfirstrow { //第一列的概率 ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0]) if k == 1 || ration1 < checkval { for _, td := range tr.TDs { if !td.MustBH { td.BH = false td.KeyDirect = 0 td.KVDirect = 0 } } } //else {for _, td := range tr.TDs {}} } else { //列在起作用 if bkeyfirstcol { for _, td := range tr.TDs { ck := fmtkey("c", td.StartCol, td.EndCol) ration1, _ := table.StartAndEndRation[ck].GetTDRation(td) if !checkCompute[ck] { checkCompute[ck] = true if ration1 >= checkval && td.ColPos != 1 { for _, tr1 := range table.TRs { for _, td1 := range tr1.TDs { if td1.StartCol == td.StartCol { if !MoneyReg.MatchString(td1.Val) { td1.BH = true td1.KeyDirect = 2 td1.KVDirect = 1 } } } } } else { for _, tr1 := range table.TRs[1:] { for _, td1 := range tr1.TDs[1:] { if td1.StartCol == td.StartCol && !td1.MustBH { td1.BH = false td1.KeyDirect = 0 td1.KVDirect = 0 } } } } } } } } } } } //qutil.Debug("table.Brule", table.Brule, !bkeyfirstcol && !bkeyfirstrow) if !table.Brule || (!bkeyfirstcol && !bkeyfirstrow) { //断行问题,虽然同列或同行,但中间被跨行截断,表格方向调整 for _, k := range table.StartAndEndRationKSort.Keys { v := table.StartAndEndRation[k] //横向判断,要判断最多的方向,否则会出现不定的情况(map遍历问题) k1 := k[:1] for _, v2 := range v.Poss { lentds := len(v.Tdmap[v2]) if v.Rationmap[v2] > checkval { for _, td := range v.Tdmap[v2] { if td.KeyDirect == 0 && !MoneyReg.MatchString(td.Val) { if k1 == "r" { ck := fmtkey("c", td.StartCol, td.EndCol) rt := table.StartAndEndRation[ck] //clen := 0 var fv float32 var tdn []*TD if rt != nil { fv, tdn = rt.GetTDRation(td) //clen = len(tdn) } if lentds > 1 { if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" { td.KeyDirect = 1 td.KVDirect = 2 td.BH = true } } } else { ck := fmtkey("r", td.StartRow, td.EndRow) rt := table.StartAndEndRation[ck] var fv float32 var tdn []*TD //clen := 0 if rt != nil { fv, tdn = rt.GetTDRation(td) //clen = len(tdn) } if lentds > 1 { if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" { td.KeyDirect = 2 td.KVDirect = 1 td.BH = true } } } } else { break } } } else if v.Rationmap[v2] < 0.5 && len(v.Tdmap[v2]) > 3 { for _, td := range v.Tdmap[v2] { // u.Debug(td.Val, "-----", td.BH) if td.KeyDirect == 0 && td.BH && !td.MustBH { if k1 == "r" { ck := fmtkey("c", td.StartCol, td.EndCol) rt := table.StartAndEndRation[ck] clen := 0 var fv float32 var tdn []*TD if rt != nil { fv, tdn = rt.GetTDRation(td) clen = len(tdn) } if lentds >= clen && lentds > 1 { if (tdn != nil && v.Rationmap[v2] < fv) || tdn == nil { td.BH = false } } } else { ck := fmtkey("r", td.StartRow, td.EndRow) rt := table.StartAndEndRation[ck] var fv float32 var tdn []*TD clen := 0 if rt != nil { fv, tdn = rt.GetTDRation(td) clen = len(tdn) } if lentds >= clen && lentds > 1 { if (tdn != nil && v.Rationmap[v2] < fv) || tdn == nil { td.BH = false } } } } else { break } } } } } } table.GetKeyRation() if len(table.TRs) > 0 && len(table.TRs[0].TDs) > 0 { t0 := table.TRs[0].TDs[0] key := fmtkey("r", t0.StartRow, t0.EndRow) r, t := table.StartAndEndRation[key].GetTDRation(t0) if r > 0.9 && len(t) > 1 { table.BFirstRow = true } for k, tr := range table.TRs { if len(tr.TDs) == 1 && tr.TDs[0].StartCol == 0 && tr.TDs[0].EndCol+1 == table.ColNum { tr.TDs[0].BH = false tr.TDs[0].KVDirect = 0 sv := FindKv(tr.TDs[0].Val, "", 2) _, resm := colonkvEntity.entrance(tr.TDs[0].Val, "", 2) for k, v := range resm { sv.AddKey(k, v) } if len(sv.Keys) > 0 { for k1, v1 := range sv.Map { if tr.TDs[0].SortKV.Map[k1] == nil { table.SortKV.AddKey(k1, v1) } } } else if table.Tag == "" && k == 0 && len(tr.TDs[0].Val) > 11 { table.Tag = tr.TDs[0].Val } // subVal := tdval_reg.FindAllStringSubmatch(tr.TDs[0].Val, -1) // //u.Debug(tr.TDs[0].Val, subVal) // if len(subVal) > 0 { // for _, subv1 := range subVal { // if len(subv1) == 3 { // table.SortKV.AddKey(subv1[1], subv1[2]) // } // } // } else if k == 0 && len(tr.TDs[0].Val) > 11 { // table.Tag = tr.TDs[0].Val // } } // for _, td := range tr.TDs { // u.Debug(td.BH, td.Val, "----") // } } } } //查找表格的kv,调用FindTdVal func (table *Table) FindKV() { //判断全是key的表格不再查找 if table.BHeader { //只要一个是key即为true direct := If(table.BFirstRow, 2, 1).(int) //kv,2查找方向,向上查找 vdirect := If(direct == 2, 1, 2).(int) //控制跨行表格 bcon := false //增加表格切块判断,只判断切块分包 //控制中标人排序方向 bodirect := 0 //控制中标人排序数值 sort := 1 //开始抽取 for _, tr := range table.TRs { if len(tr.TDs) == 1 { bcon = false td := tr.TDs[0] if td.StartCol == 0 && td.EndCol+1 == table.ColNum && len([]rune(td.Val)) > 4 && len([]rune(td.Val)) < 50 { res, _, _, _, _ := CheckCommon(td.Val, "abandontable") if res { //以下内容丢弃 bcon = true } } } if bcon { continue } if tr.TDs[0].StartRow > 0 { numbh := 0 for _, td := range tr.TDs { if td.BH { numbh++ } } if numbh > 0 && numbh <= len(tr.TDs)/2 { direct, vdirect = 1, 2 } else { direct, vdirect = 2, 1 } } for _, td := range tr.TDs { /** rt := table.StartAndEndRation[fmtkey("r", td.StartCol, td.EndCol)] if rt != nil { r, t := rt.GetTDRation(td) u.Debug(td.BH, td.Val, r, t) } **/ // if td.Val == "电视" || td.Val == "电话机" || td.Val == "传真机" || td.Val == "音响" { //qutil.Debug("----td.Valtype", td.Valtype, "td.BH:", td.BH, "KVDirect:", td.KVDirect, "Val:", td.Val, "direct:", direct, "vdirect:", vdirect) // } if !td.BH && td.KVDirect < 3 { if !table.FindTdVal(td, direct, vdirect) { if !table.FindTdVal(td, vdirect, direct) { //都识别不到时,对第一、二中标候选人的处理 bo, res := GetBidOrder(td, bodirect, sort) if res { sort++ bodirect = bo } } } //fmt.Println("td:", td.Val, td.BH, td.HeadTd, td.KVDirect) } } } //qutil.Debug("FindKV", table.SortKV.Map) } else if len(table.TRs) > 0 { //没有表头的表格处理,默认纵向吧 res := make([][]string, len(table.TRs[0].TDs)) for n, _ := range res { res[n] = []string{} } for _, tr := range table.TRs { for n, td := range table.TRs[0].TDs { //第一行的所有td td1 := table.GetTdByRCNo(tr.TDs[0].StartRow, td.StartCol) if td1 != nil { res[n] = append(res[n], td1.Val) } else { res[n] = append(res[n], "") } } } //再拆值,类似http://www.ggzy.hi.gov.cn/cgzbgg/16553.jhtml第二列,有多个值 nmapkeys := []int{} nmap := map[int][]*u.Kv{} L: for _, r1 := range res { for n, r := range r1 { if len([]rune(r)) < 60 { // 长度小于60才去分 //res1, _ := GetKVAll(r, "", nil) res1, _ := colonkvEntity.entrance(r, "", 2) if res1 != nil { nmap[n] = res1 nmapkeys = append(nmapkeys, n) /** //截取串 for _k1, _ := range res1 { r = regexp.MustCompile(_k1+".*").ReplaceAllString(r, "") } r1[n] = r res[pos] = r1 **/ } else if nmap[n] != nil { //放空值 nmap[n] = append(nmap[n], &u.Kv{}) } } else { nmap = nil nmapkeys = nil break L } } } //调整 if len(nmap) > 0 { kmapkeys := []string{} kmap := map[string][]string{} for _, mk := range nmapkeys { //同是第n列 for pos, m1 := range nmap[mk] { k, v := m1.Key, m1.Value kv := kmap[k] if kv == nil { kv = []string{} } kv = append(kv, v) kmap[k] = kv kmapkeys = append(kmapkeys, k) for _, k := range kmapkeys { arr := kmap[k] if len(arr) < pos { arr = append(arr, "") kmap[k] = arr kmapkeys = append(kmapkeys, k) } } } } if len(kmap) > 0 { for _, k := range kmapkeys { table.SortKV.AddKey(k, kmap[k]) } } } //================= //解析值放到map中 for _, arr := range res { if len(arr) > 0 { v1 := arr[0] _, _, _, _, repl := CheckCommon(v1, "con") if repl == "ENT" { table.SortKV.AddKey("中标单位", arr) continue } else if repl == "BO" { table.SortKV.AddKey("排名", arr) continue } } } } //qutil.Debug("FindKV", table.SortKV.Map) } //获取中标人顺序 //direct 0默认 1横向 2纵向 func GetBidOrder(td *TD, direct, n int) (d int, res bool) { if td.Valtype != "BO" { return } if td.Rowspan > 1 { for i := 0; i < td.Rowspan; i++ { nextcol := 1 L1: for { vtd := td.TR.Table.GetTdByRCNo(td.StartRow+i, td.EndCol+nextcol) if vtd == nil { break L1 } nextcol += vtd.Colspan if filter_zbdw_v2.MatchString(vtd.Val) { arrbo := td.TR.Table.SortKV.Map[NullTxtBid] if arrbo == nil { arrbo = []map[string]interface{}{} td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo) } a1 := arrbo.([]map[string]interface{}) a1 = append(a1, map[string]interface{}{ "entname": vtd.Val, "sortstr": td.Val, "sort": GetBidSort(td.Val, n), }) res = true td.TR.Table.SortKV.Map[NullTxtBid] = a1 } } } } else if td.Colspan > 1 { for i := 1; i < td.Colspan; i++ { nextcol := 0 L2: for { vtd := td.TR.Table.GetTdByRCNo(td.StartRow+i, td.StartCol+nextcol) if vtd == nil || vtd.Colspan >= td.Colspan { break L2 } nextcol += vtd.Colspan if filter_zbdw_v2.MatchString(vtd.Val) { arrbo := td.TR.Table.SortKV.Map[NullTxtBid] if arrbo == nil { arrbo = []map[string]interface{}{} td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo) } a1 := arrbo.([]map[string]interface{}) a1 = append(a1, map[string]interface{}{ "entname": vtd.Val, "sortstr": td.Val, "sort": GetBidSort(td.Val, n), }) res = true td.TR.Table.SortKV.Map[NullTxtBid] = a1 } } } } else { rtd := td.TR.Table.GetTdByRCNo(td.StartRow, td.EndCol+1) btd := td.TR.Table.GetTdByRCNo(td.EndRow+1, td.StartCol) //if ((rtd != nil && !rtd.BH && rtd.Valtype == "BO") || direct == 1) && btd != nil && filter_zbdw_v.MatchString(btd.Val) { if ((rtd != nil && !rtd.BH) || direct == 1) && btd != nil && filter_zbdw_v2.MatchString(btd.Val) { d = 1 arrbo := td.TR.Table.SortKV.Map[NullTxtBid] if arrbo == nil { arrbo = []map[string]interface{}{} td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo) } a1 := arrbo.([]map[string]interface{}) a1 = append(a1, map[string]interface{}{ "entname": btd.Val, "sortstr": td.Val, "sort": GetBidSort(td.Val, n), }) res = true td.TR.Table.SortKV.Map[NullTxtBid] = a1 //} else if ((btd != nil && !btd.BH && btd.Valtype == "BO") || direct == 2) && rtd != nil && filter_zbdw_v.MatchString(rtd.Val) { } else if ((btd != nil && !btd.BH) || direct == 2) && rtd != nil && filter_zbdw_v2.MatchString(rtd.Val) { d = 2 arrbo := td.TR.Table.SortKV.Map[NullTxtBid] if arrbo == nil { arrbo = []map[string]interface{}{} td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo) } a1 := arrbo.([]map[string]interface{}) a1 = append(a1, map[string]interface{}{ "entname": rtd.Val, "sortstr": td.Val, "sort": GetBidSort(td.Val, n), }) res = true td.TR.Table.SortKV.Map[NullTxtBid] = a1 } } return } func GetBidSort(str string, n int) int { val := n if strings.Index(str, "首选") > -1 { val = 1 } else { val = winnerOrderEntity.toNumber(str, n) } return val } //查找每一个单元格的表头,调用FindNear func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) { near := table.FindNear(td, direct) // if near != nil { // fmt.Println("near----", near.Val, td.Val) // } // qutil.Debug(near != nil) // qutil.Debug(near.BH) // qutil.Debug(near.KeyDirect == vdirect, near.KeyDirect == 0) // qutil.Debug(near.KVDirect == direct, near.KVDirect == 0) // qutil.Debug(near.KVDirect < 3) if near != nil && near.BH && (near.KeyDirect == vdirect || near.KeyDirect == 0) && (near.KVDirect == direct || near.KVDirect == 0) && near.KVDirect < 3 { near.KVDirect = direct near.KeyDirect = vdirect td.KVDirect = direct key := near.Val if near.Val == "" { key = fmtkey("k", near.TR.RowPos, near.ColPos) } val := table.SortKV.Map[key] //qutil.Debug("====================", "key:", key, "val:", val) bthiskey := false if val != nil { curpos := table.SortKV.Index[key] thistr := table.kTD[curpos] if thistr != near { near.Val += "_" for table.SortKV.Map[near.Val] != nil { near.Val += "_" } key = near.Val //之前这个地方没有重置,导致把之前结果覆盖了 } else { bthiskey = true } } bfind := false barr := false varrpos := -1 if bthiskey { //处理是数组值,且有合并行或合并列的情况 kvscope,对数组值的处理 pos := table.SortKV.Index[key] mval := table.kvscope[pos] bvalfind := false if direct == 1 { //kv是横向 L1: for k3, v3 := range mval { for _, v4 := range v3 { if v4.EndRow+1 == td.StartRow && v4.EndCol == td.EndCol { varrpos = k3 bvalfind = true break L1 } } } } else { //kv是纵向 L2: for k3, v3 := range mval { for _, v4 := range v3 { if v4.EndCol+1 == td.StartCol && v4.EndRow == td.EndRow { varrpos = k3 bvalfind = true break L2 } } } } if vals, ok := val.([]string); ok { if near.Val == "" { bn := false for _, vs := range vals { if vs != "" && NullTdReg.MatchString(vs) { bn = true } else { bn = false break } } if bn { near.Val = NullTxtBid key = NullTxtBid bfind = true } } if bvalfind { vals[varrpos] = td.Val // += "__" + td.Val } else { vals = append(vals, td.Val) val = vals varrpos = len(vals) - 1 } } else if vals, ok := val.(string); ok { if bvalfind { val = td.Val //vals + "__" + td.Val } else { tval := []string{vals} tval = append(tval, td.Val) val = tval varrpos = 1 } } barr = true } else { val = td.Val } td.HeadTd = near if bfind { tkey := fmtkey("k", near.TR.RowPos, near.ColPos) table.SortKV.ReplaceKey(key, val, tkey) } else { table.SortKV.AddKey(key, val) //if table.SortKV.Map[key] != nil { pos := table.SortKV.Index[key] //qutil.Debug("=========", "key:", key, "val:", val, "pos:", pos) if barr { mval := table.kvscope[pos] if mval != nil { tds := mval[varrpos] if tds != nil { tds = append(tds, td) } else { tds = []*TD{td} } if varrpos > -1 { mval[varrpos] = tds table.kvscope[pos] = mval } } } else { table.kvscope[pos] = map[int][]*TD{ 0: []*TD{td}, } table.kTD[pos] = near } //} } b = true } return } //查找单元格的表头时,横向或纵向 func (table *Table) FindNear(td *TD, direct int) *TD { if direct == 1 && td.StartCol > 0 { //左临 tr := table.TRs[:td.TR.RowPos+1] for i := len(tr) - 1; i > -1; i-- { tds := tr[i].TDs for _, td1 := range tds { if td1.StartRow <= td.StartRow && td1.EndRow >= td.EndRow && td1.EndCol+1 == td.StartCol { //找到左临节点 if td1.BH { return td1 } else { if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct { return td1.HeadTd } } } } } } else if direct == 2 && td.StartRow > 0 { //上临 tr := table.TRs[:td.TR.RowPos] for i := len(tr) - 1; i > -1; i-- { tds := tr[i].TDs for _, td1 := range tds { if td1.StartCol <= td.StartCol && td1.EndCol >= td.EndCol && td1.EndRow+1 == td.StartRow { //找到左临节点 if td1.BH { return td1 } else { if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct { return td1.HeadTd } } } } } } return nil } //根据行号列号获取td对象 func (tn *Table) GetTdByRCNo(row, col int) *TD { for _, tr := range tn.TRs { for _, td := range tr.TDs { if td.StartCol <= col && td.EndCol >= col && td.StartRow <= row && td.EndRow >= row { return td } } } return nil } //判断表格是否是分包 func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) { pac := 0 val := 0 index = []string{} index_pos := []int{} //是数组且能找到标段之类的提示 arr_count := 0 key_index := -1 hasPkgTd := map[string]bool{} for in, k := range tn.SortKV.Keys { if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) { continue } v := tn.SortKV.Map[k] vs, bvs := v.([]string) if bvs { arr_count++ haspkgs := []string{} for in2, v1 := range vs { v1 = replPkgConfusion(v1) if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) { if key_index == -1 { key_index = in } else if key_index != in { break } index = append(index, FindVal_1.FindString(v1)) index_pos = append(index_pos, in2) val += 1 pac++ } else { if ok, v1new := isHasOnePkgAndNoKv(v1); ok { haspkgs = append(haspkgs, v1new) } } } /*处理这种情况: 包一:xxxxxxxxx 包二:xxxxxxxxx */ if len(index) == 0 && len(haspkgs) > 0 && len(haspkgs) == len(vs) { for in2, v1 := range haspkgs { if key_index == -1 { key_index = in } else if key_index != in { break } index = append(index, v1) index_pos = append(index_pos, in2) val += 1 pac++ } } } else if v1, ok := v.(string); ok { v1 = replPkgConfusion(v1) if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) { key_index = in index = append(index, FindVal_1.FindString(v1)) index_pos = append(index_pos, 0) val += 1 pac++ } else if getTd := tn.GetTdByRCNo(0, tn.SortKV.Index[k]); getTd != nil && getTd.KVDirect == 2 { /*处理这种情况: 包一:xxxxxxxxx */ if ok, v1new := isHasOnePkgAndNoKv(v1); ok { hasPkgTd[k] = true key_index = in index = append(index, v1new) index_pos = append(index_pos, 0) val += 1 pac++ } } } } //key是分包的情况 //记录key对应的值 commonKeyVals := map[string][]string{} //记录key出现的次数 keyExistsCount := map[string]int{} if pac > 1 { val = 10 } else { //查找标签 if TableMultiPackageReg_4.MatchString(tn.Tag) { val += 4 } else if TableMultiPackageReg_2.MatchString(tn.Tag) { val += 4 } keyIsPkg := false for in, k := range tn.SortKV.Keys { if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) { continue } v := tn.SortKV.Map[k] //key是分包的情况 if ismatch := FindVal_1.MatchString(k); keyIsPkg || ismatch { if ismatch { keyIsPkg = true val += 4 pkgFlag := FindVal_1.FindString(k) k = strings.Replace(k, pkgFlag, "", -1) index = append(index, pkgFlag) index_pos = append(index_pos, len(index)) val += 1 pac++ } else { k = strings.TrimRight(k, "_") } keyExistsCount[k] = keyExistsCount[k] + 1 commonKeyVals[k] = append(commonKeyVals[k], qutil.ObjToString(v)) } else if k1 := FilterKey_2.ReplaceAllString(k, ""); FindKey_2.MatchString(k1) { val += 4 if vs, bvs1 := v.([]string); bvs1 { L: for in2, v1 := range vs { if len([]rune(v1)) < 20 && !moneyNum.MatchString(v1) && FindVal2_1.MatchString(v1) { for _, serial := range regSerialTitles_2 { if serial.MatchString(v1) { break L } } if key_index == -1 { key_index = in } else if key_index != in { break } index = append(index, v1) index_pos = append(index_pos, in2) val += 1 pac++ } } } else if v1, ok := v.(string); ok && !hasPkgTd[k] { v1 = replPkgConfusion(v1) for _, v2 := range strings.Split(v1, "/") { if len([]rune(v2)) < 20 && !moneyNum.MatchString(v2) && FindVal2_1.MatchString(v2) { key_index = in index = append(index, v1) index_pos = append(index_pos, 0) val += 1 pac++ underline := "" for { underline += "_" if tn.SortKV.Map[k+underline] == nil { break } else if v3, v2_ok := tn.SortKV.Map[k+underline].(string); v2_ok && v3 != "" { index = append(index, v3) index_pos = append(index_pos, 1) } else if v3, v2_ok := tn.SortKV.Map[k+underline].([]string); v2_ok { for v2_k, v2_v := range v3 { index = append(index, v2_v) index_pos = append(index_pos, v2_k+1) } } } break } } } break } } } // u.Debug(index) //过滤重复及标准化! standIndex := []string{} standIndex_pos := []int{} oldIndex := []string{} brepeat := map[string]bool{} for k, v := range index { v = u.PackageNumberConvert(v) if !brepeat[v] { brepeat[v] = true standIndex = append(standIndex, v) standIndex_pos = append(standIndex_pos, index_pos[k]) oldIndex = append(oldIndex, v) } } index = standIndex //有一个以上的包,并且相同的key出现一次以上,认为这个key是属于包里面的 if len(commonKeyVals) > 0 { for k, v := range commonKeyVals { if len(index) > 1 && keyExistsCount[k] < 2 { continue } tn.SortKV.AddKey(k, v) } } // isGoonNext := false if val > 4 && len(brepeat) > 0 { b = true //多包解析 if b { tn.BPackage = true for nk, v := range index { if tn.BlockPackage.Map[v] == nil { bp := &u.BlockPackage{} bp.Index = v bp.Origin = oldIndex[nk] bp.TableKV = u.NewJobKv() tn.BlockPackage.AddKey(v, bp) } } if len(index) == 1 { //是一个的情况 if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 { beq := true for _, v2 := range tn.SortKV.Map { if _, ok := v2.(string); !ok { beq = false break } } if beq { //统一处理为数组 td := tn.GetTdByRCNo(tn.RowNum-1, 0) if !td.BH && FindVal2_1.MatchString(td.Val) { for k2, v2 := range tn.SortKV.Map { tn.SortKV.Map[k2] = []string{v2.(string)} } } else { //没有处理成数组的情况下,继续调用正文查找分包的方法 isGoonNext = true } } } } for _, k1 := range tn.SortKV.Keys { v1 := tn.SortKV.Map[k1] if _, bvs := v1.(string); bvs && len(index) > 1 && !strings.HasSuffix(k1, "_") { v1_array := []string{v1.(string)} underline := "" for { underline += "_" if tn.SortKV.Map[k1+underline] == nil { break } else if v3, v2_ok := tn.SortKV.Map[k1+underline].(string); v2_ok && v3 != "" { v1_array = append(v1_array, v3) } } v1 = v1_array } if val, bvs := v1.([]string); bvs { if len(val) <= len(index) { for k, v := range val { tn.assemblePackage(k1, v, index[k]) } } else { for sk1, sv2 := range index { v := val[sk1] //处理http://www.hljcg.gov.cn/xwzs!queryOneXwxxqx.action?xwbh=8145b599-a11e-45cb-a76a-12157a715570 if v == "" && strings.Index(k1, "供应商") > -1 { if sk1 != len(index)-1 { //u.Debug(val[sk1+1], val[sk1+2]) if standIndex_pos[sk1+1]-standIndex_pos[sk1] > 1 { v = val[standIndex_pos[sk1]+1] } } else { if standIndex_pos[sk1] < len(val)-1 { v = val[standIndex_pos[sk1]+1] } } } tn.assemblePackage(k1, v, sv2) } } //删除子包的kv //u.Debug("----==1==-------", k1) k1tags := u.GetTags(k1) //if !(len(k1tags) > 0 && k1tags[0].Value == "采购单位") { // tn.SortKV.RemoveKey(k1) //} for _, vcgdw := range k1tags { if vcgdw.Value == "采购单位" { tn.SortKV.RemoveKey(k1) } } } else if val, bvs := v1.(string); bvs && len(index) == 1 { //删除子包的kv k1tags, _, _, _, _ := CommonDataAnaly(k1, "", "", val) if !(len(k1tags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(k1tags[0])) { //(k1tags[0].Value == "采购单位" || k1tags[0].Value == "项目编号")) { //log.Println("remove", k1, val) tn.assemblePackage(k1, val, index[0]) tn.SortKV.RemoveKey(k1) } //u.Debug("----==2==-------", k1) } } } } else { isGoonNext = true } if isGoonNext { blockPackage := map[string]*u.BlockPackage{} for _, k := range tn.SortKV.Keys { if excludeKey.MatchString(k) { continue } str := "" v := tn.SortKV.Map[k] nk := regReplAllSpace.ReplaceAllString(k, "") if vs, ok := v.([]string); ok { str += fmt.Sprintf("%s:%s\n", nk, strings.Join(vs, " ")) } else { str += fmt.Sprintf("%s:%s\n", nk, v) } b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false) if b && len(blockPackage) > 0 { tn.BPackage = true for mk, mv := range blockPackage { if tn.BlockPackage.Map[mk] == nil { tn.BlockPackage.AddKey(mk, mv) } else { bp := tn.BlockPackage.Map[mk].(*u.BlockPackage) if bp.TableKV == nil { bp.TableKV = u.NewJobKv() } for k2, v2 := range mv.ColonKV.Kv { if bp.TableKV.Kv[k2] == "" { bp.TableKV.Kv[k2] = v2 bp.TableKV.KvTag[k2] = mv.ColonKV.KvTag[k2] bp.Text += fmt.Sprintf("%v:%v\n", k2, v2) } } for k2, v2 := range mv.SpaceKV.Kv { if bp.TableKV.Kv[k2] == "" { bp.TableKV.Kv[k2] = v2 bp.TableKV.KvTag[k2] = mv.SpaceKV.KvTag[k2] bp.Text += fmt.Sprintf("%v:%v\n", k2, v2) } } } } tn.BPackage = true tn.SortKV.RemoveKey(k) } } } //查找分包中的中标人排序 if tn.BlockPackage != nil && tn.BlockPackage.Map != nil && len(tn.BlockPackage.Map) > 0 { for _, v := range tn.BlockPackage.Map { vv := v.(*u.BlockPackage) if vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0 { vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2) } } } return } //组装解析到的分包 func (tn *Table) assemblePackage(k1, v1, key string) { bp := tn.BlockPackage.Map[key].(*u.BlockPackage) if bp.TableKV == nil { bp.TableKV = u.NewJobKv() } if v1 != "" { k2, w1, v2, _, bf := CommonDataAnaly(k1, "中标情况", "", v1) if bf { for pos, k3 := range k2 { if bp.TableKV.Kv != nil && bp.TableKV.KvTag[k3] != nil && (bp.TableKV.Kv[k3] == "" || w1[pos] > bp.TableKV.KvTag[k3].Weight) { bp.TableKV.Kv[k3] = v2 bp.TableKV.KvTag[k3] = &u.Tag{Value: v2, Weight: w1[pos]} } } } else { bp.TableKV.Kv[k1] = qutil.ObjToString(v1) } } k1 = regReplAllSpace.ReplaceAllString(k1, "") //拼接内容 if !excludeKey.MatchString(k1) { bp.Text += fmt.Sprintf("%v:%v\n", k1, v1) } tn.BlockPackage.Map[key] = bp } /** 之前爬虫过来的数据对table表格的抓取异常问题 查找并修正不规则表格的字符串,只对全文做处理,块内的表格不需要修正 **/ var thbf = regexp.MustCompile("(?i)") //需要保留thead var saveThead = regexp.MustCompile("(?is)(.+?)") func RepairCon(con string) string { res := saveThead.FindAllStringSubmatch(con, 1) th := "" if len(res) == 1 && len(res[0]) == 2 { th = u.TrimLeftSpace(res[0][1], "") } con = thbf.ReplaceAllString(con, "") con = u.TrimLeftSpace(con, "") itbody := strings.Index(con, " itbody { con = findpos(con, iLen, itbody) } } //保留第一个thead if th != "" { con = strings.Replace(con, th, ""+th+"", 1) } //u.Debug(con) return con } //修复表格 func findpos(con string, iLen, start int) (newcon string) { defer qutil.Catch() n := len(con) layer := 0 pos := 0 if start >= 0 { if iLen == 6 { for i := iLen + start; i < len(con); i++ { if con[i] == '<' && i+6 < n { str := con[i : i+6] if str == " 3 { pos = lasttr + 5 } else if pos > 0 { pos += 5 } if pos <= n && pos < len(con) && start < pos { newcon = con[:start] + "" + con[start:pos] + "
" + con[pos:] } } } if newcon == "" { newcon = con } return } //td的值里面有一个包,并且没有冒号kv func isHasOnePkgAndNoKv(v1 string) (bool, string) { v1s := FindVal_1.FindAllString(v1, -1) colonCount := len(regDivision.FindAllString(v1, -1)) if len(v1s) == 1 && colonCount < 2 { ispkgcolon := regexp.MustCompile(v1s[0] + "[::]").MatchString(v1) if (ispkgcolon && colonCount == 1) || (!ispkgcolon && colonCount == 0) { return true, v1s[0] } } return false, v1 } //替换分包中混淆的词 func replPkgConfusion(v1 string) string { v1 = PreReg.ReplaceAllString(v1, "") v1 = PreReg1.ReplaceAllString(v1, "") v1 = PreCon.ReplaceAllString(v1, "") v1 = PreCon2.ReplaceAllString(v1, "") return v1 } //对td中的值,进行再处理 func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) { //处理表格中的联系人信息 indexMap := contactFormat.IndexMap matchMap := contactFormat.MatchMap weightMap := map[string]map[string]interface{}{} //权重 mustMatchFirst := len(indexMap) > 0 //第一个必须匹配上 reCreate := false matchCount := 0 contactTypeTagMap := map[string]map[string][]interface{}{} //u.Debug(mustMatchFirst, indexMap, matchMap) notMatchTrCount := 0 allAscFind := true if len(indexMap) == 0 { isCanAddToIndexMap := false matchPrevFlag := false prevCanAddToIndexMap := false LS: for _, tr := range tn.TRs { for td_index, td := range tr.TDs { thisTdKvs := colonkvEntity.GetKvs(td.Text, "", 2) if len(thisTdKvs) == 0 { tdValue := regReplAllSpace.ReplaceAllString(td.Text, "") if tdValue != "" && len([]rune(tdValue)) < 10 { thisTdKvs = append(thisTdKvs, &u.Kv{ Key: tdValue, Value: "", }) } } if len(thisTdKvs) != 1 { continue } //采购人在联系人、电话后面的处理 td_k := FilterContactKey(thisTdKvs[0].Key) td_k_length := len([]rune(td_k)) if td_k_length < 2 || td_k_length > 15 { continue } isContinue := ContactInfoMustReg.MatchString(td_k) if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) { if !matchPrevFlag && len(indexMap) > 0 { indexMap = map[int]string{} break LS } isCanAddToIndexMap = true } if isContinue { continue } for _, k := range HasOrderContactType(td_k) { if !ContactType[k].MatchString(td_k) { continue } if len(indexMap) == 0 { if isCanAddToIndexMap || (prevCanAddToIndexMap && len(tr.TDs) == 1) { myPrevTdVal := "" if td_index-2 >= 0 { myPrevTdVal = tr.TDs[td_index-2].Val } if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) { matchPrevFlag = true } indexMap[0] = k break } } else { indexMap = map[int]string{} break LS } } } prevCanAddToIndexMap = isCanAddToIndexMap isCanAddToIndexMap = false } if len(indexMap) > 0 { allAscFind = false } } ////// L: for tr_index, tr := range tn.TRs { thisTrHasMatch := false jumpNextTd := false for td_index, td := range tr.TDs { //和|以?及|与|、多个词和在一起 if !jumpNextTd && len([]rune(td.Text)) >= 5 && len([]rune(td.Text)) <= 15 && regSplit.MatchString(td.Text) && td_index+1 < len(tr.TDs) { thisTdVals := regSplit.Split(td.Text, -1) nextTdVals := MultipleValueSplitReg.Split(tr.TDs[td_index+1].Val, -1) if len(thisTdVals) == len(nextTdVals) { isHandle := false for _, k := range HasOrderContactType(td.Text) { if ContactType[k].MatchString(td.Text) { for thisTdVals_k, thisTdVals_v := range thisTdVals { thisTdVals_v = strings.TrimSpace(thisTdVals_v) if ContactType[k].MatchString(thisTdVals_v) { thisTrHasMatch = true tr.TDs[td_index+1].SortKV.AddKey(thisTdVals_v, nextTdVals[thisTdVals_k]) continue } if !ContactInfoMustReg.MatchString(thisTdVals_v) { continue } jumpNextTd = true thisTrHasMatch = true tr.TDs[td_index+1].SortKV.AddKey(k+thisTdVals_v, nextTdVals[thisTdVals_k]) } break } } if !isHandle && len(indexMap) > 0 { _, onlyContactType := u.FirstKeyValueInMap(indexMap) if myContactType, _ := onlyContactType.(string); myContactType != "" { for thisTdVals_k, thisTdVals_v := range thisTdVals { thisTdVals_v = strings.TrimSpace(thisTdVals_v) if ContactInfoMustReg.MatchString(thisTdVals_v) { jumpNextTd = true thisTrHasMatch = true tr.TDs[td_index+1].SortKV.AddKey(myContactType+thisTdVals_v, nextTdVals[thisTdVals_k]) } } } } } } else { jumpNextTd = false } /////////////////////////////////////// thisTdKvs := kvAfterDivideBlock(td.Text, 3) if len(thisTdKvs) == 0 { thisTdKvs = colonkvEntity.GetKvs(td.Text, "", 2) } if len(thisTdKvs) == 0 { tdValue := regReplAllSpace.ReplaceAllString(td.Text, "") if tdValue != "" && len([]rune(tdValue)) < 15 { thisTdKvs = append(thisTdKvs, &u.Kv{ Key: tdValue, Value: "", }) } } tdAscFind := true if len(thisTdKvs) == 0 { continue } else if allAscFind && len(thisTdKvs) >= 3 && len(indexMap) == 0 { //采购人在联系人、电话后面的处理 isCanAddToIndexMap := false LL: for _, td_kv := range thisTdKvs { //u.Debug(td_kv.PrevLine) td_k := FilterContactKey(td_kv.Key) td_k_length := len([]rune(td_k)) if td_k_length < 2 || td_k_length > 15 { continue } isContinue := ContactInfoMustReg.MatchString(td_k) if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) { if len(indexMap) > 0 { indexMap = map[int]string{} break LL } isCanAddToIndexMap = true } if isContinue { continue } if len(indexMap) == 0 { for _, k := range HasOrderContactType(td_k) { if !ContactType[k].MatchString(td_k) { continue } if isCanAddToIndexMap && len(indexMap) == 0 { indexMap[0] = k break } } } } if len(indexMap) > 0 { tdAscFind = false } } prevKey := "" oldIndexMapLength := len(indexMap) thidTdIndex := td_index notmatchCount := 0 kvTitle := "" for _, td_kv := range thisTdKvs { //u.Debug(td_kv.Key, td_kv.Value, td_kv.Title) iscontinue := false td_v := td_kv.Value td_k := FilterContactKey(td_kv.Key) td_k_length := len([]rune(td_k)) // if allAscFind && tdAscFind { for _, k := range HasOrderContactType(td_k) { if td_k_length < 3 || td_k_length > 15 { continue } if !ContactType[k].MatchString(td_k) { matchCount++ continue } if weightMap[k] == nil { weightMap[k] = map[string]interface{}{} } if ContactInfoVagueReg.MatchString(td_k) { if matchMap[k] == nil { matchMap[k] = map[string]bool{} } isAddToMatchMap := true if !strings.HasSuffix(td_k, "方式") { _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts, 3) if len(kTag) == 1 { tagVal, weightVal := u.FirstKeyValueInMap(kTag) if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) { isAddToMatchMap = false } if td.SortKV.Map[tagVal] != nil { if weightMap[k][tagVal] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][tagVal].(int)) { weightMap[k][tagVal] = weightVal.(int) td.SortKV.AddKey(tagVal, td_v) thisTrHasMatch = true } } else { weightMap[k][tagVal] = weightVal.(int) } } } if isAddToMatchMap && !filterValue.MatchString(td_v) && td_v != "" { matchMap[k][ContactInfoVagueReg.FindString(td_k)] = true } } else if k == "采购单位" { //打标签,权重高的重新覆盖 _, kTag := KvTagsToKV([]*u.Kv{td_kv}, "", []string{"采购单位"}, 3) tagVal, weightVal := u.FirstKeyValueInMap(kTag) if tagVal == k { if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 { weightMap[k][k] = weightVal.(int) matchMap[k] = map[string]bool{} indexMap = map[int]string{} } } } if u.IsMapHasValue(k, indexMap) { thisTrHasMatch = true iscontinue = true continue } if reCreate { indexMap = map[int]string{} reCreate = false } indexMap[thidTdIndex] = k iscontinue = true thisTrHasMatch = true thidTdIndex++ break } if len(indexMap) == 0 { prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "") for k, v := range ContactType { if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) { indexMap[thidTdIndex] = k thisTrHasMatch = true thidTdIndex++ } } } if len(indexMap) == 0 { if titleMatchType := ContactTypeTitleMatch(td_kv.Title); titleMatchType != "" { thidTdIndex = 0 matchMap = map[string]map[string]bool{} indexMap = map[int]string{1: titleMatchType} } } } if iscontinue { continue } //不在同一块中 if td_kv.Title != "" && kvTitle != td_kv.Title && len(indexMap) > 0 && !ContactInfoMustReg.MatchString(td_kv.Key) { thidTdIndex = 0 matchMap = map[string]map[string]bool{} indexMap = map[int]string{} } kvTitle = td_kv.Title //u.Debug(indexMap, td_k, td_v, matchMap) if len(indexMap) > 0 { if td_k_length < 2 || td_k_length > 10 { continue } modle := 0 if len(thisTdKvs) == 1 { if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" { modle = 1 } else { modle = 2 } } if !ContactInfoMustReg.MatchString(td_k) { notmatchCount++ if notmatchCount < len(indexMap)*2 && false { notmatchCount = 0 thidTdIndex = 0 indexMap = map[int]string{} matchMap = map[string]map[string]bool{} } if mustMatchFirst { break L } continue } reCreate = true index := td_index if oldIndexMapLength == 0 && len(indexMap) > 1 { if prevKey != td_k { prevKey = td_k index = td_index } else if prevKey == td_k { index++ } } if filterValue.MatchString(td_v) { thisTrHasMatch = true continue } //u.Debug(indexMap, td_k, td_v, matchMap, index, modle) myContactType := indexMap[index] if myContactType == "" && len(indexMap) == 1 { _, onlyContactType := u.FirstKeyValueInMap(indexMap) myContactType, _ = onlyContactType.(string) } if myContactType == "" { continue } matchCount++ if matchMap[myContactType] == nil { matchMap[myContactType] = map[string]bool{} } if IsContactKvHandle(ContactInfoMustReg.FindString(td_k), matchMap[myContactType]) { continue } matchMap[myContactType][ContactInfoMustReg.FindString(td_k)] = true if ContactType[myContactType].MatchString(td_k) { continue } thisTrHasMatch = true if modle == 1 { td.Text = myContactType + td_k td.Val = td.Text } else { // if !strings.HasSuffix(td_k, "方式") { _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts, 3) if len(kTag) == 1 { tagVal, _ := u.FirstKeyValueInMap(kTag) if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) { continue } if contactTypeTagMap[myContactType] == nil { contactTypeTagMap[myContactType] = map[string][]interface{}{} } myOldKeyArray := contactTypeTagMap[myContactType][tagVal] if myOldKeyArray != nil { tn.TRs[myOldKeyArray[0].(int)].TDs[myOldKeyArray[1].(int)].SortKV.RemoveKey(myContactType + myOldKeyArray[2].(string)) } else { contactTypeTagMap[myContactType][tagVal] = make([]interface{}, 3) } if weightMap[myContactType] == nil { weightMap[myContactType] = map[string]interface{}{} } weightMap[myContactType][tagVal] = 1 contactTypeTagMap[myContactType][tagVal] = []interface{}{tr_index, td_index, td_k} } } td.SortKV.AddKey(myContactType+td_k, td_v) } } } //u.Debug(td.SortKV.Map) } if allAscFind && !thisTrHasMatch { notMatchTrCount++ if notMatchTrCount >= 2 { notMatchTrCount = 0 indexMap = map[int]string{} } } } //u.Debug("end", matchCount, indexMap, matchMap) if matchCount == 0 { indexMap = map[int]string{} matchMap = map[string]map[string]bool{} } (*contactFormat).IndexMap = indexMap (*contactFormat).MatchMap = matchMap // for _, tr := range tn.TRs { // for _, td := range tr.TDs { // log.Println(td.SortKV.Map) // } // } } func (table *Table) analyBrand() { //5c2d8c05a5cb26b9b782572b //产品名称 品牌 规格 单价 单位 数量 小计 质保期 lineMapArr := make(map[string]*SortMap) lineMap := make(map[string]*SortMap) brandRule := u.BrandRules //将val为数组和string的分开 for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序 val := table.SortKV.Map[key] key = regReplAllSpace.ReplaceAllString(key, "") key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉 if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]} /* { "商品":["",""], "商品_"["",""], } */ valArr, allempty := filterVal(realTypeVal...) //过滤数据 if allempty { continue } realTypeVal = valArr line := underline.FindString(key) lineValMap1 := lineMapArr[line] // i := 1 // L: // for { //去除数组空数据 // last := realTypeVal[len(realTypeVal)-i] // if last == "" { // i++ // if i > len(realTypeVal) { // break // } // goto L // } else { // break // } // } // dislodgeNull := realTypeVal[:(len(realTypeVal) - i + 1)] //去除数组中空数据 if len(realTypeVal) > 0 { if lineValMap1 == nil { tmp := NewSortMap() tmp.AddKey(key, realTypeVal) lineMapArr[line] = tmp } else { lineValMap1.AddKey(key, realTypeVal) } } //qutil.Debug("lineMapArr---", lineMapArr[line].Keys, lineMapArr[line].Map) } else if realTypeVal, b := val.(string); b { //val为字符串 {"数量":"1"} /* { "商品:"",名称:"", "商品_:"",名称_:"", "商品__:"",名称__:"", } */ valArr, allempty := filterVal(realTypeVal) //过滤数据 if allempty { continue } realTypeVal = valArr[0] line := underline.FindString(key) lineValMap2 := lineMap[line] if lineValMap2 == nil { tmp := NewSortMap() tmp.AddKey(key, realTypeVal) lineMap[line] = tmp } else { lineValMap2.AddKey(key, realTypeVal) } //qutil.Debug("lineMap---", lineMap[line].Keys, lineMap[line].Map) } else { // "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")5c2b0551a5cb26b9b7cb05db否5c2a42e6a5cb26b9b763ba5a采购人:一、采购人5c2b06f5a5cb26b9b7cc4409 //成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]] //qutil.Debug("err data:", key, val) } } //处理数组数据后,匹配必须title和替换要保存的title //qutil.Debug("lineMapArr----", len(lineMapArr)) if len(lineMapArr) > 0 { for _, aMap := range lineMapArr { maxNum := 0 arrcount1 := 0 //记录key是否存在必须title(数组数据) arrcount2 := 0 ka := make(map[string][]string) //最终存储数据 //qutil.Debug("aMap.Keys----", aMap.Keys) for _, k0 := range aMap.Keys { match := false //记录must是否匹配到 v0 := aMap.Map[k0].([]string) //匹配必须title for nameM, r := range brandRule["must"] { if convert(k0, r) { //匹配成功 v0tmp1 := v0 match = true if len(ka[nameM]) != 0 && strings.Contains(k0, "描述") { //防止k0匹配到多次 和特殊情况 物料名称 物料描述同时出现 continue } if nameM == "itemname" || nameM == "modal" { hasGoods(table, v0...) //判断itemname和modal中有没有商品 if nameM == "itemname" { v0tmp1 = filterItem(v0...) //过滤itemname } } if nameM == "brandname" || nameM == "modal" { if len(ka["brandname"]) == 0 { brand, allNull := hasBrand(table, v0...) if !allNull { ka["brandname"] = brand } } } //unitprice if nameM == "unitprice" { //处理金额 v0tmp1 = dealPrice(k0, v0...) } if nameM != "brandname" && len(ka[nameM]) == 0 { ka[nameM] = v0tmp1 } arrcount1++ } } //替换其它要保存字段 if !match { //must未匹配,匹配replace for nameR, r := range brandRule["replace"] { if convert(k0, r) { //匹配成功 v0tmp2 := v0 //totalprice if nameR == "totalprice" { //处理金额 v0tmp2 = dealPrice(k0, v0...) } //number if nameR == "number" { //处理数量 uname0 := []string{} v0tmp2, uname0 = dealNumber(v0...) if len(ka["unitname"]) == 0 && len(uname0) != 0 { ka["unitname"] = uname0 } } if len(v0tmp2) > 0 { ka[nameR] = v0tmp2 } arrcount2++ } } } } //找最终存储数据的最小len(arr) // for _, vf := range ka { // //找最短的数组 // lenVal := len(vf) // if minNum == 0 || minNum > lenVal { //maxNum = len(最短数组) // minNum = lenVal // } // } //找最终存储数据的最大len(arr),小的补空 for _, vf1 := range ka { lenVal := len(vf1) if lenVal > maxNum { maxNum = lenVal } } finishKa := make(map[string][]string) for vf2K, vf2 := range ka { if len(vf2) < maxNum { lenMv := maxNum - len(vf2) for i := 0; i < lenMv; i++ { vf2 = append(vf2, "") } } finishKa[vf2K] = vf2 } hasKey(table, arrcount1) //是否匹配到table中的标题 //qutil.Debug("finishKa----", finishKa) if arrcount1 >= 1 { if arrcount1+arrcount2 == 1 { //删除只匹配到一个价钱(总价) delete(finishKa, "unitprice") } finishData := dealArrData(maxNum, finishKa) table.BrandData = append(table.BrandData, finishData) } } } //处理string数据后,匹配必须title和替换要保存的title //qutil.Debug("lineMap----", len(lineMap)) if len(lineMap) > 0 { for _, sMap := range lineMap { strcount1 := 0 //记录key是否存在必须title(字符串数据) strcount2 := 0 endStrMap := make(map[string]string) //qutil.Debug(k, "aMap.Keys----", sMap.Keys) for _, k1 := range sMap.Keys { match := false //记录must是否匹配到 v1 := qutil.ObjToString(sMap.Map[k1]) // for k1, v1 := range sMap { //qutil.Debug(k1, "++++++++++", v1) if v1 == "" { continue } //匹配必须title for nameM, r := range brandRule["must"] { if convert(k1, r) { //匹配成功 v1tmp1 := v1 match = true if nameM == "itemname" || nameM == "modal" { //特殊处理itemname hasGoods(table, v1) if nameM == "itemname" { v1tmp1 = filterItem(v1)[0] //过滤itemname if v1tmp1 == "" { break } } } if nameM == "brandname" || nameM == "modal" { //特殊处理brandname if endStrMap["brandname"] == "" { brand, allNull := hasBrand(table, v1) if !allNull { endStrMap["brandname"] = brand[0] } } } //unitprice if nameM == "unitprice" { //处理金额 v1tmp1 = dealPrice(k1, v1)[0] } if nameM != "brandname" && endStrMap[nameM] == "" { endStrMap[nameM] = v1tmp1 } strcount1++ } } //替换其它要保存字段 if !match { for nameR, r := range brandRule["replace"] { if convert(k1, r) { //匹配成功 v1tmp2 := v1 //totalprice if nameR == "totalprice" { //处理金额 v1tmp2 = dealPrice(k1, v1)[0] } //number if nameR == "number" { //处理数量 varr1, uname1 := dealNumber(v1) v1tmp2 = varr1[0] //从number中获取到的单位 if endStrMap["unitname"] == "" && uname1[0] != "" { endStrMap["unitname"] = uname1[0] } } if v1tmp2 != "" { endStrMap[nameR] = v1tmp2 } strcount2++ } } } //} } //原始字符串数据处理 hasKey(table, strcount1) //是否匹配到table中的标题 //qutil.Debug("endStrMap----", endStrMap) if strcount1 >= 1 { if strcount1+strcount2 == 1 { //删除只匹配到一个价钱(总价) delete(endStrMap, "unitprice") } finishData := dealStrData(endStrMap) //处理数据 if len(finishData) > 0 { table.BrandData = append(table.BrandData, finishData) } } } } } func dealArrData(maxNum int, ka map[string][]string) []map[string]string { for k2, v2 := range ka { //处理数组长度不相等,使长度一致 if len(v2) > maxNum { ka[k2] = v2[:maxNum] } } finalData := assembleData(ka, 1) if len(finalData) > 0 { return finalData } return nil } func dealStrData(kv map[string]string) []map[string]string { finalData := []map[string]string{} if len(kv) > 0 { finalData = assembleData(kv, 2) } return finalData } //组装数据,每一行的数据为一数据集合 func assembleData(m interface{}, n int) []map[string]string { defer qutil.Catch() /* { "itemname":["计算机","打印机","机柜"], "number" :["1","12","4"] } */ datas := []map[string]string{} if n == 1 { //数组数据 realTypeM := m.(map[string][]string) //根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr /* arr1 ["a1","b1","c1"] arr2 ["a2","b2","c2"] [ {"a1","a2"}, {"b1","b2"}, {"c1","c2"} ] */ //start for k3, v3 := range realTypeM { for _, val := range v3 { data := make(map[string]string) data[k3] = val datas = append(datas, data) } break } for i, data := range datas { for k4, v4 := range realTypeM { if i < len(v4) { //数组数据长度不一致 if v4[i] != " " { data[k4] = v4[i] } else { delete(data, k4) } } else { fmt.Println("err table") } } datas[i] = data } //end for _, fdv := range datas { //清除空数据和只含特殊符号的数据 for fmk, fmv := range fdv { if tabletdclear.ReplaceAllString(fmv, "") == "" { delete(fdv, fmk) } } } } else { //字符串数据 realTypeM := m.(map[string]string) datas = append(datas, realTypeM) } return datas } ////组装数据,每一行的数据为一数据集合 //func assembleData(m interface{}, n int) []map[string]string { // defer qutil.Catch() // /* // { // "itemname":["计算机","打印机","机柜"], // "number" :["1","12","4"] // } // */ // datas := []map[string]string{} // switch reflect.TypeOf(m).String() { // case "map[string][]string": //数组数据 // realTypeM := m.(map[string][]string) // //根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr // /* // arr1 ["a1","b1","c1"] // arr2 ["a2","b2","c2"] // [ // {"a1","a2"}, // {"b1","b2"}, // {"c1","c2"} // ] // */ // //start // for k3, v3 := range realTypeM { // for _, val := range v3 { // data := make(map[string]string) // data[k3] = val // datas = append(datas, data) // } // break // } // for i, data := range datas { // for k4, v4 := range realTypeM { // if i < len(v4) { //数组数据长度不一致 // if v4[i] != " " { // data[k4] = v4[i] // } else { // delete(data, k4) // //continue // } // } else { // fmt.Println("err table") // //continue // } // } // datas[i] = data // } // //end // // for _, fdv := range datas { //清除空数据和只含特殊符号的数据 // // for fmk, fmv := range fdv { // // if tabletdclear.ReplaceAllString(fmv, "") == "" { // // delete(fdv, fmk) // // } // // } // // } // case "map[string]string": //字符串数据 // realTypeM := m.(map[string]string) // datas = append(datas, realTypeM) // default: // } // return datas //} func convert(key, r string) bool { defer qutil.Catch() flag := false key = tabletitleclear.ReplaceAllString(key, "") reg, err := regexp.Compile(r) if err != nil { fmt.Println("reg err:", err) return false } flag = reg.MatchString(key) return flag } func hasKey(table *Table, n int) { defer qutil.Catch() if table.TableResult.HasKey == 1 { return } if n >= 1 { table.TableResult.HasKey = 1 } } func hasGoods(table *Table, data ...string) { defer qutil.Catch() goodsArr := make([]string, len(data)) //fmt.Println("table.TableResult.HasGoods=====", table.TableResult.HasGoods) if table.TableResult.HasGoods == 1 { return } for i, d := range data { if d != "" { goods := u.GoodsGet.CheckSensitiveWord(d) //fmt.Println("goods======", goods) goodsArr[i] = goods if len(goods) > 0 { table.TableResult.HasGoods = 1 break } } } } //func hasBrand(table *Table, data ...string) { // defer qutil.Catch() // if table.TableResult.HasBrand == 1 { // return // } // for i, d := range data { // if d != "" { // brand := u.BrandGet.CheckSensitiveWord(d) // qutil.Debug(d, brand) // if brand != "" { // table.TableResult.HasBrand = 1 // break // } // } // } //} func hasBrand(table *Table, data ...string) ([]string, bool) { defer qutil.Catch() //fmt.Println("table.TableResult.HasBrand---------", table.TableResult.HasBrand) brandArr := make([]string, len(data)) // if table.TableResult.HasBrand == 1 { // return brandArr, 1 // } allNull := true for i, d := range data { //if d != "" { brand := u.BrandGet.CheckSensitiveWord(d) if brand != "" { allNull = false } //fmt.Println("brand======", brand) brandArr[i] = brand if len(brand) > 0 { table.TableResult.HasBrand = 1 } //} } return brandArr, allNull } //过滤td值 func filterVal(val ...string) ([]string, bool) { defer qutil.Catch() n := 0 //记录被过滤的个数 for i, v := range val { afterFilter := tabletdclear.ReplaceAllString(v, "") afterFilter = NullVal.ReplaceAllString(afterFilter, "") if afterFilter == "" { n++ } val[i] = afterFilter } allempty := false if n == len(val) { //所有都被过滤掉 allempty = true } return val, allempty } //过滤itemname全是数字 func filterItem(itemval ...string) []string { defer qutil.Catch() result := []string{} for _, v := range itemval { afterFilter := numclear.ReplaceAllString(v, "") if afterFilter != "" { result = append(result, v) } else { result = append(result, afterFilter) } } return result } //处理价格 func dealPrice(key string, val ...string) []string { defer qutil.Catch() iswan := strings.Contains(key, "万") //表格title中带有万 result := []string{} for _, v := range val { //1.00万元 1元 tmparr := strings.Split(v, ".") tmparr[0] = moneyNum.ReplaceAllString(tmparr[0], "") if iswan { result = append(result, tmparr[0]+"0000") } else { if strings.Contains(v, "万") { //价格中带有万 result = append(result, tmparr[0]+"0000") } else { result = append(result, tmparr[0]) } } } return result } //处理number func dealNumber(val ...string) ([]string, []string) { defer qutil.Catch() unitnameArr := []string{} result := []string{} for _, v := range val { //1个 1.00个 n := numclear.FindString(v) unitname := numclear.ReplaceAllString(v, "") //匹配个数后的单位 unitnameArr = append(unitnameArr, unitname) //val[i] = strings.Split(n, ".")[0] result = append(result, strings.Split(n, ".")[0]) } return result, unitnameArr }