|
@@ -4,8 +4,6 @@ import (
|
|
|
"fmt"
|
|
|
"jy/clear"
|
|
|
u "jy/util"
|
|
|
-
|
|
|
- //"log"
|
|
|
qutil "qfw/util"
|
|
|
"regexp"
|
|
|
"strings"
|
|
@@ -32,11 +30,11 @@ var (
|
|
|
moneyreg = regexp.MustCompile("(预算|费|价|额|规模|投资)")
|
|
|
//根据表格的内容判断是不是表头,如果含有金额则不是表头
|
|
|
MoneyReg = regexp.MustCompile("^[\\s ::0-9.万元()()人民币¥$]+$")
|
|
|
+
|
|
|
//判断分包时
|
|
|
moneyNum = regexp.MustCompile("[元整¥万]")
|
|
|
//对隐藏表格的判断
|
|
|
display = regexp.MustCompile("(?i).*?display\\s?[:]\\s?none.*")
|
|
|
-
|
|
|
//---------------
|
|
|
//求是分包的概率
|
|
|
//根据表格的标签对分包进行打分
|
|
@@ -45,8 +43,8 @@ var (
|
|
|
//在判断分包打分前过虑表格key
|
|
|
FilterKey_2 = regexp.MustCompile("招标|投标|项目")
|
|
|
//根据表格的key进行分包打分
|
|
|
- FindKey_2 = regexp.MustCompile("([分子][包标](号)?|标[号项段包](划分)?|包件?[号段名数])")
|
|
|
- FindKey_3 = regexp.MustCompile("(标段编号)")
|
|
|
+ FindKey_2 = regexp.MustCompile("([分子][包标](号)?|标[号项段包](划分)?|包件?[号段名数]|包组)")
|
|
|
+ FindKey_3 = regexp.MustCompile("(标段编号|标包)")
|
|
|
//对值进行分包判断
|
|
|
FindVal_1 = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|合同|分|施工|监理)?(标|包)(段|号)?)[ \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)")
|
|
|
FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$")
|
|
@@ -60,6 +58,7 @@ var (
|
|
|
|
|
|
//清理表格标签正则
|
|
|
ClearTagReg = regexp.MustCompile("<[^>]*?>|[\\s\\n\\r]*$")
|
|
|
+
|
|
|
//查找表格标签正则
|
|
|
ttagreg = regexp.MustCompile("(?s)([^\\n::。,;\\s\u3000\u2003\u00a0]{2,30})[::]?[^::。;!\\n]{0,35}[\\s\\n]*$")
|
|
|
|
|
@@ -72,16 +71,16 @@ var (
|
|
|
|
|
|
//对表格kv的处理
|
|
|
//对不能标准化的key做批识
|
|
|
- filter_tag_zb = regexp.MustCompile("(中标|成交|投标)[\\p{Han}]{0,6}(情况|结果|信息|明细)")
|
|
|
+ filter_tag_zb = regexp.MustCompile("(中标|成交|投标)[\\p{Han}]{0,6}(情况|结果|信息|明细)?")
|
|
|
//中标金额
|
|
|
//包含以下字眼做标准化处理
|
|
|
filter_zbje_k = regexp.MustCompile("(中标|成交|总|拦标|合同|供[应货]商|报)[\\p{Han}、]{0,6}(价|额|[大小]写|[万亿]?元).{0,4}$")
|
|
|
//简单判断金额
|
|
|
filter_zbje_jd = regexp.MustCompile("^[^(售|保证)]{0,4}(价|额).{0,4}$")
|
|
|
//预算金额
|
|
|
- filter_ysje_jd = regexp.MustCompile("预算")
|
|
|
+ filter_ysje_jd = regexp.MustCompile("(预算|预控价|项目概.|项目信息)")
|
|
|
//且排队以下字眼的key
|
|
|
- filter_zbje_kn = regexp.MustCompile("得分|打分|时间|业绩|须知|分|要求$")
|
|
|
+ filter_zbje_kn = regexp.MustCompile("得分|打分|时间|业绩|须知|分|电话|要求|需求数量|发布规模$|第[2二3三4四5五]|地址|询价保证金|行号")
|
|
|
//且值包含以下字眼
|
|
|
filter_zbje_v = regexp.MustCompile("[¥$$0-9一二三四五六七八九十,,〇零点..壹贰叁肆伍陆柒捌玖拾百佰千仟万亿億元圆角分整正()::大小写]{2,16}")
|
|
|
|
|
@@ -93,9 +92,8 @@ var (
|
|
|
//简单判断
|
|
|
filter_zbdw_jd = regexp.MustCompile("(投标|成交|中标|合同)(供应商|单位|人|名称).{0,4}$")
|
|
|
//且不包含以下字眼
|
|
|
- filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址|询价保证金")
|
|
|
- //且值包含以下字眼
|
|
|
- filter_zbdw_v = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)")
|
|
|
+ filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址|询价保证金") //且值包含以下字眼
|
|
|
+ filter_zbdw_v = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)")
|
|
|
//且值包含以下字眼
|
|
|
filter_zbdw_v2 = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$")
|
|
|
|
|
@@ -111,15 +109,16 @@ var (
|
|
|
ContactType = map[string]*regexp.Regexp{
|
|
|
"采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|招标(服务)?|甲|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心(地址)?|业主|收料人|采购部"),
|
|
|
"代理机构": regexp.MustCompile("(代理|受托|集中采购).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"),
|
|
|
- "中标单位": regexp.MustCompile("^((拟(定)?|预|最终|唯一)?(中标|成交|中选|供(货|应)))[^候选]{0,2}(人|方|单位|公司|(服务|供应)?商|企业)"),
|
|
|
+ "中标单位": regexp.MustCompile("^((拟(定)?|预|最终|唯一)?(中标|成交|中选|供(货|应))((成交))?)[^候选]{0,2}(人|方|单位|公司|(服务|供应)?商|企业)"),
|
|
|
+ "监督部门": regexp.MustCompile("投诉受理部门"),
|
|
|
}
|
|
|
ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$")
|
|
|
MultipleValueSplitReg = regexp.MustCompile("[,,、\\s\u3000\u2003\u00a0]")
|
|
|
BuyerContacts = []string{"采购单位联系人", "采购单位联系电话", "采购单位联系地址"}
|
|
|
FilterSerial = regexp.MustCompile(".+[、..::,]")
|
|
|
underline = regexp.MustCompile("_+$")
|
|
|
- iswinnertabletag = regexp.MustCompile("(中标|候选人|成交|结果)")
|
|
|
- nswinnertabletag = regexp.MustCompile("[评得分估]+|标的|班子成员")
|
|
|
+ iswinnertabletag = regexp.MustCompile("(中标|候选人|成交|结果|磋商情况)")
|
|
|
+ nswinnertabletag = regexp.MustCompile("评得分估|标的|班子成员|人员")
|
|
|
jsonReg = regexp.MustCompile(`\{.+:[^}]*\} `) // \{".*\":\".+\"}
|
|
|
regHz = regexp.MustCompile("[\u4e00-\u9fa5]")
|
|
|
winnerOrderAndBidResult = regexp.MustCompile("((中标)?候选人|(中标|评标)结果)")
|
|
@@ -176,7 +175,7 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}, isSite bool,
|
|
|
if winnerOrderAndBidResult.MatchString(tabletag) && t1.Value == "采购单位联系人" { //处理table中项目负责人
|
|
|
kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
|
|
|
} else if regexp.MustCompile("(中标候选人|名单及其排序|排序)").MatchString(tabletag) && t1.Value == "采购单位" {
|
|
|
- kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight - 100})
|
|
|
+ kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight - 150})
|
|
|
} else {
|
|
|
kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight})
|
|
|
}
|
|
@@ -189,18 +188,15 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}, isSite bool,
|
|
|
if tabletag == "" {
|
|
|
returntag = "中标情况"
|
|
|
}
|
|
|
- kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: k, Value: v1, Weight: -100})
|
|
|
+ kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: k, Value: v1, Weight: -100, IsInvalid: true})
|
|
|
} else if filter_zbdw_ky.MatchString(k) && !filter_zbdw_kn.MatchString(k) &&
|
|
|
filter_zbdw_v.MatchString(v1) {
|
|
|
- kvTags["中标单位"] = append(kvTags["中标单位"], &u.Tag{Key: k, Value: v1, Weight: -100})
|
|
|
+ kvTags["中标单位"] = append(kvTags["中标单位"], &u.Tag{Key: k, Value: v1, Weight: -100, IsInvalid: true})
|
|
|
if tabletag == "" {
|
|
|
returntag = "中标情况"
|
|
|
}
|
|
|
- } else {
|
|
|
+ } else if !filter_zbje_jd.MatchString(tabletag) && !filter_zbje_jd.MatchString(k) && utf8.RuneCountInString(v1) < 13 {
|
|
|
//对上一步没有取到标准化key的进一步处理
|
|
|
- if tabletag == "" {
|
|
|
-
|
|
|
- }
|
|
|
if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) {
|
|
|
//u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1))
|
|
|
if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
|
|
@@ -210,17 +206,16 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}, isSite bool,
|
|
|
kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: k, Value: v1, Weight: -100})
|
|
|
}
|
|
|
|
|
|
- } /*else if filter_zbdw_jd.MatchString(k) && filter_zbdw_v.MatchString(v1) {
|
|
|
- k1 = append(k1, "中标单位")
|
|
|
- weight = append(weight, -100)
|
|
|
- b = true
|
|
|
- }*/
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
return
|
|
|
}
|
|
|
|
|
|
+var glRex *regexp.Regexp = regexp.MustCompile("(成交|中标|候选|排名|名次|供应商排序|中标候选人|名单及其排序|排序)")
|
|
|
+var djReg *regexp.Regexp = regexp.MustCompile("^单价")
|
|
|
+
|
|
|
//对解析后的表格的kv进行过滤
|
|
|
func (table *Table) KVFilter(isSite bool, codeSite string) {
|
|
|
//1.标准化值查找
|
|
@@ -235,37 +230,22 @@ func (table *Table) KVFilter(isSite bool, codeSite string) {
|
|
|
//遍历table.sortkv,进行过滤处理,并放入标准化KV中,如果值是数组跳到下一步处理
|
|
|
for _, k := range table.SortKV.Keys {
|
|
|
//表格描述处理,对成交结果的处理
|
|
|
- if regexp.MustCompile("(成交|中标|候选|排名|名次|供应商排序)").MatchString(k) {
|
|
|
+ if glRex.MatchString(k) {
|
|
|
table.Desc += "成交结果,"
|
|
|
}
|
|
|
- if regexp.MustCompile("^单价").MatchString(k) {
|
|
|
+ if djReg.MatchString(k) {
|
|
|
continue
|
|
|
}
|
|
|
v := table.SortKV.Map[k]
|
|
|
if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
|
|
|
k = pkgFilter.ReplaceAllString(k, "")
|
|
|
k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
|
|
|
- if k == "2、建设规模" {
|
|
|
- k = "预算"
|
|
|
- }
|
|
|
- if k == `中标价(万元)\费率(%)` {
|
|
|
- k = "中标价(万元)"
|
|
|
- }
|
|
|
kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v, isSite, codeSite) //对key标准化处理,没有找到会走中标
|
|
|
- //qutil.Debug(k, v, k1, w1, v1, tag, b)
|
|
|
if tag != "" && table.Tag == "" {
|
|
|
table.Tag = tag
|
|
|
}
|
|
|
MergeKvTags(table.StandKV, kvTags)
|
|
|
} else {
|
|
|
- //u.Debug(k, v, "---------")
|
|
|
- if strings.Contains(k, "总价") {
|
|
|
- if vvvv, ok := v.([]string); ok && len(vvvv) > 0 {
|
|
|
- as.RemoveKey("报价")
|
|
|
- as.AddKey(k, vvvv[len(vvvv)-1])
|
|
|
- continue
|
|
|
- }
|
|
|
- }
|
|
|
as.AddKey(k, v)
|
|
|
}
|
|
|
}
|
|
@@ -372,10 +352,13 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
|
|
|
if !res {
|
|
|
kt := u.GetTags(filterThText.ReplaceAllString(ClearKey(k, 2), ""), isSite, codeSite)
|
|
|
if kt.Len() > 0 {
|
|
|
- kv = kt[0].Value
|
|
|
+ if kt[0].Value == "单品报价" && winnertag {
|
|
|
+ kv = "中标金额"
|
|
|
+ } else {
|
|
|
+ kv = kt[0].Value
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- //qutil.Debug(k, res, repl, kv, "--", vs)
|
|
|
if !res && kv == "" { //key未验证出,验证数组的val值
|
|
|
checkKey[kn+kn1] = true
|
|
|
if winnertag { //如果是中标信息 在根据val数组信息解析候选人
|
|
@@ -465,7 +448,6 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
|
|
|
//if hadSort { //有排序,再添加entname和price
|
|
|
if len(tmpEntname) > 0 && n < len(tmpEntname) && tmpEntname[n] != "" {
|
|
|
smap_v["entname"] = tmpEntname[n]
|
|
|
-
|
|
|
if len(tmpPrice) > 0 && n < len(tmpPrice) && tmpPrice[n] != "" {
|
|
|
smap_v["price"] = tmpPrice[n]
|
|
|
}
|
|
@@ -509,12 +491,6 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- //table.StandKV[kk] = append(table.StandKV[kk], vv...)
|
|
|
- // else if k2 == "中标金额" {
|
|
|
- // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
|
|
|
- // table.StandKV[k2] = v1
|
|
|
- // }
|
|
|
- // }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -825,6 +801,10 @@ func (table *Table) Analy(contactFormat *u.ContactFormat, isSite bool, codeSite
|
|
|
if trs.Size() == 0 {
|
|
|
trs = table.Goquery.ChildrenFiltered("tr")
|
|
|
}
|
|
|
+ ztb := table.Goquery.Find("table").Size()
|
|
|
+ if ztb >= 10 {
|
|
|
+ return []*Table{}
|
|
|
+ }
|
|
|
//遍历节点,初始化table 结构
|
|
|
table.createTabe(trs, isSite, codeSite)
|
|
|
if len(table.TRs) == 0 {
|
|
@@ -1455,23 +1435,7 @@ func (table *Table) ComputeRowColIsKeyRation(isSite bool, codeSite string) {
|
|
|
} else if table.Tag == "" && k == 0 && len(tr.TDs[0].Val) > 11 {
|
|
|
table.Tag = tr.TDs[0].Val
|
|
|
}
|
|
|
-
|
|
|
- // subVal := tdval_reg.FindAllStringSubmatch(tr.TDs[0].Val, -1)
|
|
|
- // //u.Debug(tr.TDs[0].Val, subVal)
|
|
|
- // if len(subVal) > 0 {
|
|
|
- // for _, subv1 := range subVal {
|
|
|
- // if len(subv1) == 3 {
|
|
|
- // table.SortKV.AddKey(subv1[1], subv1[2])
|
|
|
- // }
|
|
|
- // }
|
|
|
- // } else if k == 0 && len(tr.TDs[0].Val) > 11 {
|
|
|
- // table.Tag = tr.TDs[0].Val
|
|
|
- // }
|
|
|
-
|
|
|
}
|
|
|
- // for _, td := range tr.TDs {
|
|
|
- // u.Debug(td.BH, td.Val, "----")
|
|
|
- // }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -1489,6 +1453,7 @@ func (table *Table) FindKV(isSite bool, codeSite string) {
|
|
|
bodirect := 0
|
|
|
//控制中标人排序数值
|
|
|
sort := 1
|
|
|
+
|
|
|
//开始抽取
|
|
|
for _, tr := range table.TRs {
|
|
|
bcon = trSingleColumn(tr, bcon, table) //tr单列,是否丢弃内容
|
|
@@ -1509,16 +1474,6 @@ func (table *Table) FindKV(isSite bool, codeSite string) {
|
|
|
}
|
|
|
}
|
|
|
for _, td := range tr.TDs {
|
|
|
- /**
|
|
|
- rt := table.StartAndEndRation[fmtkey("r", td.StartCol, td.EndCol)]
|
|
|
- if rt != nil {
|
|
|
- r, t := rt.GetTDRation(td)
|
|
|
- u.Debug(td.BH, td.Val, r, t)
|
|
|
- }
|
|
|
- **/
|
|
|
- // if td.Val == "电视" || td.Val == "电话机" || td.Val == "传真机" || td.Val == "音响" {
|
|
|
- //qutil.Debug("----td.Valtype", td.Valtype, "td.BH:", td.BH, "KVDirect:", td.KVDirect, "Val:", td.Val, "direct:", direct, "vdirect:", vdirect)
|
|
|
- // }
|
|
|
if !td.BH && td.KVDirect < 3 {
|
|
|
if !table.FindTdVal(td, direct, vdirect) { //table.FindTdVal()存储了table.SortKV
|
|
|
if !table.FindTdVal(td, vdirect, direct) {
|
|
@@ -1541,6 +1496,7 @@ func (table *Table) FindKV(isSite bool, codeSite string) {
|
|
|
//fmt.Println("td:", td.Val, td.BH, td.HeadTd, td.KVDirect)
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
}
|
|
|
//qutil.Debug("FindKV", table.SortKV.Map)
|
|
|
} else if len(table.TRs) > 0 { //没有表头的表格处理,默认纵向吧
|
|
@@ -1779,19 +1735,41 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
|
|
|
return
|
|
|
}
|
|
|
near := table.FindNear(td, direct)
|
|
|
- // if near != nil {
|
|
|
- // fmt.Println("near----", near.Val, td.Val)
|
|
|
- // }
|
|
|
- // qutil.Debug(near != nil)
|
|
|
- // qutil.Debug(near.BH)
|
|
|
- // qutil.Debug(near.KeyDirect == vdirect, near.KeyDirect == 0)
|
|
|
- // qutil.Debug(near.KVDirect == direct, near.KVDirect == 0)
|
|
|
- // qutil.Debug(near.KVDirect < 3)
|
|
|
if near != nil && near.BH && (near.KeyDirect == vdirect || near.KeyDirect == 0) && (near.KVDirect == direct || near.KVDirect == 0) && near.KVDirect < 3 {
|
|
|
near.KVDirect = direct
|
|
|
near.KeyDirect = vdirect
|
|
|
td.KVDirect = direct
|
|
|
key := repSpace.ReplaceAllString(near.Val, "")
|
|
|
+ if key == "名称" && near.StartCol == 0 && near.Rowspan > 0 {
|
|
|
+ for _, vn := range table.TRs[near.Rowspan-1].TDs {
|
|
|
+ if strings.Contains(vn.Val, "代理") {
|
|
|
+ key = "代理机构"
|
|
|
+ break
|
|
|
+ } else if strings.Contains(vn.Val, "招标") {
|
|
|
+ key = "采购单位"
|
|
|
+ break
|
|
|
+ } else if strings.Contains(vn.Val, "中标") {
|
|
|
+ key = "中标单位"
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else if strings.Contains(key, "中标候选人") && strings.Contains(td.Val, "公司") {
|
|
|
+ key = "中标单位"
|
|
|
+ } else if key == "单位名称" {
|
|
|
+ tmpnewnear := table.FindNear(near, 2)
|
|
|
+ if tmpnewnear != nil {
|
|
|
+ if tmpnewnear.MustBH || tmpnewnear.BH {
|
|
|
+ key = tmpnewnear.Val + near.Val
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ tmpnewnear = table.FindNear(near, 1)
|
|
|
+ if tmpnewnear != nil {
|
|
|
+ if tmpnewnear.MustBH || tmpnewnear.BH {
|
|
|
+ key = tmpnewnear.Val + near.Val
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
if near.Val == "" {
|
|
|
key = fmtkey("k", near.TR.RowPos, near.ColPos)
|
|
|
}
|
|
@@ -1802,11 +1780,19 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
|
|
|
curpos := table.SortKV.Index[key]
|
|
|
thistr := table.kTD[curpos]
|
|
|
if thistr != near {
|
|
|
- near.Val += "_"
|
|
|
- for table.SortKV.Map[near.Val] != nil {
|
|
|
- near.Val += "_"
|
|
|
+ if strings.TrimSpace(near.Val) == "名称" && near.TR != nil && len(near.TR.TDs) > 0 && near.ColPos-1 >= 0 {
|
|
|
+ rv := near.TR.TDs[near.ColPos-1].Val
|
|
|
+ if near.ColPos > 0 && (strings.Contains(rv, "招标") || strings.Contains(rv, "代理") || strings.Contains(rv, "采购") || strings.Contains(rv, "中标")) {
|
|
|
+ near = near.TR.TDs[near.ColPos-1]
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ //near.Val += "_"
|
|
|
+ //for table.SortKV.Map[near.Val] != nil {
|
|
|
+ // near.Val += "_"
|
|
|
+ //}
|
|
|
+ //key = near.Val //之前这个地方没有重置,导致把之前结果覆盖了
|
|
|
+ bthiskey = true
|
|
|
}
|
|
|
- key = near.Val //之前这个地方没有重置,导致把之前结果覆盖了
|
|
|
} else {
|
|
|
bthiskey = true
|
|
|
}
|
|
@@ -1862,6 +1848,7 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
|
|
|
}
|
|
|
if bvalfind && varrpos > -1 && len(vals) > varrpos {
|
|
|
vals = append(vals, td.Val) // 累加
|
|
|
+ val = vals
|
|
|
//vals[varrpos] = td.Val // += "__" + td.Val
|
|
|
} else {
|
|
|
//添加时候去除空值和nil
|
|
@@ -1911,7 +1898,6 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
|
|
|
table.SortKV.AddKey(key, val)
|
|
|
//if table.SortKV.Map[key] != nil {
|
|
|
pos := table.SortKV.Index[key]
|
|
|
- //qutil.Debug("=========", "key:", key, "val:", val, "pos:", pos)
|
|
|
if barr {
|
|
|
mval := table.kvscope[pos]
|
|
|
if mval != nil {
|
|
@@ -1932,7 +1918,6 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
|
|
|
}
|
|
|
table.kTD[pos] = near
|
|
|
}
|
|
|
- //}
|
|
|
}
|
|
|
b = true
|
|
|
}
|
|
@@ -2027,6 +2012,7 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
|
|
|
oldIndex := []string{} //存放包的原始值
|
|
|
brepeat := map[string]bool{}
|
|
|
for k, v := range index {
|
|
|
+
|
|
|
v = u.PackageNumberConvert(v)
|
|
|
if !brepeat[v] {
|
|
|
brepeat[v] = true
|
|
@@ -2052,6 +2038,7 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
|
|
|
//多包解析
|
|
|
if b {
|
|
|
tn.BPackage = true
|
|
|
+ //pnum := len(index)
|
|
|
//根据数组index分包长度添加table.BlockPackage子包数组
|
|
|
for nk, v := range index {
|
|
|
if tn.BlockPackage.Map[v] == nil {
|
|
@@ -2284,6 +2271,7 @@ func (tn *Table) isGoonNext(isSite bool, codeSite string) {
|
|
|
} else {
|
|
|
str += fmt.Sprintf("%s:%s\n", nk, v)
|
|
|
}
|
|
|
+
|
|
|
if excludeKey2.MatchString(str) {
|
|
|
continue
|
|
|
}
|
|
@@ -2682,6 +2670,7 @@ func isHasOnePkgAndNoKv(v1 string) (bool, string) {
|
|
|
|
|
|
//替换分包中混淆的词
|
|
|
func replPkgConfusion(v1 string) string {
|
|
|
+
|
|
|
v1 = PreReg.ReplaceAllString(v1, "")
|
|
|
v1 = PreReg1.ReplaceAllString(v1, "")
|
|
|
v1 = PreCon.ReplaceAllString(v1, "")
|
|
@@ -2731,6 +2720,7 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat, isSite bool, co
|
|
|
for _, this := range thisTdKvs {
|
|
|
if str := ContactInfoVagueReg.FindString(this.Key); str != "" {
|
|
|
td.SortKV.AddKey(tdType+str, this.Value)
|
|
|
+
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -3920,11 +3910,19 @@ func hasBrand(table *Table, data ...string) ([]string, bool) {
|
|
|
return brandArr, allNull
|
|
|
}
|
|
|
|
|
|
+var clearnn *regexp.Regexp = regexp.MustCompile("([\\d.]*)[\\n\\s]*[\\((][\\d.]+[)\\)]")
|
|
|
+
|
|
|
//过滤td值
|
|
|
func filterVal(val ...string) ([]string, bool) {
|
|
|
defer qutil.Catch()
|
|
|
n := 0 //记录被过滤的个数
|
|
|
for i, v := range val {
|
|
|
+ if len(clearnn.FindStringSubmatch(v)) > 0 {
|
|
|
+ tmpv := clearnn.FindStringSubmatch(v)[1]
|
|
|
+ if tmpv != "" {
|
|
|
+ v = tmpv
|
|
|
+ }
|
|
|
+ }
|
|
|
afterFilter := tabletdclear.ReplaceAllString(v, "")
|
|
|
afterFilter = NullVal.ReplaceAllString(afterFilter, "")
|
|
|
if afterFilter == "" {
|