package pretreated import ( "fmt" "github.com/shopspring/decimal" "jy/clear" u "jy/util" qu "qfw/util" "regexp" "strings" ) /* * 全局变量,主要是一堆判断正则 * */ var ( //key 的日期单位 dateReg *regexp.Regexp = regexp.MustCompile(`[年|月|日|天]`) //清理品目中数字 numclear = regexp.MustCompile("^[\\d一二三四五六七八九十.]+") num1 = regexp.MustCompile("(\\d)") //清理表格title中的不需要的内容 tabletitleclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、.,.。_/((人民币万元件个公斤户))]") tabletitleclear2 = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕]*") //清理表格中是key中包含的空格或数字等 tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]") //清理上阶段kv的匹配的短词 tablekeyclear2 = regexp.MustCompile("(供应商信用融资|供应商公章|主要标的名称|中标人推荐理由|成交供应商推荐理由)") //清理表格td中的符号 tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕¥$]*") //判断key是金额,对万元的处理 moneyReg = regexp.MustCompile("(预算|费|价|额|规模|投资)") //特殊文本-为表头 specHeadReg = regexp.MustCompile("(成交供应商|中选人)") //key不需要清理-例如折扣 费率 noClearKeyReg = regexp.MustCompile(`[((](费率|年|月|日|天|日历天|历天)[))]`) //根据表格的内容判断是不是表头,如果含有金额则不是表头 MoneyReg = regexp.MustCompile("^[\\s  ::0-9.万元()()人民币¥$]+$") //特殊情况值,不能为表头 noStartHeadReg = regexp.MustCompile("^(\\d标段)$") GSReg = regexp.MustCompile(".*公司.*") //判断分包时 moneyNum = regexp.MustCompile("[元整¥万]") //对隐藏表格的判断 display = regexp.MustCompile("(?i).*?display\\s?[:]\\s?none.*") //--------------- //求是分包的概率 //根据表格的标签对分包进行打分 TableMultiPackageReg_4 = regexp.MustCompile("(标段|分包|包段|划分|子包|标包|合同段)") TableMultiPackageReg_2 = regexp.MustCompile("(概况|范围|情况|内容|详细|结果|信息)") //在判断分包打分前过虑表格key FilterKey_2 = regexp.MustCompile("招标|投标|项目") //根据表格的key进行分包打分 FindKey_2 = regexp.MustCompile("([分子][包标](号)?|标[号项段包](划分)?|包件?[号段名数]|包[组件])") FindKey_3 = regexp.MustCompile("(标段编号|标包|包件|包号)") //对值进行分包判断 FindVal_1 = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|标的[一二三四五六七八九十1-9A-Za-z]+|((子|合同|分|施工|监理)?(包|包件|标)(段|号)?)[  \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)") FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$") //判断分包前排除 包件号? excludeKey = regexp.MustCompile("(标识|数量|分包个数|标段代码|涉及包号|分包数量|项目标号|规格|型号|招标范围|业绩|废标|标段选择要求)|(^编号$)|([^包段标]编号)") //编号|划分 excludeKey2 = regexp.MustCompile("包/[0-9]{0,4}[箱纸张]") excludeKey3 = regexp.MustCompile("(分包个数|每包[0-9]*元|标线|国标|享受一包服务)") //------------- cut = u.NewCut() //清理表格标签正则 ClearTagReg = regexp.MustCompile("<[^>]*?>|[\\s\\n\\r]*$") //查找表格标签正则 ttagreg = regexp.MustCompile("(?s)([^\\n::。,;\\s\u3000\u2003\u00a0]{2,30})[::]?[^::。;!\\n]{0,35}[\\s\\n]*$") //判断表格是表头的概率 checkval = float32(0.6) //tdval_reg = regexp.MustCompile(`([\p{Han}][\p{Han}\s、()\\(\\)]{1,9})[::]([^::\\n。]{5,60})(?:[;;,,.。\\n\\t\\s])?`) //空格替换 repSpace = regexp.MustCompile("[\\s\u3000\u2003\u00a0::]+|\\\\t+") //对表格kv的处理 //对不能标准化的key做批识 filter_tag_zb = regexp.MustCompile("(中标|成交|投标)[\\p{Han}]{0,6}(情况|结果|信息|明细)?") //中标金额 //包含以下字眼做标准化处理 filter_zbje_k = regexp.MustCompile("(中标|成交|总|拦标|合同|供[应货]商|报)[\\p{Han}、]{0,6}(价|额|[大小]写|[万亿]?元).{0,4}$") //简单判断金额 filter_zbje_jd = regexp.MustCompile("^[^(售|保证)]{0,4}(价|额).{0,4}$") //预算金额 filter_ysje_jd = regexp.MustCompile("(预算|预控价|项目概.|项目信息)") //且排队以下字眼的key filter_zbje_kn = regexp.MustCompile("得分|打分|时间|业绩|须知|分|电话|要求|需求数量|发布规模$|第[2二3三4四5五]|地址|询价保证金|行号") //且值包含以下字眼 filter_zbje_v = regexp.MustCompile("[¥$$0-9一二三四五六七八九十,,〇零点..壹贰叁肆伍陆柒捌玖拾百佰千仟万亿億元圆角分整正()::大小写]{2,16}") //中标单位的处理 //包含以下字眼的Key标准化 filter_zbdw_ky = regexp.MustCompile("(中标|成交|拦标|合同|选中|投标|拟|预|最终)[\\p{Han}、]{0,6}(供[应货]商|企业|单位|人|机构)(名称)?.{0,4}$") //识别中标单位相关信息 filter_zbdw_info = regexp.MustCompile("(中标|成交|中选|供(货|应))[^候选]{0,}") //简单判断 filter_zbdw_jd = regexp.MustCompile("(投标|成交|中标|合同)(供应商|单位|人|名称).{0,4}$") //且不包含以下字眼 filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址|询价保证金") //且值包含以下字眼 //且值包含以下字眼 filter_zbdw_v = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)") //且值包含以下字眼 filter_zbdw_v2 = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$") //Tg = map[string]interface{}{} //一些表格没有表头,是空的,对值是排序的做处理对应 NullTxBid NullTdReg = regexp.MustCompile("(首选|第[一二三四五1-5])(中标|成交)?(名(称)?|(候选|排序)?(人|单位|供应商))") NullTxtBid = "成交供应商排名" projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$") MhSpilt = regexp.MustCompile("[::]") //降低冒号权重 //指定字段且时间格式 UnTimeSpiltKey = regexp.MustCompile("(招标文件获取截止时间|招标文件获取开始时间|报名截止时间|报名开始时间|投标文件递交开始时间|开工日期|竣工日期)") UnTimeSpiltValue = regexp.MustCompile("\\d{1,2}[::]\\d{1,2}") //识别采购单位联系人、联系电话、代理机构联系人、联系电话 -- 名称有异常 ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?(((联系)?(电话|方式|号码)([//及]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(详细)?(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表") ContactInfoExcluReg = regexp.MustCompile("[商]名称$") ContactInfoMustReg = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$") ContactType = map[string]*regexp.Regexp{ "采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|招标(服务)?|甲|建设|招标|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心(地址)?|业主|收料人|采购部"), "代理机构": regexp.MustCompile("(代理|受托|集中采购).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"), "中标单位": regexp.MustCompile("^((拟(定)?|预|最终|唯一)?(中标|成交|中选|供(货|应))((成交))?)[^候选]{0,2}(人|方|单位|公司|(服务|供应)?商|企业)"), "监督部门": regexp.MustCompile("投诉受理部门"), } ContactHeadReg = regexp.MustCompile("^(招标人|采购人)$") ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$") MultipleValueSplitReg = regexp.MustCompile("[,,、\\s\u3000\u2003\u00a0]") BuyerContacts = []string{"采购单位联系人", "采购单位联系电话", "采购单位联系地址"} FilterSerial = regexp.MustCompile(".+[、..::,]") underline = regexp.MustCompile("_+$") iswinnertabletag = regexp.MustCompile("(中标|候选人|成交|结果|磋商情况)") nswinnertabletag = regexp.MustCompile("评得分估|标的信息|班子成员") jsonReg = regexp.MustCompile(`\{.+:[^}]*\} `) // \{".*\":\".+\"} regHz = regexp.MustCompile("[\u4e00-\u9fa5]") winnerOrderAndBidResult = regexp.MustCompile("((中标)?候选人|(中标|评标)结果)") WinnerOrderStr = regexp.MustCompile(`(集团|公司|学校|中心|家具城|门诊|[大中小]+学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行)$`) DoubtReg = regexp.MustCompile("(我中心|有(疑问|质疑|异议|意见)|(书面)?提出|不再受理|投诉|质疑|书面形式|监督|公示期(限)?)") //新增-分包-表格-sortKV budgetSortKVReg = regexp.MustCompile("(预算)") bidamountSortKVReg = regexp.MustCompile("(成交结果[((]万元[))]|成交金额|履约金额|中[标选]金额)") winnerSortKVReg = regexp.MustCompile("(投标人[((]供应商[))]名称)|供应商名称|中标候选人|中[标选]人|中[标选]单位") ) var fblbReg *regexp.Regexp = regexp.MustCompile("(废标|流标|否决依据|未中标情况说明|负责人资格|负责人业绩|相关业绩|类似项目情况表|技术评分明细表|否决投标人投标的原因|开标记录|附件[:0-9]|越南盾|技术分[^公]|填报项目业绩|未通过.*原因)") // 59.992664,33.495715,20.001306 var clearnum *regexp.Regexp = regexp.MustCompile("(([0-9.]{1,6}[,,]+){4,}|(\\d{6}[,,]\\d{2}.){2,})") var glRex *regexp.Regexp = regexp.MustCompile("(成交|中标|候选|排名|名次|供应商排序|中标候选人|名单及其排序|排序)") var djReg *regexp.Regexp = regexp.MustCompile("^单价") var hxrRex *regexp.Regexp = regexp.MustCompile("((成交|中标|中选)?候选人[弟|第][1-5一二三四五]名|[弟|第][1-5一二三四五][名]?(成交|中标|中选)?候选人)") var winMoneyReg *regexp.Regexp = regexp.MustCompile("(报价|投标价|投标报价|评审价|投标总价|含税总价[((]元[))]|总金额)") var winNoMoneyReg *regexp.Regexp = regexp.MustCompile("(得分|时间|序号|分)") var cleardwReg *regexp.Regexp = regexp.MustCompile("[((]{1}\\d*[人元件个公斤户]/[人元件个公斤户][))]") var zbhxrReg *regexp.Regexp = regexp.MustCompile("(中标候选人|投标单位名称|候选人姓名|候选人名称)") var zbhxrSortReg_1 *regexp.Regexp = regexp.MustCompile("^[第|弟][12345一二三四五]名$") var zbhxrSortReg_2 *regexp.Regexp = regexp.MustCompile("^([12345一二三四五])$") var zbhxrSortReg_3 *regexp.Regexp = regexp.MustCompile("^([12345一二三四五])") var zbhxrSortNameReg *regexp.Regexp = regexp.MustCompile("(中标候选人[第|弟][123一二三]名)|[第|弟][123一二三]中标候选人") var zbhxrSecondReg *regexp.Regexp = regexp.MustCompile("(中标候选人[第|弟][2二]名)|[第|弟][2二]中标候选人") var clearnn *regexp.Regexp = regexp.MustCompile("([\\d.]*)[\\n\\s]*[\\((][\\d.]+[)\\)]") // 分包含有关键词 var pkgValidReg1 *regexp.Regexp = regexp.MustCompile("(中标单位|中标金额)[::]") var tableClearTextReg *regexp.Regexp = regexp.MustCompile("业绩[::].*") // 特殊-爬虫文本-抽取单价数量-并计算 func dealWithSpecStructToSpiderCode(text string) string { text = formattext50.ReplaceAllString(text, "$1&&$2") arr := strings.Split(text, "&&") if len(arr) == 2 { one := qu.Float64All(arr[0]) two := qu.Float64All(arr[1]) if one > 0 && two > 0 { return fmt.Sprintf("\n合同金额:%f\n", one*two) } } return "" } // 对比前后候选人的有效性-true -为新 func thanWinnerOrderEffective(old_order []map[string]interface{}, new_order []map[string]interface{}) bool { if len(new_order) == 0 || new_order == nil { return false } if len(old_order) == 0 || old_order == nil { return true } old_info, new_info := old_order[0], new_order[0] if qu.IntAll(old_info["sort"]) > 1 { return true } //排序比对 if qu.IntAll(new_info["sort"]) > 1 { return false } //金额比对 - isuse_1, isuse_2 := false, false if old_vf, ok := old_info["price"].(float64); ok && old_vf > 0.0 { isuse_1 = true } else { if old_vs, ok := old_info["price"].(string); ok && old_vs != "" { isuse_1 = true } } if new_vf, ok := new_info["price"].(float64); ok && new_vf > 0.0 { isuse_2 = true } else { if new_vs, ok := new_info["price"].(string); ok && new_vs != "" { isuse_2 = true } } if isuse_1 && !isuse_2 { return false } if !isuse_1 && isuse_2 { return true } //均正常-优先取新值 return true } // 对比前后候选人的有效性-true -为新 func onlyExistsWinEntName(winorder []map[string]interface{}) bool { if len(winorder) <= 3 { for _, v := range winorder { if vf, ok := v["price"].(float64); ok && vf > 0.0 { return false } } } else { return false } return true } func thanExistsNewWinOrder(winorder []map[string]interface{}, new_winorder []map[string]interface{}) bool { if len(winorder) != len(new_winorder) { return false } isok := 0 for k, v := range winorder { if qu.ObjToString(v["entname"]) == qu.ObjToString(new_winorder[k]["entname"]) { if new_price, ok := new_winorder[k]["price"].(float64); ok && new_price > 0.0 { isok++ } } } if isok == len(winorder) { return true } return false } // 多供应商文本构建分包 func dealWithMultiSuppliersText(con string) (bool, string) { startIndex := MultiStartReg.FindAllStringIndex(con, 1) endIndex := MultiEndReg.FindAllStringIndex(con, 1) if len(startIndex) == 1 && len(endIndex) == 1 { if len(startIndex[0]) > 1 && len(endIndex[0]) > 1 { t_start, t_end := startIndex[0][1], endIndex[0][0] if t_end > t_start { text := con[t_start:t_end] arr1 := SupplyInfoReg1.FindAllStringSubmatch(text, -1) if text1 := supplyInfoMethod(arr1, 2, 4); text1 != "" { return true, strings.ReplaceAll(con, text, text1) } arr2 := SupplyInfoReg2.FindAllStringSubmatch(text, -1) if text2 := supplyInfoMethod(arr2, 2, 4); text2 != "" { return true, strings.ReplaceAll(con, text, text2) } } } } return false, "" } // 特殊-重构 func supplyInfoMethod(arr [][]string, w_index int, b_index int) string { new_text := "" if len(arr) > 1 { for k, v := range arr { key := fmt.Sprintf("包%d", k+1) new_text += key + "\n中标单位:" + v[w_index] + "\n中标金额:" + v[b_index] + "\n" } } return new_text } // 分析方法 func AnalyStart(job *u.Job, isSite bool, codeSite string) { con := job.Content //全文的需要修复表格 con = RepairCon(con) //格式化正文 //con = preConReg1.ReplaceAllString(con, "${1}${2}") hisReg1_str := hisReg1.FindString(con) if hisReg1_str != "" && !strings.Contains(hisReg1_str, "中标候选人得分") { con = hisReg1.ReplaceAllString(con, "${4}") } hisReg2_str := hisReg2.FindString(con) if hisReg2_str != "" && !strings.Contains(hisReg2_str, "中标候选人得分") { con = hisReg2.ReplaceAllString(con, "${6}") } con = formattext.ReplaceAllString(con, "${1}:${2}") con = formattext2.ReplaceAllString(con, "${1}") con = formattext3.ReplaceAllString(con, "") con = formattext4.ReplaceAllString(con, "\n${1}:${2}\n") //特殊格式-影响分包候选人抽取-候选人等识别-替换 con = formattext5.ReplaceAllString(con, "中标金额:${2}\n") con = formattext6.ReplaceAllString(con, "$1$2") con = formattext7.ReplaceAllString(con, "$1$2") //改变特殊结构 con = formattext10.ReplaceAllString(con, "\n分包$3\n中标单位:$5 中标金额:$6\n") con = formattext11.ReplaceAllString(con, "${1}\n${2}\n预算金额:${4}\n${5}\n预算金额:${7}\n${8}\n") con = formattext12.ReplaceAllString(con, "\n${1}:${3}万元\n") con = formattext13.ReplaceAllString(con, "\n包一\n中标单位:${1}\n中标金额:${3}\n"+"包二\n中标单位:${2}\n中标金额:${4}\n") con = formattext14.ReplaceAllString(con, "\n包一\n中标单位:${1}\n中标金额:${2}\n"+"包二\n中标单位:${3}\n中标金额:${4}\n") //多供应商~文本结构~重构 if m_b, m_c := dealWithMultiSuppliersText(con); m_b { con = m_c } //工程业绩描述影响抽取 con = formattext20.ReplaceAllString(con, "\n") con = formattext21.ReplaceAllString(con, "") //指定爬虫-特殊结构-计算抽取 if codeSite == "a_zgzfcgw_zfcghtgg_new" { str := formattext50.FindString(con) if str != "" { new_str := dealWithSpecStructToSpiderCode(str) if new_str != "" { con = new_str + con } } } con = formatText(con, "all") job.ContentClean = HtmlToText(job.Content) job.Content = con //计算表格占比,返回表格数组、占比 tabs, _ := ComputeConRatio(con, 1) /*if len(tabs) > 0 { newcon, newtabs, newration := FindBigText(con, ration, tabs) if newcon != "" { con = newcon con = formatText(con, "all") tabs = newtabs ration = newration } }*/ job.BlockPackage = map[string]*u.BlockPackage{} //分块+处理每块kv blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock, isSite, codeSite) if len(blockArrays) > 0 { //有分块 //从块里面找分包-文本 if !job.IsFile { job.BlockPackage = FindPackageFromBlocks(&blockArrays, isSite, codeSite) //从块里面找分包 } for _, bl := range blockArrays { if len([]rune(bl.Text)) > 80 { bl.Block, _ = DivideBlock(job.CategorySecond, bl.Text, 1, job.RuleBlock, isSite, codeSite) for _, bl_bl := range bl.Block { processTableInBlock(bl_bl, job, isSite, codeSite) } } FindProjectCode(bl.Text, job) //匹配项目编号 //表格找分包相关--- isUnRulesTab := processTableInBlock(bl, job, isSite, codeSite) //处理表格 if isUnRulesTab { //是否不规则表格 job.IsUnRulesTab = isUnRulesTab } //对块行内容业绩相关进行过滤 bl.Text = tableClearTextReg.ReplaceAllString(bl.Text, "") //新加 未分块table中未能解析到中标候选人,从正文中解析-全文匹配一次 if (job.Winnerorder == nil || len(job.Winnerorder) == 0) || len(job.Winnerorder) > 8 { //表格没有划分时候:-纯文本匹配 tmp_text := HtmlToText(bl.Text) bl.Winnerorder = winnerOrderEntity.Find(tmp_text, true, 1, isSite, codeSite) if thanWinnerOrderEffective(job.Winnerorder, bl.Winnerorder) { job.Winnerorder = bl.Winnerorder } } //无分包-附件-格式化文本处理- if (job.BlockPackage == nil || len(job.BlockPackage) == 0) && job.IsFile { tmp_text := HtmlToText(bl.Text) job.BlockPackage = FindPackageFromText(job.Title, tmp_text, isSite, codeSite) } job.Block = append(job.Block, bl) } } else { //未分块,创建分块 //log.Println(con) bl := &u.Block{} newCon := con //log.Println(con) if len(tabs) > 0 { //解析表格逻辑 job.HasTable = 1 //添加标识:文本中有table newCon = TextAfterRemoveTable(con) //log.Println(newCon) if newCon != "" { job.BlockPackage = FindPackageFromText(job.Title, newCon, isSite, codeSite) } for i := 0; i < len(tabs); i++ { blockTag := "" if len(tabs[i].Nodes) > 0 { if tabs[i].Nodes[0].PrevSibling != nil { blockTag = tabs[i].Nodes[0].PrevSibling.Data } } //添加标识:文本中有table //blockTag - 块标签 //处理表格 tabres := AnalyTableV2(tabs[i], job.Category, blockTag, con, 1, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象 job.IsUnRulesTab = tabres.isUnRulesTab processTableResult(tabres, bl, job, isSite, codeSite) } } else { //从正文里面找分包 job.BlockPackage = FindPackageFromText(job.Title, newCon, isSite, codeSite) } bl.Text = HtmlToText(con) FindProjectCode(bl.Text, job) //匹配项目编号 ~~ 清洗无效信息文本 if blTextReg.MatchString(bl.Text) && !unblTextReg.MatchString(bl.Text) { if strings.Index(bl.Text, "业绩") > 1 { //如果有采购单位信息~置前 before_arr := []string{} if beforeTextReg.MatchString(bl.Text) { before_arr = beforeTextReg.FindAllString(bl.Text, -1) } bl.Text = bl.Text[:strings.Index(bl.Text, "业绩")] if len(before_arr) > 0 { bl.Text = strings.Join(before_arr, "\n") + bl.Text } } } //特殊-指定处理-结构转化formattext100 if formattext100.MatchString(bl.Text) { new_str := formattext100.FindString(bl.Text) new_str = formattext100.ReplaceAllString(new_str, "$1") bl.Text = fmt.Sprintf("中标金额:%s万元\n", new_str) + bl.Text } //调用kv解析库-处理detail bl.Text = formatText(bl.Text, "all") //处理 : bl.ColonKV = GetKVAll(bl.Text, "", nil, 1, isSite, codeSite) //处理空格 bl.SpaceKV = SspacekvEntity.Entrance(bl.Text, "", nil, isSite, codeSite) //新加 未分块table中未能解析到 中标候选人,从正文中解析 if job.Winnerorder == nil || len(job.Winnerorder) == 0 || len(job.Winnerorder) > 8 { bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1, isSite, codeSite) if thanWinnerOrderEffective(job.Winnerorder, bl.Winnerorder) { job.Winnerorder = bl.Winnerorder } } else { //table里面识别出单位候选人-未识别金额... if onlyExistsWinEntName(job.Winnerorder) { new_winorder := winnerOrderEntity.Find(bl.Text, true, 1, isSite, codeSite) if thanExistsNewWinOrder(job.Winnerorder, new_winorder) { job.Winnerorder = new_winorder } } } //如果表格查询分包-有分包-但是没有有效值的话 ,正文重新查找 if len(tabs) > 0 && job.BlockPackage != nil { if !isUsefulPackage(job.BlockPackage) { //表格未识别出有效分包-且文本里面无有效字样 text_pkg := FindPackageFromText(job.Title, bl.Text, isSite, codeSite) if len(text_pkg) > 0 { job.BlockPackage = text_pkg } } } job.Block = append(job.Block, bl) } } // 是否有效分包 func isUsefulPackage(pkg map[string]*u.BlockPackage) bool { if pkg == nil || len(pkg) == 0 { return false } for _, v := range pkg { p_winner := v.Winner p_budget := v.Budget p_bidamout := v.Bidamount if p_winner != "" || p_budget > float64(0) || p_bidamout > float64(0) { return true } } return false } // 核查候选人字段是否合理 func verifyPackageWinnerOrder(wins []map[string]interface{}) bool { temp := map[string]string{} for k, v := range wins { if qu.IntAll(v["sort"]) != k+1 { return false } entname := qu.ObjToString(v["entname"]) if temp[entname] == "" { temp[entname] = entname } else { return false } } return true } // 判断数组string 是否重复 func isRepeatArrString(arr1, arr2 []string) bool { is_r := true for k, v := range arr1 { if v != arr2[k] { is_r = false break } } return is_r } // 对sortkv重构 func isResetUnitAmountSortKV(table *Table) { isUnitAmount := 0 for _, k := range table.SortKV.Keys { v := table.SortKV.Map[k] if new_v, ok := v.(string); ok && (k == "中标金额" || k == "单位") { if k == "单位" && new_v == "万元" { isUnitAmount++ } if k == "中标金额" && MoneyReg.MatchString(new_v) && !strings.Contains(new_v, "万") { isUnitAmount++ } } } if isUnitAmount > 1 { table.SortKV.Map["中标金额"] = qu.ObjToString(table.SortKV.Map["中标金额"]) + "万元" } } func isResetUnitPriceSortKV(table *Table) { keyArr := []string{"序号", "数量", "单价"} isMatch := true for _, v := range keyArr { if _, ok := table.SortKV.Map[v].(string); !ok { isMatch = false break } } if isMatch && table.SortKV.Map["总价(元)"] == nil { if qu.ObjToString(table.SortKV.Map["序号"]) == "1" && qu.ObjToString(table.SortKV.Map["数量"]) == "1" { table.SortKV.Map["总价(元)"] = table.SortKV.Map["单价"] table.SortKV.Keys = append(table.SortKV.Keys, "总价(元)") } } } func isResetAmountAggregateSortKV(table *Table) { keyGroup := [][]string{} keyGroup = append(keyGroup, []string{"序号", "标项名称", "总价(元)"}) keyGroup = append(keyGroup, []string{"序号", "名称", "总价(元)"}) keyGroup = append(keyGroup, []string{"序号", "服务内容", "验收金额(元)"}) keyGroup = append(keyGroup, []string{"序号", "标项名称", "单价(元)", "数量"}) for _, v := range keyGroup { if len(v) == 3 { arr1 := u.ConvertInterface(table.SortKV.Map[v[0]]) arr2 := u.ConvertInterface(table.SortKV.Map[v[1]]) arr3 := u.ConvertInterface(table.SortKV.Map[v[2]]) if len(arr1) > 1 && len(arr1) == len(arr2) && len(arr1) == len(arr3) { amount := float64(0) for _, nv := range arr3 { amount = precisionFloat(amount, qu.Float64All(nv)) } if amount > float64(0) { table.SortKV.Map[v[2]] = fmt.Sprintf("%f", amount) } break } } if len(v) == 4 { arr1 := u.ConvertInterface(table.SortKV.Map[v[0]]) arr2 := u.ConvertInterface(table.SortKV.Map[v[1]]) arr3 := u.ConvertInterface(table.SortKV.Map[v[2]]) arr4 := u.ConvertInterface(table.SortKV.Map[v[3]]) if len(arr1) > 1 && len(arr1) == len(arr2) && len(arr1) == len(arr3) && len(arr1) == len(arr4) { amount := float64(0) for kv, nv := range arr3 { amount = precisionFloat(amount, qu.Float64All(nv)*qu.Float64All(arr4[kv])) } if amount > float64(0) { if table.SortKV.Map["总价(元)"] == nil { table.SortKV.Map["总价(元)"] = fmt.Sprintf("%f", amount) table.SortKV.Keys = append(table.SortKV.Keys, "总价(元)") } else { table.SortKV.Map["总价(元)"] = fmt.Sprintf("%f", amount) } } break } } } } func isReseterialNumberSortKV(table *Table) { arr := u.ConvertInterface(table.SortKV.Map["序号"]) if len(arr) > 5 { table.SortKV.Map["序号"] = arr[:3] } } func isResetWinnerRankingSortKV(table *Table) { if len(table.SortKV.Map) == 2 && table.SortKV.Map["中标人"] != nil && table.SortKV.Map["中标价格"] != nil { arr := u.ConvertInterface(table.SortKV.Map["中标人"]) if len(arr) > 1 && len(arr) <= 3 { table.SortKV.Map["排名"] = []string{"1", "2"} table.SortKV.Keys = append(table.SortKV.Keys, "排名") } } } // 精度丢失-相加 func precisionFloat(tmp1, tmp2 float64) float64 { n1 := decimal.NewFromFloat(tmp1) n2 := decimal.NewFromFloat(tmp2) decimalValue := n2.Add(n1) res, _ := decimalValue.Float64() return res } // 重置as~keys func resetAsKeysBidamount(as *SortMap) { keys, values_data := as.Keys, as.Map if len(keys) == 0 { return } k1, k2 := "投标报价(元)", "经评审的投标价(元)" value1, value2 := make([]string, 0), make([]string, 0) is_del := false if arr, ok := values_data[k1].([]string); ok && len(arr) > 0 { value1 = arr } else { return } if arr, ok := values_data[k2].([]string); ok && len(arr) > 0 { value2 = arr } else { return } if len(value1) == len(value2) && len(value1) > 0 { tmp_value := value2[0] price := winnerOrderEntity.clear("中标金额", tmp_value+GetMoneyUnit(k1, tmp_value)) if pricestr, _ := price.(string); len(pricestr) < 30 && len(pricestr) > 0 { is_del = true } } if is_del { as.Map[k1] = as.Map[k2] } } // 判断是否特殊候选人结构表格 func judgmentWinnerOrderHeaderInfo(TRs []*TR) bool { if len(TRs) < 3 { return false } //是否含有指定关键词 TR_0 := TRs[0] isLen := 0 for k, v := range TRs { if k > 0 { if len(v.TDs) == len(TR_0.TDs) { isLen++ } if isLen >= 2 { break } } } if isLen < 2 { return false } textArr := [][]string{} textArr = append(textArr, []string{"投标人", "中标候选人排序", "投标报价(万元)"}) textArr = append(textArr, []string{"投标人", "中标候选人排序", "投标总报价(万元)"}) for _, arr := range textArr { isok := 0 for _, v := range arr { for _, v1 := range TR_0.TDs { if v1.Val == v { isok++ break } } } if isok == 3 { return true } } return false } // 预算标签-不一定为分包 func isUnRealBudgetBp(tnv []*u.Tag) bool { if len(tnv) != 2 { return false } key_1, key_2 := tnv[0].Key, tnv[1].Key value_1, value_2 := tnv[0].Value, tnv[1].Value if value_1 != value_2 { if strings.Contains(key_1, "项目总投资") && strings.Contains(key_2, "项目投资") { return true } if strings.Contains(key_2, "项目总投资") && strings.Contains(key_1, "项目投资") { return true } } return false } // 初始化lineMapArr,lineMap func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMap map[string]*SortMap) { lineMapArr = make(map[string]*SortMap) lineMap = make(map[string]*SortMap) for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序 val := table.SortKV.Map[key] key = regReplAllSpace.ReplaceAllString(key, "") key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉 //qu.Debug(key, "---------------------------", val) if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]} /* { "商品":["",""], "商品_"["",""], } */ valArr, allempty := filterVal(realTypeVal...) //过滤数据 if allempty { continue } realTypeVal = valArr line := underline.FindString(key) lineValMap1 := lineMapArr[line] // i := 1 // L: // for { //去除数组空数据 // last := realTypeVal[len(realTypeVal)-i] // if last == "" { // i++ // if i > len(realTypeVal) { // break // } // goto L // } else { // break // } // } // dislodgeNull := realTypeVal[:(len(realTypeVal) - i + 1)] //去除数组中空数据 if len(realTypeVal) > 0 { if lineValMap1 == nil { tmp := NewSortMap() tmp.AddKey(key, realTypeVal) lineMapArr[line] = tmp } else { lineValMap1.AddKey(key, realTypeVal) } } //qu.Debug("lineMapArr---", lineMapArr[line].Keys, lineMapArr[line].Map) } else if realTypeVal, b := val.(string); b { //val为字符串 {"数量":"1"} /* { "商品:"",名称:"", "商品_:"",名称_:"", "商品__:"",名称__:"", } */ valArr, allempty := filterVal(realTypeVal) //过滤数据 if allempty { continue } realTypeVal = valArr[0] line := underline.FindString(key) lineValMap2 := lineMap[line] if lineValMap2 == nil { tmp := NewSortMap() tmp.AddKey(key, realTypeVal) lineMap[line] = tmp } else { lineValMap2.AddKey(key, realTypeVal) } //qu.Debug("lineMap---", lineMap[line].Keys, lineMap[line].Map) } else { // "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")5c2b0551a5cb26b9b7cb05db否5c2a42e6a5cb26b9b763ba5a采购人:一、采购人5c2b06f5a5cb26b9b7cc4409 //成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]] //qu.Debug("err data:", key, val) } } return lineMapArr, lineMap } func dealArrData(maxNum int, ka map[string][]string) []map[string]string { for k2, v2 := range ka { //处理数组长度不相等,使长度一致 if len(v2) > maxNum { ka[k2] = v2[:maxNum] } } finalData := assembleData(ka, 1) if len(finalData) > 0 { return finalData } return nil } func dealStrData(kv map[string]string) []map[string]string { finalData := []map[string]string{} if len(kv) > 0 { finalData = assembleData(kv, 2) } return finalData } // 组装数据,每一行的数据为一数据集合 func assembleData(m interface{}, n int) []map[string]string { defer qu.Catch() /* { "itemname":["计算机","打印机","机柜"], "number" :["1","12","4"] } */ datas := []map[string]string{} if n == 1 { //数组数据 realTypeM := m.(map[string][]string) //根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr /* arr1 ["a1","b1","c1"] arr2 ["a2","b2","c2"] [ {"a1","a2"}, {"b1","b2"}, {"c1","c2"} ] */ //start for k3, v3 := range realTypeM { for _, val := range v3 { data := make(map[string]string) data[k3] = val datas = append(datas, data) } break } for i, data := range datas { for k4, v4 := range realTypeM { if i < len(v4) { //数组数据长度不一致 if v4[i] != " " { data[k4] = v4[i] } else { delete(data, k4) } } else { fmt.Println("err table") } } datas[i] = data } //end for _, fdv := range datas { //清除空数据和只含特殊符号的数据 for fmk, fmv := range fdv { if tabletdclear.ReplaceAllString(fmv, "") == "" { delete(fdv, fmk) } } } } else { //字符串数据 realTypeM := m.(map[string]string) datas = append(datas, realTypeM) } return datas } func convert(key, r string) bool { defer qu.Catch() flag := false key = tabletitleclear.ReplaceAllString(key, "") reg, err := regexp.Compile(r) if err != nil { fmt.Println("reg err:", err) return false } flag = reg.MatchString(key) return flag } func hasKey(table *Table, n int) { defer qu.Catch() if table.TableResult.HasKey == 1 { return } if n >= 1 { table.TableResult.HasKey = 1 } } func hasGoods(table *Table, data ...string) { defer qu.Catch() goodsArr := make([]string, len(data)) //fmt.Println("table.TableResult.HasGoods=====", table.TableResult.HasGoods) if table.TableResult.HasGoods == 1 { return } for i, d := range data { if d != "" { goods := u.GoodsGet.CheckSensitiveWord(d) //fmt.Println("goods======", goods) goodsArr[i] = goods if len(goods) > 0 { table.TableResult.HasGoods = 1 break } } } } func hasBrand(table *Table, data ...string) ([]string, bool) { defer qu.Catch() //fmt.Println("table.TableResult.HasBrand---------", table.TableResult.HasBrand) brandArr := make([]string, len(data)) // if table.TableResult.HasBrand == 1 { // return brandArr, 1 // } allNull := true for i, d := range data { //if d != "" { brand := u.BrandGet.CheckSensitiveWord(d) if brand != "" { allNull = false } //fmt.Println("brand======", brand) brandArr[i] = brand if len(brand) > 0 { table.TableResult.HasBrand = 1 } //} } return brandArr, allNull } // 过滤td值 func filterVal(val ...string) ([]string, bool) { defer qu.Catch() n := 0 //记录被过滤的个数 for i, v := range val { if len(clearnn.FindStringSubmatch(v)) > 0 { tmpv := clearnn.FindStringSubmatch(v)[1] if tmpv != "" { v = tmpv } } afterFilter := tabletdclear.ReplaceAllString(v, "") afterFilter = NullVal.ReplaceAllString(afterFilter, "") if afterFilter == "" { n++ } val[i] = afterFilter } allempty := false if n == len(val) { //所有都被过滤掉 allempty = true } return val, allempty } // 过滤itemname全是数字 func filterItem(itemval ...string) []string { defer qu.Catch() result := []string{} for _, v := range itemval { afterFilter := numclear.ReplaceAllString(v, "") if afterFilter != "" { result = append(result, v) } else { result = append(result, afterFilter) } } return result } // 处理价格 func dealPriceInterface(key string, val ...string) (result []interface{}) { defer qu.Catch() for _, v := range val { if num1.MatchString(v) { //含数字 tdIsWan := strings.Contains(v, "万") if !tdIsWan { if strings.Contains(key, "万") { v = v + "万" } } data := []interface{}{v, ""} money := clear.ObjToMoney(data)[0] result = append(result, money) } else { result = append(result, "") } } return } // 处理number func dealNumberInterface(val ...string) (result []interface{}) { defer qu.Catch() for _, v := range val { //1个 1.00个 n := numclear.FindString(v) if n == "" { result = append(result, "") } else if tmp := clear.NumChar[n]; tmp != nil { //一二三... result = append(result, tmp) } else { //数字 result = append(result, qu.IntAll(strings.Split(n, ".")[0])) } } return } // 处理价格 func dealPrice(key string, val ...string) []string { defer qu.Catch() result := []string{} for _, v := range val { data := []interface{}{v, key} money := clear.ObjToMoney(data)[0] result = append(result, fmt.Sprintf("%v", money)) } // result := []string{} // for _, v := range val { //1.00万元 1元 2.25元/斤 // tmparr := strings.Split(v, ".") // tmparr[0] = moneyNum.ReplaceAllString(tmparr[0], "") // if iswan { // result = append(result, tmparr[0]+"0000") // } else { //td val值带万 // if strings.Contains(v, "万") { //价格中带有万 // result = append(result, tmparr[0]+"0000") // } else { // result = append(result, tmparr[0]) // } // } // } return result } // 处理number func dealNumber(val ...string) ([]string, []string) { defer qu.Catch() unitnameArr := []string{} result := []string{} for _, v := range val { //1个 1.00个 n := numclear.FindString(v) unitname := numclear.ReplaceAllString(v, "") //匹配个数后的单位 unitnameArr = append(unitnameArr, unitname) //val[i] = strings.Split(n, ".")[0] result = append(result, strings.Split(n, ".")[0]) } return result, unitnameArr } // 是否符合指定结构 func isPkgRegexArr(regs []*regexp.Regexp, con string) bool { S_Index := regs[0].FindAllStringIndex(con, -1) E_Index := regs[1].FindAllStringIndex(con, -1) if len(S_Index) == len(E_Index) && len(S_Index) == 1 { return true } return false }