analymethod.go 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119
  1. package pretreated
  2. import (
  3. "fmt"
  4. "github.com/shopspring/decimal"
  5. "jy/clear"
  6. u "jy/util"
  7. qu "qfw/util"
  8. "regexp"
  9. "strings"
  10. )
  11. /*
  12. *
  13. 全局变量,主要是一堆判断正则
  14. *
  15. */
  16. var (
  17. //key 的日期单位
  18. dateReg *regexp.Regexp = regexp.MustCompile(`[年|月|日|天]`)
  19. //清理品目中数字
  20. numclear = regexp.MustCompile("^[\\d一二三四五六七八九十.]+")
  21. num1 = regexp.MustCompile("(\\d)")
  22. //清理表格title中的不需要的内容
  23. tabletitleclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、.,.。_/((人民币万元件个公斤户))]")
  24. tabletitleclear2 = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕]*")
  25. //清理表格中是key中包含的空格或数字等
  26. tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]")
  27. //清理上阶段kv的匹配的短词
  28. tablekeyclear2 = regexp.MustCompile("(供应商信用融资|供应商公章|主要标的名称|中标人推荐理由|成交供应商推荐理由)")
  29. //清理表格td中的符号
  30. tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕¥$]*")
  31. //判断key是金额,对万元的处理
  32. moneyReg = regexp.MustCompile("(预算|概算|费|价|额|规模|投资)")
  33. //特殊文本-为表头
  34. specHeadReg = regexp.MustCompile("(成交供应商|中选人)")
  35. //key不需要清理-例如折扣 费率
  36. noClearKeyReg = regexp.MustCompile(`[((](费率|年|月|日|天|日历天|历天)[))]`)
  37. //根据表格的内容判断是不是表头,如果含有金额则不是表头
  38. MoneyReg = regexp.MustCompile("^[\\s  ::0-9.万元()()人民币¥$]+$")
  39. //特殊情况值,不能为表头
  40. noStartHeadReg = regexp.MustCompile("^(\\d标段)$")
  41. GSReg = regexp.MustCompile(".*公司.*")
  42. //判断分包时
  43. moneyNum = regexp.MustCompile("[元整¥万]")
  44. //对隐藏表格的判断
  45. display = regexp.MustCompile("(?i).*?display\\s?[:]\\s?none.*")
  46. //---------------
  47. //求是分包的概率
  48. //根据表格的标签对分包进行打分
  49. TableMultiPackageReg_4 = regexp.MustCompile("(标段|分包|包段|划分|子包|标包|合同段)")
  50. TableMultiPackageReg_2 = regexp.MustCompile("(概况|范围|情况|内容|详细|结果|信息)")
  51. //在判断分包打分前过虑表格key
  52. FilterKey_2 = regexp.MustCompile("招标|投标|项目")
  53. //根据表格的key进行分包打分
  54. FindKey_2 = regexp.MustCompile("([分子][包标](号)?|标[号项段包](划分)?|包件?[号段名数]|包[组件])")
  55. FindKey_3 = regexp.MustCompile("(标段编号|标包|包件|包号)")
  56. //对值进行分包判断
  57. FindVal_1 = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|标的[一二三四五六七八九十1-9A-Za-z]+|((子|合同|分|施工|监理)?(包|包件|标)(段|号)?)[  \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)")
  58. FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$")
  59. //判断分包前排除 包件号?
  60. excludeKey = regexp.MustCompile("(标识|数量|分包个数|标段代码|涉及包号|分包数量|项目标号|规格|型号|招标范围|业绩|废标|标段选择要求)|(^编号$)|([^包段标]编号)") //编号|划分
  61. excludeKey2 = regexp.MustCompile("包/[0-9]{0,4}[箱纸张]")
  62. excludeKey3 = regexp.MustCompile("(分包个数|每包[0-9]*元|标线|国标|享受一包服务)")
  63. //-------------
  64. cut = u.NewCut()
  65. //清理表格标签正则
  66. ClearTagReg = regexp.MustCompile("<[^>]*?>|[\\s\\n\\r]*$")
  67. //查找表格标签正则
  68. ttagreg = regexp.MustCompile("(?s)([^\\n::。,;\\s\u3000\u2003\u00a0]{2,30})[::]?[^::。;!\\n]{0,35}[\\s\\n]*$")
  69. //判断表格是表头的概率
  70. checkval = float32(0.6)
  71. //tdval_reg = regexp.MustCompile(`([\p{Han}][\p{Han}\s、()\\(\\)]{1,9})[::]([^::\\n。]{5,60})(?:[;;,,.。\\n\\t\\s])?`)
  72. //空格替换
  73. repSpace = regexp.MustCompile("[\\s\u3000\u2003\u00a0::]+|\\\\t+")
  74. //对表格kv的处理
  75. //对不能标准化的key做批识
  76. filter_tag_zb = regexp.MustCompile("(中标|成交|投标)[\\p{Han}]{0,6}(情况|结果|信息|明细)?")
  77. //中标金额
  78. //包含以下字眼做标准化处理
  79. filter_zbje_k = regexp.MustCompile("(中标|成交|总|拦标|合同|供[应货]商|报)[\\p{Han}、]{0,6}(价|额|[大小]写|[万亿]?元).{0,4}$")
  80. //简单判断金额
  81. filter_zbje_jd = regexp.MustCompile("^[^(售|保证)]{0,4}(价|额).{0,4}$")
  82. //预算金额
  83. filter_ysje_jd = regexp.MustCompile("(预算|预控价|项目概.|项目信息)")
  84. //且排队以下字眼的key
  85. filter_zbje_kn = regexp.MustCompile("得分|打分|时间|业绩|须知|分|电话|要求|需求数量|发布规模$|第[2二3三4四5五]|地址|询价保证金|行号")
  86. //且值包含以下字眼
  87. filter_zbje_v = regexp.MustCompile("[¥$$0-9一二三四五六七八九十,,〇零点..壹贰叁肆伍陆柒捌玖拾百佰千仟万亿億元圆角分整正()::大小写]{2,16}")
  88. //中标单位的处理
  89. //包含以下字眼的Key标准化
  90. filter_zbdw_ky = regexp.MustCompile("(中标|成交|拦标|合同|选中|投标|拟|预|最终)[\\p{Han}、]{0,6}(供[应货]商|企业|单位|人|机构)(名称)?.{0,4}$")
  91. //识别中标单位相关信息
  92. filter_zbdw_info = regexp.MustCompile("(中标|成交|中选|供(货|应))[^候选]{0,}")
  93. //简单判断
  94. filter_zbdw_jd = regexp.MustCompile("(投标|成交|中标|合同)(供应商|单位|人|名称).{0,4}$")
  95. //且不包含以下字眼
  96. filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址|询价保证金") //且值包含以下字眼
  97. //且值包含以下字眼
  98. filter_zbdw_v = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)")
  99. //且值包含以下字眼
  100. filter_zbdw_v2 = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$")
  101. //Tg = map[string]interface{}{}
  102. //一些表格没有表头,是空的,对值是排序的做处理对应 NullTxBid
  103. NullTdReg = regexp.MustCompile("(首选|第[一二三四五1-5])(中标|成交)?(名(称)?|(候选|排序)?(人|单位|供应商))")
  104. NullTxtBid = "成交供应商排名"
  105. projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$")
  106. MhSpilt = regexp.MustCompile("[::]") //降低冒号权重
  107. //指定字段且时间格式
  108. UnTimeSpiltKey = regexp.MustCompile("(招标文件获取截止时间|招标文件获取开始时间|报名截止时间|报名开始时间|投标文件递交开始时间|开工日期|竣工日期)")
  109. UnTimeSpiltValue = regexp.MustCompile("\\d{1,2}[::]\\d{1,2}")
  110. //识别采购单位联系人、联系电话、代理机构联系人、联系电话 -- 名称有异常
  111. ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?(((联系)?(电话|方式|号码)([//及]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(详细)?(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表")
  112. ContactInfoExcluReg = regexp.MustCompile("[商]名称$")
  113. ContactInfoMustReg = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$")
  114. ContactType = map[string]*regexp.Regexp{
  115. "采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|招标(服务)?|甲|建设|招标|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心(地址)?|业主|收料人|采购部"),
  116. "代理机构": regexp.MustCompile("(代理|受托|集中采购).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"),
  117. "中标单位": regexp.MustCompile("^((拟(定)?|预|最终|唯一)?(中标|成交|中选|供(货|应))((成交))?)[^候选]{0,2}(人|方|单位|公司|(服务|供应)?商|企业)"),
  118. "监督部门": regexp.MustCompile("投诉受理部门"),
  119. }
  120. ContactHeadReg = regexp.MustCompile("^(招标人|采购人)$")
  121. ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$")
  122. MultipleValueSplitReg = regexp.MustCompile("[,,、\\s\u3000\u2003\u00a0]")
  123. BuyerContacts = []string{"采购单位联系人", "采购单位联系电话", "采购单位联系地址"}
  124. FilterSerial = regexp.MustCompile(".+[、..::,]")
  125. underline = regexp.MustCompile("_+$")
  126. iswinnertabletag = regexp.MustCompile("(中标|候选人|成交|结果|磋商情况)")
  127. nswinnertabletag = regexp.MustCompile("评得分估|标的信息|班子成员")
  128. jsonReg = regexp.MustCompile(`\{.+:[^}]*\} `) // \{".*\":\".+\"}
  129. regHz = regexp.MustCompile("[\u4e00-\u9fa5]")
  130. winnerOrderAndBidResult = regexp.MustCompile("((中标)?候选人|(中标|评标)结果)")
  131. WinnerOrderStr = regexp.MustCompile(`(集团|公司|学校|中心|家具城|门诊|[大中小]+学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行)$`)
  132. DoubtReg = regexp.MustCompile("(我中心|有(疑问|质疑|异议|意见)|(书面)?提出|不再受理|投诉|质疑|书面形式|监督|公示期(限)?)")
  133. //新增-分包-表格-sortKV
  134. budgetSortKVReg = regexp.MustCompile("(预算)")
  135. bidamountSortKVReg = regexp.MustCompile("(成交结果[((]万元[))]|成交金额|履约金额|中[标选]金额)")
  136. winnerSortKVReg = regexp.MustCompile("(投标人[((]供应商[))]名称)|供应商名称|中标候选人|中[标选]人|中[标选]单位")
  137. )
  138. var fblbReg *regexp.Regexp = regexp.MustCompile("(废标|流标|否决依据|未中标情况说明|负责人资格|负责人业绩|相关业绩|类似项目情况表|技术评分明细表|否决投标人投标的原因|开标记录|附件[:0-9]|越南盾|技术分[^公]|填报项目业绩|未通过.*原因)")
  139. // 59.992664,33.495715,20.001306
  140. var clearnum *regexp.Regexp = regexp.MustCompile("(([0-9.]{1,6}[,,]+){4,}|(\\d{6}[,,]\\d{2}.){2,})")
  141. var glRex *regexp.Regexp = regexp.MustCompile("(成交|中标|候选|排名|名次|供应商排序|中标候选人|名单及其排序|排序)")
  142. var djReg *regexp.Regexp = regexp.MustCompile("^单价")
  143. var hxrRex *regexp.Regexp = regexp.MustCompile("((成交|中标|中选)?候选人[弟|第][1-5一二三四五]名|[弟|第][1-5一二三四五][名]?(成交|中标|中选)?候选人)")
  144. var winMoneyReg *regexp.Regexp = regexp.MustCompile("(报价|投标价|投标报价|评审价|投标总价|含税总价[((]元[))]|总金额)")
  145. var winNoMoneyReg *regexp.Regexp = regexp.MustCompile("(得分|时间|序号|分)")
  146. var cleardwReg *regexp.Regexp = regexp.MustCompile("[((]{1}\\d*[人元件个公斤户]/[人元件个公斤户][))]")
  147. var zbhxrReg *regexp.Regexp = regexp.MustCompile("(中标候选人|投标单位名称|候选人姓名|候选人名称)")
  148. var zbhxrSortReg_1 *regexp.Regexp = regexp.MustCompile("^[第|弟][12345一二三四五]名$")
  149. var zbhxrSortReg_2 *regexp.Regexp = regexp.MustCompile("^([12345一二三四五])$")
  150. var zbhxrSortReg_3 *regexp.Regexp = regexp.MustCompile("^([12345一二三四五])")
  151. var zbhxrSortNameReg *regexp.Regexp = regexp.MustCompile("(中标候选人[第|弟][123一二三]名)|[第|弟][123一二三]中标候选人")
  152. var zbhxrSecondReg *regexp.Regexp = regexp.MustCompile("(中标候选人[第|弟][2二]名)|[第|弟][2二]中标候选人")
  153. var clearnn *regexp.Regexp = regexp.MustCompile("([\\d.]*)[\\n\\s]*[\\((][\\d.]+[)\\)]")
  154. // 分包含有关键词
  155. var pkgValidReg1 *regexp.Regexp = regexp.MustCompile("(中标单位|中标金额)[::]")
  156. var tableClearTextReg *regexp.Regexp = regexp.MustCompile("业绩[::].*")
  157. // 特殊-爬虫文本-抽取单价数量-并计算
  158. func dealWithSpecStructToSpiderCode(text string) string {
  159. text = formattext50.ReplaceAllString(text, "$1&&$2")
  160. arr := strings.Split(text, "&&")
  161. if len(arr) == 2 {
  162. one := qu.Float64All(arr[0])
  163. two := qu.Float64All(arr[1])
  164. if one > 0 && two > 0 {
  165. return fmt.Sprintf("\n合同金额:%f\n", one*two)
  166. }
  167. }
  168. return ""
  169. }
  170. // 对比前后候选人的有效性-true -为新
  171. func thanWinnerOrderEffective(old_order []map[string]interface{}, new_order []map[string]interface{}) bool {
  172. if len(new_order) == 0 || new_order == nil {
  173. return false
  174. }
  175. if len(old_order) == 0 || old_order == nil {
  176. return true
  177. }
  178. old_info, new_info := old_order[0], new_order[0]
  179. if qu.IntAll(old_info["sort"]) > 1 {
  180. return true
  181. } //排序比对
  182. if qu.IntAll(new_info["sort"]) > 1 {
  183. return false
  184. }
  185. //金额比对 -
  186. isuse_1, isuse_2 := false, false
  187. if old_vf, ok := old_info["price"].(float64); ok && old_vf > 0.0 {
  188. isuse_1 = true
  189. } else {
  190. if old_vs, ok := old_info["price"].(string); ok && old_vs != "" {
  191. isuse_1 = true
  192. }
  193. }
  194. if new_vf, ok := new_info["price"].(float64); ok && new_vf > 0.0 {
  195. isuse_2 = true
  196. } else {
  197. if new_vs, ok := new_info["price"].(string); ok && new_vs != "" {
  198. isuse_2 = true
  199. }
  200. }
  201. if isuse_1 && !isuse_2 {
  202. return false
  203. }
  204. if !isuse_1 && isuse_2 {
  205. return true
  206. }
  207. //均正常-优先取新值
  208. return true
  209. }
  210. // 对比前后候选人的有效性-true -为新
  211. func onlyExistsWinEntName(winorder []map[string]interface{}) bool {
  212. if len(winorder) <= 3 {
  213. for _, v := range winorder {
  214. if vf, ok := v["price"].(float64); ok && vf > 0.0 {
  215. return false
  216. }
  217. }
  218. } else {
  219. return false
  220. }
  221. return true
  222. }
  223. func thanExistsNewWinOrder(winorder []map[string]interface{}, new_winorder []map[string]interface{}) bool {
  224. if len(winorder) != len(new_winorder) {
  225. return false
  226. }
  227. isok := 0
  228. for k, v := range winorder {
  229. if qu.ObjToString(v["entname"]) == qu.ObjToString(new_winorder[k]["entname"]) {
  230. if new_price, ok := new_winorder[k]["price"].(float64); ok && new_price > 0.0 {
  231. isok++
  232. }
  233. }
  234. }
  235. if isok == len(winorder) {
  236. return true
  237. }
  238. return false
  239. }
  240. // 多供应商文本构建分包
  241. func dealWithMultiSuppliersText(con string) (bool, string) {
  242. startIndex := MultiStartReg.FindAllStringIndex(con, 1)
  243. endIndex := MultiEndReg.FindAllStringIndex(con, 1)
  244. if len(startIndex) == 1 && len(endIndex) == 1 {
  245. if len(startIndex[0]) > 1 && len(endIndex[0]) > 1 {
  246. t_start, t_end := startIndex[0][1], endIndex[0][0]
  247. if t_end > t_start {
  248. text := con[t_start:t_end]
  249. arr1 := SupplyInfoReg1.FindAllStringSubmatch(text, -1)
  250. if text1 := supplyInfoMethod(arr1, 2, 4); text1 != "" {
  251. return true, strings.ReplaceAll(con, text, text1)
  252. }
  253. arr2 := SupplyInfoReg2.FindAllStringSubmatch(text, -1)
  254. if text2 := supplyInfoMethod(arr2, 2, 4); text2 != "" {
  255. return true, strings.ReplaceAll(con, text, text2)
  256. }
  257. }
  258. }
  259. }
  260. return false, ""
  261. }
  262. // 结果类-供应商信息
  263. func dealWithSpecResultInfoText(con string) (bool, string) {
  264. startIndex := ResultStartReg.FindAllStringIndex(con, 1)
  265. middleIndex := ResultMiddleReg.FindAllStringIndex(con, 1)
  266. endIndex := ResultEndReg.FindAllStringIndex(con, 1)
  267. if len(startIndex) == 1 && len(middleIndex) == 1 && len(endIndex) == 1 {
  268. if len(startIndex[0]) > 1 && len(middleIndex[0]) > 1 && len(endIndex[0]) > 1 {
  269. s1, e1 := startIndex[0][1], middleIndex[0][0]
  270. s2, e2 := middleIndex[0][1], endIndex[0][0]
  271. if e1 > s1 && e2 > s2 {
  272. if t1, t2 := con[s1:e1], con[s2:e2]; t1 != "" && t2 != "" {
  273. con = strings.ReplaceAll(con, t1, "\n")
  274. con = strings.ReplaceAll(con, t2, "\n")
  275. return true, con
  276. }
  277. }
  278. }
  279. }
  280. return false, ""
  281. }
  282. // 特殊-重构
  283. func supplyInfoMethod(arr [][]string, w_index int, b_index int) string {
  284. new_text := ""
  285. if len(arr) > 1 {
  286. for k, v := range arr {
  287. key := fmt.Sprintf("包%d", k+1)
  288. new_text += key + "\n中标单位:" + v[w_index] + "\n中标金额:" + v[b_index] + "\n"
  289. }
  290. }
  291. return new_text
  292. }
  293. // 分析方法
  294. func AnalyStart(job *u.Job, isSite bool, codeSite string) {
  295. con := job.Content
  296. //全文的需要修复表格
  297. con = RepairCon(con)
  298. //格式化正文
  299. //con = preConReg1.ReplaceAllString(con, "${1}${2}")
  300. hisReg1_str := hisReg1.FindString(con)
  301. if hisReg1_str != "" && !strings.Contains(hisReg1_str, "中标候选人得分") {
  302. con = hisReg1.ReplaceAllString(con, "${4}")
  303. }
  304. hisReg2_str := hisReg2.FindString(con)
  305. if hisReg2_str != "" && !strings.Contains(hisReg2_str, "中标候选人得分") {
  306. con = hisReg2.ReplaceAllString(con, "${6}")
  307. }
  308. con = formattext.ReplaceAllString(con, "${1}:${2}")
  309. con = formattext2.ReplaceAllString(con, "${1}")
  310. con = formattext3.ReplaceAllString(con, "")
  311. con = formattext4.ReplaceAllString(con, "\n${1}:${2}\n")
  312. //特殊格式-影响分包候选人抽取-候选人等识别-替换
  313. con = formattext5.ReplaceAllString(con, "中标金额:${2}\n")
  314. con = formattext6.ReplaceAllString(con, "$1$2")
  315. con = formattext7.ReplaceAllString(con, "$1$2")
  316. //改变特殊结构
  317. con = formattext10.ReplaceAllString(con, "\n分包$3\n中标单位:$5 中标金额:$6\n")
  318. con = formattext11.ReplaceAllString(con, "${1}\n${2}\n预算金额:${4}\n${5}\n预算金额:${7}\n${8}\n")
  319. con = formattext12.ReplaceAllString(con, "\n${1}:${3}万元\n")
  320. con = formattext13.ReplaceAllString(con, "\n包一\n中标单位:${1}\n中标金额:${3}\n"+"包二\n中标单位:${2}\n中标金额:${4}\n")
  321. con = formattext14.ReplaceAllString(con, "\n包一\n中标单位:${1}\n中标金额:${2}\n"+"包二\n中标单位:${3}\n中标金额:${4}\n")
  322. //多供应商~文本结构~重构
  323. if m_b, m_c := dealWithMultiSuppliersText(con); m_b {
  324. con = m_c
  325. }
  326. //结果类调整-特殊结构~重构
  327. if m_b, m_c := dealWithSpecResultInfoText(con); m_b {
  328. con = m_c
  329. }
  330. //工程业绩描述影响抽取
  331. con = formattext20.ReplaceAllString(con, "\n")
  332. con = formattext21.ReplaceAllString(con, "")
  333. //特殊结构转换
  334. con = formattext30.ReplaceAllString(con, "${1}")
  335. //可扩展..暂无必要..支持数据太少..无需增加表维护
  336. if strings.Contains(job.Title, "项目监理") {
  337. }
  338. //指定爬虫-特殊结构-计算抽取
  339. if codeSite == "a_zgzfcgw_zfcghtgg_new" {
  340. str := formattext50.FindString(con)
  341. if str != "" {
  342. new_str := dealWithSpecStructToSpiderCode(str)
  343. if new_str != "" {
  344. con = new_str + con
  345. }
  346. }
  347. }
  348. con = formatText(con, "all")
  349. job.ContentClean = HtmlToText(job.Content)
  350. job.Content = con
  351. //计算表格占比,返回表格数组、占比
  352. tabs, _ := ComputeConRatio(con, 1)
  353. /*if len(tabs) > 0 {
  354. newcon, newtabs, newration := FindBigText(con, ration, tabs)
  355. if newcon != "" {
  356. con = newcon
  357. con = formatText(con, "all")
  358. tabs = newtabs
  359. ration = newration
  360. }
  361. }*/
  362. job.BlockPackage = map[string]*u.BlockPackage{}
  363. //分块+处理每块kv
  364. blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock, isSite, codeSite)
  365. if len(blockArrays) > 0 { //有分块
  366. //从块里面找分包-文本
  367. if !job.IsFile {
  368. job.BlockPackage = FindPackageFromBlocks(&blockArrays, isSite, codeSite) //从块里面找分包
  369. }
  370. for _, bl := range blockArrays {
  371. if len([]rune(bl.Text)) > 80 {
  372. bl.Block, _ = DivideBlock(job.CategorySecond, bl.Text, 1, job.RuleBlock, isSite, codeSite)
  373. for _, bl_bl := range bl.Block {
  374. processTableInBlock(bl_bl, job, isSite, codeSite)
  375. }
  376. }
  377. FindProjectCode(bl.Text, job) //匹配项目编号
  378. //表格找分包相关---
  379. isUnRulesTab := processTableInBlock(bl, job, isSite, codeSite) //处理表格
  380. if isUnRulesTab { //是否不规则表格
  381. job.IsUnRulesTab = isUnRulesTab
  382. }
  383. //对块行内容业绩相关进行过滤
  384. bl.Text = tableClearTextReg.ReplaceAllString(bl.Text, "")
  385. //新加 未分块table中未能解析到中标候选人,从正文中解析-全文匹配一次
  386. if (job.Winnerorder == nil || len(job.Winnerorder) == 0) || len(job.Winnerorder) > 8 {
  387. //表格没有划分时候:-纯文本匹配
  388. tmp_text := HtmlToText(bl.Text)
  389. bl.Winnerorder = winnerOrderEntity.Find(tmp_text, true, 1, isSite, codeSite)
  390. if thanWinnerOrderEffective(job.Winnerorder, bl.Winnerorder) {
  391. job.Winnerorder = bl.Winnerorder
  392. }
  393. }
  394. //无分包-附件-格式化文本处理-
  395. if (job.BlockPackage == nil || len(job.BlockPackage) == 0) && job.IsFile {
  396. tmp_text := HtmlToText(bl.Text)
  397. job.BlockPackage = FindPackageFromText(job.Title, tmp_text, isSite, codeSite)
  398. }
  399. job.Block = append(job.Block, bl)
  400. }
  401. } else { //未分块,创建分块
  402. //log.Println(con)
  403. bl := &u.Block{}
  404. newCon := con
  405. //log.Println(con)
  406. if len(tabs) > 0 { //解析表格逻辑
  407. job.HasTable = 1 //添加标识:文本中有table
  408. newCon = TextAfterRemoveTable(con)
  409. //log.Println(newCon)
  410. if newCon != "" {
  411. job.BlockPackage = FindPackageFromText(job.Title, newCon, isSite, codeSite)
  412. }
  413. for i := 0; i < len(tabs); i++ {
  414. blockTag := ""
  415. if len(tabs[i].Nodes) > 0 {
  416. if tabs[i].Nodes[0].PrevSibling != nil {
  417. blockTag = tabs[i].Nodes[0].PrevSibling.Data
  418. }
  419. }
  420. //添加标识:文本中有table
  421. //blockTag - 块标签
  422. //处理表格
  423. tabres := AnalyTableV2(tabs[i], job.Category, job.Category_Old, blockTag, con, 1, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
  424. job.IsUnRulesTab = tabres.isUnRulesTab
  425. processTableResult(tabres, bl, job, isSite, codeSite)
  426. }
  427. } else {
  428. //从正文里面找分包
  429. job.BlockPackage = FindPackageFromText(job.Title, newCon, isSite, codeSite)
  430. }
  431. bl.Text = HtmlToText(con)
  432. FindProjectCode(bl.Text, job) //匹配项目编号 ~~ 清洗无效信息文本
  433. if blTextReg.MatchString(bl.Text) && !unblTextReg.MatchString(bl.Text) {
  434. if strings.Index(bl.Text, "业绩") > 1 {
  435. //如果有采购单位信息~置前
  436. before_arr := []string{}
  437. if beforeTextReg.MatchString(bl.Text) {
  438. before_arr = beforeTextReg.FindAllString(bl.Text, -1)
  439. }
  440. bl.Text = bl.Text[:strings.Index(bl.Text, "业绩")]
  441. if len(before_arr) > 0 {
  442. bl.Text = strings.Join(before_arr, "\n") + bl.Text
  443. }
  444. }
  445. }
  446. //特殊-指定处理-结构转化formattext100
  447. if formattext100.MatchString(bl.Text) {
  448. new_str := formattext100.FindString(bl.Text)
  449. new_str = formattext100.ReplaceAllString(new_str, "$1")
  450. bl.Text = fmt.Sprintf("中标金额:%s万元\n", new_str) + bl.Text
  451. }
  452. //调用kv解析库-处理detail
  453. bl.Text = formatText(bl.Text, "all")
  454. //处理 :
  455. bl.ColonKV = GetKVAll(bl.Text, "", nil, 1, isSite, codeSite)
  456. //处理空格
  457. bl.SpaceKV = SspacekvEntity.Entrance(bl.Text, "", nil, isSite, codeSite)
  458. //新加 未分块table中未能解析到 中标候选人,从正文中解析
  459. if job.Winnerorder == nil || len(job.Winnerorder) == 0 || len(job.Winnerorder) > 8 {
  460. bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1, isSite, codeSite)
  461. if thanWinnerOrderEffective(job.Winnerorder, bl.Winnerorder) {
  462. job.Winnerorder = bl.Winnerorder
  463. }
  464. } else { //table里面识别出单位候选人-未识别金额...
  465. if onlyExistsWinEntName(job.Winnerorder) {
  466. new_winorder := winnerOrderEntity.Find(bl.Text, true, 1, isSite, codeSite)
  467. if thanExistsNewWinOrder(job.Winnerorder, new_winorder) {
  468. job.Winnerorder = new_winorder
  469. }
  470. }
  471. }
  472. //如果表格查询分包-有分包-但是没有有效值的话 ,正文重新查找
  473. if len(tabs) > 0 && job.BlockPackage != nil {
  474. if !isUsefulPackage(job.BlockPackage) { //表格未识别出有效分包-且文本里面无有效字样
  475. text_pkg := FindPackageFromText(job.Title, bl.Text, isSite, codeSite)
  476. if len(text_pkg) > 0 {
  477. job.BlockPackage = text_pkg
  478. }
  479. }
  480. }
  481. job.Block = append(job.Block, bl)
  482. }
  483. }
  484. // 是否有效分包
  485. func isUsefulPackage(pkg map[string]*u.BlockPackage) bool {
  486. if pkg == nil || len(pkg) == 0 {
  487. return false
  488. }
  489. for _, v := range pkg {
  490. p_winner := v.Winner
  491. p_budget := v.Budget
  492. p_bidamout := v.Bidamount
  493. if p_winner != "" || p_budget > float64(0) || p_bidamout > float64(0) {
  494. return true
  495. }
  496. }
  497. return false
  498. }
  499. // 核查候选人字段是否合理
  500. func verifyPackageWinnerOrder(wins []map[string]interface{}) bool {
  501. temp := map[string]string{}
  502. for k, v := range wins {
  503. if qu.IntAll(v["sort"]) != k+1 {
  504. return false
  505. }
  506. entname := qu.ObjToString(v["entname"])
  507. if temp[entname] == "" {
  508. temp[entname] = entname
  509. } else {
  510. return false
  511. }
  512. }
  513. return true
  514. }
  515. // 判断数组string 是否重复
  516. func isRepeatArrString(arr1, arr2 []string) bool {
  517. is_r := true
  518. for k, v := range arr1 {
  519. if v != arr2[k] {
  520. is_r = false
  521. break
  522. }
  523. }
  524. return is_r
  525. }
  526. // 对sortkv重构
  527. func isResetUnitAmountSortKV(table *Table) {
  528. isUnitAmount := 0
  529. for _, k := range table.SortKV.Keys {
  530. v := table.SortKV.Map[k]
  531. if new_v, ok := v.(string); ok && (k == "中标金额" || k == "单位") {
  532. if k == "单位" && new_v == "万元" {
  533. isUnitAmount++
  534. }
  535. if k == "中标金额" && MoneyReg.MatchString(new_v) && !strings.Contains(new_v, "万") {
  536. isUnitAmount++
  537. }
  538. }
  539. }
  540. if isUnitAmount > 1 {
  541. table.SortKV.Map["中标金额"] = qu.ObjToString(table.SortKV.Map["中标金额"]) + "万元"
  542. }
  543. }
  544. func isResetUnitPriceSortKV(table *Table) {
  545. keyArr := []string{"序号", "数量", "单价"}
  546. isMatch := true
  547. for _, v := range keyArr {
  548. if _, ok := table.SortKV.Map[v].(string); !ok {
  549. isMatch = false
  550. break
  551. }
  552. }
  553. if isMatch && table.SortKV.Map["总价(元)"] == nil {
  554. if qu.ObjToString(table.SortKV.Map["序号"]) == "1" &&
  555. qu.ObjToString(table.SortKV.Map["数量"]) == "1" {
  556. table.SortKV.Map["总价(元)"] = table.SortKV.Map["单价"]
  557. table.SortKV.Keys = append(table.SortKV.Keys, "总价(元)")
  558. }
  559. }
  560. }
  561. func isResetAmountAggregateSortKV(table *Table) {
  562. keyGroup := [][]string{}
  563. keyGroup = append(keyGroup, []string{"序号", "标项名称", "总价(元)"})
  564. keyGroup = append(keyGroup, []string{"序号", "名称", "总价(元)"})
  565. keyGroup = append(keyGroup, []string{"序号", "服务内容", "验收金额(元)"})
  566. keyGroup = append(keyGroup, []string{"序号", "标项名称", "单价(元)", "数量"})
  567. for _, v := range keyGroup {
  568. if len(v) == 3 {
  569. arr1 := u.ConvertInterface(table.SortKV.Map[v[0]])
  570. arr2 := u.ConvertInterface(table.SortKV.Map[v[1]])
  571. arr3 := u.ConvertInterface(table.SortKV.Map[v[2]])
  572. if len(arr1) > 1 && len(arr1) == len(arr2) && len(arr1) == len(arr3) {
  573. amount := float64(0)
  574. for _, nv := range arr3 {
  575. amount = precisionFloat(amount, qu.Float64All(nv))
  576. }
  577. if amount > float64(0) {
  578. table.SortKV.Map[v[2]] = fmt.Sprintf("%f", amount)
  579. }
  580. break
  581. }
  582. }
  583. if len(v) == 4 {
  584. arr1 := u.ConvertInterface(table.SortKV.Map[v[0]])
  585. arr2 := u.ConvertInterface(table.SortKV.Map[v[1]])
  586. arr3 := u.ConvertInterface(table.SortKV.Map[v[2]])
  587. arr4 := u.ConvertInterface(table.SortKV.Map[v[3]])
  588. if len(arr1) > 1 && len(arr1) == len(arr2) && len(arr1) == len(arr3) && len(arr1) == len(arr4) {
  589. amount := float64(0)
  590. for kv, nv := range arr3 {
  591. amount = precisionFloat(amount, qu.Float64All(nv)*qu.Float64All(arr4[kv]))
  592. }
  593. if amount > float64(0) {
  594. if table.SortKV.Map["总价(元)"] == nil {
  595. table.SortKV.Map["总价(元)"] = fmt.Sprintf("%f", amount)
  596. table.SortKV.Keys = append(table.SortKV.Keys, "总价(元)")
  597. } else {
  598. table.SortKV.Map["总价(元)"] = fmt.Sprintf("%f", amount)
  599. }
  600. }
  601. break
  602. }
  603. }
  604. }
  605. }
  606. func isReseterialNumberSortKV(table *Table) {
  607. arr := u.ConvertInterface(table.SortKV.Map["序号"])
  608. if len(arr) > 5 {
  609. table.SortKV.Map["序号"] = arr[:3]
  610. }
  611. }
  612. func isResetWinnerRankingSortKV(table *Table) {
  613. if len(table.SortKV.Map) == 2 && table.SortKV.Map["中标人"] != nil && table.SortKV.Map["中标价格"] != nil {
  614. arr := u.ConvertInterface(table.SortKV.Map["中标人"])
  615. if len(arr) > 1 && len(arr) <= 3 {
  616. table.SortKV.Map["排名"] = []string{"1", "2"}
  617. table.SortKV.Keys = append(table.SortKV.Keys, "排名")
  618. }
  619. }
  620. }
  621. // 精度丢失-相加
  622. func precisionFloat(tmp1, tmp2 float64) float64 {
  623. n1 := decimal.NewFromFloat(tmp1)
  624. n2 := decimal.NewFromFloat(tmp2)
  625. decimalValue := n2.Add(n1)
  626. res, _ := decimalValue.Float64()
  627. return res
  628. }
  629. // 重置as~keys
  630. func resetAsKeysBidamount(as *SortMap) {
  631. keys, values_data := as.Keys, as.Map
  632. if len(keys) == 0 {
  633. return
  634. }
  635. k1, k2 := "投标报价(元)", "经评审的投标价(元)"
  636. value1, value2 := make([]string, 0), make([]string, 0)
  637. is_del := false
  638. if arr, ok := values_data[k1].([]string); ok && len(arr) > 0 {
  639. value1 = arr
  640. } else {
  641. return
  642. }
  643. if arr, ok := values_data[k2].([]string); ok && len(arr) > 0 {
  644. value2 = arr
  645. } else {
  646. return
  647. }
  648. if len(value1) == len(value2) && len(value1) > 0 {
  649. tmp_value := value2[0]
  650. price := winnerOrderEntity.clear("中标金额", tmp_value+GetMoneyUnit(k1, tmp_value))
  651. if pricestr, _ := price.(string); len(pricestr) < 30 && len(pricestr) > 0 {
  652. is_del = true
  653. }
  654. }
  655. if is_del {
  656. as.Map[k1] = as.Map[k2]
  657. }
  658. }
  659. // 判断是否特殊候选人结构表格
  660. func judgmentWinnerOrderHeaderInfo(TRs []*TR) bool {
  661. if len(TRs) < 3 {
  662. return false
  663. }
  664. //是否含有指定关键词
  665. TR_0 := TRs[0]
  666. isLen := 0
  667. for k, v := range TRs {
  668. if k > 0 {
  669. if len(v.TDs) == len(TR_0.TDs) {
  670. isLen++
  671. }
  672. if isLen >= 2 {
  673. break
  674. }
  675. }
  676. }
  677. if isLen < 2 {
  678. return false
  679. }
  680. textArr := [][]string{}
  681. textArr = append(textArr, []string{"投标人", "中标候选人排序", "投标报价(万元)"})
  682. textArr = append(textArr, []string{"投标人", "中标候选人排序", "投标总报价(万元)"})
  683. for _, arr := range textArr {
  684. isok := 0
  685. for _, v := range arr {
  686. for _, v1 := range TR_0.TDs {
  687. if v1.Val == v {
  688. isok++
  689. break
  690. }
  691. }
  692. }
  693. if isok == 3 {
  694. return true
  695. }
  696. }
  697. return false
  698. }
  699. // 预算标签-不一定为分包
  700. func isUnRealBudgetBp(tnv []*u.Tag) bool {
  701. if len(tnv) != 2 {
  702. return false
  703. }
  704. key_1, key_2 := tnv[0].Key, tnv[1].Key
  705. value_1, value_2 := tnv[0].Value, tnv[1].Value
  706. if value_1 != value_2 {
  707. if strings.Contains(key_1, "项目总投资") && strings.Contains(key_2, "项目投资") {
  708. return true
  709. }
  710. if strings.Contains(key_2, "项目总投资") && strings.Contains(key_1, "项目投资") {
  711. return true
  712. }
  713. }
  714. return false
  715. }
  716. // 初始化lineMapArr,lineMap
  717. func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMap map[string]*SortMap) {
  718. lineMapArr = make(map[string]*SortMap)
  719. lineMap = make(map[string]*SortMap)
  720. for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
  721. val := table.SortKV.Map[key]
  722. key = regReplAllSpace.ReplaceAllString(key, "")
  723. key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
  724. //qu.Debug(key, "---------------------------", val)
  725. if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
  726. /*
  727. {
  728. "商品":["",""],
  729. "商品_"["",""],
  730. }
  731. */
  732. valArr, allempty := filterVal(realTypeVal...) //过滤数据
  733. if allempty {
  734. continue
  735. }
  736. realTypeVal = valArr
  737. line := underline.FindString(key)
  738. lineValMap1 := lineMapArr[line]
  739. // i := 1
  740. // L:
  741. // for { //去除数组空数据
  742. // last := realTypeVal[len(realTypeVal)-i]
  743. // if last == "" {
  744. // i++
  745. // if i > len(realTypeVal) {
  746. // break
  747. // }
  748. // goto L
  749. // } else {
  750. // break
  751. // }
  752. // }
  753. // dislodgeNull := realTypeVal[:(len(realTypeVal) - i + 1)] //去除数组中空数据
  754. if len(realTypeVal) > 0 {
  755. if lineValMap1 == nil {
  756. tmp := NewSortMap()
  757. tmp.AddKey(key, realTypeVal)
  758. lineMapArr[line] = tmp
  759. } else {
  760. lineValMap1.AddKey(key, realTypeVal)
  761. }
  762. }
  763. //qu.Debug("lineMapArr---", lineMapArr[line].Keys, lineMapArr[line].Map)
  764. } else if realTypeVal, b := val.(string); b { //val为字符串 {"数量":"1"}
  765. /*
  766. {
  767. "商品:"",名称:"",
  768. "商品_:"",名称_:"",
  769. "商品__:"",名称__:"",
  770. }
  771. */
  772. valArr, allempty := filterVal(realTypeVal) //过滤数据
  773. if allempty {
  774. continue
  775. }
  776. realTypeVal = valArr[0]
  777. line := underline.FindString(key)
  778. lineValMap2 := lineMap[line]
  779. if lineValMap2 == nil {
  780. tmp := NewSortMap()
  781. tmp.AddKey(key, realTypeVal)
  782. lineMap[line] = tmp
  783. } else {
  784. lineValMap2.AddKey(key, realTypeVal)
  785. }
  786. //qu.Debug("lineMap---", lineMap[line].Keys, lineMap[line].Map)
  787. } else {
  788. // "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")5c2b0551a5cb26b9b7cb05db否5c2a42e6a5cb26b9b763ba5a采购人:一、采购人5c2b06f5a5cb26b9b7cc4409
  789. //成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]]
  790. //qu.Debug("err data:", key, val)
  791. }
  792. }
  793. return lineMapArr, lineMap
  794. }
  795. func dealArrData(maxNum int, ka map[string][]string) []map[string]string {
  796. for k2, v2 := range ka {
  797. //处理数组长度不相等,使长度一致
  798. if len(v2) > maxNum {
  799. ka[k2] = v2[:maxNum]
  800. }
  801. }
  802. finalData := assembleData(ka, 1)
  803. if len(finalData) > 0 {
  804. return finalData
  805. }
  806. return nil
  807. }
  808. func dealStrData(kv map[string]string) []map[string]string {
  809. finalData := []map[string]string{}
  810. if len(kv) > 0 {
  811. finalData = assembleData(kv, 2)
  812. }
  813. return finalData
  814. }
  815. // 组装数据,每一行的数据为一数据集合
  816. func assembleData(m interface{}, n int) []map[string]string {
  817. defer qu.Catch()
  818. /*
  819. {
  820. "itemname":["计算机","打印机","机柜"],
  821. "number" :["1","12","4"]
  822. }
  823. */
  824. datas := []map[string]string{}
  825. if n == 1 { //数组数据
  826. realTypeM := m.(map[string][]string)
  827. //根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr
  828. /*
  829. arr1 ["a1","b1","c1"]
  830. arr2 ["a2","b2","c2"]
  831. [
  832. {"a1","a2"},
  833. {"b1","b2"},
  834. {"c1","c2"}
  835. ]
  836. */
  837. //start
  838. for k3, v3 := range realTypeM {
  839. for _, val := range v3 {
  840. data := make(map[string]string)
  841. data[k3] = val
  842. datas = append(datas, data)
  843. }
  844. break
  845. }
  846. for i, data := range datas {
  847. for k4, v4 := range realTypeM {
  848. if i < len(v4) { //数组数据长度不一致
  849. if v4[i] != " " {
  850. data[k4] = v4[i]
  851. } else {
  852. delete(data, k4)
  853. }
  854. } else {
  855. fmt.Println("err table")
  856. }
  857. }
  858. datas[i] = data
  859. }
  860. //end
  861. for _, fdv := range datas { //清除空数据和只含特殊符号的数据
  862. for fmk, fmv := range fdv {
  863. if tabletdclear.ReplaceAllString(fmv, "") == "" {
  864. delete(fdv, fmk)
  865. }
  866. }
  867. }
  868. } else { //字符串数据
  869. realTypeM := m.(map[string]string)
  870. datas = append(datas, realTypeM)
  871. }
  872. return datas
  873. }
  874. func convert(key, r string) bool {
  875. defer qu.Catch()
  876. flag := false
  877. key = tabletitleclear.ReplaceAllString(key, "")
  878. reg, err := regexp.Compile(r)
  879. if err != nil {
  880. fmt.Println("reg err:", err)
  881. return false
  882. }
  883. flag = reg.MatchString(key)
  884. return flag
  885. }
  886. func hasKey(table *Table, n int) {
  887. defer qu.Catch()
  888. if table.TableResult.HasKey == 1 {
  889. return
  890. }
  891. if n >= 1 {
  892. table.TableResult.HasKey = 1
  893. }
  894. }
  895. func hasGoods(table *Table, data ...string) {
  896. defer qu.Catch()
  897. goodsArr := make([]string, len(data))
  898. //fmt.Println("table.TableResult.HasGoods=====", table.TableResult.HasGoods)
  899. if table.TableResult.HasGoods == 1 {
  900. return
  901. }
  902. for i, d := range data {
  903. if d != "" {
  904. goods := u.GoodsGet.CheckSensitiveWord(d)
  905. //fmt.Println("goods======", goods)
  906. goodsArr[i] = goods
  907. if len(goods) > 0 {
  908. table.TableResult.HasGoods = 1
  909. break
  910. }
  911. }
  912. }
  913. }
  914. func hasBrand(table *Table, data ...string) ([]string, bool) {
  915. defer qu.Catch()
  916. //fmt.Println("table.TableResult.HasBrand---------", table.TableResult.HasBrand)
  917. brandArr := make([]string, len(data))
  918. // if table.TableResult.HasBrand == 1 {
  919. // return brandArr, 1
  920. // }
  921. allNull := true
  922. for i, d := range data {
  923. //if d != "" {
  924. brand := u.BrandGet.CheckSensitiveWord(d)
  925. if brand != "" {
  926. allNull = false
  927. }
  928. //fmt.Println("brand======", brand)
  929. brandArr[i] = brand
  930. if len(brand) > 0 {
  931. table.TableResult.HasBrand = 1
  932. }
  933. //}
  934. }
  935. return brandArr, allNull
  936. }
  937. // 过滤td值
  938. func filterVal(val ...string) ([]string, bool) {
  939. defer qu.Catch()
  940. n := 0 //记录被过滤的个数
  941. for i, v := range val {
  942. if len(clearnn.FindStringSubmatch(v)) > 0 {
  943. tmpv := clearnn.FindStringSubmatch(v)[1]
  944. if tmpv != "" {
  945. v = tmpv
  946. }
  947. }
  948. afterFilter := tabletdclear.ReplaceAllString(v, "")
  949. afterFilter = NullVal.ReplaceAllString(afterFilter, "")
  950. if afterFilter == "" {
  951. n++
  952. }
  953. val[i] = afterFilter
  954. }
  955. allempty := false
  956. if n == len(val) { //所有都被过滤掉
  957. allempty = true
  958. }
  959. return val, allempty
  960. }
  961. // 过滤itemname全是数字
  962. func filterItem(itemval ...string) []string {
  963. defer qu.Catch()
  964. result := []string{}
  965. for _, v := range itemval {
  966. afterFilter := numclear.ReplaceAllString(v, "")
  967. if afterFilter != "" {
  968. result = append(result, v)
  969. } else {
  970. result = append(result, afterFilter)
  971. }
  972. }
  973. return result
  974. }
  975. // 处理价格
  976. func dealPriceInterface(key string, val ...string) (result []interface{}) {
  977. defer qu.Catch()
  978. for _, v := range val {
  979. if num1.MatchString(v) { //含数字
  980. tdIsWan := strings.Contains(v, "万")
  981. if !tdIsWan {
  982. if strings.Contains(key, "万") {
  983. v = v + "万"
  984. }
  985. }
  986. data := []interface{}{v, ""}
  987. money := clear.ObjToMoney(data)[0]
  988. result = append(result, money)
  989. } else {
  990. result = append(result, "")
  991. }
  992. }
  993. return
  994. }
  995. // 处理number
  996. func dealNumberInterface(val ...string) (result []interface{}) {
  997. defer qu.Catch()
  998. for _, v := range val { //1个 1.00个
  999. n := numclear.FindString(v)
  1000. if n == "" {
  1001. result = append(result, "")
  1002. } else if tmp := clear.NumChar[n]; tmp != nil { //一二三...
  1003. result = append(result, tmp)
  1004. } else { //数字
  1005. result = append(result, qu.IntAll(strings.Split(n, ".")[0]))
  1006. }
  1007. }
  1008. return
  1009. }
  1010. // 处理价格
  1011. func dealPrice(key string, val ...string) []string {
  1012. defer qu.Catch()
  1013. result := []string{}
  1014. for _, v := range val {
  1015. data := []interface{}{v, key}
  1016. money := clear.ObjToMoney(data)[0]
  1017. result = append(result, fmt.Sprintf("%v", money))
  1018. }
  1019. // result := []string{}
  1020. // for _, v := range val { //1.00万元 1元 2.25元/斤
  1021. // tmparr := strings.Split(v, ".")
  1022. // tmparr[0] = moneyNum.ReplaceAllString(tmparr[0], "")
  1023. // if iswan {
  1024. // result = append(result, tmparr[0]+"0000")
  1025. // } else { //td val值带万
  1026. // if strings.Contains(v, "万") { //价格中带有万
  1027. // result = append(result, tmparr[0]+"0000")
  1028. // } else {
  1029. // result = append(result, tmparr[0])
  1030. // }
  1031. // }
  1032. // }
  1033. return result
  1034. }
  1035. // 处理number
  1036. func dealNumber(val ...string) ([]string, []string) {
  1037. defer qu.Catch()
  1038. unitnameArr := []string{}
  1039. result := []string{}
  1040. for _, v := range val { //1个 1.00个
  1041. n := numclear.FindString(v)
  1042. unitname := numclear.ReplaceAllString(v, "") //匹配个数后的单位
  1043. unitnameArr = append(unitnameArr, unitname)
  1044. //val[i] = strings.Split(n, ".")[0]
  1045. result = append(result, strings.Split(n, ".")[0])
  1046. }
  1047. return result, unitnameArr
  1048. }
  1049. // 是否符合指定结构
  1050. func isPkgRegexArr(regs []*regexp.Regexp, con string) bool {
  1051. S_Index := regs[0].FindAllStringIndex(con, -1)
  1052. E_Index := regs[1].FindAllStringIndex(con, -1)
  1053. if len(S_Index) == len(E_Index) && len(S_Index) == 1 {
  1054. return true
  1055. }
  1056. return false
  1057. }