analytable.go 107 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535
  1. package pretreated
  2. import (
  3. "fmt"
  4. u "jy/util"
  5. qutil "qfw/util"
  6. "regexp"
  7. "strings"
  8. "github.com/PuerkitoBio/goquery"
  9. )
  10. /**
  11. 全局变量,主要是一堆判断正则
  12. **/
  13. var (
  14. //清理品目中数字
  15. numclear = regexp.MustCompile("^[\\d一二三四五六七八九十.]+")
  16. //清理表格title中的不需要的内容
  17. tabletitleclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。、_/((人民币万元件个公斤))]")
  18. //清理表格中是key中包含的空格或数字等
  19. tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。、_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]")
  20. //清理表格td中的符号
  21. tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、,。、_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕¥$]*")
  22. //判断key是金额,对万元的处理
  23. moneyreg = regexp.MustCompile("(预算|费|价|额|规模|投资)")
  24. //根据表格的内容判断是不是表头,如果含有金额则不是表头
  25. MoneyReg = regexp.MustCompile("^[\\s  ::0-9.万元()()人民币¥$]+$")
  26. //判断分包时
  27. moneyNum = regexp.MustCompile("[元整¥万]")
  28. //对隐藏表格的判断
  29. display = regexp.MustCompile("(?i).*?display\\s?[:]\\s?none.*")
  30. //---------------
  31. //求是分包的概率
  32. //根据表格的标签对分包进行打分
  33. TableMultiPackageReg_4 = regexp.MustCompile("(标段|分包|包段|划分|子包|标包|合同段)")
  34. TableMultiPackageReg_2 = regexp.MustCompile("(概况|范围|情况|内容|详细|结果|信息)")
  35. //在判断分包打分前过虑表格key
  36. FilterKey_2 = regexp.MustCompile("招标|投标|项目")
  37. //根据表格的key进行分包打分
  38. FindKey_2 = regexp.MustCompile("([分子][包标](号)?|标[号项段包](划分)?|包件?[号段名数])")
  39. //对值进行分包判断
  40. FindVal_1 = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|合同|分|施工|监理)?(标|包)(段|号)?)[  \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)")
  41. FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$")
  42. //判断分包前排除
  43. excludeKey = regexp.MustCompile("(涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)") //编号|划分
  44. //-------------
  45. cut = u.NewCut()
  46. //清理表格标签正则
  47. ClearTagReg = regexp.MustCompile("<[^>]*?>|[\\s\\n\\r]*$")
  48. //查找表格标签正则
  49. ttagreg = regexp.MustCompile("(?s)([^\\n::。,;\\s\u3000\u2003\u00a0]{2,30})[::]?[^::。;!\\n]{0,35}[\\s\\n]*$")
  50. //判断表格是表头的概率
  51. checkval = float32(0.6)
  52. //tdval_reg = regexp.MustCompile(`([\p{Han}][\p{Han}\s、()\\(\\)]{1,9})[::]([^::\\n。]{5,60})(?:[;;,,.。\\n\\t\\s])?`)
  53. //空格替换
  54. repSpace = regexp.MustCompile("[\\s\u3000\u2003\u00a0::]+|\\\\t+")
  55. //对表格kv的处理
  56. //对不能标准化的key做批识
  57. filter_tag_zb = regexp.MustCompile("(中标|成交|投标)[\\p{Han}]{0,6}(情况|结果|信息|明细)")
  58. //中标金额
  59. //包含以下字眼做标准化处理
  60. filter_zbje_k = regexp.MustCompile("(中标|成交|总|拦标|合同|供[应货]商|报)[\\p{Han}、]{0,6}(价|额|[大小]写|[万亿]?元).{0,4}$")
  61. //简单判断金额
  62. filter_zbje_jd = regexp.MustCompile("^[^售]{0,4}(价|额).{0,4}$")
  63. //且排队以下字眼的key
  64. filter_zbje_kn = regexp.MustCompile("得分|打分|时间|业绩|须知|分$")
  65. //且值包含以下字眼
  66. filter_zbje_v = regexp.MustCompile("[¥$$0-9一二三四五六七八九十,,〇零点..壹贰叁肆伍陆柒捌玖拾百佰千仟万亿億元圆角分整正()::大小写]{2,16}")
  67. //中标单位的处理
  68. //包含以下字眼的Key标准化
  69. filter_zbdw_ky = regexp.MustCompile("(中标|成交|拦标|合同|选中|投标|拟|预|最终)[\\p{Han}、]{0,6}(供[应货]商|企业|单位|人|机构)(名称)?.{0,4}$")
  70. //简单判断
  71. filter_zbdw_jd = regexp.MustCompile("(投标|成交|中标|合同)(供应商|单位|人|名称).{0,4}$")
  72. //且不包含以下字眼
  73. filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址")
  74. //且值包含以下字眼
  75. filter_zbdw_v = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)")
  76. //且值包含以下字眼
  77. filter_zbdw_v2 = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$")
  78. //Tg = map[string]interface{}{}
  79. //一些表格没有表头,是空的,对值是排序的做处理对应 NullTxBid
  80. NullTdReg = regexp.MustCompile("(首选|第[一二三四五1-5])(中标|成交)?(名(称)?|(候选|排序)?(人|单位|供应商))")
  81. NullTxtBid = "成交供应商排名"
  82. projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$")
  83. MhSpilt = regexp.MustCompile("[::]")
  84. //识别采购单位联系人、联系电话、代理机构联系人、联系电话
  85. ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|(征求意见|报名审核购买)?((联系人?|办公)?((电话([//]传真)?|手机)(号码)?|邮箱(地址)?|(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|(项目)?经办)人)|采购方代表")
  86. ContactInfoMustReg = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$")
  87. ContactType = map[string]*regexp.Regexp{
  88. "采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|发布人?|甲|招标(服务)?|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方)|(项目|建(库|设))单位|招标人信息|采购中心地址|业主|收料人"),
  89. "代理机构": regexp.MustCompile("(代理|受托).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"),
  90. }
  91. ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$")
  92. MultipleValueSplitReg = regexp.MustCompile("[,,、\\s\u3000\u2003\u00a0]")
  93. BuyerContacts = []string{"采购单位联系人", "采购单位联系电话", "采购单位联系地址"}
  94. FilterSerial = regexp.MustCompile(".+[、..::,]")
  95. filterTableWror = regexp.MustCompile("班子成员")
  96. underline = regexp.MustCompile("_+$")
  97. iswinnertabletag = regexp.MustCompile("(中标|候选人|成交|结果)")
  98. nswinnertabletag = regexp.MustCompile("[评得分估]+")
  99. projectcodeReg = regexp.MustCompile(`((|\(|\[){1}(编号|项目编号|标段编号){1}(:|:)(.){4,30}()|\)|\])`)
  100. projectcodeReg2 = regexp.MustCompile(`(编号|项目编号|标段编号){1}(:|:)(.){4,30}[0-9]`)
  101. jsonReg = regexp.MustCompile(`\{".*\":\".+\"}`)
  102. )
  103. //在解析时,判断表格元素是否隐藏
  104. func IsHide(g *goquery.Selection) (b bool) {
  105. style, exists := g.Attr("style")
  106. if exists {
  107. b = display.MatchString(style)
  108. }
  109. return
  110. }
  111. //对表格的key进行标准化处理,多个k相同时,出现覆盖问题
  112. //待扩展,暂不支持正则标签库
  113. func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (k1 []string, weight []int, v1, returntag string, b bool) {
  114. k1 = []string{}
  115. weight = []int{}
  116. tk := k
  117. if sv, sok := v.(string); sok { //取KV
  118. v1 = sv
  119. } else if sv, sok := v.([]string); sok { //是数组先默认取第一个
  120. v1 = sv[0]
  121. }
  122. //对值单位的处理 (预算|费|价|额|规模|投资)
  123. if moneyreg.MatchString(tk) {
  124. v1 += GetMoneyUnit(tk, v1)
  125. }
  126. //先清理key
  127. //u.Debug(1, k, v1)
  128. k = ClearKey(k, 2)
  129. //u.Debug(2, k)
  130. //取标准key
  131. res := u.GetTags(k)
  132. if len(res) == 0 && tk != k {
  133. res = u.GetTags(tk)
  134. }
  135. //log.Println(k, res)
  136. // if len(res) == 0 {
  137. // go u.AddtoNoMatchMap(tk)
  138. // }
  139. //当取到标准化值时,放入数组
  140. if len(res) > 0 {
  141. b = true
  142. for _, t1 := range res {
  143. k1 = append(k1, t1.Value)
  144. weight = append(weight, t1.Weight)
  145. }
  146. //k1 = res[0].Value
  147. }
  148. //没有取到标准化key时,对中标金额和中标单位的逻辑处理
  149. if !b {
  150. if filter_zbje_k.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
  151. if tabletag == "" {
  152. returntag = "中标情况"
  153. }
  154. k1 = append(k1, "中标金额")
  155. weight = append(weight, -100)
  156. b = true
  157. } else if filter_zbdw_ky.MatchString(k) && !filter_zbdw_kn.MatchString(k) &&
  158. filter_zbdw_v.MatchString(v1) {
  159. k1 = append(k1, "中标单位")
  160. weight = append(weight, -100)
  161. if tabletag == "" {
  162. returntag = "中标情况"
  163. }
  164. b = true
  165. }
  166. }
  167. //对上一步没有取到标准化key的进一步处理
  168. if !b {
  169. if tabletag == "" {
  170. }
  171. if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) {
  172. //u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1))
  173. if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
  174. k1 = append(k1, "中标金额")
  175. weight = append(weight, -100)
  176. b = true
  177. } /*else if filter_zbdw_jd.MatchString(k) && filter_zbdw_v.MatchString(v1) {
  178. k1 = append(k1, "中标单位")
  179. weight = append(weight, -100)
  180. b = true
  181. }*/
  182. }
  183. }
  184. return
  185. }
  186. //对解析后的表格的kv进行过滤
  187. func (table *Table) KVFilter() {
  188. //1.标准化值查找
  189. //2.对数组的处理
  190. //3.对分包的处理
  191. //4.对KV的处理
  192. //判断表格是否有用,调用abandontable正则数组进行判断
  193. //遍历每一行
  194. winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签
  195. if !winnertag {
  196. winnertag = iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
  197. }
  198. table.analyTdKV() //1.遍历每行每列td的sortkv添加到table.SorkVK中;2.td有子表格的处理
  199. as := NewSortMap()
  200. //遍历table.sortkv,进行过滤处理,并放入标准化KV中,如果值是数组跳到下一步处理
  201. for _, k := range table.SortKV.Keys {
  202. //表格描述处理,对成交结果的处理
  203. if regexp.MustCompile("(成交|中标|候选|排名|名次|供应商排序)").MatchString(k) {
  204. table.Desc += "成交结果,"
  205. }
  206. if regexp.MustCompile("^单价").MatchString(k) {
  207. continue
  208. }
  209. v := table.SortKV.Map[k]
  210. if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
  211. k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
  212. k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
  213. //qutil.Debug(k, v, k1, w1, v1, tag, b)
  214. if b {
  215. //降低冒号值的权重
  216. if MhSpilt.MatchString(v1) {
  217. for pos, _ := range k1 {
  218. w1[pos] -= 50
  219. }
  220. }
  221. if tag != "" && table.Tag == "" {
  222. table.Tag = tag
  223. }
  224. for pos, k2 := range k1 { //根据关键词,过滤table.SortKV到table.StandKV和table.StandKVWeight
  225. if table.StandKV[k2] == "" || w1[pos] > table.StandKVWeight[k2] {
  226. table.StandKV[k2] = v1 //本节点
  227. table.StandKVWeight[k2] = w1[pos]
  228. }
  229. // else if k2 == "中标金额" {
  230. // // u.Debug(qutil.Float64All(v1), qutil.Float64All(table.StandKV[k2]))
  231. // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
  232. // table.StandKV[k2] = v1
  233. // }
  234. // }
  235. }
  236. }
  237. } else {
  238. //u.Debug(k, v, "---------")
  239. as.AddKey(k, v)
  240. }
  241. }
  242. //处理值是数组的kv放入标准化kv中//处理table.SortKV.value为数组的情况
  243. table.sortKVArr(as, winnertag)
  244. //
  245. if filterTableWror.MatchString(table.Tag) {
  246. table.WinnerOrder = nil
  247. }
  248. //
  249. if len(table.WinnerOrder) > 0 || !table.BPackage {
  250. winnerOrder := []map[string]interface{}{}
  251. maxSort := 0
  252. //调整顺序
  253. for i := 0; i < 2; i++ {
  254. for _, v := range table.WinnerOrder {
  255. sortstr, _ := v["sortstr"].(string)
  256. if (i == 0 && sortstr == "") || (i == 1 && sortstr != "") {
  257. continue
  258. }
  259. sort, _ := v["sort"].(int)
  260. if i == 0 {
  261. if maxSort == 0 || sort > maxSort {
  262. maxSort = sort
  263. }
  264. } else {
  265. maxSort++
  266. v["sort"] = maxSort
  267. }
  268. winnerOrder = append(winnerOrder, v)
  269. }
  270. if len(winnerOrder) == len(table.WinnerOrder) {
  271. break
  272. }
  273. }
  274. table.WinnerOrder = winnerOrder
  275. winnerOrder = []map[string]interface{}{}
  276. L: //遍历每个td,查询中标人
  277. for _, tr := range table.TRs {
  278. for _, td := range tr.TDs {
  279. winnerOrder = winnerOrderEntity.Find(td.Val, true, 3)
  280. if len(winnerOrder) > 0 {
  281. break L
  282. }
  283. }
  284. }
  285. if len(table.WinnerOrder) > 0 {
  286. //中标候选人合并
  287. winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder)
  288. if table.StandKV["中标单位"] == "" {
  289. ent := table.WinnerOrder[0]["entname"]
  290. if ent != nil {
  291. table.StandKV["中标单位"], _ = ent.(string)
  292. table.StandKVWeight["中标单位"] = -25
  293. }
  294. }
  295. } else if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder
  296. if len(winnerOrder) > 1 {
  297. table.WinnerOrder = winnerOrder
  298. }
  299. }
  300. }
  301. //对中标候选人进行排序
  302. winnerOrderEntity.Order(table.WinnerOrder)
  303. //该表格有一个分包,并且有中标候选人排序的情况下,把中标候选人放到包里面
  304. if table.BlockPackage != nil && table.BlockPackage.Keys != nil && len(table.BlockPackage.Keys) == 1 {
  305. if table.BlockPackage.Map != nil {
  306. onePkgKey := table.BlockPackage.Keys[0]
  307. onePkg, _ := table.BlockPackage.Map[onePkgKey].(*u.BlockPackage)
  308. if onePkg != nil && onePkg.WinnerOrder != nil && len(onePkg.WinnerOrder) == 0 {
  309. onePkg.WinnerOrder = table.WinnerOrder
  310. table.BlockPackage.Map[onePkgKey] = onePkg
  311. }
  312. }
  313. }
  314. }
  315. //处理table.SortKV.value为数组的情况
  316. func (table *Table) sortKVArr(as *SortMap, winnertag bool) {
  317. checkKey := map[int]bool{}
  318. for kn, k := range as.Keys { //遍历table.SortKV.value为数组的key
  319. v := as.Map[k]
  320. if vm, ok := v.([]map[string]interface{}); ok && k == NullTxtBid {
  321. if table.WinnerOrder == nil {
  322. table.WinnerOrder = []map[string]interface{}{}
  323. }
  324. table.WinnerOrder = append(table.WinnerOrder, vm...)
  325. } else {
  326. //增加候选人排序逻辑
  327. if table.WinnerOrder == nil && !checkKey[kn] {
  328. if vs1, ok := v.([]string); ok {
  329. smap := make([]map[string]interface{}, len(vs1))
  330. for n1, _ := range vs1 {
  331. smap[n1] = map[string]interface{}{}
  332. }
  333. //hadSort := false
  334. tmpEntname := make([]string, len(vs1))
  335. tmpPrice := make([]string, len(vs1))
  336. for kn1, k := range as.Keys[kn:] {
  337. v := as.Map[k]
  338. if ContactType["采购单位"].MatchString(k) || ContactType["代理机构"].MatchString(k) {
  339. continue
  340. }
  341. //目前对数组数据的key做判断,但是某些额可以是不满足情况的
  342. //载明内容:[第一中标候选人 第二中标候选人] id:5d00587da5cb26b9b75e367b
  343. if vs, ok := v.([]string); ok && len(vs) == len(vs1) { //数组值的个数相同
  344. res, _, _, _, repl := CheckCommon(k, "bidorder")
  345. kv := ""
  346. if !res {
  347. kt := u.GetTags(filterThText.ReplaceAllString(ClearKey(k, 2), ""))
  348. if kt.Len() > 0 {
  349. kv = kt[0].Value
  350. }
  351. }
  352. //qutil.Debug(k, res, repl, kv, "--", vs)
  353. if !res && kv == "" { //key未验证出,验证数组的val值
  354. checkKey[kn+kn1] = true
  355. if winnertag { //如果是中标信息 在根据val数组信息解析候选人
  356. for vsk, vsv := range vs {
  357. if NullTdReg.MatchString(vsv) { //数据先验证val是否有排序
  358. //hadSort = true
  359. smap[vsk]["sortstr"] = vsv
  360. smap[vsk]["sort"] = GetBidSort(vsv, vsk+1)
  361. } else if findCandidate2.MatchString(vsv) && tmpEntname[vsk] == "" { //数据验证val是否是候选人
  362. entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
  363. if entname != "" {
  364. tmpEntname[vsk] = entname
  365. }
  366. } else { //验证val时如果数组中的第一条数据既不满足sort或者entname 判定此数组数据错误
  367. break
  368. }
  369. }
  370. }
  371. }
  372. if res || kv != "" { //连续往下找几个key
  373. checkKey[kn+kn1] = true
  374. SORT:
  375. if repl == "sort" {
  376. //hadSort = true
  377. for vsk, vsv := range vs {
  378. smap[vsk]["sortstr"] = vsv
  379. smap[vsk]["sort"] = GetBidSort(vsv, vsk+1)
  380. }
  381. } else if repl == "entname" || kv == "中标单位" {
  382. for vsk, vsv := range vs {
  383. if winnerReg6.MatchString(vsv) { //k:中标候选人 v:["第一名","第二名"]
  384. repl = "sort"
  385. goto SORT
  386. }
  387. // if entname, _ := smap[vsk]["entname"].(string); entname != "" || len([]rune(vsv)) < 3 {
  388. // break
  389. // }
  390. // entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
  391. // if entname != "" {
  392. // smap[vsk]["entname"] = entname
  393. //
  394. if tmpEntname[vsk] != "" || len([]rune(vsv)) < 4 { //排除 单位:["台","个","套"]
  395. break
  396. }
  397. entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
  398. if entname != "" {
  399. tmpEntname[vsk] = entname
  400. }
  401. }
  402. } else if kv == "中标金额" {
  403. for vsk, vsv := range vs {
  404. //过滤price 2348273.432元(万元)-->2348273.432
  405. //tmp1, _ := smap[vsk]["price"].(string)
  406. tmp1 := tmpPrice[vsk]
  407. p1num := numberReg2.FindString(tmp1)
  408. p2num := numberReg2.FindString(vsv)
  409. p1 := qutil.Float64All(p1num)
  410. p2 := qutil.Float64All(p2num)
  411. if p2 > p1 {
  412. //smap[vsk]["price"] = winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv))
  413. price := winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv))
  414. if pricestr, _ := price.(string); len(pricestr) < 30 && len(pricestr) > 0 {
  415. tmpPrice[vsk] = pricestr
  416. }
  417. }
  418. }
  419. }
  420. }
  421. } else {
  422. //break
  423. }
  424. }
  425. newSmap := []map[string]interface{}{}
  426. //qutil.Debug("smap=======", smap)
  427. //qutil.Debug("tmpEntname--", len(tmpEntname), tmpEntname)
  428. //qutil.Debug("tmpPrice--", len(tmpPrice), tmpPrice)
  429. for n, smap_v := range smap {
  430. //if hadSort { //有排序,再添加entname和price
  431. if len(tmpEntname) > 0 && n < len(tmpEntname) && tmpEntname[n] != "" {
  432. smap_v["entname"] = tmpEntname[n]
  433. if len(tmpPrice) > 0 && n < len(tmpPrice) && tmpPrice[n] != "" {
  434. smap_v["price"] = tmpPrice[n]
  435. }
  436. }
  437. //} else if len(tmpEntname) > 0 {
  438. //fmt.Println("table winnerorder only has entname", tmpEntname)
  439. //}
  440. if len(smap_v) > 2 { //只有排序信息 sort和sortstr
  441. newSmap = append(newSmap, smap_v)
  442. }
  443. }
  444. if len(newSmap) > 0 {
  445. table.WinnerOrder = newSmap
  446. }
  447. }
  448. }
  449. k1, w1, v1, tag, b := CommonDataAnaly(k, table.Tag, table.Desc, v)
  450. if b {
  451. if tag != "" && table.Tag == "" {
  452. table.Tag = tag
  453. }
  454. for pos, k2 := range k1 {
  455. if table.StandKV[k2] == "" || w1[pos] > table.StandKVWeight[k2] {
  456. table.StandKV[k2] = v1 //本节点
  457. table.StandKVWeight[k2] = w1[pos]
  458. }
  459. // else if k2 == "中标金额" {
  460. // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
  461. // table.StandKV[k2] = v1
  462. // }
  463. // }
  464. }
  465. }
  466. }
  467. }
  468. }
  469. //1.遍历每行每列td的sortkv添加到table.SorkVK中;2.td有子表格的处理
  470. func (table *Table) analyTdKV() {
  471. //遍历每一行
  472. for _, tr := range table.TRs {
  473. for _, td := range tr.TDs {
  474. //fmt.Println(td.BH, td.MustBH, td.Val, td.SortKV.Map)
  475. bc := false
  476. if !td.BH {
  477. //表头是否是无用内容
  478. if td.HeadTd != nil {
  479. bc, _, _, _, _ = CheckCommon(td.HeadTd.Val, "abandontable")
  480. }
  481. }
  482. if !bc {
  483. //td元素有内嵌kv,遍历放入table的Kv中
  484. if len(td.SortKV.Keys) > 0 {
  485. for _, k3 := range td.SortKV.Keys {
  486. _val := td.SortKV.Map[k3]
  487. //thisFlag := false
  488. if td.HeadTd != nil && len([]rune(k3)) < 4 {
  489. k3 = td.HeadTd.Val + k3
  490. }
  491. if table.SortKV.Map[k3] == nil && _val != nil && _val != "" {
  492. //u.Debug(k3, _val)
  493. //if !thisFlag || (thisFlag && table.SortKV.Map[k3] == nil) {
  494. table.SortKV.AddKey(k3, _val)
  495. }
  496. }
  497. }
  498. }
  499. //td有子表格的处理
  500. //u.Debug(td.BH, td.Val, td.SonTableResult)
  501. if td.SonTableResult != nil {
  502. //u.Debug(td.SonTableResult.SortKV.Map, "-------", td.SonTableResult.Tabs)
  503. for _, k3 := range td.SonTableResult.SortKV.Keys {
  504. if table.StandKV[k3] == "" || td.SonTableResult.SortKVWeight[k3] > table.StandKVWeight[k3] {
  505. table.StandKV[k3] = qutil.ObjToString(td.SonTableResult.SortKV.Map[k3])
  506. table.StandKVWeight[k3] = td.SonTableResult.SortKVWeight[k3]
  507. }
  508. }
  509. //中标候选人排序
  510. if table.WinnerOrder == nil || len(table.WinnerOrder) == 0 {
  511. table.WinnerOrder = td.SonTableResult.WinnerOrder
  512. } else {
  513. winnerOrderEntity.Merge(table.WinnerOrder, td.SonTableResult.WinnerOrder)
  514. }
  515. }
  516. }
  517. }
  518. }
  519. //表格结果合并到父表格集中
  520. func (table *Table) MergerToTableresult() {
  521. //对多包表格的多包值的合并处理
  522. if table.BPackage {
  523. table.TableResult.IsMultiPackage = true
  524. for k, v := range table.BlockPackage.Map {
  525. package1 := table.TableResult.PackageMap.Map[k]
  526. if package1 == nil {
  527. table.TableResult.PackageMap.AddKey(k, v)
  528. } else {
  529. bp := package1.(*u.BlockPackage)
  530. if bp.TableKV == nil {
  531. bp.TableKV = u.NewJobKv()
  532. }
  533. v1 := v.(*u.BlockPackage)
  534. if v1.TableKV != nil && v1.TableKV.Kv != nil {
  535. for k2, v2 := range v1.TableKV.Kv {
  536. if bp.TableKV == nil {
  537. bp.TableKV = u.NewJobKv()
  538. }
  539. if bp.TableKV.Kv[k2] == "" || (v1.TableKV.KvTag[k2] != nil && bp.TableKV.KvTag[k2] != nil && v1.TableKV.KvTag[k2].Weight > bp.TableKV.KvTag[k2].Weight) {
  540. //可能会报错 assignment to entry in nil map
  541. bp.TableKV.Kv[k2] = v2
  542. bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
  543. }
  544. }
  545. }
  546. bp.WinnerOrder = v1.WinnerOrder
  547. //table.TableResult.PackageMap.AddKey(k, v)
  548. }
  549. }
  550. // str := ""
  551. // for _, k := range table.TableResult.PackageMap.Keys {
  552. // v := table.TableResult.PackageMap.Map[k].(*u.BlockPackage)
  553. // str += fmt.Sprintf("包号:%s,中标人:%s,中标价:%s,预算:%s,文本:%s,排名:%v ---\t", v.Index, v.TableKV["中标单位"]+v.ColonKV["中标单位"], v.TableKV["中标金额"]+v.ColonKV["中标金额"], v.TableKV["预算"]+v.ColonKV["预算"], v.Text, v.WinnerOrder)
  554. // }
  555. // u.Debug(table, table.TableResult, str)
  556. }
  557. //遍历标准key到tableresult.sortkv中
  558. for k, v := range table.StandKV {
  559. if table.TableResult.SortKV.Map[k] == nil || table.StandKVWeight[k] > table.TableResult.SortKVWeight[k] || strings.Contains(table.Tag, "变更") {
  560. v = strings.Replace(v, "__", "", -1)
  561. if table.TableResult.SortKV.Map[k] == nil {
  562. table.TableResult.SortKV.AddKey(k, v) //父集
  563. } else {
  564. table.TableResult.SortKV.ReplaceKey(k, v, k)
  565. }
  566. table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
  567. } else if table.TableResult.SortKV.Map[k] != nil {
  568. //u.Debug(k, v, table.TableResult.SortKV.Map[k], "..............")
  569. }
  570. }
  571. //表格的块标签
  572. if table.TableResult.BlockTag == "" && table.Tag != "" {
  573. table.TableResult.BlockTag = table.Tag
  574. }
  575. //中标候选人(多个table,现在默认取第一个table的信息,考虑需不需要多个table分析合并数据?)
  576. if table.TableResult.WinnerOrder == nil || len(table.TableResult.WinnerOrder) == 0 {
  577. table.TableResult.WinnerOrder = table.WinnerOrder
  578. }
  579. //增加brand 并列table
  580. if len(table.BrandData) > 0 {
  581. for _, v := range table.BrandData {
  582. if len(v) > 0 {
  583. table.TableResult.BrandData = append(table.TableResult.BrandData, v)
  584. }
  585. }
  586. }
  587. }
  588. /**
  589. 解析表格入口
  590. 返回:汇总表格对象
  591. **/
  592. func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock) (tabres *TableResult) {
  593. defer qutil.Catch()
  594. //u.Debug(con)
  595. if itype == 1 {
  596. //修复表格
  597. con = RepairCon(con)
  598. }
  599. //生成tableresult对象
  600. tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock)
  601. //可以有多个table
  602. for _, table := range tabs {
  603. //隐藏表格跳过
  604. if IsHide(table) {
  605. continue
  606. }
  607. tabres.GoqueryTabs = append(tabres.GoqueryTabs, table)
  608. }
  609. //解析表格集
  610. tabres.Analy()
  611. return
  612. }
  613. //开始解析表格集
  614. func (ts *TableResult) Analy() {
  615. tabs := []*Table{}
  616. contactFormat := &u.ContactFormat{
  617. IndexMap: map[int]string{},
  618. MatchMap: map[string]map[string]bool{},
  619. }
  620. for _, table := range ts.GoqueryTabs {
  621. tn := NewTable(ts.Html, ts, table)
  622. //核心模块
  623. ts := tn.Analy(contactFormat)
  624. for _, tab := range ts {
  625. tabs = append(tabs, tab)
  626. //fmt.Println("tab.SortKV.Map", tab.SortKV.Keys)
  627. }
  628. //tn.SonTables = append(tn.SonTables, tn)
  629. }
  630. //统一合并,考虑统一多表格是多包的情况---新增
  631. if len(tabs) > 1 {
  632. pns := map[string]string{}
  633. pnarr := []string{}
  634. for _, table := range tabs {
  635. pn := table.StandKV["项目名称"]
  636. if pn != "" && TitleReg.MatchString(pn) {
  637. pnarr = append(pnarr, pn)
  638. matchres := TitleReg.FindAllStringSubmatch(pn, -1)
  639. if len(matchres) == 1 && len(matchres[0]) > 0 {
  640. v1 := u.PackageNumberConvert(matchres[0][0])
  641. pns[v1] = matchres[0][0]
  642. bp := &u.BlockPackage{}
  643. bp.Index = v1
  644. bp.Origin = matchres[0][0]
  645. bp.TableKV = u.NewJobKv()
  646. for _, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
  647. bp.TableKV.Kv[k] = table.StandKV[k]
  648. }
  649. bp.WinnerOrder = table.WinnerOrder
  650. if table.BlockPackage.Map[v1] == nil {
  651. table.BPackage = true
  652. table.BlockPackage.AddKey(v1, bp)
  653. }
  654. }
  655. }
  656. }
  657. if len(tabs) == len(pns) {
  658. //多个表格,每个表格都是一个分包 http://www.cxzwfw.gov.cn/info/1009/6963.htm
  659. //项目名称、项目编号、采购单位、招标机构、预算
  660. pname := projectnameReg.ReplaceAllString(pnarr[0], "")
  661. btrue := true
  662. for _, pn := range pnarr[1:] {
  663. pn = projectnameReg.ReplaceAllString(pn, "")
  664. //u.Debug(pn, pname)
  665. if pn != pname {
  666. //项目名称不一致
  667. btrue = false
  668. break
  669. }
  670. }
  671. if btrue {
  672. ts.SortKV.AddKey("项目名称", pname)
  673. ts.SortKVWeight["项目名称"] = 100
  674. for _, table := range tabs {
  675. table.BPackage = true
  676. //预算、中标金额、NullTxtBid成交供应商排名 中标单位 成交状态
  677. if table.BlockPackage != nil && len(table.BlockPackage.Keys) == 1 {
  678. bp := table.BlockPackage.Map[table.BlockPackage.Keys[0]].(*u.BlockPackage)
  679. if table.TableResult.WinnerOrder != nil {
  680. bp.WinnerOrder = table.WinnerOrder
  681. }
  682. if bp != nil && table.StandKV != nil {
  683. if bp.TableKV == nil {
  684. bp.TableKV = u.NewJobKv()
  685. }
  686. for nk, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
  687. bp.TableKV.Kv[k] = table.StandKV[k]
  688. if nk < 4 {
  689. delete(table.StandKV, k)
  690. }
  691. }
  692. }
  693. }
  694. }
  695. }
  696. }
  697. }
  698. for _, table := range tabs {
  699. table.MergerToTableresult()
  700. // for k, v := range table.TableResult.SortKV.Map {
  701. // qutil.Debug(k, "=====", v)
  702. // }
  703. }
  704. }
  705. //解析表格
  706. func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
  707. //查找表体中的tr对象
  708. trs := table.Goquery.ChildrenFiltered("tbody,thead,tfoot").ChildrenFiltered("tr")
  709. if trs.Size() == 0 {
  710. trs = table.Goquery.ChildrenFiltered("tr")
  711. }
  712. //遍历节点,初始化table 结构
  713. table.createTabe(trs)
  714. //重置行列
  715. table.ComputeRowColSpan()
  716. //对table结构体进行整体解析处理
  717. ts := table.AnalyTables(contactFormat)
  718. return ts
  719. }
  720. //遍历节点,初始化table 结构体
  721. func (table *Table) createTabe(trs *goquery.Selection) {
  722. trs.Each(func(n int, sel *goquery.Selection) {
  723. //隐藏行不处理
  724. if IsHide(sel) {
  725. return
  726. }
  727. //遍历每行的td
  728. tds := sel.ChildrenFiltered("td,th")
  729. TR := NewTR(table)
  730. tdTextIsNull := false
  731. var empty int
  732. tds.Each(func(m int, selm *goquery.Selection) {
  733. //对隐藏列不处理!!!
  734. if IsHide(selm) {
  735. return
  736. }
  737. //进入每一个单元格
  738. td := NewTD(selm, TR, table) //初始化td,kv处理,td中有table处理,td的方向
  739. //num++
  740. TR.AddTD(td)
  741. if td.Val == "" && td.SonTableResult == nil { //删除一个tr,tr中所有td是空值的
  742. empty++
  743. if tds.Size() == empty {
  744. tdTextIsNull = true
  745. }
  746. }
  747. })
  748. //向table添加每行不为空的tr
  749. if !tdTextIsNull {
  750. table.AddTR(TR)
  751. }
  752. })
  753. }
  754. //对table进行整体解析处理
  755. func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
  756. ts := tn.tableSubDemolitionTable() //分包,拆表
  757. for n, table := range ts {
  758. //处理每个table
  759. if len(table.TRs) > 0 {
  760. //删除尾部空白行
  761. table.deleteTrimTr()
  762. //table.Print()
  763. //校对表格
  764. table.Adjust()
  765. //查找表格的标签,table.Tag字段
  766. table.FindTag()
  767. //log.Println(table.TableResult.Id, table.Html)
  768. //分割表格
  769. table.bSplit(n, ts)
  770. //对没有表头表格的处理
  771. if table.Tag != "" {
  772. _, _, b := CheckMultiPackage(table.Tag, "")
  773. if b {
  774. table.StandKV["项目名称"] = table.Tag
  775. table.StandKVWeight["项目名称"] = -100
  776. }
  777. }
  778. table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
  779. //开始查找kv,核心模块,table.SortKV
  780. table.FindKV()
  781. //table中抽取品牌,table.BrandData
  782. if u.IsBrandGoods {
  783. table.analyBrand()
  784. }
  785. //判断是否是多包,并处理分包的//遍历td分块
  786. table.CheckMultiPackageByTable()
  787. //str := "\n"
  788. //for k, v := range table.StandKV {
  789. // str += fmt.Sprintf("_==___%s:%v\n", k, v)
  790. // if table.TableResult.SortKV.Map[k] == nil {
  791. // table.TableResult.SortKV.AddKey(k, v)
  792. // table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
  793. // }
  794. //}
  795. res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
  796. if !res {
  797. //过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
  798. table.KVFilter()
  799. }
  800. for k, v := range table.StandKV { //过滤后的标准化kv
  801. if table.TableResult.SortKV.Map[k] == nil {
  802. table.TableResult.SortKV.AddKey(k, v)
  803. table.TableResult.SortKVWeight[k] = table.StandKVWeight[k]
  804. }
  805. }
  806. //u.Debug(str)
  807. }
  808. }
  809. return ts
  810. }
  811. //分包,拆表
  812. func (table *Table) tableSubDemolitionTable() []*Table {
  813. tm := []map[string]interface{}{}
  814. tmk := map[string]bool{}
  815. tmn := map[int]map[string]interface{}{}
  816. for rownum, tr := range table.TRs {
  817. if len(tr.TDs) == 1 && table.ColNum > 1 { //tr里面有一列,table里面有多列
  818. td := tr.TDs[0] //取每行第一个td
  819. //td开始列等于0 && td结束列+1等于table列数 && td长度大于1小于50
  820. if td.StartCol == 0 && td.EndCol+1 == table.ColNum && len([]rune(td.Val)) > 1 && len([]rune(td.Val)) < 50 {
  821. con, m1, b := CheckMultiPackage(td.Val, "") //判断分包
  822. if b {
  823. for k, _ := range m1 {
  824. numstr := u.PackageNumberConvert(k)
  825. m2 := map[string]interface{}{
  826. "tag": con,
  827. //"num": numstr,
  828. //"numtxt": v[0],
  829. "startrow": rownum,
  830. }
  831. tmk[numstr] = true
  832. tmn[rownum] = m2
  833. tm = append(tm, m2)
  834. break
  835. }
  836. }
  837. }
  838. }
  839. }
  840. //拆表
  841. ts := []*Table{}
  842. if len(tmk) > 1 && len(tmk) == len(tm) {
  843. var tab1 *Table
  844. for rownum, tr := range table.TRs {
  845. if tab1 == nil {
  846. tab1 = NewTable("", table.TableResult, table.Goquery)
  847. tab1.BSplit = true
  848. if tmn[rownum] != nil {
  849. tab1.StandKV["项目名称"] = tmn[rownum]["tag"].(string)
  850. tab1.StandKVWeight["项目名称"] = -100
  851. }
  852. ts = append(ts, tab1)
  853. }
  854. if tmn[rownum] != nil {
  855. tab1.Tag = tmn[rownum]["tag"].(string)
  856. } else {
  857. tab1.AddTR(tr)
  858. }
  859. if tmn[rownum+1] != nil {
  860. tab1 = nil
  861. }
  862. }
  863. } else {
  864. ts = append(ts, table)
  865. }
  866. return ts
  867. }
  868. //分割表格
  869. func (table *Table) bSplit(n int, ts []*Table) {
  870. if table.BSplit {
  871. if !table.BHeader && n > 0 {
  872. for i := n - 1; i > -1; i-- {
  873. if ts[i].BHeader {
  874. if ts[i].BFirstRow {
  875. //取第一行插入到
  876. table.InsertTR(ts[i].TRs[0])
  877. table.Adjust()
  878. }
  879. break
  880. }
  881. }
  882. }
  883. }
  884. }
  885. //删除尾部空白行
  886. func (table *Table) deleteTrimTr() {
  887. for len(table.TRs) > 0 {
  888. npos := len(table.TRs)
  889. tailTR := table.TRs[npos-1] //最后一个tr,取最后一行
  890. bspace := true
  891. for _, v := range tailTR.TDs {
  892. if v.Val != "" || v.SonTableResult != nil || len(v.SortKV.Keys) > 0 {
  893. bspace = false
  894. break
  895. }
  896. }
  897. //删除尾部空行,是空行的话就删除
  898. if bspace {
  899. table.TRs = table.TRs[:npos-1]
  900. } else {
  901. break
  902. }
  903. }
  904. }
  905. //校对表格
  906. func (table *Table) Adjust() {
  907. //计算行列起止位置,跨行跨列处理
  908. table.ComputeRowColSpan()
  909. // for k1, tr := range table.TRs {
  910. // for k2, td := range tr.TDs {
  911. // qutil.Debug(k1, k2, td.Val, td.StartRow, td.EndRow, td.StartCol, td.EndCol)
  912. // }
  913. // }
  914. //大概计算每个起止行列的概率
  915. table.GetKeyRation()
  916. /*
  917. for k, v := range table.StartAndEndRation {
  918. for k1, v1 := range v.Poss {
  919. bs, _ := json.Marshal(v1)
  920. str := ""
  921. for _, td := range v.Tdmap[v1] {
  922. str += "__" + td.Val + fmt.Sprintf("%d_%d_%d_%d", td.StartRow, td.EndRow, td.StartCol, td.EndCol)
  923. }
  924. qutil.Debug(k, k1, string(bs), v.Rationmap[v1], str)
  925. }
  926. }
  927. */
  928. //u.Debug("tdnum:", num, table.RowNum, table.ColNum)
  929. //是否是规则的表格,单元各个数=行数*列数
  930. table.Brule = table.TDNum == table.RowNum*table.ColNum
  931. count := 0
  932. for _, trs := range table.TRs {
  933. for _, td := range trs.TDs {
  934. if td.BH {
  935. count++
  936. }
  937. }
  938. }
  939. if float32(count)/float32(table.TDNum) < 0.85 {
  940. //精确计算起止行列是表头的概率
  941. table.ComputeRowColIsKeyRation()
  942. bhead := false
  943. L:
  944. for i, tr := range table.TRs {
  945. for _, td := range tr.TDs {
  946. if td.BH {
  947. //qutil.Debug("----=====---", td.Val, len(table.TRs[len(table.TRs)-1].TDs), i, len(table.TRs)-1)
  948. if i == len(table.TRs)-1 && len(table.TRs[len(table.TRs)-1].TDs) == 2 {
  949. res, _, _, _, _ := CheckCommon(td.Val, "abandontable")
  950. if res {
  951. //删除此行
  952. table.TRs = table.TRs[:len(table.TRs)-1]
  953. table.Adjust()
  954. return
  955. }
  956. }
  957. bhead = true
  958. break L
  959. }
  960. }
  961. }
  962. table.BHeader = bhead
  963. }
  964. }
  965. //计算行/列表格的结束位置 StartRow=0 EndRow=0,table.TDNum td个数 table.RowNum 行数
  966. func (table *Table) ComputeRowColSpan() {
  967. n := 0 //td总个数
  968. mapRC := map[int]map[int]int{} //记录第几行pos,起始列对应的合并值
  969. for k, v := range table.TRs {
  970. n += len(v.TDs) //每行的td总数相加
  971. nk := 0 //nk列的起始,k行的起始||如果有合并,起始就不是0
  972. ball := true
  973. rowspans := v.TDs[0].Rowspan //某一行第一个td的rowspan
  974. for _, v1 := range v.TDs {
  975. if v1.Rowspan != rowspans {
  976. ball = false
  977. break
  978. }
  979. }
  980. for _, v1 := range v.TDs {
  981. if ball {
  982. v1.Rowspan = 1
  983. }
  984. mc := mapRC[k]
  985. for {
  986. if mc != nil && mc[nk] > 0 {
  987. nk += mc[nk]
  988. } else {
  989. break
  990. }
  991. }
  992. v1.StartCol = nk
  993. nk += v1.Colspan - 1
  994. v1.EndCol = nk
  995. if nk >= table.ColNum {
  996. table.ColNum = nk + 1
  997. }
  998. nk++
  999. v1.StartRow = k
  1000. v1.EndRow = k + v1.Rowspan - 1
  1001. ck := fmtkey("c", v1.StartCol, v1.EndCol)
  1002. tdcs := table.StartAndEndRation[ck]
  1003. if tdcs == nil {
  1004. tdcs = NewTDRationScope(ck)
  1005. table.StartAndEndRation[ck] = tdcs
  1006. table.StartAndEndRationKSort.AddKey(ck, 1)
  1007. }
  1008. tdcs.Addtd(v1)
  1009. rk := fmtkey("r", v1.StartRow, v1.EndRow)
  1010. tdrs := table.StartAndEndRation[rk]
  1011. if tdrs == nil {
  1012. tdrs = NewTDRationScope(rk)
  1013. table.StartAndEndRation[rk] = tdrs
  1014. table.StartAndEndRationKSort.AddKey(rk, 1)
  1015. }
  1016. tdrs.Addtd(v1)
  1017. if v1.Rowspan > 1 {
  1018. for i := 1; i < v1.Rowspan; i++ {
  1019. r := k + i
  1020. if r < len(table.TRs) {
  1021. mc := mapRC[r]
  1022. if mc == nil {
  1023. mc = map[int]int{}
  1024. }
  1025. mc[v1.StartCol] = v1.Colspan
  1026. mapRC[r] = mc
  1027. }
  1028. }
  1029. }
  1030. }
  1031. }
  1032. table.TDNum = n //td总个数
  1033. table.RowNum = len(table.TRs) //tr总行数
  1034. }
  1035. func fmtkey(t string, start, end int) string {
  1036. return fmt.Sprintf("%s_%d_%d", t, start, end)
  1037. }
  1038. //查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
  1039. func (table *Table) FindTag() {
  1040. //查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
  1041. if table.Tag != "" {
  1042. return
  1043. }
  1044. t1, _ := table.Goquery.OuterHtml()
  1045. html := table.Html
  1046. pos := strings.Index(html, t1)
  1047. if pos <= 0 {
  1048. doc, _ := goquery.NewDocumentFromReader(strings.NewReader(table.Html))
  1049. html, _ = doc.Html()
  1050. pos = strings.Index(html, t1)
  1051. }
  1052. //u.Debug("--------", t1, "====\n\n\n\n=====", html)
  1053. if pos > 0 {
  1054. tcon := html[:pos]
  1055. tcon = cut.ClearHtml(tcon)
  1056. tcon = ClearTagReg.ReplaceAllString(tcon, "")
  1057. //u.Debug(pos, "-----------", tcon)
  1058. strs := ttagreg.FindStringSubmatch(tcon)
  1059. if len(strs) > 0 {
  1060. table.Tag = strs[0]
  1061. //u.Debug(table.Tag)
  1062. }
  1063. }
  1064. if table.Tag == "" {
  1065. table.Tag = table.TableResult.BlockTag
  1066. }
  1067. //u.Debug(table.Tag)
  1068. }
  1069. //计算r/c_start_end的概率
  1070. func (table *Table) GetKeyRation() {
  1071. for _, vn := range table.StartAndEndRationKSort.Keys {
  1072. v := table.StartAndEndRation[vn]
  1073. for _, v1 := range v.Poss {
  1074. count := 0
  1075. n := 0
  1076. for _, td := range v.Tdmap[v1] {
  1077. n++
  1078. if td.BH {
  1079. count++
  1080. }
  1081. }
  1082. v.Rationmap[v1] = float32(count) / float32(n)
  1083. }
  1084. }
  1085. }
  1086. //计算行列是表头的概率调用GetKeyRation
  1087. func (table *Table) ComputeRowColIsKeyRation() {
  1088. //增加对跨行校正限止
  1089. // u.Debug(table.Brule, table.ColNum, table.RowNum, table.TDNum)
  1090. bkeyfirstrow := false
  1091. bkeyfirstcol := false
  1092. if table.Brule { //不存在跨行跨列的情况,规则表格
  1093. checkCompute := map[string]bool{}
  1094. for k, tr := range table.TRs {
  1095. rk := fmtkey("r", tr.TDs[0].StartRow, tr.TDs[0].EndRow)
  1096. if k == 0 { //第1行的概率
  1097. ck := fmtkey("c", tr.TDs[0].StartCol, tr.TDs[0].EndCol)
  1098. //u.Debug(table.BFirstRow, "--", table.StartAndEndRation[rk], table.StartAndEndRation[ck])
  1099. ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0])
  1100. ration2, _ := table.StartAndEndRation[ck].GetTDRation(tr.TDs[0])
  1101. if (len(tr.TDs) == 2 && ration2 < 0.55) && (len(tr.TDs) == 2 && ration1 > 0.5) { //第一行为key
  1102. bkeyfirstrow = true
  1103. ball := true
  1104. for _, td := range tr.TDs {
  1105. if MoneyReg.MatchString(td.Val) {
  1106. bkeyfirstrow = false
  1107. ball = false
  1108. td.BH = false
  1109. break
  1110. }
  1111. }
  1112. for _, td := range tr.TDs {
  1113. if ball {
  1114. //td.BH = true
  1115. td.KeyDirect = 1
  1116. td.KVDirect = 2
  1117. }
  1118. }
  1119. } else if ration2 > 0.55 { //第1列
  1120. bkeyfirstcol = true
  1121. if !checkCompute[ck] {
  1122. checkCompute[ck] = true
  1123. //重置第1列
  1124. for _, tr1 := range table.TRs {
  1125. for _, td1 := range tr1.TDs {
  1126. if td1.StartCol == 0 {
  1127. if !MoneyReg.MatchString(td1.Val) {
  1128. //td1.BH = true
  1129. td1.KeyDirect = 2
  1130. td1.KVDirect = 1
  1131. }
  1132. }
  1133. }
  1134. }
  1135. }
  1136. }
  1137. if !bkeyfirstrow && !bkeyfirstcol {
  1138. if len(tr.TDs) > 1 && ration1 > ration2 && ration1 > 0.5 {
  1139. bkeyfirstrow = true
  1140. for _, td := range tr.TDs {
  1141. if !MoneyReg.MatchString(td.Val) {
  1142. //td.BH = true
  1143. td.KeyDirect = 1
  1144. td.KVDirect = 2
  1145. }
  1146. }
  1147. } else if tr.Table.ColNum > 1 && ration2 > 0.5 {
  1148. bkeyfirstcol = true
  1149. if !checkCompute[ck] {
  1150. checkCompute[ck] = true
  1151. //重置第1列
  1152. for _, tr1 := range table.TRs {
  1153. for _, td1 := range tr1.TDs {
  1154. if td1.StartCol == 0 {
  1155. if !MoneyReg.MatchString(td1.Val) {
  1156. td1.BH = true
  1157. td1.KeyDirect = 2
  1158. td1.KVDirect = 1
  1159. }
  1160. }
  1161. }
  1162. }
  1163. }
  1164. }
  1165. }
  1166. } else {
  1167. if bkeyfirstrow {
  1168. //第一列的概率
  1169. ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0])
  1170. if k == 1 || ration1 < checkval {
  1171. for _, td := range tr.TDs {
  1172. if !td.MustBH {
  1173. td.BH = false
  1174. td.KeyDirect = 0
  1175. td.KVDirect = 0
  1176. }
  1177. }
  1178. } //else {for _, td := range tr.TDs {}}
  1179. } else {
  1180. //列在起作用
  1181. if bkeyfirstcol {
  1182. for _, td := range tr.TDs {
  1183. ck := fmtkey("c", td.StartCol, td.EndCol)
  1184. ration1, _ := table.StartAndEndRation[ck].GetTDRation(td)
  1185. if !checkCompute[ck] {
  1186. checkCompute[ck] = true
  1187. if ration1 >= checkval && td.ColPos != 1 {
  1188. for _, tr1 := range table.TRs {
  1189. for _, td1 := range tr1.TDs {
  1190. if td1.StartCol == td.StartCol {
  1191. if !MoneyReg.MatchString(td1.Val) {
  1192. td1.BH = true
  1193. td1.KeyDirect = 2
  1194. td1.KVDirect = 1
  1195. }
  1196. }
  1197. }
  1198. }
  1199. } else {
  1200. for _, tr1 := range table.TRs[1:] {
  1201. for _, td1 := range tr1.TDs[1:] {
  1202. if td1.StartCol == td.StartCol && !td1.MustBH {
  1203. td1.BH = false
  1204. td1.KeyDirect = 0
  1205. td1.KVDirect = 0
  1206. }
  1207. }
  1208. }
  1209. }
  1210. }
  1211. }
  1212. }
  1213. }
  1214. }
  1215. }
  1216. }
  1217. //qutil.Debug("table.Brule", table.Brule, !bkeyfirstcol && !bkeyfirstrow)
  1218. if !table.Brule || (!bkeyfirstcol && !bkeyfirstrow) {
  1219. //断行问题,虽然同列或同行,但中间被跨行截断,表格方向调整
  1220. for _, k := range table.StartAndEndRationKSort.Keys {
  1221. v := table.StartAndEndRation[k]
  1222. //横向判断,要判断最多的方向,否则会出现不定的情况(map遍历问题)
  1223. k1 := k[:1]
  1224. for _, v2 := range v.Poss {
  1225. lentds := len(v.Tdmap[v2])
  1226. if v.Rationmap[v2] > checkval {
  1227. for _, td := range v.Tdmap[v2] {
  1228. if td.KeyDirect == 0 && !MoneyReg.MatchString(td.Val) {
  1229. if k1 == "r" {
  1230. ck := fmtkey("c", td.StartCol, td.EndCol)
  1231. rt := table.StartAndEndRation[ck]
  1232. //clen := 0
  1233. var fv float32
  1234. var tdn []*TD
  1235. if rt != nil {
  1236. fv, tdn = rt.GetTDRation(td)
  1237. //clen = len(tdn)
  1238. }
  1239. if lentds > 1 {
  1240. if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
  1241. td.KeyDirect = 1
  1242. td.KVDirect = 2
  1243. //td.BH = true
  1244. }
  1245. }
  1246. } else {
  1247. ck := fmtkey("r", td.StartRow, td.EndRow)
  1248. rt := table.StartAndEndRation[ck]
  1249. var fv float32
  1250. var tdn []*TD
  1251. //clen := 0
  1252. if rt != nil {
  1253. fv, tdn = rt.GetTDRation(td)
  1254. //clen = len(tdn)
  1255. }
  1256. if lentds > 1 {
  1257. if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
  1258. td.KeyDirect = 2
  1259. td.KVDirect = 1
  1260. //td.BH = true
  1261. }
  1262. }
  1263. }
  1264. } else {
  1265. break
  1266. }
  1267. }
  1268. } else if v.Rationmap[v2] < 0.5 && len(v.Tdmap[v2]) > 3 {
  1269. for _, td := range v.Tdmap[v2] {
  1270. // u.Debug(td.Val, "-----", td.BH)
  1271. if td.KeyDirect == 0 && td.BH && !td.MustBH {
  1272. if k1 == "r" {
  1273. ck := fmtkey("c", td.StartCol, td.EndCol)
  1274. rt := table.StartAndEndRation[ck]
  1275. clen := 0
  1276. var fv float32
  1277. var tdn []*TD
  1278. if rt != nil {
  1279. fv, tdn = rt.GetTDRation(td)
  1280. clen = len(tdn)
  1281. }
  1282. if lentds >= clen && lentds > 1 {
  1283. if (tdn != nil && v.Rationmap[v2] < fv) || tdn == nil {
  1284. td.BH = false
  1285. }
  1286. }
  1287. } else {
  1288. ck := fmtkey("r", td.StartRow, td.EndRow)
  1289. rt := table.StartAndEndRation[ck]
  1290. var fv float32
  1291. var tdn []*TD
  1292. clen := 0
  1293. if rt != nil {
  1294. fv, tdn = rt.GetTDRation(td)
  1295. clen = len(tdn)
  1296. }
  1297. if lentds >= clen && lentds > 1 {
  1298. if (tdn != nil && v.Rationmap[v2] < fv) || tdn == nil {
  1299. td.BH = false
  1300. }
  1301. }
  1302. }
  1303. } else {
  1304. break
  1305. }
  1306. }
  1307. }
  1308. }
  1309. }
  1310. }
  1311. table.GetKeyRation()
  1312. if len(table.TRs) > 0 && len(table.TRs[0].TDs) > 0 {
  1313. t0 := table.TRs[0].TDs[0]
  1314. key := fmtkey("r", t0.StartRow, t0.EndRow)
  1315. r, t := table.StartAndEndRation[key].GetTDRation(t0)
  1316. if r > 0.9 && len(t) > 1 {
  1317. table.BFirstRow = true
  1318. }
  1319. for k, tr := range table.TRs {
  1320. if len(tr.TDs) == 1 && tr.TDs[0].StartCol == 0 && tr.TDs[0].EndCol+1 == table.ColNum {
  1321. tr.TDs[0].BH = false
  1322. tr.TDs[0].KVDirect = 0
  1323. sv := FindKv(tr.TDs[0].Val, "", 2)
  1324. _, resm := colonkvEntity.entrance(tr.TDs[0].Val, "", nil, 2)
  1325. for k, v := range resm {
  1326. sv.AddKey(k, v)
  1327. }
  1328. if len(sv.Keys) > 0 {
  1329. for k1, v1 := range sv.Map {
  1330. if tr.TDs[0].SortKV.Map[k1] == nil {
  1331. table.SortKV.AddKey(k1, v1)
  1332. }
  1333. }
  1334. } else if table.Tag == "" && k == 0 && len(tr.TDs[0].Val) > 11 {
  1335. table.Tag = tr.TDs[0].Val
  1336. }
  1337. // subVal := tdval_reg.FindAllStringSubmatch(tr.TDs[0].Val, -1)
  1338. // //u.Debug(tr.TDs[0].Val, subVal)
  1339. // if len(subVal) > 0 {
  1340. // for _, subv1 := range subVal {
  1341. // if len(subv1) == 3 {
  1342. // table.SortKV.AddKey(subv1[1], subv1[2])
  1343. // }
  1344. // }
  1345. // } else if k == 0 && len(tr.TDs[0].Val) > 11 {
  1346. // table.Tag = tr.TDs[0].Val
  1347. // }
  1348. }
  1349. // for _, td := range tr.TDs {
  1350. // u.Debug(td.BH, td.Val, "----")
  1351. // }
  1352. }
  1353. }
  1354. }
  1355. //查找表格的kv,调用FindTdVal
  1356. func (table *Table) FindKV() {
  1357. //判断全是key的表格不再查找
  1358. if table.BHeader { //只要一个是key即为true
  1359. direct := If(table.BFirstRow, 2, 1).(int) //kv,2查找方向,向上查找
  1360. vdirect := If(direct == 2, 1, 2).(int)
  1361. //控制跨行表格
  1362. bcon := false
  1363. //增加表格切块判断,只判断切块分包
  1364. //控制中标人排序方向
  1365. bodirect := 0
  1366. //控制中标人排序数值
  1367. sort := 1
  1368. //开始抽取
  1369. for _, tr := range table.TRs {
  1370. bcon = trSingleColumn(tr, bcon, table) //tr单列,是否丢弃内容
  1371. if bcon {
  1372. continue
  1373. }
  1374. if tr.TDs[0].StartRow > 0 {
  1375. numbh := 0
  1376. for _, td := range tr.TDs {
  1377. if td.BH {
  1378. numbh++
  1379. }
  1380. }
  1381. if numbh > 0 && numbh <= len(tr.TDs)/2 {
  1382. direct, vdirect = 1, 2
  1383. } else {
  1384. direct, vdirect = 2, 1
  1385. }
  1386. }
  1387. for _, td := range tr.TDs {
  1388. /**
  1389. rt := table.StartAndEndRation[fmtkey("r", td.StartCol, td.EndCol)]
  1390. if rt != nil {
  1391. r, t := rt.GetTDRation(td)
  1392. u.Debug(td.BH, td.Val, r, t)
  1393. }
  1394. **/
  1395. // if td.Val == "电视" || td.Val == "电话机" || td.Val == "传真机" || td.Val == "音响" {
  1396. //qutil.Debug("----td.Valtype", td.Valtype, "td.BH:", td.BH, "KVDirect:", td.KVDirect, "Val:", td.Val, "direct:", direct, "vdirect:", vdirect)
  1397. // }
  1398. if !td.BH && td.KVDirect < 3 {
  1399. if !table.FindTdVal(td, direct, vdirect) { //table.FindTdVal()存储了table.SortKV
  1400. if !table.FindTdVal(td, vdirect, direct) {
  1401. //都识别不到时,对第一、二中标候选人的处理
  1402. bo, res := GetBidOrder(td, bodirect, sort)
  1403. if res {
  1404. sort++
  1405. bodirect = bo
  1406. }
  1407. if len(td.SortKV.Map) > 0 {
  1408. for tdk, tdv := range td.SortKV.Map {
  1409. if tdv == nil || tdv == "" { //value为空或者null不再添加到table.SortKV
  1410. continue
  1411. }
  1412. table.SortKV.AddKey(tdk, tdv)
  1413. }
  1414. }
  1415. }
  1416. }
  1417. //fmt.Println("td:", td.Val, td.BH, td.HeadTd, td.KVDirect)
  1418. }
  1419. }
  1420. }
  1421. //qutil.Debug("FindKV", table.SortKV.Map)
  1422. } else if len(table.TRs) > 0 { //没有表头的表格处理,默认纵向吧
  1423. res := initLongitudinalData(table) //拼装纵向数组
  1424. //再拆值,类似http://www.ggzy.hi.gov.cn/cgzbgg/16553.jhtml第二列,有多个值
  1425. nmapkeys := []int{}
  1426. nmap := map[int][]*u.Kv{}
  1427. L:
  1428. for _, r1 := range res {
  1429. for n, r := range r1 {
  1430. if len([]rune(r)) < 60 { // 长度小于60才去分
  1431. //res1, _ := GetKVAll(r, "", nil)
  1432. res1, _ := colonkvEntity.entrance(r, "", nil, 2)
  1433. if res1 != nil {
  1434. nmap[n] = res1
  1435. nmapkeys = append(nmapkeys, n)
  1436. /**
  1437. //截取串
  1438. for _k1, _ := range res1 {
  1439. r = regexp.MustCompile(_k1+".*").ReplaceAllString(r, "")
  1440. }
  1441. r1[n] = r
  1442. res[pos] = r1
  1443. **/
  1444. } else if nmap[n] != nil {
  1445. //放空值
  1446. nmap[n] = append(nmap[n], &u.Kv{})
  1447. }
  1448. } else {
  1449. nmap = nil
  1450. nmapkeys = nil
  1451. break L
  1452. }
  1453. }
  1454. }
  1455. //调整
  1456. if len(nmap) > 0 {
  1457. kmapkeys := []string{}
  1458. kmap := map[string][]string{}
  1459. for _, mk := range nmapkeys { //同是第n列
  1460. for pos, m1 := range nmap[mk] {
  1461. k, v := m1.Key, m1.Value
  1462. kv := kmap[k]
  1463. if kv == nil {
  1464. kv = []string{}
  1465. }
  1466. kv = append(kv, v)
  1467. kmap[k] = kv
  1468. kmapkeys = append(kmapkeys, k)
  1469. for _, k := range kmapkeys {
  1470. arr := kmap[k]
  1471. if len(arr) < pos {
  1472. arr = append(arr, "")
  1473. kmap[k] = arr
  1474. kmapkeys = append(kmapkeys, k)
  1475. }
  1476. }
  1477. }
  1478. }
  1479. if len(kmap) > 0 {
  1480. for _, k := range kmapkeys {
  1481. table.SortKV.AddKey(k, kmap[k])
  1482. }
  1483. }
  1484. }
  1485. //=================
  1486. //解析值放到map中
  1487. for _, arr := range res {
  1488. if len(arr) > 0 {
  1489. v1 := arr[0]
  1490. _, _, _, _, repl := CheckCommon(v1, "con")
  1491. if repl == "ENT" {
  1492. table.SortKV.AddKey("中标单位", arr)
  1493. continue
  1494. } else if repl == "BO" {
  1495. table.SortKV.AddKey("排名", arr)
  1496. continue
  1497. }
  1498. }
  1499. }
  1500. }
  1501. //qutil.Debug("FindKV", table.SortKV.Map)
  1502. }
  1503. //初始化组装纵向数据
  1504. func initLongitudinalData(table *Table) [][]string {
  1505. res := make([][]string, len(table.TRs[0].TDs)) //创建table第一行的列数长度
  1506. for n, _ := range res {
  1507. res[n] = []string{}
  1508. }
  1509. for _, tr := range table.TRs {
  1510. for n, td := range table.TRs[0].TDs { //第一行的所有td
  1511. td1 := table.GetTdByRCNo(tr.TDs[0].StartRow, td.StartCol) //根据行号列号获取td对象
  1512. if td1 != nil {
  1513. res[n] = append(res[n], td1.Val)
  1514. } else {
  1515. res[n] = append(res[n], "")
  1516. }
  1517. }
  1518. }
  1519. return res
  1520. }
  1521. //tr单列,是否丢弃内容
  1522. func trSingleColumn(tr *TR, bcon bool, table *Table) bool {
  1523. if len(tr.TDs) == 1 {
  1524. bcon = false
  1525. td := tr.TDs[0]
  1526. if td.StartCol == 0 && td.EndCol+1 == table.ColNum && len([]rune(td.Val)) > 4 && len([]rune(td.Val)) < 50 {
  1527. res, _, _, _, _ := CheckCommon(td.Val, "abandontable")
  1528. if res { //以下内容丢弃
  1529. bcon = true
  1530. }
  1531. }
  1532. }
  1533. return bcon
  1534. }
  1535. //获取中标人顺序
  1536. //direct 0默认 1横向 2纵向
  1537. func GetBidOrder(td *TD, direct, n int) (d int, res bool) {
  1538. if td.Valtype != "BO" {
  1539. return
  1540. }
  1541. if td.Rowspan > 1 {
  1542. for i := 0; i < td.Rowspan; i++ {
  1543. nextcol := 1
  1544. L1:
  1545. for {
  1546. vtd := td.TR.Table.GetTdByRCNo(td.StartRow+i, td.EndCol+nextcol)
  1547. if vtd == nil {
  1548. break L1
  1549. }
  1550. nextcol += vtd.Colspan
  1551. if filter_zbdw_v2.MatchString(vtd.Val) {
  1552. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1553. if arrbo == nil {
  1554. arrbo = []map[string]interface{}{}
  1555. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1556. }
  1557. a1 := arrbo.([]map[string]interface{})
  1558. a1 = append(a1, map[string]interface{}{
  1559. "entname": vtd.Val,
  1560. "sortstr": td.Val,
  1561. "sort": GetBidSort(td.Val, n),
  1562. })
  1563. res = true
  1564. td.TR.Table.SortKV.Map[NullTxtBid] = a1
  1565. }
  1566. }
  1567. }
  1568. } else if td.Colspan > 1 {
  1569. for i := 1; i < td.Colspan; i++ {
  1570. nextcol := 0
  1571. L2:
  1572. for {
  1573. vtd := td.TR.Table.GetTdByRCNo(td.StartRow+i, td.StartCol+nextcol)
  1574. if vtd == nil || vtd.Colspan >= td.Colspan {
  1575. break L2
  1576. }
  1577. nextcol += vtd.Colspan
  1578. if filter_zbdw_v2.MatchString(vtd.Val) {
  1579. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1580. if arrbo == nil {
  1581. arrbo = []map[string]interface{}{}
  1582. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1583. }
  1584. a1 := arrbo.([]map[string]interface{})
  1585. a1 = append(a1, map[string]interface{}{
  1586. "entname": vtd.Val,
  1587. "sortstr": td.Val,
  1588. "sort": GetBidSort(td.Val, n),
  1589. })
  1590. res = true
  1591. td.TR.Table.SortKV.Map[NullTxtBid] = a1
  1592. }
  1593. }
  1594. }
  1595. } else {
  1596. rtd := td.TR.Table.GetTdByRCNo(td.StartRow, td.EndCol+1)
  1597. btd := td.TR.Table.GetTdByRCNo(td.EndRow+1, td.StartCol)
  1598. //if ((rtd != nil && !rtd.BH && rtd.Valtype == "BO") || direct == 1) && btd != nil && filter_zbdw_v.MatchString(btd.Val) {
  1599. if ((rtd != nil && !rtd.BH) || direct == 1) && btd != nil && filter_zbdw_v2.MatchString(btd.Val) {
  1600. d = 1
  1601. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1602. if arrbo == nil {
  1603. arrbo = []map[string]interface{}{}
  1604. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1605. }
  1606. a1 := arrbo.([]map[string]interface{})
  1607. a1 = append(a1, map[string]interface{}{
  1608. "entname": btd.Val,
  1609. "sortstr": td.Val,
  1610. "sort": GetBidSort(td.Val, n),
  1611. })
  1612. res = true
  1613. td.TR.Table.SortKV.Map[NullTxtBid] = a1
  1614. //} else if ((btd != nil && !btd.BH && btd.Valtype == "BO") || direct == 2) && rtd != nil && filter_zbdw_v.MatchString(rtd.Val) {
  1615. } else if ((btd != nil && !btd.BH) || direct == 2) && rtd != nil && filter_zbdw_v2.MatchString(rtd.Val) {
  1616. d = 2
  1617. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1618. if arrbo == nil {
  1619. arrbo = []map[string]interface{}{}
  1620. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1621. }
  1622. a1 := arrbo.([]map[string]interface{})
  1623. a1 = append(a1, map[string]interface{}{
  1624. "entname": rtd.Val,
  1625. "sortstr": td.Val,
  1626. "sort": GetBidSort(td.Val, n),
  1627. })
  1628. res = true
  1629. td.TR.Table.SortKV.Map[NullTxtBid] = a1
  1630. }
  1631. }
  1632. return
  1633. }
  1634. func GetBidSort(str string, n int) int {
  1635. val := n
  1636. if strings.Index(str, "首选") > -1 {
  1637. val = 1
  1638. } else {
  1639. val = winnerOrderEntity.toNumber(str, n)
  1640. }
  1641. return val
  1642. }
  1643. //查找每一个单元格的表头,调用FindNear
  1644. func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
  1645. near := table.FindNear(td, direct)
  1646. // if near != nil {
  1647. // fmt.Println("near----", near.Val, td.Val)
  1648. // }
  1649. // qutil.Debug(near != nil)
  1650. // qutil.Debug(near.BH)
  1651. // qutil.Debug(near.KeyDirect == vdirect, near.KeyDirect == 0)
  1652. // qutil.Debug(near.KVDirect == direct, near.KVDirect == 0)
  1653. // qutil.Debug(near.KVDirect < 3)
  1654. if near != nil && near.BH && (near.KeyDirect == vdirect || near.KeyDirect == 0) && (near.KVDirect == direct || near.KVDirect == 0) && near.KVDirect < 3 {
  1655. near.KVDirect = direct
  1656. near.KeyDirect = vdirect
  1657. td.KVDirect = direct
  1658. key := near.Val
  1659. if near.Val == "" {
  1660. key = fmtkey("k", near.TR.RowPos, near.ColPos)
  1661. }
  1662. val := table.SortKV.Map[key]
  1663. //qutil.Debug("====================", "key:", key, "val:", val)
  1664. bthiskey := false
  1665. if val != nil {
  1666. curpos := table.SortKV.Index[key]
  1667. thistr := table.kTD[curpos]
  1668. if thistr != near {
  1669. near.Val += "_"
  1670. for table.SortKV.Map[near.Val] != nil {
  1671. near.Val += "_"
  1672. }
  1673. key = near.Val //之前这个地方没有重置,导致把之前结果覆盖了
  1674. } else {
  1675. bthiskey = true
  1676. }
  1677. }
  1678. bfind := false
  1679. barr := false
  1680. varrpos := -1
  1681. if bthiskey {
  1682. //处理是数组值,且有合并行或合并列的情况 kvscope,对数组值的处理
  1683. pos := table.SortKV.Index[key]
  1684. mval := table.kvscope[pos]
  1685. bvalfind := false
  1686. if direct == 1 { //kv是横向
  1687. L1:
  1688. for k3, v3 := range mval {
  1689. for _, v4 := range v3 {
  1690. if v4.EndRow+1 == td.StartRow && v4.EndCol == td.EndCol {
  1691. varrpos = k3
  1692. bvalfind = true
  1693. break L1
  1694. }
  1695. }
  1696. }
  1697. } else { //kv是纵向
  1698. L2:
  1699. for k3, v3 := range mval {
  1700. for _, v4 := range v3 {
  1701. if v4.EndCol+1 == td.StartCol && v4.EndRow == td.EndRow {
  1702. varrpos = k3
  1703. bvalfind = true
  1704. break L2
  1705. }
  1706. }
  1707. }
  1708. }
  1709. if vals, ok := val.([]string); ok {
  1710. if near.Val == "" {
  1711. bn := false
  1712. for _, vs := range vals {
  1713. if vs != "" && NullTdReg.MatchString(vs) {
  1714. bn = true
  1715. } else {
  1716. bn = false
  1717. break
  1718. }
  1719. }
  1720. if bn {
  1721. near.Val = NullTxtBid
  1722. key = NullTxtBid
  1723. bfind = true
  1724. }
  1725. }
  1726. if bvalfind && varrpos > -1 && len(vals) > varrpos {
  1727. vals[varrpos] = td.Val // += "__" + td.Val
  1728. } else {
  1729. //添加时候去除空值和nil
  1730. newVals := []string{}
  1731. for _, isval := range vals {
  1732. if isval == "" {
  1733. continue
  1734. }
  1735. newVals = append(newVals, isval)
  1736. }
  1737. //vals = append(vals, td.Val)
  1738. if td.Val != "" {
  1739. newVals = append(newVals, td.Val)
  1740. }
  1741. val = newVals
  1742. varrpos = len(vals) - 1
  1743. }
  1744. } else if vals, ok := val.(string); ok && vals != "" && td.Val != "" {
  1745. if bvalfind {
  1746. val = td.Val //vals + "__" + td.Val
  1747. } else {
  1748. tval := []string{vals}
  1749. tval = append(tval, td.Val)
  1750. val = tval
  1751. varrpos = 1
  1752. }
  1753. }
  1754. barr = true
  1755. } else {
  1756. if td.Val != "" {
  1757. val = td.Val
  1758. } else if len(near.SortKV.Map) == 1 && near.SortKV.Map[near.Val] != "" {
  1759. val = near.SortKV.Map[near.Val]
  1760. }
  1761. }
  1762. td.HeadTd = near
  1763. if bfind {
  1764. tkey := fmtkey("k", near.TR.RowPos, near.ColPos)
  1765. table.SortKV.ReplaceKey(key, val, tkey)
  1766. } else {
  1767. if val == nil || val == "" || key == "采购项目预算金额" {
  1768. return
  1769. }
  1770. table.SortKV.AddKey(key, val)
  1771. //if table.SortKV.Map[key] != nil {
  1772. pos := table.SortKV.Index[key]
  1773. //qutil.Debug("=========", "key:", key, "val:", val, "pos:", pos)
  1774. if barr {
  1775. mval := table.kvscope[pos]
  1776. if mval != nil {
  1777. tds := mval[varrpos]
  1778. if tds != nil {
  1779. tds = append(tds, td)
  1780. } else {
  1781. tds = []*TD{td}
  1782. }
  1783. if varrpos > -1 {
  1784. mval[varrpos] = tds
  1785. table.kvscope[pos] = mval
  1786. }
  1787. }
  1788. } else {
  1789. table.kvscope[pos] = map[int][]*TD{
  1790. 0: []*TD{td},
  1791. }
  1792. table.kTD[pos] = near
  1793. }
  1794. //}
  1795. }
  1796. b = true
  1797. }
  1798. return
  1799. }
  1800. //查找单元格的表头时,横向或纵向
  1801. func (table *Table) FindNear(td *TD, direct int) *TD {
  1802. if direct == 1 && td.StartCol > 0 { //左临
  1803. tr := table.TRs[:td.TR.RowPos+1]
  1804. for i := len(tr) - 1; i > -1; i-- {
  1805. tds := tr[i].TDs
  1806. for _, td1 := range tds {
  1807. if td1.StartRow <= td.StartRow && td1.EndRow >= td.EndRow && td1.EndCol+1 == td.StartCol {
  1808. //找到左临节点
  1809. if td1.BH {
  1810. return td1
  1811. } else {
  1812. if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct {
  1813. return td1.HeadTd
  1814. }
  1815. }
  1816. }
  1817. }
  1818. }
  1819. } else if direct == 2 && td.StartRow > 0 { //上临
  1820. tr := table.TRs[:td.TR.RowPos]
  1821. for i := len(tr) - 1; i > -1; i-- {
  1822. tds := tr[i].TDs
  1823. for _, td1 := range tds {
  1824. if td1.StartCol <= td.StartCol && td1.EndCol >= td.EndCol && td1.EndRow+1 == td.StartRow {
  1825. //找到左临节点
  1826. if td1.BH {
  1827. return td1
  1828. } else {
  1829. if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct {
  1830. return td1.HeadTd
  1831. }
  1832. }
  1833. }
  1834. }
  1835. }
  1836. }
  1837. return nil
  1838. }
  1839. //根据行号列号获取td对象
  1840. func (tn *Table) GetTdByRCNo(row, col int) *TD {
  1841. for _, tr := range tn.TRs {
  1842. for _, td := range tr.TDs {
  1843. if td.StartCol <= col && td.EndCol >= col && td.StartRow <= row && td.EndRow >= row {
  1844. return td
  1845. }
  1846. }
  1847. }
  1848. return nil
  1849. }
  1850. //判断表格是否是分包
  1851. func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
  1852. pac := 0 //包的数量
  1853. val := 0 //分值
  1854. index = []string{} //存储分包,使用tbale.SortKV的key和value使用正则等处理对值进行判断
  1855. index_pos := []int{} //下标
  1856. //是数组且能找到标段之类的提示
  1857. //arr_count := 0 //计数table.SortKV的value是数组的数量,后面没用
  1858. key_index := -1
  1859. hasPkgTd := map[string]bool{}
  1860. //初始化CheckMultiPackageByTable方法需要的数据
  1861. key_index, index, index_pos, val, pac, hasPkgTd = initCheckMultiPackageByTable(tn, key_index, index, index_pos, val, pac, hasPkgTd)
  1862. //key是分包的情况
  1863. //记录key对应的值
  1864. commonKeyVals := map[string][]string{}
  1865. //记录key出现的次数
  1866. keyExistsCount := map[string]int{}
  1867. if pac > 1 {
  1868. val = 10
  1869. } else {
  1870. //查找标签
  1871. if TableMultiPackageReg_4.MatchString(tn.Tag) {
  1872. val += 4
  1873. } else if TableMultiPackageReg_2.MatchString(tn.Tag) {
  1874. val += 4
  1875. }
  1876. //根据table.SortKV的key判断是否分包,如果没有再根据value判断
  1877. val, index, index_pos = foundPacBySortKV(tn, val, index, index_pos, &keyExistsCount, &commonKeyVals, key_index, hasPkgTd)
  1878. }
  1879. // u.Debug(index)
  1880. //过滤重复及标准化!
  1881. standIndex := []string{}
  1882. standIndex_pos := []int{}
  1883. oldIndex := []string{} //存放包的原始值
  1884. brepeat := map[string]bool{}
  1885. for k, v := range index {
  1886. v = u.PackageNumberConvert(v)
  1887. if !brepeat[v] {
  1888. brepeat[v] = true
  1889. standIndex = append(standIndex, v)
  1890. standIndex_pos = append(standIndex_pos, index_pos[k])
  1891. oldIndex = append(oldIndex, index[k])
  1892. }
  1893. }
  1894. index = standIndex
  1895. //有一个以上的包,并且相同的key出现一次以上,认为这个key是属于包里面的
  1896. if len(commonKeyVals) > 0 {
  1897. for k, v := range commonKeyVals {
  1898. if len(index) > 1 && keyExistsCount[k] < 2 {
  1899. continue
  1900. }
  1901. tn.SortKV.AddKey(k, v)
  1902. }
  1903. }
  1904. //
  1905. isGoonNext := false
  1906. if val > 4 && len(brepeat) > 0 {
  1907. b = true
  1908. //多包解析
  1909. if b {
  1910. tn.BPackage = true
  1911. //根据数组index分包长度添加table.BlockPackage子包数组
  1912. for nk, v := range index {
  1913. if tn.BlockPackage.Map[v] == nil {
  1914. bp := &u.BlockPackage{}
  1915. bp.Index = v //序号 (转换后编号,只有数字或字母)
  1916. bp.Origin = oldIndex[nk] //包的原始值
  1917. bp.TableKV = u.NewJobKv() //table kv (分出的对应的KV值)
  1918. tn.BlockPackage.AddKey(v, bp) //table子包数组
  1919. }
  1920. }
  1921. isGoonNext = tn.manyPackageProcessByIndex(index, standIndex_pos) //多包处理,处理不同情况下的分包
  1922. }
  1923. } else {
  1924. isGoonNext = true
  1925. }
  1926. if isGoonNext { //没有处理成数组的情况下,继续调用正文查找分包的方法
  1927. tn.isGoonNext()
  1928. }
  1929. //查找分包中的中标人排序
  1930. if tn.BlockPackage != nil && tn.BlockPackage.Map != nil && len(tn.BlockPackage.Map) > 0 {
  1931. for _, v := range tn.BlockPackage.Map {
  1932. vv := v.(*u.BlockPackage)
  1933. if vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0 {
  1934. vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2)
  1935. }
  1936. }
  1937. }
  1938. return
  1939. }
  1940. //多包处理,处理不同情况下的分包
  1941. func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int) (isGoonNext bool) {
  1942. if len(index) == 1 { //是一个的情况
  1943. if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 { //table带排序的KV值小于10并且小于10列和小于4行
  1944. beq := true
  1945. for _, v2 := range tn.SortKV.Map {
  1946. if _, ok := v2.(string); !ok {
  1947. beq = false
  1948. break
  1949. }
  1950. }
  1951. if beq { //统一处理为数组
  1952. td := tn.GetTdByRCNo(tn.RowNum-1, 0)
  1953. if !td.BH && FindVal2_1.MatchString(td.Val) {
  1954. for k2, v2 := range tn.SortKV.Map {
  1955. tn.SortKV.Map[k2] = []string{v2.(string)}
  1956. }
  1957. } else {
  1958. //没有处理成数组的情况下,继续调用正文查找分包的方法
  1959. isGoonNext = true
  1960. }
  1961. }
  1962. }
  1963. }
  1964. for _, k1 := range tn.SortKV.Keys {
  1965. v1 := tn.SortKV.Map[k1]
  1966. if _, bvs := v1.(string); bvs && len(index) > 1 && !strings.HasSuffix(k1, "_") { //table.SortKV.Map.value为字符串并且index有分包而且table.SortKV.Map.key没有_
  1967. v1_array := []string{v1.(string)}
  1968. underline := ""
  1969. for {
  1970. underline += "_"
  1971. if tn.SortKV.Map[k1+underline] == nil {
  1972. break
  1973. } else if v3, v2_ok := tn.SortKV.Map[k1+underline].(string); v2_ok && v3 != "" {
  1974. v1_array = append(v1_array, v3)
  1975. }
  1976. }
  1977. v1 = v1_array
  1978. }
  1979. if val, bvs := v1.([]string); bvs {
  1980. if len(val) <= len(index) { //table.SortKV.Map.value数组小于等于分包index
  1981. for k, v := range val {
  1982. tn.assemblePackage(k1, v, index[k]) //组装解析到的分包
  1983. }
  1984. } else {
  1985. for sk1, sv2 := range index {
  1986. v := val[sk1]
  1987. //处理http://www.hljcg.gov.cn/xwzs!queryOneXwxxqx.action?xwbh=8145b599-a11e-45cb-a76a-12157a715570
  1988. if v == "" && strings.Index(k1, "供应商") > -1 {
  1989. if sk1 != len(index)-1 {
  1990. //u.Debug(val[sk1+1], val[sk1+2])
  1991. if standIndex_pos[sk1+1]-standIndex_pos[sk1] > 1 {
  1992. v = val[standIndex_pos[sk1]+1]
  1993. }
  1994. } else {
  1995. if standIndex_pos[sk1] < len(val)-1 {
  1996. v = val[standIndex_pos[sk1]+1]
  1997. }
  1998. }
  1999. }
  2000. tn.assemblePackage(k1, v, sv2)
  2001. }
  2002. }
  2003. //删除子包的kv
  2004. //u.Debug("----==1==-------", k1)
  2005. k1tags := u.GetTags(k1) //取得匹配
  2006. //if !(len(k1tags) > 0 && k1tags[0].Value == "采购单位") {
  2007. // tn.SortKV.RemoveKey(k1)
  2008. //}
  2009. for _, vcgdw := range k1tags {
  2010. if vcgdw.Value == "采购单位" {
  2011. tn.SortKV.RemoveKey(k1)
  2012. }
  2013. }
  2014. } else if val, bvs := v1.(string); bvs && len(index) == 1 {
  2015. //删除子包的kv
  2016. k1tags, _, _, _, _ := CommonDataAnaly(k1, "", "", val)
  2017. if len(k1tags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(k1tags[0]) { //(k1tags[0].Value == "采购单位" || k1tags[0].Value == "项目编号")) {
  2018. //log.Println("remove", k1, val)
  2019. tn.SortKV.RemoveKey(k1)
  2020. tn.assemblePackage(k1, val, index[0])
  2021. }
  2022. //u.Debug("----==2==-------", k1)
  2023. }
  2024. }
  2025. return isGoonNext
  2026. }
  2027. //没有处理成数组的情况下,继续调用正文查找分包的方法
  2028. func (tn *Table) isGoonNext() {
  2029. blockPackage := map[string]*u.BlockPackage{}
  2030. for _, k := range tn.SortKV.Keys {
  2031. if excludeKey.MatchString(k) {
  2032. continue
  2033. }
  2034. str := "" //拼装为冒号kv
  2035. v := tn.SortKV.Map[k]
  2036. nk := regReplAllSpace.ReplaceAllString(k, "")
  2037. if vs, ok := v.([]string); ok {
  2038. str += fmt.Sprintf("%s:%s\n", nk, strings.Join(vs, " "))
  2039. } else {
  2040. str += fmt.Sprintf("%s:%s\n", nk, v)
  2041. }
  2042. b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false) //分块之后分包
  2043. if b && len(blockPackage) > 0 {
  2044. tn.BPackage = true
  2045. for mk, mv := range blockPackage {
  2046. if tn.BlockPackage.Map[mk] == nil {
  2047. tn.BlockPackage.AddKey(mk, mv)
  2048. } else {
  2049. bp := tn.BlockPackage.Map[mk].(*u.BlockPackage)
  2050. if bp.TableKV == nil {
  2051. bp.TableKV = u.NewJobKv()
  2052. }
  2053. for k2, v2 := range mv.ColonKV.Kv {
  2054. if bp.TableKV.Kv[k2] == "" {
  2055. bp.TableKV.Kv[k2] = v2
  2056. bp.TableKV.KvTag[k2] = mv.ColonKV.KvTag[k2]
  2057. bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
  2058. }
  2059. }
  2060. for k2, v2 := range mv.SpaceKV.Kv {
  2061. if bp.TableKV.Kv[k2] == "" {
  2062. bp.TableKV.Kv[k2] = v2
  2063. bp.TableKV.KvTag[k2] = mv.SpaceKV.KvTag[k2]
  2064. bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
  2065. }
  2066. }
  2067. }
  2068. }
  2069. tn.BPackage = true
  2070. tn.SortKV.RemoveKey(k)
  2071. }
  2072. }
  2073. }
  2074. //根据table.SortKV的key判断是否分包,如果没有再根据value判断
  2075. func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, keyExistsCount *map[string]int, commonKeyVals *map[string][]string, key_index int, hasPkgTd map[string]bool) (rval int, rindex []string, rindex_pos []int) {
  2076. keyIsPkg := false
  2077. for in, k := range tn.SortKV.Keys {
  2078. if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) { //判断分包前排除
  2079. continue
  2080. }
  2081. v := tn.SortKV.Map[k]
  2082. //key是分包的情况
  2083. if ismatch := FindVal_1.MatchString(k); keyIsPkg || ismatch {
  2084. if ismatch {
  2085. keyIsPkg = true
  2086. val += 4
  2087. pkgFlag := FindVal_1.FindString(k) //对值进行分包判断
  2088. k = strings.Replace(k, pkgFlag, "", -1)
  2089. index = append(index, pkgFlag)
  2090. index_pos = append(index_pos, len(index))
  2091. val += 1
  2092. //pac++
  2093. } else {
  2094. k = strings.TrimRight(k, "_")
  2095. }
  2096. (*keyExistsCount)[k] = (*keyExistsCount)[k] + 1
  2097. (*commonKeyVals)[k] = append((*commonKeyVals)[k], qutil.ObjToString(v))
  2098. } else if k1 := FilterKey_2.ReplaceAllString(k, ""); FindKey_2.MatchString(k1) {
  2099. val += 4
  2100. //value数组分包
  2101. if vs, bvs1 := v.([]string); bvs1 {
  2102. L:
  2103. for in2, v1 := range vs {
  2104. if len([]rune(v1)) < 20 && !moneyNum.MatchString(v1) && FindVal2_1.MatchString(v1) {
  2105. for _, serial := range tn.TableResult.RuleBlock.TitleRegs {
  2106. if serial.MatchString(v1) {
  2107. break L
  2108. }
  2109. }
  2110. if key_index == -1 {
  2111. key_index = in
  2112. } else if key_index != in {
  2113. break
  2114. }
  2115. index = append(index, v1)
  2116. index_pos = append(index_pos, in2)
  2117. val += 1
  2118. //pac++
  2119. }
  2120. }
  2121. } else if v1, ok := v.(string); ok && !hasPkgTd[k] {
  2122. //value字符串分包
  2123. v1 = replPkgConfusion(v1) //替换分包中混淆的词
  2124. for _, v2 := range strings.Split(v1, "/") {
  2125. if len([]rune(v2)) < 20 && !moneyNum.MatchString(v2) && FindVal2_1.MatchString(v2) {
  2126. key_index = in
  2127. index = append(index, v1)
  2128. index_pos = append(index_pos, 0)
  2129. val += 1
  2130. //pac++
  2131. underline := ""
  2132. for {
  2133. underline += "_"
  2134. if tn.SortKV.Map[k+underline] == nil {
  2135. break
  2136. } else if v3, v2_ok := tn.SortKV.Map[k+underline].(string); v2_ok && v3 != "" {
  2137. index = append(index, v3)
  2138. index_pos = append(index_pos, 1)
  2139. } else if v3, v2_ok := tn.SortKV.Map[k+underline].([]string); v2_ok {
  2140. for v2_k, v2_v := range v3 {
  2141. index = append(index, v2_v)
  2142. index_pos = append(index_pos, v2_k+1)
  2143. }
  2144. }
  2145. }
  2146. break
  2147. }
  2148. }
  2149. }
  2150. break
  2151. }
  2152. }
  2153. return val, index, index_pos
  2154. }
  2155. //初始化CheckMultiPackageByTable方法需要的数据
  2156. func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, index_pos []int, val int, pac int, hasPkgTd map[string]bool) (rkey_index int, rindex []string, rindex_pos []int, rval int, rpac int, rhasPkgTd map[string]bool) {
  2157. for in, k := range tn.SortKV.Keys {
  2158. //涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)就跳过
  2159. if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) {
  2160. continue
  2161. }
  2162. v := tn.SortKV.Map[k]
  2163. if vs, bvs := v.([]string); bvs {
  2164. //arr_count++
  2165. haspkgs := []string{}
  2166. for in2, v1 := range vs {
  2167. v1 = replPkgConfusion(v1) //替换分包中混淆的词
  2168. if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) {
  2169. if key_index == -1 {
  2170. key_index = in
  2171. } else if key_index != in {
  2172. break
  2173. }
  2174. index = append(index, FindVal_1.FindString(v1))
  2175. index_pos = append(index_pos, in2)
  2176. val += 1
  2177. pac++
  2178. } else {
  2179. if ok, v1new := isHasOnePkgAndNoKv(v1); ok { //td的值里面有一个包,并且没有冒号kv
  2180. haspkgs = append(haspkgs, v1new)
  2181. }
  2182. }
  2183. }
  2184. /*处理这种情况:
  2185. <tr><td>包一:xxxxxxxxx</td></tr>
  2186. <tr><td>包二:xxxxxxxxx</td></tr>
  2187. */
  2188. if len(index) == 0 && len(haspkgs) > 0 && len(haspkgs) == len(vs) {
  2189. for in2, v1 := range haspkgs {
  2190. if key_index == -1 {
  2191. key_index = in
  2192. } else if key_index != in {
  2193. break
  2194. }
  2195. index = append(index, v1)
  2196. index_pos = append(index_pos, in2)
  2197. val += 1
  2198. pac++
  2199. }
  2200. }
  2201. } else if v1, ok := v.(string); ok {
  2202. v1 = replPkgConfusion(v1) //替换分包中混淆的词
  2203. if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) {
  2204. key_index = in
  2205. index = append(index, FindVal_1.FindString(v1))
  2206. index_pos = append(index_pos, 0)
  2207. val += 1
  2208. pac++
  2209. } else if getTd := tn.GetTdByRCNo(0, tn.SortKV.Index[k]); getTd != nil && getTd.KVDirect == 2 { //纵向
  2210. /*处理这种情况:
  2211. <tr><td>包一:xxxxxxxxx</td></tr>
  2212. */
  2213. if ok, v1new := isHasOnePkgAndNoKv(v1); ok {
  2214. hasPkgTd[k] = true
  2215. key_index = in
  2216. index = append(index, v1new)
  2217. index_pos = append(index_pos, 0)
  2218. val += 1
  2219. pac++
  2220. }
  2221. }
  2222. }
  2223. }
  2224. return key_index, index, index_pos, val, pac, hasPkgTd
  2225. }
  2226. //组装解析到的分包,//key如果匹配到抽取关键词就添加到table.SortKV
  2227. func (tn *Table) assemblePackage(k1, v1, key string) {
  2228. bp := tn.BlockPackage.Map[key].(*u.BlockPackage)
  2229. if bp.TableKV == nil {
  2230. bp.TableKV = u.NewJobKv()
  2231. }
  2232. if v1 != "" {
  2233. k2, w1, v2, _, bf := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
  2234. if bf {
  2235. for pos, k3 := range k2 {
  2236. if bp.TableKV.Kv != nil && bp.TableKV.KvTag[k3] != nil && (bp.TableKV.Kv[k3] == "" || w1[pos] > bp.TableKV.KvTag[k3].Weight) {
  2237. bp.TableKV.Kv[k3] = v2
  2238. bp.TableKV.KvTag[k3] = &u.Tag{Value: v2, Weight: w1[pos]}
  2239. } else {
  2240. bp.TableKV.Kv[k1] = qutil.ObjToString(v1)
  2241. if tn.SortKV.Map[k3] == nil {
  2242. tn.SortKV.AddKey(k3, v2) //添加匹配到抽取关键词的key,value
  2243. }
  2244. }
  2245. }
  2246. } else {
  2247. bp.TableKV.Kv[k1] = qutil.ObjToString(v1)
  2248. }
  2249. }
  2250. k1 = regReplAllSpace.ReplaceAllString(k1, "")
  2251. //拼接内容
  2252. if !excludeKey.MatchString(k1) {
  2253. bp.Text += fmt.Sprintf("%v:%v\n", k1, v1)
  2254. }
  2255. tn.BlockPackage.Map[key] = bp
  2256. }
  2257. /**
  2258. 之前爬虫过来的数据对table表格的抓取异常问题
  2259. 查找并修正不规则表格的字符串,只对全文做处理,块内的表格不需要修正
  2260. **/
  2261. var thbf = regexp.MustCompile("(?i)</?t(head|body|foot)>")
  2262. //需要保留thead
  2263. var saveThead = regexp.MustCompile("(?is)<thead>(.+?)</thead>")
  2264. func RepairCon(con string) string {
  2265. res := saveThead.FindAllStringSubmatch(con, 1)
  2266. th := ""
  2267. if len(res) == 1 && len(res[0]) == 2 {
  2268. th = u.TrimLeftSpace(res[0][1], "")
  2269. }
  2270. con = thbf.ReplaceAllString(con, "")
  2271. con = u.TrimLeftSpace(con, "")
  2272. itbody := strings.Index(con, "<tr")
  2273. iLen := 3
  2274. if itbody == 0 {
  2275. con = findpos(con, iLen, itbody)
  2276. } else {
  2277. itable := strings.Index(con, "<table")
  2278. if itable == -1 || itable > itbody {
  2279. con = findpos(con, iLen, itbody)
  2280. }
  2281. }
  2282. //保留第一个thead
  2283. if th != "" {
  2284. con = strings.Replace(con, th, "<thead>"+th+"</thead>", 1)
  2285. }
  2286. //u.Debug(con)
  2287. return con
  2288. }
  2289. //修复表格
  2290. func findpos(con string, iLen, start int) (newcon string) {
  2291. defer qutil.Catch()
  2292. n := len(con)
  2293. layer := 0
  2294. pos := 0
  2295. if start >= 0 {
  2296. if iLen == 6 {
  2297. for i := iLen + start; i < len(con); i++ {
  2298. if con[i] == '<' && i+6 < n {
  2299. str := con[i : i+6]
  2300. if str == "</tbod" {
  2301. if layer == 0 {
  2302. pos = i
  2303. break
  2304. } else {
  2305. layer--
  2306. }
  2307. i += 6
  2308. } else if str == "<tbody" {
  2309. layer++
  2310. i += 6
  2311. }
  2312. }
  2313. }
  2314. if pos+7 <= n && start+6 < pos {
  2315. newcon = con[:start] + "<table" + con[start+6:pos] + "</table" + con[pos+7:]
  2316. }
  2317. } else {
  2318. layer++
  2319. nq := 0
  2320. lasttr := 0
  2321. for i := iLen + start; i < len(con); i++ {
  2322. if con[i] == '<' && i+4 < n {
  2323. if nq == 0 {
  2324. str := con[i : i+4]
  2325. if str == "</tr" {
  2326. if layer <= 0 {
  2327. pos = i //正常情况不会存在此类情况
  2328. break
  2329. } else {
  2330. layer--
  2331. lasttr = i
  2332. }
  2333. i += 4
  2334. } else if str[:3] == "<tr" {
  2335. layer++
  2336. i += 4
  2337. } else if str == "<tab" && i+6 < n && con[i+4:i+6] == "le" {
  2338. if layer == 0 {
  2339. break
  2340. } else {
  2341. //内嵌的表格
  2342. nq++
  2343. }
  2344. }
  2345. } else {
  2346. if i+6 < n {
  2347. str := con[i : i+6]
  2348. if str == "</tabl" {
  2349. nq--
  2350. } else if str == "<table" {
  2351. nq++
  2352. }
  2353. } else {
  2354. break
  2355. }
  2356. }
  2357. }
  2358. }
  2359. if pos == 0 && lasttr > 3 {
  2360. pos = lasttr + 5
  2361. } else if pos > 0 {
  2362. pos += 5
  2363. }
  2364. if pos <= n && pos < len(con) && start < pos {
  2365. newcon = con[:start] + "<table>" + con[start:pos] + "</table>" + con[pos:]
  2366. }
  2367. }
  2368. }
  2369. if newcon == "" {
  2370. newcon = con
  2371. }
  2372. return
  2373. }
  2374. //td的值里面有一个包,并且没有冒号kv
  2375. func isHasOnePkgAndNoKv(v1 string) (bool, string) {
  2376. v1s := FindVal_1.FindAllString(v1, -1)
  2377. colonCount := len(regDivision.FindAllString(v1, -1))
  2378. if len(v1s) == 1 && colonCount < 2 {
  2379. ispkgcolon := regexp.MustCompile(v1s[0] + "[::]").MatchString(v1)
  2380. if (ispkgcolon && colonCount == 1) || (!ispkgcolon && colonCount == 0) {
  2381. return true, v1s[0]
  2382. }
  2383. }
  2384. return false, v1
  2385. }
  2386. //替换分包中混淆的词
  2387. func replPkgConfusion(v1 string) string {
  2388. v1 = PreReg.ReplaceAllString(v1, "")
  2389. v1 = PreReg1.ReplaceAllString(v1, "")
  2390. v1 = PreCon.ReplaceAllString(v1, "")
  2391. v1 = PreCon2.ReplaceAllString(v1, "")
  2392. return v1
  2393. }
  2394. //对td中的值,进行再处理
  2395. func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
  2396. //处理表格中的联系人信息
  2397. indexMap := contactFormat.IndexMap
  2398. matchMap := contactFormat.MatchMap
  2399. weightMap := map[string]map[string]interface{}{} //权重
  2400. mustMatchFirst := len(indexMap) > 0 //第一个必须匹配上
  2401. reCreate := false
  2402. matchCount := 0
  2403. contactTypeTagMap := map[string]map[string][]interface{}{}
  2404. //u.Debug(mustMatchFirst, indexMap, matchMap)
  2405. notMatchTrCount := 0
  2406. allAscFind := true //开启正序查询
  2407. //涉及变量allAscFind,indexMap
  2408. if len(indexMap) == 0 {
  2409. isCanAddToIndexMap := false
  2410. matchPrevFlag := false
  2411. prevCanAddToIndexMap := false
  2412. LS:
  2413. for _, tr := range tn.TRs {
  2414. for td_index, td := range tr.TDs {
  2415. thisTdKvs := tn.tdkv(td) //获取td冒号kv
  2416. if len(thisTdKvs) != 1 {
  2417. continue
  2418. }
  2419. //1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
  2420. goOnFunc, isContinue, td_k := tn.tdKV(thisTdKvs[0].Key, &matchPrevFlag, &isCanAddToIndexMap, &indexMap, "LS")
  2421. if !goOnFunc {
  2422. break LS
  2423. }
  2424. if isContinue {
  2425. continue
  2426. }
  2427. //采购单位,代理机构
  2428. for _, k := range HasOrderContactType(td_k) {
  2429. if !ContactType[k].MatchString(td_k) { //不是采购单位,代理机构跳过
  2430. continue
  2431. }
  2432. if len(indexMap) == 0 {
  2433. if isCanAddToIndexMap || (prevCanAddToIndexMap && len(tr.TDs) == 1) {
  2434. myPrevTdVal := ""
  2435. if td_index-2 >= 0 {
  2436. myPrevTdVal = tr.TDs[td_index-2].Val
  2437. if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
  2438. matchPrevFlag = true
  2439. }
  2440. }
  2441. indexMap[0] = k
  2442. break
  2443. }
  2444. } else {
  2445. indexMap = map[int]string{}
  2446. break LS
  2447. }
  2448. }
  2449. }
  2450. prevCanAddToIndexMap = isCanAddToIndexMap
  2451. isCanAddToIndexMap = false
  2452. }
  2453. if len(indexMap) > 0 {
  2454. allAscFind = false
  2455. }
  2456. }
  2457. //////
  2458. L:
  2459. for tr_index, tr := range tn.TRs {
  2460. thisTrHasMatch := false
  2461. jumpNextTd := false
  2462. for td_index, td := range tr.TDs {
  2463. //和|以?及|与|、多个词和在一起
  2464. jumpNextTd, thisTrHasMatch = tn.tdsMultipleWords(jumpNextTd, td, td_index, tr, thisTrHasMatch, indexMap)
  2465. //分块之后的kv
  2466. thisTdKvs := kvAfterDivideBlock("", td.Text, 3, tn.TableResult.RuleBlock)
  2467. if len(thisTdKvs) == 0 {
  2468. thisTdKvs = tn.tdkv(td) //获取冒号kv
  2469. }
  2470. tdAscFind := true //开启td正序查询
  2471. if len(thisTdKvs) == 0 {
  2472. continue
  2473. } else if allAscFind && len(thisTdKvs) >= 3 && len(indexMap) == 0 {
  2474. //采购人在联系人、电话后面的处理
  2475. tdAscFind = tn.hasIndexMap(thisTdKvs, &indexMap, tdAscFind)
  2476. }
  2477. prevKey := ""
  2478. oldIndexMapLength := len(indexMap)
  2479. thidTdIndex := td_index
  2480. //notmatchCount := 0
  2481. kvTitle := ""
  2482. for _, td_kv := range thisTdKvs {
  2483. //u.Debug(td_kv.Key, td_kv.Value, td_kv.Title)
  2484. iscontinue := false
  2485. td_v := td_kv.Value
  2486. td_k := FilterContactKey(td_kv.Key) //带括号()[]的采购单位,代理机构处理
  2487. td_k_length := len([]rune(td_k))
  2488. if td_k_length < 3 || td_k_length > 15 {
  2489. continue
  2490. }
  2491. //都为正序查询
  2492. if allAscFind && tdAscFind {
  2493. //都为正序查询处理
  2494. matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex)
  2495. }
  2496. if iscontinue {
  2497. continue
  2498. }
  2499. //不在同一块中
  2500. if td_kv.Title != "" && kvTitle != td_kv.Title && len(indexMap) > 0 && !ContactInfoMustReg.MatchString(td_kv.Key) {
  2501. thidTdIndex = 0
  2502. matchMap = map[string]map[string]bool{}
  2503. indexMap = map[int]string{}
  2504. }
  2505. kvTitle = td_kv.Title
  2506. //u.Debug(indexMap, td_k, td_v, matchMap)
  2507. if td_k_length < 2 || td_k_length > 10 {
  2508. continue
  2509. }
  2510. if len(indexMap) > 0 {
  2511. //没有识别到采购单位联系人、联系电话、代理机构联系人、联系电话
  2512. if !ContactInfoMustReg.MatchString(td_k) {
  2513. //notmatchCount++
  2514. //if notmatchCount < len(indexMap)*2 && false {//false???????
  2515. // notmatchCount = 0
  2516. // thidTdIndex = 0
  2517. // indexMap = map[int]string{}
  2518. // matchMap = map[string]map[string]bool{}
  2519. //}
  2520. if mustMatchFirst { //indexMap初始值大于0
  2521. break L
  2522. }
  2523. continue
  2524. }
  2525. reCreate = true
  2526. index := td_index
  2527. //oldIndexMapLength原来的indexMap等于0 ,现在的indexMap大于1
  2528. if oldIndexMapLength == 0 && len(indexMap) > 1 {
  2529. if prevKey != td_k {
  2530. prevKey = td_k
  2531. index = td_index
  2532. } else if prevKey == td_k {
  2533. index++
  2534. }
  2535. }
  2536. //kv.value为空
  2537. if filterValue.MatchString(td_v) {
  2538. thisTrHasMatch = true
  2539. continue
  2540. }
  2541. //u.Debug(indexMap, td_k, td_v, matchMap, index, modle)
  2542. //myContactType
  2543. myContactType := indexMap[index]
  2544. if myContactType == "" && len(indexMap) == 1 {
  2545. _, onlyContactType := u.FirstKeyValueInMap(indexMap)
  2546. myContactType, _ = onlyContactType.(string)
  2547. }
  2548. if myContactType == "" {
  2549. continue
  2550. }
  2551. matchCount++
  2552. if matchMap[myContactType] == nil {
  2553. matchMap[myContactType] = map[string]bool{}
  2554. }
  2555. if IsContactKvHandle(ContactInfoMustReg.FindString(td_k), matchMap[myContactType]) {
  2556. continue
  2557. }
  2558. matchMap[myContactType][ContactInfoMustReg.FindString(td_k)] = true
  2559. if ContactType[myContactType].MatchString(td_k) {
  2560. continue
  2561. }
  2562. thisTrHasMatch = true
  2563. //modle
  2564. modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index)
  2565. }
  2566. }
  2567. //u.Debug(td.SortKV.Map)
  2568. }
  2569. if allAscFind && !thisTrHasMatch {
  2570. notMatchTrCount++
  2571. if notMatchTrCount >= 2 {
  2572. notMatchTrCount = 0
  2573. indexMap = map[int]string{}
  2574. }
  2575. }
  2576. }
  2577. //u.Debug("end", matchCount, indexMap, matchMap)
  2578. if matchCount == 0 {
  2579. indexMap = map[int]string{}
  2580. matchMap = map[string]map[string]bool{}
  2581. }
  2582. (*contactFormat).IndexMap = indexMap
  2583. (*contactFormat).MatchMap = matchMap
  2584. // for _, tr := range tn.TRs {
  2585. // for _, td := range tr.TDs {
  2586. // log.Println(td.SortKV.Map)
  2587. // }
  2588. // }
  2589. }
  2590. //modle
  2591. func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactTypeTagMap *map[string]map[string][]interface{}, tn *Table, weightMap *map[string]map[string]interface{}, tr_index, td_index int) {
  2592. modle := 0
  2593. if len(thisTdKvs) == 1 {
  2594. if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" {
  2595. modle = 1
  2596. } else {
  2597. modle = 2
  2598. }
  2599. }
  2600. if modle == 1 {
  2601. td.Text = myContactType + td_k
  2602. td.Val = td.Text
  2603. } else {
  2604. //
  2605. if !strings.HasSuffix(td_k, "方式") {
  2606. _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts, 3)
  2607. if len(kTag) == 1 {
  2608. tagVal, _ := u.FirstKeyValueInMap(kTag)
  2609. if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
  2610. return
  2611. }
  2612. if (*contactTypeTagMap)[myContactType] == nil {
  2613. (*contactTypeTagMap)[myContactType] = map[string][]interface{}{}
  2614. }
  2615. myOldKeyArray := (*contactTypeTagMap)[myContactType][tagVal]
  2616. if myOldKeyArray != nil {
  2617. tn.TRs[myOldKeyArray[0].(int)].TDs[myOldKeyArray[1].(int)].SortKV.RemoveKey(myContactType + myOldKeyArray[2].(string))
  2618. } else {
  2619. (*contactTypeTagMap)[myContactType][tagVal] = make([]interface{}, 3)
  2620. }
  2621. if (*weightMap)[myContactType] == nil {
  2622. (*weightMap)[myContactType] = map[string]interface{}{}
  2623. }
  2624. (*weightMap)[myContactType][tagVal] = 1
  2625. (*contactTypeTagMap)[myContactType][tagVal] = []interface{}{tr_index, td_index, td_k}
  2626. }
  2627. }
  2628. td.SortKV.AddKey(myContactType+td_k, td_v)
  2629. }
  2630. }
  2631. //都为正序查询
  2632. func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[string]interface{}, matchMap map[string]map[string]bool, td *TD, thisTrHasMatch bool, td_kv *u.Kv, indexMap map[int]string, iscontinue bool, reCreate bool, thidTdIndex int) (int, map[string]map[string]interface{}, map[string]map[string]bool, bool, map[int]string, bool, bool, int) {
  2633. for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
  2634. if !ContactType[k].MatchString(td_k) { //没有匹配到采购单位,代理机构
  2635. continue
  2636. }
  2637. matchCount++
  2638. if weightMap[k] == nil {
  2639. weightMap[k] = map[string]interface{}{}
  2640. }
  2641. //匹配到进行处理
  2642. if ContactInfoVagueReg.MatchString(td_k) {
  2643. thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch)
  2644. } else if k == "采购单位" { //打标签,权重高的重新覆盖
  2645. _, kTag := KvTagsToKV([]*u.Kv{td_kv}, "", []string{"采购单位"}, 3)
  2646. tagVal, weightVal := u.FirstKeyValueInMap(kTag)
  2647. if tagVal == k {
  2648. if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
  2649. weightMap[k][k] = weightVal.(int)
  2650. matchMap[k] = map[string]bool{}
  2651. indexMap = map[int]string{}
  2652. }
  2653. }
  2654. }
  2655. if u.IsMapHasValue(k, indexMap) { //map中是否存在value
  2656. thisTrHasMatch = true
  2657. iscontinue = true
  2658. continue
  2659. }
  2660. if reCreate {
  2661. indexMap = map[int]string{}
  2662. reCreate = false
  2663. }
  2664. indexMap[thidTdIndex] = k
  2665. iscontinue = true
  2666. thisTrHasMatch = true
  2667. thidTdIndex++
  2668. break
  2669. }
  2670. if len(indexMap) == 0 && td_kv.PrevLine != "" {
  2671. //td_kv.PrevLine
  2672. prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "")
  2673. for k, v := range ContactType { //采购单位,代理机构正则
  2674. if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) {
  2675. indexMap[thidTdIndex] = k
  2676. thisTrHasMatch = true
  2677. thidTdIndex++
  2678. }
  2679. }
  2680. }
  2681. if len(indexMap) == 0 && td_kv.Title != "" {
  2682. //td_kv.Title
  2683. if titleMatchType := ContactTypeTitleMatch(td_kv.Title); titleMatchType != "" {
  2684. thidTdIndex = 0
  2685. matchMap = map[string]map[string]bool{}
  2686. indexMap = map[int]string{1: titleMatchType}
  2687. }
  2688. }
  2689. return matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex
  2690. }
  2691. //匹配到进行处理
  2692. func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string, td_k string, td_v string, td *TD, weightMap *map[string]map[string]interface{}, thisTrHasMatch bool) bool {
  2693. if (*matchMap)[k] == nil {
  2694. (*matchMap)[k] = map[string]bool{}
  2695. }
  2696. isAddToMatchMap := true
  2697. if !strings.HasSuffix(td_k, "方式") {
  2698. _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts, 3)
  2699. if len(kTag) == 1 {
  2700. tagVal, weightVal := u.FirstKeyValueInMap(kTag)
  2701. if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
  2702. isAddToMatchMap = false
  2703. }
  2704. if td.SortKV.Map[tagVal] != nil {
  2705. if (*weightMap)[k][tagVal] == nil || (weightVal != nil && weightVal.(int) >= (*weightMap)[k][tagVal].(int)) {
  2706. (*weightMap)[k][tagVal] = weightVal.(int)
  2707. td.SortKV.AddKey(tagVal, td_v)
  2708. thisTrHasMatch = true
  2709. }
  2710. } else {
  2711. (*weightMap)[k][tagVal] = weightVal.(int)
  2712. }
  2713. }
  2714. }
  2715. if isAddToMatchMap && !filterValue.MatchString(td_v) && td_v != "" {
  2716. (*matchMap)[k][ContactInfoVagueReg.FindString(td_k)] = true
  2717. }
  2718. return thisTrHasMatch
  2719. }
  2720. //采购人在联系人、电话后面的处理
  2721. func (tn *Table) hasIndexMap(thisTdKvs []*u.Kv, indexMap *map[int]string, tdAscFind bool) bool {
  2722. //采购人在联系人、电话后面的处理
  2723. isCanAddToIndexMap := false
  2724. LL:
  2725. for _, td_kv := range thisTdKvs {
  2726. //1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
  2727. goOnFunc, isContinue, td_k := tn.tdKV(td_kv.Key, nil, &isCanAddToIndexMap, indexMap, "LL")
  2728. if !goOnFunc {
  2729. break LL
  2730. }
  2731. if isContinue {
  2732. continue
  2733. }
  2734. if len(*indexMap) == 0 {
  2735. for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
  2736. if !ContactType[k].MatchString(td_k) {
  2737. continue
  2738. }
  2739. if isCanAddToIndexMap && len(*indexMap) == 0 {
  2740. (*indexMap)[0] = k
  2741. break
  2742. }
  2743. }
  2744. }
  2745. }
  2746. if len(*indexMap) > 0 {
  2747. tdAscFind = false
  2748. }
  2749. return tdAscFind
  2750. }
  2751. //和|以?及|与|、多个词和在一起
  2752. func (tn *Table) tdsMultipleWords(jumpNextTd bool, td *TD, td_index int, tr *TR, thisTrHasMatch bool, indexMap map[int]string) (jump, thisTr bool) {
  2753. if !jumpNextTd && len([]rune(td.Text)) >= 5 && len([]rune(td.Text)) <= 15 && regSplit.MatchString(td.Text) && td_index+1 < len(tr.TDs) {
  2754. thisTdVals := regSplit.Split(td.Text, -1)
  2755. nextTdVals := MultipleValueSplitReg.Split(tr.TDs[td_index+1].Val, -1)
  2756. if len(thisTdVals) == len(nextTdVals) { //本次和下个长度相等
  2757. for _, k := range HasOrderContactType(td.Text) { //采购单位,代理机构
  2758. if ContactType[k].MatchString(td.Text) { //采购单位,代理机构
  2759. for thisTdVals_k, thisTdVals_v := range thisTdVals {
  2760. thisTdVals_v = strings.TrimSpace(thisTdVals_v)
  2761. if ContactType[k].MatchString(thisTdVals_v) { //采购单位,代理机构
  2762. thisTrHasMatch = true
  2763. tr.TDs[td_index+1].SortKV.AddKey(thisTdVals_v, nextTdVals[thisTdVals_k])
  2764. continue
  2765. }
  2766. if !ContactInfoMustReg.MatchString(thisTdVals_v) {
  2767. continue
  2768. }
  2769. jumpNextTd = true
  2770. thisTrHasMatch = true
  2771. tr.TDs[td_index+1].SortKV.AddKey(k+thisTdVals_v, nextTdVals[thisTdVals_k])
  2772. }
  2773. break
  2774. }
  2775. }
  2776. if len(indexMap) > 0 {
  2777. _, onlyContactType := u.FirstKeyValueInMap(indexMap)
  2778. if myContactType, _ := onlyContactType.(string); myContactType != "" {
  2779. for thisTdVals_k, thisTdVals_v := range thisTdVals {
  2780. thisTdVals_v = strings.TrimSpace(thisTdVals_v)
  2781. if ContactInfoMustReg.MatchString(thisTdVals_v) {
  2782. jumpNextTd = true
  2783. thisTrHasMatch = true
  2784. tr.TDs[td_index+1].SortKV.AddKey(myContactType+thisTdVals_v, nextTdVals[thisTdVals_k])
  2785. }
  2786. }
  2787. }
  2788. }
  2789. }
  2790. } else {
  2791. jumpNextTd = false
  2792. }
  2793. return jumpNextTd, thisTrHasMatch
  2794. }
  2795. //采购单位,代理机构
  2796. func (tn *Table) tdHasOrderContactType(td_k string, indexMap *map[int]string, tr *TR, prevCanAddToIndexMap, isCanAddToIndexMap, matchPrevFlag *bool, td_index int) (gotoFunc bool) {
  2797. for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
  2798. if !ContactType[k].MatchString(td_k) {
  2799. continue
  2800. }
  2801. if len(*indexMap) == 0 {
  2802. if (*isCanAddToIndexMap) || (*prevCanAddToIndexMap && len(tr.TDs) == 1) {
  2803. myPrevTdVal := ""
  2804. if td_index-2 >= 0 {
  2805. myPrevTdVal = tr.TDs[td_index-2].Val
  2806. }
  2807. if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
  2808. (*matchPrevFlag) = true
  2809. }
  2810. (*indexMap)[0] = k
  2811. break
  2812. }
  2813. } else {
  2814. (*indexMap) = map[int]string{}
  2815. return false
  2816. }
  2817. }
  2818. return true
  2819. }
  2820. //1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
  2821. func (tn *Table) tdKV(key string, matchPrevFlag, isCanAddToIndexMap *bool, indexMap *map[int]string, gotoName string) (goOnFunc, isContinue bool, td_k string) {
  2822. td_k = FilterContactKey(key) //带括号的()[]【】采购单位,代理机构处理
  2823. td_k_length := len([]rune(td_k))
  2824. if td_k_length < 2 || td_k_length > 15 {
  2825. return true, true, "" //继续执行,跳过当前循环
  2826. }
  2827. isContinue = ContactInfoMustReg.MatchString(td_k) //识别采购单位联系人、联系电话、代理机构联系人、联系电话
  2828. if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) {
  2829. if gotoName == "LS" && !(*matchPrevFlag) && len(*indexMap) > 0 {
  2830. (*indexMap) = map[int]string{}
  2831. return false, false, "" //中断外层循环
  2832. }
  2833. if gotoName == "LL" && len(*indexMap) > 0 {
  2834. (*indexMap) = map[int]string{}
  2835. return false, false, ""
  2836. }
  2837. (*isCanAddToIndexMap) = true
  2838. }
  2839. return true, false, td_k //继续执行,不结束当前循环,返回处理后的值
  2840. }
  2841. //获取td冒号kv
  2842. func (tn *Table) tdkv(td *TD) []*u.Kv {
  2843. thisTdKvs := colonkvEntity.GetKvs(td.Text, tn.Desc, 2) //获取冒号kv
  2844. //获取冒号
  2845. if len(thisTdKvs) == 0 {
  2846. tdValue := regReplAllSpace.ReplaceAllString(td.Text, "") //去除空格换行
  2847. if tdValue != "" && len([]rune(tdValue)) < 10 {
  2848. thisTdKvs = append(thisTdKvs, &u.Kv{
  2849. Key: tdValue,
  2850. Value: "",
  2851. })
  2852. }
  2853. }
  2854. return thisTdKvs
  2855. }
  2856. //table中抽取品牌,table.BrandData
  2857. func (table *Table) analyBrand() {
  2858. //5c2d8c05a5cb26b9b782572b
  2859. //产品名称 品牌 规格 单价 单位 数量 小计 质保期
  2860. lineMapArr := make(map[string]*SortMap)
  2861. lineMap := make(map[string]*SortMap)
  2862. brandRule := u.BrandRules
  2863. //初始化lineMapArr,lineMap;
  2864. lineMapArr, lineMap = initLineMapLineMapArr(table) //处理数组数据后,匹配必须title和替换要保存的title
  2865. //qutil.Debug("lineMapArr----", len(lineMapArr))
  2866. if len(lineMapArr) > 0 {
  2867. for _, aMap := range lineMapArr {
  2868. maxNum := 0 //记录最大长度
  2869. arrcount1 := 0 //记录key是否存在必须title(数组数据)
  2870. arrcount2 := 0
  2871. ka := make(map[string][]string) //最终存储数据
  2872. //qutil.Debug("aMap.Keys----", aMap.Keys)
  2873. //匹配商品规则
  2874. arrcount1, arrcount2, ka = table.matchMapArrBrandRule(aMap, brandRule, ka, arrcount1, arrcount2)
  2875. //找最终存储数据的最小len(arr)
  2876. // for _, vf := range ka {
  2877. // //找最短的数组
  2878. // lenVal := len(vf)
  2879. // if minNum == 0 || minNum > lenVal { //maxNum = len(最短数组)
  2880. // minNum = lenVal
  2881. // }
  2882. // }
  2883. //找最终存储数据的最大len(arr),小的补空
  2884. for _, vf1 := range ka {
  2885. lenVal := len(vf1)
  2886. if lenVal > maxNum {
  2887. maxNum = lenVal
  2888. }
  2889. }
  2890. //table.BrandData商品存储
  2891. finishKa := make(map[string][]string)
  2892. for vf2K, vf2 := range ka {
  2893. if len(vf2) < maxNum {
  2894. if vf2K == "unitprice" { //价格的当前总数比最大的总数小就跳过,可能是总价格而不是单个的价格
  2895. continue
  2896. }
  2897. lenMv := maxNum - len(vf2)
  2898. for i := 0; i < lenMv; i++ {
  2899. vf2 = append(vf2, "")
  2900. }
  2901. }
  2902. finishKa[vf2K] = vf2
  2903. }
  2904. hasKey(table, arrcount1) //是否匹配到table中的标题
  2905. //qutil.Debug("finishKa----", finishKa)
  2906. if arrcount1 >= 1 {
  2907. if arrcount1+arrcount2 == 1 { //删除只匹配到一个价钱(总价)
  2908. delete(finishKa, "unitprice")
  2909. }
  2910. finishData := dealArrData(maxNum, finishKa)
  2911. table.BrandData = append(table.BrandData, finishData) //存储了table.BrandData
  2912. }
  2913. }
  2914. }
  2915. //处理string数据后,匹配必须title和替换要保存的title
  2916. //qutil.Debug("lineMap----", len(lineMap))
  2917. if len(lineMap) > 0 {
  2918. for _, sMap := range lineMap {
  2919. strcount1 := 0 //记录key是否存在必须title(字符串数据)
  2920. strcount2 := 0
  2921. endStrMap := make(map[string]string)
  2922. //qutil.Debug(k, "aMap.Keys----", sMap.Keys)
  2923. //匹配商品规则
  2924. strcount1, strcount2, endStrMap = table.matchMapBrandRule(sMap, brandRule, endStrMap, strcount1, strcount2)
  2925. //原始字符串数据处理
  2926. hasKey(table, strcount1) //是否匹配到table中的标题
  2927. //qutil.Debug("endStrMap----", endStrMap)
  2928. if strcount1 >= 1 {
  2929. if strcount1+strcount2 == 1 { //删除只匹配到一个价钱(总价)
  2930. delete(endStrMap, "unitprice")
  2931. }
  2932. finishData := dealStrData(endStrMap) //处理数据
  2933. if len(finishData) > 0 {
  2934. table.BrandData = append(table.BrandData, finishData)
  2935. }
  2936. }
  2937. }
  2938. }
  2939. }
  2940. //字符串匹配商品规则
  2941. func (table *Table) matchMapBrandRule(sMap *SortMap, brandRule map[string]map[string]string, endStrMap map[string]string, strcount1, strcount2 int) (int, int, map[string]string) {
  2942. for _, k1 := range sMap.Keys {
  2943. match := false //记录must是否匹配到
  2944. v1 := qutil.ObjToString(sMap.Map[k1])
  2945. // for k1, v1 := range sMap {
  2946. //qutil.Debug(k1, "++++++++++", v1)
  2947. if v1 == "" {
  2948. continue
  2949. }
  2950. //匹配必须title
  2951. for nameM, r := range brandRule["must"] {
  2952. if convert(k1, r) { //匹配成功
  2953. v1tmp1 := v1
  2954. match = true
  2955. if nameM == "itemname" || nameM == "modal" { //特殊处理itemname
  2956. hasGoods(table, v1)
  2957. if nameM == "itemname" {
  2958. v1tmp1 = filterItem(v1)[0] //过滤itemname
  2959. if v1tmp1 == "" {
  2960. break
  2961. }
  2962. }
  2963. }
  2964. if nameM == "brandname" || nameM == "modal" { //特殊处理brandname
  2965. if endStrMap["brandname"] == "" {
  2966. brand, allNull := hasBrand(table, v1)
  2967. if !allNull {
  2968. endStrMap["brandname"] = brand[0]
  2969. }
  2970. }
  2971. }
  2972. //unitprice
  2973. if nameM == "unitprice" { //处理金额
  2974. v1tmp1 = dealPrice(k1, v1)[0]
  2975. }
  2976. if nameM != "brandname" && endStrMap[nameM] == "" {
  2977. endStrMap[nameM] = v1tmp1
  2978. }
  2979. strcount1++
  2980. }
  2981. }
  2982. //替换其它要保存字段
  2983. if !match {
  2984. for nameR, r := range brandRule["replace"] {
  2985. if convert(k1, r) { //匹配成功
  2986. v1tmp2 := v1
  2987. //totalprice
  2988. if nameR == "totalprice" { //处理金额
  2989. v1tmp2 = dealPrice(k1, v1)[0]
  2990. }
  2991. //number
  2992. if nameR == "number" { //处理数量
  2993. varr1, uname1 := dealNumber(v1)
  2994. v1tmp2 = varr1[0]
  2995. //从number中获取到的单位
  2996. if endStrMap["unitname"] == "" && uname1[0] != "" {
  2997. endStrMap["unitname"] = uname1[0]
  2998. }
  2999. }
  3000. if v1tmp2 != "" {
  3001. endStrMap[nameR] = v1tmp2
  3002. }
  3003. strcount2++
  3004. }
  3005. }
  3006. }
  3007. //}
  3008. }
  3009. return strcount1, strcount2, endStrMap
  3010. }
  3011. //数组匹配商品规则
  3012. func (table *Table) matchMapArrBrandRule(aMap *SortMap, brandRule map[string]map[string]string, ka map[string][]string, arrcount1, arrcount2 int) (int, int, map[string][]string) {
  3013. for _, k0 := range aMap.Keys {
  3014. match := false //记录must是否匹配到
  3015. v0 := aMap.Map[k0].([]string)
  3016. //匹配必须title
  3017. for nameM, r := range brandRule["must"] {
  3018. if convert(k0, r) { //匹配成功
  3019. v0tmp1 := v0
  3020. match = true
  3021. if len(ka[nameM]) != 0 && strings.Contains(k0, "描述") { //防止k0匹配到多次 和特殊情况 物料名称 物料描述同时出现
  3022. continue
  3023. }
  3024. if nameM == "itemname" || nameM == "modal" {
  3025. hasGoods(table, v0...) //判断itemname和modal中有没有商品
  3026. if nameM == "itemname" {
  3027. v0tmp1 = filterItem(v0...) //过滤itemname
  3028. }
  3029. }
  3030. if nameM == "brandname" || nameM == "modal" {
  3031. if len(ka["brandname"]) == 0 {
  3032. brand, allNull := hasBrand(table, v0...)
  3033. if !allNull {
  3034. ka["brandname"] = brand
  3035. }
  3036. }
  3037. }
  3038. //unitprice
  3039. if nameM == "unitprice" { //处理金额
  3040. v0tmp1 = dealPrice(k0, v0...)
  3041. }
  3042. if nameM != "brandname" && len(ka[nameM]) == 0 {
  3043. ka[nameM] = v0tmp1
  3044. }
  3045. arrcount1++
  3046. }
  3047. }
  3048. //替换其它要保存字段
  3049. if !match { //must未匹配,匹配replace
  3050. for nameR, r := range brandRule["replace"] {
  3051. if convert(k0, r) { //匹配成功
  3052. v0tmp2 := v0
  3053. //totalprice
  3054. if nameR == "totalprice" { //处理金额
  3055. v0tmp2 = dealPrice(k0, v0...)
  3056. }
  3057. //number
  3058. if nameR == "number" { //处理数量
  3059. uname0 := []string{}
  3060. v0tmp2, uname0 = dealNumber(v0...)
  3061. if len(ka["unitname"]) == 0 && len(uname0) != 0 {
  3062. ka["unitname"] = uname0
  3063. }
  3064. }
  3065. if len(v0tmp2) > 0 {
  3066. ka[nameR] = v0tmp2
  3067. }
  3068. arrcount2++
  3069. }
  3070. }
  3071. }
  3072. }
  3073. return arrcount1, arrcount2, ka
  3074. }
  3075. //初始化lineMapArr,lineMap
  3076. func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMap map[string]*SortMap) {
  3077. lineMapArr = make(map[string]*SortMap)
  3078. lineMap = make(map[string]*SortMap)
  3079. for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
  3080. val := table.SortKV.Map[key]
  3081. key = regReplAllSpace.ReplaceAllString(key, "")
  3082. key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
  3083. if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
  3084. /*
  3085. {
  3086. "商品":["",""],
  3087. "商品_"["",""],
  3088. }
  3089. */
  3090. valArr, allempty := filterVal(realTypeVal...) //过滤数据
  3091. if allempty {
  3092. continue
  3093. }
  3094. realTypeVal = valArr
  3095. line := underline.FindString(key)
  3096. lineValMap1 := lineMapArr[line]
  3097. // i := 1
  3098. // L:
  3099. // for { //去除数组空数据
  3100. // last := realTypeVal[len(realTypeVal)-i]
  3101. // if last == "" {
  3102. // i++
  3103. // if i > len(realTypeVal) {
  3104. // break
  3105. // }
  3106. // goto L
  3107. // } else {
  3108. // break
  3109. // }
  3110. // }
  3111. // dislodgeNull := realTypeVal[:(len(realTypeVal) - i + 1)] //去除数组中空数据
  3112. if len(realTypeVal) > 0 {
  3113. if lineValMap1 == nil {
  3114. tmp := NewSortMap()
  3115. tmp.AddKey(key, realTypeVal)
  3116. lineMapArr[line] = tmp
  3117. } else {
  3118. lineValMap1.AddKey(key, realTypeVal)
  3119. }
  3120. }
  3121. //qutil.Debug("lineMapArr---", lineMapArr[line].Keys, lineMapArr[line].Map)
  3122. } else if realTypeVal, b := val.(string); b { //val为字符串 {"数量":"1"}
  3123. /*
  3124. {
  3125. "商品:"",名称:"",
  3126. "商品_:"",名称_:"",
  3127. "商品__:"",名称__:"",
  3128. }
  3129. */
  3130. valArr, allempty := filterVal(realTypeVal) //过滤数据
  3131. if allempty {
  3132. continue
  3133. }
  3134. realTypeVal = valArr[0]
  3135. line := underline.FindString(key)
  3136. lineValMap2 := lineMap[line]
  3137. if lineValMap2 == nil {
  3138. tmp := NewSortMap()
  3139. tmp.AddKey(key, realTypeVal)
  3140. lineMap[line] = tmp
  3141. } else {
  3142. lineValMap2.AddKey(key, realTypeVal)
  3143. }
  3144. //qutil.Debug("lineMap---", lineMap[line].Keys, lineMap[line].Map)
  3145. } else {
  3146. // "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")5c2b0551a5cb26b9b7cb05db否5c2a42e6a5cb26b9b763ba5a采购人:一、采购人5c2b06f5a5cb26b9b7cc4409
  3147. //成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]]
  3148. //qutil.Debug("err data:", key, val)
  3149. }
  3150. }
  3151. return lineMapArr, lineMap
  3152. }
  3153. func dealArrData(maxNum int, ka map[string][]string) []map[string]string {
  3154. for k2, v2 := range ka {
  3155. //处理数组长度不相等,使长度一致
  3156. if len(v2) > maxNum {
  3157. ka[k2] = v2[:maxNum]
  3158. }
  3159. }
  3160. finalData := assembleData(ka, 1)
  3161. if len(finalData) > 0 {
  3162. return finalData
  3163. }
  3164. return nil
  3165. }
  3166. func dealStrData(kv map[string]string) []map[string]string {
  3167. finalData := []map[string]string{}
  3168. if len(kv) > 0 {
  3169. finalData = assembleData(kv, 2)
  3170. }
  3171. return finalData
  3172. }
  3173. //组装数据,每一行的数据为一数据集合
  3174. func assembleData(m interface{}, n int) []map[string]string {
  3175. defer qutil.Catch()
  3176. /*
  3177. {
  3178. "itemname":["计算机","打印机","机柜"],
  3179. "number" :["1","12","4"]
  3180. }
  3181. */
  3182. datas := []map[string]string{}
  3183. if n == 1 { //数组数据
  3184. realTypeM := m.(map[string][]string)
  3185. //根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr
  3186. /*
  3187. arr1 ["a1","b1","c1"]
  3188. arr2 ["a2","b2","c2"]
  3189. [
  3190. {"a1","a2"},
  3191. {"b1","b2"},
  3192. {"c1","c2"}
  3193. ]
  3194. */
  3195. //start
  3196. for k3, v3 := range realTypeM {
  3197. for _, val := range v3 {
  3198. data := make(map[string]string)
  3199. data[k3] = val
  3200. datas = append(datas, data)
  3201. }
  3202. break
  3203. }
  3204. for i, data := range datas {
  3205. for k4, v4 := range realTypeM {
  3206. if i < len(v4) { //数组数据长度不一致
  3207. if v4[i] != " " {
  3208. data[k4] = v4[i]
  3209. } else {
  3210. delete(data, k4)
  3211. }
  3212. } else {
  3213. fmt.Println("err table")
  3214. }
  3215. }
  3216. datas[i] = data
  3217. }
  3218. //end
  3219. for _, fdv := range datas { //清除空数据和只含特殊符号的数据
  3220. for fmk, fmv := range fdv {
  3221. if tabletdclear.ReplaceAllString(fmv, "") == "" {
  3222. delete(fdv, fmk)
  3223. }
  3224. }
  3225. }
  3226. } else { //字符串数据
  3227. realTypeM := m.(map[string]string)
  3228. datas = append(datas, realTypeM)
  3229. }
  3230. return datas
  3231. }
  3232. ////组装数据,每一行的数据为一数据集合
  3233. //func assembleData(m interface{}, n int) []map[string]string {
  3234. // defer qutil.Catch()
  3235. // /*
  3236. // {
  3237. // "itemname":["计算机","打印机","机柜"],
  3238. // "number" :["1","12","4"]
  3239. // }
  3240. // */
  3241. // datas := []map[string]string{}
  3242. // switch reflect.TypeOf(m).String() {
  3243. // case "map[string][]string": //数组数据
  3244. // realTypeM := m.(map[string][]string)
  3245. // //根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr
  3246. // /*
  3247. // arr1 ["a1","b1","c1"]
  3248. // arr2 ["a2","b2","c2"]
  3249. // [
  3250. // {"a1","a2"},
  3251. // {"b1","b2"},
  3252. // {"c1","c2"}
  3253. // ]
  3254. // */
  3255. // //start
  3256. // for k3, v3 := range realTypeM {
  3257. // for _, val := range v3 {
  3258. // data := make(map[string]string)
  3259. // data[k3] = val
  3260. // datas = append(datas, data)
  3261. // }
  3262. // break
  3263. // }
  3264. // for i, data := range datas {
  3265. // for k4, v4 := range realTypeM {
  3266. // if i < len(v4) { //数组数据长度不一致
  3267. // if v4[i] != " " {
  3268. // data[k4] = v4[i]
  3269. // } else {
  3270. // delete(data, k4)
  3271. // //continue
  3272. // }
  3273. // } else {
  3274. // fmt.Println("err table")
  3275. // //continue
  3276. // }
  3277. // }
  3278. // datas[i] = data
  3279. // }
  3280. // //end
  3281. // // for _, fdv := range datas { //清除空数据和只含特殊符号的数据
  3282. // // for fmk, fmv := range fdv {
  3283. // // if tabletdclear.ReplaceAllString(fmv, "") == "" {
  3284. // // delete(fdv, fmk)
  3285. // // }
  3286. // // }
  3287. // // }
  3288. // case "map[string]string": //字符串数据
  3289. // realTypeM := m.(map[string]string)
  3290. // datas = append(datas, realTypeM)
  3291. // default:
  3292. // }
  3293. // return datas
  3294. //}
  3295. func convert(key, r string) bool {
  3296. defer qutil.Catch()
  3297. flag := false
  3298. key = tabletitleclear.ReplaceAllString(key, "")
  3299. reg, err := regexp.Compile(r)
  3300. if err != nil {
  3301. fmt.Println("reg err:", err)
  3302. return false
  3303. }
  3304. flag = reg.MatchString(key)
  3305. return flag
  3306. }
  3307. func hasKey(table *Table, n int) {
  3308. defer qutil.Catch()
  3309. if table.TableResult.HasKey == 1 {
  3310. return
  3311. }
  3312. if n >= 1 {
  3313. table.TableResult.HasKey = 1
  3314. }
  3315. }
  3316. func hasGoods(table *Table, data ...string) {
  3317. defer qutil.Catch()
  3318. goodsArr := make([]string, len(data))
  3319. //fmt.Println("table.TableResult.HasGoods=====", table.TableResult.HasGoods)
  3320. if table.TableResult.HasGoods == 1 {
  3321. return
  3322. }
  3323. for i, d := range data {
  3324. if d != "" {
  3325. goods := u.GoodsGet.CheckSensitiveWord(d)
  3326. //fmt.Println("goods======", goods)
  3327. goodsArr[i] = goods
  3328. if len(goods) > 0 {
  3329. table.TableResult.HasGoods = 1
  3330. break
  3331. }
  3332. }
  3333. }
  3334. }
  3335. //func hasBrand(table *Table, data ...string) {
  3336. // defer qutil.Catch()
  3337. // if table.TableResult.HasBrand == 1 {
  3338. // return
  3339. // }
  3340. // for i, d := range data {
  3341. // if d != "" {
  3342. // brand := u.BrandGet.CheckSensitiveWord(d)
  3343. // qutil.Debug(d, brand)
  3344. // if brand != "" {
  3345. // table.TableResult.HasBrand = 1
  3346. // break
  3347. // }
  3348. // }
  3349. // }
  3350. //}
  3351. func hasBrand(table *Table, data ...string) ([]string, bool) {
  3352. defer qutil.Catch()
  3353. //fmt.Println("table.TableResult.HasBrand---------", table.TableResult.HasBrand)
  3354. brandArr := make([]string, len(data))
  3355. // if table.TableResult.HasBrand == 1 {
  3356. // return brandArr, 1
  3357. // }
  3358. allNull := true
  3359. for i, d := range data {
  3360. //if d != "" {
  3361. brand := u.BrandGet.CheckSensitiveWord(d)
  3362. if brand != "" {
  3363. allNull = false
  3364. }
  3365. //fmt.Println("brand======", brand)
  3366. brandArr[i] = brand
  3367. if len(brand) > 0 {
  3368. table.TableResult.HasBrand = 1
  3369. }
  3370. //}
  3371. }
  3372. return brandArr, allNull
  3373. }
  3374. //过滤td值
  3375. func filterVal(val ...string) ([]string, bool) {
  3376. defer qutil.Catch()
  3377. n := 0 //记录被过滤的个数
  3378. for i, v := range val {
  3379. afterFilter := tabletdclear.ReplaceAllString(v, "")
  3380. afterFilter = NullVal.ReplaceAllString(afterFilter, "")
  3381. if afterFilter == "" {
  3382. n++
  3383. }
  3384. val[i] = afterFilter
  3385. }
  3386. allempty := false
  3387. if n == len(val) { //所有都被过滤掉
  3388. allempty = true
  3389. }
  3390. return val, allempty
  3391. }
  3392. //过滤itemname全是数字
  3393. func filterItem(itemval ...string) []string {
  3394. defer qutil.Catch()
  3395. result := []string{}
  3396. for _, v := range itemval {
  3397. afterFilter := numclear.ReplaceAllString(v, "")
  3398. if afterFilter != "" {
  3399. result = append(result, v)
  3400. } else {
  3401. result = append(result, afterFilter)
  3402. }
  3403. }
  3404. return result
  3405. }
  3406. //处理价格
  3407. func dealPrice(key string, val ...string) []string {
  3408. defer qutil.Catch()
  3409. iswan := strings.Contains(key, "万") //表格title中带有万
  3410. result := []string{}
  3411. for _, v := range val { //1.00万元 1元
  3412. tmparr := strings.Split(v, ".")
  3413. tmparr[0] = moneyNum.ReplaceAllString(tmparr[0], "")
  3414. if iswan {
  3415. result = append(result, tmparr[0]+"0000")
  3416. } else {
  3417. if strings.Contains(v, "万") { //价格中带有万
  3418. result = append(result, tmparr[0]+"0000")
  3419. } else {
  3420. result = append(result, tmparr[0])
  3421. }
  3422. }
  3423. }
  3424. return result
  3425. }
  3426. //处理number
  3427. func dealNumber(val ...string) ([]string, []string) {
  3428. defer qutil.Catch()
  3429. unitnameArr := []string{}
  3430. result := []string{}
  3431. for _, v := range val { //1个 1.00个
  3432. n := numclear.FindString(v)
  3433. unitname := numclear.ReplaceAllString(v, "") //匹配个数后的单位
  3434. unitnameArr = append(unitnameArr, unitname)
  3435. //val[i] = strings.Split(n, ".")[0]
  3436. result = append(result, strings.Split(n, ".")[0])
  3437. }
  3438. return result, unitnameArr
  3439. }
  3440. func (tn *Table) analyProNameAndItemNumber() {
  3441. }