analytable.go 107 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547
  1. package pretreated
  2. import (
  3. "fmt"
  4. u "jy/util"
  5. qutil "qfw/util"
  6. "regexp"
  7. "strings"
  8. "github.com/PuerkitoBio/goquery"
  9. )
  10. /**
  11. 全局变量,主要是一堆判断正则
  12. **/
  13. var (
  14. //清理品目中数字
  15. numclear = regexp.MustCompile("^[\\d一二三四五六七八九十.]+")
  16. //清理表格title中的不需要的内容
  17. tabletitleclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。、_/((人民币万元件个公斤))]")
  18. //清理表格中是key中包含的空格或数字等
  19. tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。、_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]")
  20. //清理表格td中的符号
  21. tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、,。、_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕¥$]*")
  22. //判断key是金额,对万元的处理
  23. moneyreg = regexp.MustCompile("(预算|费|价|额|规模|投资)")
  24. //根据表格的内容判断是不是表头,如果含有金额则不是表头
  25. MoneyReg = regexp.MustCompile("^[\\s  ::0-9.万元()()人民币¥$]+$")
  26. //判断分包时
  27. moneyNum = regexp.MustCompile("[元整¥万]")
  28. //对隐藏表格的判断
  29. display = regexp.MustCompile("(?i).*?display\\s?[:]\\s?none.*")
  30. //---------------
  31. //求是分包的概率
  32. //根据表格的标签对分包进行打分
  33. TableMultiPackageReg_4 = regexp.MustCompile("(标段|分包|包段|划分|子包|标包|合同段)")
  34. TableMultiPackageReg_2 = regexp.MustCompile("(概况|范围|情况|内容|详细|结果|信息)")
  35. //在判断分包打分前过虑表格key
  36. FilterKey_2 = regexp.MustCompile("招标|投标|项目")
  37. //根据表格的key进行分包打分
  38. FindKey_2 = regexp.MustCompile("([分子][包标](号)?|标[号项段包](划分)?|包件?[号段名数])")
  39. //对值进行分包判断
  40. FindVal_1 = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|合同|分|施工|监理)?(标|包)(段|号)?)[  \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)")
  41. FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$")
  42. //判断分包前排除
  43. excludeKey = regexp.MustCompile("(涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)") //编号|划分
  44. //-------------
  45. cut = u.NewCut()
  46. //清理表格标签正则
  47. ClearTagReg = regexp.MustCompile("<[^>]*?>|[\\s\\n\\r]*$")
  48. //查找表格标签正则
  49. ttagreg = regexp.MustCompile("(?s)([^\\n::。,;\\s\u3000\u2003\u00a0]{2,30})[::]?[^::。;!\\n]{0,35}[\\s\\n]*$")
  50. //判断表格是表头的概率
  51. checkval = float32(0.6)
  52. //tdval_reg = regexp.MustCompile(`([\p{Han}][\p{Han}\s、()\\(\\)]{1,9})[::]([^::\\n。]{5,60})(?:[;;,,.。\\n\\t\\s])?`)
  53. //空格替换
  54. repSpace = regexp.MustCompile("[\\s\u3000\u2003\u00a0::]+|\\\\t+")
  55. //对表格kv的处理
  56. //对不能标准化的key做批识
  57. filter_tag_zb = regexp.MustCompile("(中标|成交|投标)[\\p{Han}]{0,6}(情况|结果|信息|明细)")
  58. //中标金额
  59. //包含以下字眼做标准化处理
  60. filter_zbje_k = regexp.MustCompile("(中标|成交|总|拦标|合同|供[应货]商|报)[\\p{Han}、]{0,6}(价|额|[大小]写|[万亿]?元).{0,4}$")
  61. //简单判断金额
  62. filter_zbje_jd = regexp.MustCompile("^[^(售|保证)]{0,4}(价|额).{0,4}$")
  63. //且排队以下字眼的key
  64. filter_zbje_kn = regexp.MustCompile("得分|打分|时间|业绩|须知|分|要求$")
  65. //且值包含以下字眼
  66. filter_zbje_v = regexp.MustCompile("[¥$$0-9一二三四五六七八九十,,〇零点..壹贰叁肆伍陆柒捌玖拾百佰千仟万亿億元圆角分整正()::大小写]{2,16}")
  67. //中标单位的处理
  68. //包含以下字眼的Key标准化
  69. filter_zbdw_ky = regexp.MustCompile("(中标|成交|拦标|合同|选中|投标|拟|预|最终)[\\p{Han}、]{0,6}(供[应货]商|企业|单位|人|机构)(名称)?.{0,4}$")
  70. //简单判断
  71. filter_zbdw_jd = regexp.MustCompile("(投标|成交|中标|合同)(供应商|单位|人|名称).{0,4}$")
  72. //且不包含以下字眼
  73. filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址")
  74. //且值包含以下字眼
  75. filter_zbdw_v = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)")
  76. //且值包含以下字眼
  77. filter_zbdw_v2 = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$")
  78. //Tg = map[string]interface{}{}
  79. //一些表格没有表头,是空的,对值是排序的做处理对应 NullTxBid
  80. NullTdReg = regexp.MustCompile("(首选|第[一二三四五1-5])(中标|成交)?(名(称)?|(候选|排序)?(人|单位|供应商))")
  81. NullTxtBid = "成交供应商排名"
  82. projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$")
  83. MhSpilt = regexp.MustCompile("[::]")
  84. //识别采购单位联系人、联系电话、代理机构联系人、联系电话
  85. ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?((电话([//]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表")
  86. ContactInfoMustReg = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$")
  87. ContactType = map[string]*regexp.Regexp{
  88. "采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|甲|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心地址|业主|收料人|采购部"),
  89. "代理机构": regexp.MustCompile("(代理|受托|集中采购).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"),
  90. }
  91. ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$")
  92. MultipleValueSplitReg = regexp.MustCompile("[,,、\\s\u3000\u2003\u00a0]")
  93. BuyerContacts = []string{"采购单位联系人", "采购单位联系电话", "采购单位联系地址"}
  94. FilterSerial = regexp.MustCompile(".+[、..::,]")
  95. underline = regexp.MustCompile("_+$")
  96. iswinnertabletag = regexp.MustCompile("(中标|候选人|成交|结果)")
  97. nswinnertabletag = regexp.MustCompile("[评得分估]+|标的|班子成员")
  98. jsonReg = regexp.MustCompile(`\{.+:[^}]*\} `) // \{".*\":\".+\"}
  99. regHz = regexp.MustCompile("[\u4e00-\u9fa5]")
  100. winnerOrderAndBidResult = regexp.MustCompile("((中标)?候选人|(中标|评标)结果)")
  101. )
  102. //在解析时,判断表格元素是否隐藏
  103. func IsHide(g *goquery.Selection) (b bool) {
  104. style, exists := g.Attr("style")
  105. if exists {
  106. b = display.MatchString(style)
  107. }
  108. return
  109. }
  110. //对表格的key进行标准化处理,多个k相同时,出现覆盖问题
  111. //待扩展,暂不支持正则标签库
  112. func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (kvTags map[string][]*u.Tag, returntag string) {
  113. kvTags = map[string][]*u.Tag{}
  114. v1 := ""
  115. if sv, sok := v.(string); sok { //取KV
  116. v1 = sv
  117. } else if sv, sok := v.([]string); sok { //是数组先默认取第一个
  118. if len(sv) >= 1 {
  119. v1 = sv[0]
  120. }
  121. }
  122. //对值单位的处理 (预算|费|价|额|规模|投资)
  123. if moneyreg.MatchString(k) {
  124. v1 += GetMoneyUnit(k, v1)
  125. }
  126. //先清理key
  127. //u.Debug(1, k, v1)
  128. k1 := ClearKey(k, 2)
  129. //u.Debug(2, k)
  130. //取标准key
  131. res := u.GetTags(k1)
  132. if len(res) == 0 && k1 != k {
  133. res = u.GetTags(k)
  134. k1 = k
  135. }
  136. //log.Println(k, res)
  137. // if len(res) == 0 {
  138. // go u.AddtoNoMatchMap(tk)
  139. // }
  140. //当取到标准化值时,放入数组
  141. if len(res) > 0 {
  142. for _, t1 := range res {
  143. //降低冒号值的权重
  144. if MhSpilt.MatchString(v1) {
  145. t1.Weight -= 50
  146. }
  147. if winnerOrderAndBidResult.MatchString(tabletag) && t1.Value == "采购单位联系人" { //处理table中项目负责人
  148. kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
  149. } else {
  150. kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight})
  151. }
  152. }
  153. //k1 = res[0].Value
  154. } else {
  155. kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
  156. //没有取到标准化key时,对中标金额和中标单位的逻辑处理
  157. if filter_zbje_k.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
  158. if tabletag == "" {
  159. returntag = "中标情况"
  160. }
  161. kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: k, Value: v1, Weight: -100})
  162. } else if filter_zbdw_ky.MatchString(k) && !filter_zbdw_kn.MatchString(k) &&
  163. filter_zbdw_v.MatchString(v1) {
  164. kvTags["中标单位"] = append(kvTags["中标单位"], &u.Tag{Key: k, Value: v1, Weight: -100})
  165. if tabletag == "" {
  166. returntag = "中标情况"
  167. }
  168. } else {
  169. //对上一步没有取到标准化key的进一步处理
  170. if tabletag == "" {
  171. }
  172. if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) {
  173. //u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1))
  174. if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
  175. kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: k, Value: v1, Weight: -100})
  176. } /*else if filter_zbdw_jd.MatchString(k) && filter_zbdw_v.MatchString(v1) {
  177. k1 = append(k1, "中标单位")
  178. weight = append(weight, -100)
  179. b = true
  180. }*/
  181. }
  182. }
  183. }
  184. return
  185. }
  186. //对解析后的表格的kv进行过滤
  187. func (table *Table) KVFilter() {
  188. //1.标准化值查找
  189. //2.对数组的处理
  190. //3.对分包的处理
  191. //4.对KV的处理
  192. //判断表格是否有用,调用abandontable正则数组进行判断
  193. //遍历每一行
  194. table.analyTdKV() //1.遍历每行每列td的sortkv添加到table.SorkVK中;2.td有子表格的处理
  195. as := NewSortMap()
  196. //遍历table.sortkv,进行过滤处理,并放入标准化KV中,如果值是数组跳到下一步处理
  197. for _, k := range table.SortKV.Keys {
  198. //表格描述处理,对成交结果的处理
  199. if regexp.MustCompile("(成交|中标|候选|排名|名次|供应商排序)").MatchString(k) {
  200. table.Desc += "成交结果,"
  201. }
  202. if regexp.MustCompile("^单价").MatchString(k) {
  203. continue
  204. }
  205. v := table.SortKV.Map[k]
  206. if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
  207. k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
  208. kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
  209. //qutil.Debug(k, v, k1, w1, v1, tag, b)
  210. if tag != "" && table.Tag == "" {
  211. table.Tag = tag
  212. }
  213. MergeKvTags(table.StandKV, kvTags)
  214. } else {
  215. //u.Debug(k, v, "---------")
  216. as.AddKey(k, v)
  217. }
  218. }
  219. //处理值是数组的kv放入标准化kv中//处理table.SortKV.value为数组的情况
  220. table.sortKVArr(as)
  221. //
  222. if len(table.WinnerOrder) > 0 || !table.BPackage {
  223. winnerOrder := []map[string]interface{}{}
  224. maxSort := 0
  225. //调整顺序
  226. for i := 0; i < 2; i++ {
  227. for _, v := range table.WinnerOrder {
  228. sortstr, _ := v["sortstr"].(string)
  229. if (i == 0 && sortstr == "") || (i == 1 && sortstr != "") {
  230. continue
  231. }
  232. sort, _ := v["sort"].(int)
  233. if i == 0 {
  234. if maxSort == 0 || sort > maxSort {
  235. maxSort = sort
  236. }
  237. } else {
  238. maxSort++
  239. v["sort"] = maxSort
  240. }
  241. winnerOrder = append(winnerOrder, v)
  242. }
  243. if len(winnerOrder) == len(table.WinnerOrder) {
  244. break
  245. }
  246. }
  247. table.WinnerOrder = winnerOrder
  248. winnerOrder = []map[string]interface{}{}
  249. L: //遍历每个td,查询中标人
  250. for _, tr := range table.TRs {
  251. for _, td := range tr.TDs {
  252. winnerOrder = winnerOrderEntity.Find(td.Val, true, 3)
  253. if len(winnerOrder) > 0 {
  254. break L
  255. }
  256. }
  257. }
  258. if len(table.WinnerOrder) > 0 {
  259. //中标候选人合并
  260. winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder)
  261. } else if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder
  262. if len(winnerOrder) > 1 {
  263. table.WinnerOrder = winnerOrder
  264. }
  265. }
  266. }
  267. //对中标候选人进行排序
  268. winnerOrderEntity.Order(table.WinnerOrder)
  269. //该表格有一个分包,并且有中标候选人排序的情况下,把中标候选人放到包里面
  270. if table.BlockPackage != nil && table.BlockPackage.Keys != nil && len(table.BlockPackage.Keys) == 1 {
  271. if table.BlockPackage.Map != nil {
  272. onePkgKey := table.BlockPackage.Keys[0]
  273. onePkg, _ := table.BlockPackage.Map[onePkgKey].(*u.BlockPackage)
  274. if onePkg != nil && onePkg.WinnerOrder != nil && len(onePkg.WinnerOrder) == 0 {
  275. onePkg.WinnerOrder = table.WinnerOrder
  276. table.BlockPackage.AddKey(onePkgKey, onePkg)
  277. }
  278. }
  279. }
  280. }
  281. //处理table.SortKV.value为数组的情况
  282. func (table *Table) sortKVArr(as *SortMap) {
  283. winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签
  284. if !winnertag {
  285. winnertag = iswinnertabletag.MatchString(table.TableResult.BlockTag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
  286. }
  287. checkKey := map[int]bool{}
  288. for kn, k := range as.Keys { //遍历table.SortKV.value为数组的key
  289. v := as.Map[k]
  290. if vm, ok := v.([]map[string]interface{}); ok && k == NullTxtBid {
  291. if table.WinnerOrder == nil {
  292. table.WinnerOrder = []map[string]interface{}{}
  293. }
  294. table.WinnerOrder = append(table.WinnerOrder, vm...)
  295. } else {
  296. //增加候选人排序逻辑
  297. if table.WinnerOrder == nil && !checkKey[kn] {
  298. if vs1, ok := v.([]string); ok {
  299. smap := make([]map[string]interface{}, len(vs1))
  300. for n1, _ := range vs1 {
  301. smap[n1] = map[string]interface{}{}
  302. }
  303. //hadSort := false
  304. tmpEntname := make([]string, len(vs1))
  305. tmpPrice := make([]string, len(vs1))
  306. for kn1, k := range as.Keys[kn:] {
  307. v := as.Map[k]
  308. if ContactType["采购单位"].MatchString(k) || ContactType["代理机构"].MatchString(k) {
  309. continue
  310. }
  311. //目前对数组数据的key做判断,但是某些额可以是不满足情况的
  312. //载明内容:[第一中标候选人 第二中标候选人] id:5d00587da5cb26b9b75e367b
  313. if vs, ok := v.([]string); ok && len(vs) == len(vs1) { //数组值的个数相同
  314. res, _, _, _, repl := CheckCommon(k, "bidorder")
  315. kv := ""
  316. if !res {
  317. kt := u.GetTags(filterThText.ReplaceAllString(ClearKey(k, 2), ""))
  318. if kt.Len() > 0 {
  319. kv = kt[0].Value
  320. }
  321. }
  322. //qutil.Debug(k, res, repl, kv, "--", vs)
  323. if !res && kv == "" { //key未验证出,验证数组的val值
  324. checkKey[kn+kn1] = true
  325. if winnertag { //如果是中标信息 在根据val数组信息解析候选人
  326. for vsk, vsv := range vs {
  327. if NullTdReg.MatchString(vsv) { //数据先验证val是否有排序
  328. //hadSort = true
  329. smap[vsk]["sortstr"] = vsv
  330. smap[vsk]["sort"] = GetBidSort(vsv, vsk+1)
  331. } else if findCandidate2.MatchString(vsv) && tmpEntname[vsk] == "" { //数据验证val是否是候选人
  332. entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
  333. if entname != "" {
  334. tmpEntname[vsk] = entname
  335. }
  336. } else { //验证val时如果数组中的第一条数据既不满足sort或者entname 判定此数组数据错误
  337. break
  338. }
  339. }
  340. }
  341. }
  342. if res || kv != "" { //连续往下找几个key
  343. checkKey[kn+kn1] = true
  344. SORT:
  345. if repl == "sort" {
  346. //hadSort = true
  347. for vsk, vsv := range vs {
  348. smap[vsk]["sortstr"] = vsv
  349. smap[vsk]["sort"] = GetBidSort(vsv, vsk+1)
  350. }
  351. } else if repl == "entname" || kv == "中标单位" {
  352. for vsk, vsv := range vs {
  353. if winnerReg6.MatchString(vsv) { //k:中标候选人 v:["第一名","第二名"]
  354. repl = "sort"
  355. goto SORT
  356. }
  357. // if entname, _ := smap[vsk]["entname"].(string); entname != "" || len([]rune(vsv)) < 3 {
  358. // break
  359. // }
  360. // entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
  361. // if entname != "" {
  362. // smap[vsk]["entname"] = entname
  363. //
  364. if tmpEntname[vsk] != "" || len([]rune(vsv)) < 4 { //排除 单位:["台","个","套"]
  365. break
  366. }
  367. entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
  368. if entname != "" {
  369. tmpEntname[vsk] = entname
  370. }
  371. }
  372. } else if kv == "中标金额" {
  373. for vsk, vsv := range vs {
  374. //过滤price 2348273.432元(万元)-->2348273.432
  375. //tmp1, _ := smap[vsk]["price"].(string)
  376. tmp1 := tmpPrice[vsk]
  377. p1num := numberReg2.FindString(tmp1)
  378. p2num := numberReg2.FindString(vsv)
  379. p1 := qutil.Float64All(p1num)
  380. p2 := qutil.Float64All(p2num)
  381. if p2 > p1 {
  382. //smap[vsk]["price"] = winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv))
  383. price := winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv))
  384. if pricestr, _ := price.(string); len(pricestr) < 30 && len(pricestr) > 0 {
  385. tmpPrice[vsk] = pricestr
  386. }
  387. }
  388. }
  389. }
  390. }
  391. } else {
  392. //break
  393. }
  394. }
  395. newSmap := []map[string]interface{}{}
  396. //qutil.Debug("smap=======", smap)
  397. //qutil.Debug("tmpEntname--", len(tmpEntname), tmpEntname)
  398. //qutil.Debug("tmpPrice--", len(tmpPrice), tmpPrice)
  399. for n, smap_v := range smap {
  400. //if hadSort { //有排序,再添加entname和price
  401. if len(tmpEntname) > 0 && n < len(tmpEntname) && tmpEntname[n] != "" {
  402. smap_v["entname"] = tmpEntname[n]
  403. if len(tmpPrice) > 0 && n < len(tmpPrice) && tmpPrice[n] != "" {
  404. smap_v["price"] = tmpPrice[n]
  405. }
  406. }
  407. //} else if len(tmpEntname) > 0 {
  408. //fmt.Println("table winnerorder only has entname", tmpEntname)
  409. //}
  410. if len(smap_v) > 2 { //只有排序信息 sort和sortstr
  411. newSmap = append(newSmap, smap_v)
  412. }
  413. }
  414. if len(newSmap) > 0 {
  415. table.WinnerOrder = newSmap
  416. }
  417. }
  418. }
  419. kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v)
  420. if tag != "" && table.Tag == "" {
  421. table.Tag = tag
  422. }
  423. for kk, vv := range kvTags {
  424. table.StandKV[kk] = append(table.StandKV[kk], vv...)
  425. // else if k2 == "中标金额" {
  426. // if qutil.Float64All(v1) > qutil.Float64All(table.StandKV[k2]) {
  427. // table.StandKV[k2] = v1
  428. // }
  429. // }
  430. }
  431. }
  432. }
  433. }
  434. //1.遍历每行每列td的sortkv添加到table.SorkVK中;2.td有子表格的处理
  435. func (table *Table) analyTdKV() {
  436. //遍历每一行
  437. for _, tr := range table.TRs {
  438. for _, td := range tr.TDs {
  439. //fmt.Println(td.BH, td.MustBH, td.Val, td.SortKV.Map)
  440. bc := false
  441. if !td.BH {
  442. //表头是否是无用内容
  443. if td.HeadTd != nil {
  444. bc, _, _, _, _ = CheckCommon(td.HeadTd.Val, "abandontable")
  445. }
  446. }
  447. if !bc {
  448. //td元素有内嵌kv,遍历放入table的Kv中
  449. if len(td.SortKV.Keys) > 0 {
  450. for _, k3 := range td.SortKV.Keys {
  451. _val := td.SortKV.Map[k3]
  452. //thisFlag := false
  453. if td.HeadTd != nil && len([]rune(k3)) < 4 {
  454. k3 = td.HeadTd.Val + k3
  455. }
  456. if table.SortKV.Map[k3] == nil && _val != nil && _val != "" {
  457. //u.Debug(k3, _val)
  458. //if !thisFlag || (thisFlag && table.SortKV.Map[k3] == nil) {
  459. table.SortKV.AddKey(k3, _val)
  460. }
  461. }
  462. }
  463. }
  464. //td有子表格的处理
  465. //u.Debug(td.BH, td.Val, td.SonTableResult)
  466. if td.SonTableResult != nil {
  467. //u.Debug(td.SonTableResult.SortKV.Map, "-------", td.SonTableResult.Tabs)
  468. for k3, v3 := range td.SonTableResult.KvTags {
  469. table.StandKV[k3] = append(table.StandKV[k3], v3...)
  470. }
  471. //中标候选人排序
  472. if table.WinnerOrder == nil || len(table.WinnerOrder) == 0 {
  473. table.WinnerOrder = td.SonTableResult.WinnerOrder
  474. } else {
  475. winnerOrderEntity.Merge(table.WinnerOrder, td.SonTableResult.WinnerOrder)
  476. }
  477. }
  478. }
  479. }
  480. }
  481. //表格结果合并到父表格集中
  482. func (table *Table) MergerToTableresult() {
  483. //对多包表格的多包值的合并处理
  484. if table.BPackage {
  485. table.TableResult.IsMultiPackage = true
  486. for _, v2 := range table.BlockPackage.Keys {
  487. package1 := table.TableResult.PackageMap.Map[v2]
  488. if package1 == nil {
  489. table.TableResult.PackageMap.AddKey(v2, table.BlockPackage.Map[v2])
  490. if vvv, ok := table.BlockPackage.Map[v2].(*u.BlockPackage); ok {
  491. if vvv.TableKV != nil && len(vvv.TableKV.KvTags) > 0 {
  492. MergeKvTags(table.TableResult.KvTags, vvv.TableKV.KvTags)
  493. }
  494. }
  495. } else {
  496. bp := package1.(*u.BlockPackage)
  497. if bp.TableKV == nil {
  498. bp.TableKV = u.NewJobKv()
  499. }
  500. v1 := table.BlockPackage.Map[v2].(*u.BlockPackage)
  501. if v1.TableKV != nil && len(v1.TableKV.KvTags) > 0 {
  502. for k2, v2 := range v1.TableKV.KvTags {
  503. if k2 == "" {
  504. continue
  505. }
  506. isExists := false
  507. for _, v2v := range v2 {
  508. if v2v.Value == "" {
  509. continue
  510. }
  511. for _, v2vv := range bp.TableKV.KvTags[k2] {
  512. if v2v.Value == v2vv.Value {
  513. isExists = true
  514. break
  515. }
  516. }
  517. if !isExists {
  518. bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
  519. bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
  520. }
  521. }
  522. }
  523. }
  524. if len(v1.WinnerOrder) > 0 && len(bp.WinnerOrder) == 0 {
  525. bp.WinnerOrder = v1.WinnerOrder
  526. }
  527. //table.TableResult.PackageMap.AddKey(k, v)
  528. }
  529. }
  530. // str := ""
  531. // for _, k := range table.TableResult.PackageMap.Keys {
  532. // v := table.TableResult.PackageMap.Map[k].(*u.BlockPackage)
  533. // str += fmt.Sprintf("包号:%s,中标人:%s,中标价:%s,预算:%s,文本:%s,排名:%v ---\t", v.Index, v.TableKV["中标单位"]+v.ColonKV["中标单位"], v.TableKV["中标金额"]+v.ColonKV["中标金额"], v.TableKV["预算"]+v.ColonKV["预算"], v.Text, v.WinnerOrder)
  534. // }
  535. // u.Debug(table, table.TableResult, str)
  536. }
  537. //遍历标准key到tableresult.sortkv中
  538. for _, v := range table.StandKV {
  539. for _, vv := range v {
  540. vv.Value = strings.Replace(vv.Value, "__", "", -1)
  541. }
  542. }
  543. MergeKvTags(table.TableResult.KvTags, table.StandKV)
  544. //表格的块标签
  545. if table.TableResult.BlockTag == "" && table.Tag != "" {
  546. table.TableResult.BlockTag = table.Tag
  547. }
  548. //中标候选人(多个table,现在默认取第一个table的信息,考虑需不需要多个table分析合并数据?)
  549. if table.TableResult.WinnerOrder == nil || len(table.TableResult.WinnerOrder) == 0 {
  550. table.TableResult.WinnerOrder = table.WinnerOrder
  551. }
  552. //增加brand 并列table
  553. if len(table.BrandData) > 0 {
  554. for _, v := range table.BrandData {
  555. if len(v) > 0 {
  556. table.TableResult.BrandData = append(table.TableResult.BrandData, v)
  557. }
  558. }
  559. }
  560. if table.BlockPackage != nil && len(table.BlockPackage.Keys) > 0 {
  561. for _, v := range table.BlockPackage.Keys {
  562. if table.BlockPackage.Map[v] != nil {
  563. if vvv, ok := table.BlockPackage.Map[v].((*u.BlockPackage)); ok {
  564. if vvv.TableKV != nil && len(vvv.TableKV.KvTags) > 0 {
  565. for kk, vv := range vvv.TableKV.KvTags {
  566. if kk == "" {
  567. continue
  568. }
  569. if len(table.TableResult.KvTags[kk]) == 0 {
  570. table.TableResult.KvTags[kk] = vv
  571. }
  572. }
  573. }
  574. }
  575. }
  576. }
  577. }
  578. }
  579. /**
  580. 解析表格入口
  581. 返回:汇总表格对象
  582. **/
  583. func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock) (tabres *TableResult) {
  584. defer qutil.Catch()
  585. //u.Debug(con)
  586. if itype == 1 {
  587. //修复表格
  588. con = RepairCon(con)
  589. }
  590. //生成tableresult对象
  591. tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock)
  592. //可以有多个table
  593. //for _, table := range tabs {
  594. //隐藏表格跳过
  595. if IsHide(tabs) {
  596. return
  597. }
  598. tabres.GoqueryTabs = tabs
  599. //}
  600. //解析表格集
  601. tabres.Analy()
  602. return
  603. }
  604. //开始解析表格集
  605. func (ts *TableResult) Analy() {
  606. tabs := []*Table{}
  607. contactFormat := &u.ContactFormat{
  608. IndexMap: map[int]string{},
  609. MatchMap: map[string]map[string]bool{},
  610. }
  611. //for _, table := range ts.GoqueryTabs {
  612. tn := NewTable(ts.Html, ts, ts.GoqueryTabs)
  613. //核心模块
  614. tsw := tn.Analy(contactFormat)
  615. for _, tab := range tsw {
  616. if len(tab.TRs) > 0 {
  617. tabs = append(tabs, tab)
  618. }
  619. //fmt.Println("tab.SortKV.Map", tab.SortKV.Keys)
  620. }
  621. //tn.SonTables = append(tn.SonTables, tn)
  622. //}
  623. //统一合并,考虑统一多表格是多包的情况---新增
  624. if len(tabs) > 1 {
  625. pns := map[string]string{}
  626. pnarr := []string{}
  627. for _, table := range tabs {
  628. if len(table.StandKV["项目名称"]) == 0 {
  629. continue
  630. }
  631. pn := table.StandKV["项目名称"][0]
  632. if pn != nil && pn.Value != "" && TitleReg.MatchString(pn.Value) {
  633. pnarr = append(pnarr, pn.Value)
  634. matchres := TitleReg.FindAllStringSubmatch(pn.Value, -1)
  635. if len(matchres) == 1 && len(matchres[0]) > 0 {
  636. v1 := u.PackageNumberConvert(matchres[0][0])
  637. pns[v1] = matchres[0][0]
  638. bp := &u.BlockPackage{}
  639. bp.Index = v1
  640. bp.Origin = matchres[0][0]
  641. bp.TableKV = u.NewJobKv()
  642. for _, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
  643. if len(table.StandKV[k]) > 0 {
  644. bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
  645. }
  646. }
  647. bp.WinnerOrder = table.WinnerOrder
  648. if table.BlockPackage.Map[v1] == nil {
  649. table.BPackage = true
  650. table.BlockPackage.AddKey(v1, bp)
  651. }
  652. }
  653. }
  654. }
  655. if len(tabs) == len(pns) {
  656. //多个表格,每个表格都是一个分包 http://www.cxzwfw.gov.cn/info/1009/6963.htm
  657. //项目名称、项目编号、采购单位、招标机构、预算
  658. pname := projectnameReg.ReplaceAllString(pnarr[0], "")
  659. btrue := true
  660. for _, pn := range pnarr[1:] {
  661. pn = projectnameReg.ReplaceAllString(pn, "")
  662. //u.Debug(pn, pname)
  663. if pn != pname {
  664. //项目名称不一致
  665. btrue = false
  666. break
  667. }
  668. }
  669. if btrue {
  670. ts.KvTags["项目名称"] = append(ts.KvTags["项目名称"], &u.Tag{Key: "项目名称", Value: pname, Weight: 100})
  671. for _, table := range tabs {
  672. table.BPackage = true
  673. //预算、中标金额、NullTxtBid成交供应商排名 中标单位 成交状态
  674. if table.BlockPackage != nil && len(table.BlockPackage.Keys) == 1 {
  675. bp := table.BlockPackage.Map[table.BlockPackage.Keys[0]].(*u.BlockPackage)
  676. if table.TableResult.WinnerOrder != nil {
  677. bp.WinnerOrder = table.WinnerOrder
  678. }
  679. if bp != nil && table.StandKV != nil {
  680. if bp.TableKV == nil {
  681. bp.TableKV = u.NewJobKv()
  682. }
  683. for nk, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
  684. if len(table.StandKV[k]) > 0 {
  685. bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
  686. }
  687. if nk < 4 {
  688. delete(table.StandKV, k)
  689. }
  690. }
  691. }
  692. }
  693. }
  694. }
  695. }
  696. }
  697. for _, table := range tabs {
  698. table.MergerToTableresult()
  699. // for k, v := range table.TableResult.SortKV.Map {
  700. // qutil.Debug(k, "=====", v)
  701. // }
  702. MergeKvTags(ts.KvTags, table.TableResult.KvTags)
  703. }
  704. }
  705. //解析表格
  706. func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
  707. //查找表体中的tr对象
  708. trs := table.Goquery.ChildrenFiltered("tbody,thead,tfoot").ChildrenFiltered("tr")
  709. if trs.Size() == 0 {
  710. trs = table.Goquery.ChildrenFiltered("tr")
  711. }
  712. //遍历节点,初始化table 结构
  713. table.createTabe(trs)
  714. //重置行列
  715. table.ComputeRowColSpan()
  716. //对table结构体进行整体解析处理
  717. ts := table.AnalyTables(contactFormat)
  718. return ts
  719. }
  720. //遍历节点,初始化table 结构体
  721. func (table *Table) createTabe(trs *goquery.Selection) {
  722. trs.Each(func(n int, sel *goquery.Selection) {
  723. //隐藏行不处理
  724. if IsHide(sel) {
  725. return
  726. }
  727. //遍历每行的td
  728. tds := sel.ChildrenFiltered("td,th")
  729. TR := NewTR(table)
  730. tdTextIsNull := false
  731. var empty int
  732. tds.Each(func(m int, selm *goquery.Selection) {
  733. //对隐藏列不处理!!!
  734. if IsHide(selm) {
  735. return
  736. }
  737. //进入每一个单元格
  738. td := NewTD(selm, TR, table) //初始化td,kv处理,td中有table处理,td的方向
  739. //num++
  740. TR.AddTD(td)
  741. if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0 { //删除一个tr,tr中所有td是空值的
  742. empty++
  743. if tds.Size() == empty {
  744. tdTextIsNull = true
  745. }
  746. }
  747. })
  748. //向table添加每行不为空的tr
  749. if !tdTextIsNull {
  750. table.AddTR(TR)
  751. }
  752. })
  753. }
  754. //对table进行整体解析处理
  755. func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
  756. ts := tn.tableSubDemolitionTable() //分包,拆表
  757. for n, table := range ts {
  758. //处理每个table
  759. if len(table.TRs) > 0 {
  760. //删除尾部空白行
  761. table.deleteTrimTr()
  762. //table.Print()
  763. //校对表格
  764. table.Adjust()
  765. //查找表格的标签,table.Tag字段
  766. table.FindTag()
  767. //log.Println(table.TableResult.Id, table.Html)
  768. //分割表格
  769. table.bSplit(n, ts)
  770. //对没有表头表格的处理
  771. if table.Tag != "" {
  772. _, _, b := CheckMultiPackage(table.Tag, "")
  773. if b {
  774. table.StandKV["项目名称"] = append(table.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: table.Tag, Weight: -100})
  775. }
  776. }
  777. table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
  778. //开始查找kv,核心模块,table.SortKV
  779. table.FindKV()
  780. //table中抽取品牌,table.BrandData
  781. if u.IsBrandGoods {
  782. table.analyBrand()
  783. }
  784. //判断是否是多包,并处理分包的//遍历td分块
  785. table.CheckMultiPackageByTable()
  786. res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
  787. if !res {
  788. //过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
  789. table.KVFilter()
  790. }
  791. //MergeKvTags(table.TableResult.KvTags, table.StandKV)
  792. }
  793. }
  794. return ts
  795. }
  796. //分包,拆表
  797. func (table *Table) tableSubDemolitionTable() []*Table {
  798. tm := []map[string]interface{}{}
  799. tmk := map[string]bool{}
  800. tmn := map[int]map[string]interface{}{}
  801. for rownum, tr := range table.TRs {
  802. if len(tr.TDs) == 1 && table.ColNum > 1 { //tr里面有一列,table里面有多列
  803. td := tr.TDs[0] //取每行第一个td
  804. //td开始列等于0 && td结束列+1等于table列数 && td长度大于1小于50
  805. if td.StartCol == 0 && td.EndCol+1 == table.ColNum && len([]rune(td.Val)) > 1 && len([]rune(td.Val)) < 50 {
  806. con, m1, b := CheckMultiPackage(td.Val, "") //判断分包
  807. if b {
  808. for k, _ := range m1 {
  809. numstr := u.PackageNumberConvert(k)
  810. m2 := map[string]interface{}{
  811. "tag": con,
  812. //"num": numstr,
  813. //"numtxt": v[0],
  814. "startrow": rownum,
  815. }
  816. tmk[numstr] = true
  817. tmn[rownum] = m2
  818. tm = append(tm, m2)
  819. break
  820. }
  821. }
  822. }
  823. }
  824. }
  825. //拆表
  826. ts := []*Table{}
  827. if len(tmk) > 1 && len(tmk) == len(tm) {
  828. var tab1 *Table
  829. for rownum, tr := range table.TRs {
  830. if tab1 == nil {
  831. tab1 = NewTable("", table.TableResult, table.Goquery)
  832. tab1.BSplit = true
  833. if tmn[rownum] != nil {
  834. tab1.StandKV["项目名称"] = append(tab1.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: tmn[rownum]["tag"].(string), Weight: -100})
  835. }
  836. ts = append(ts, tab1)
  837. }
  838. if tmn[rownum] != nil {
  839. tab1.Tag = tmn[rownum]["tag"].(string)
  840. } else {
  841. tab1.AddTR(tr)
  842. }
  843. if tmn[rownum+1] != nil {
  844. tab1 = nil
  845. }
  846. }
  847. } else {
  848. ts = append(ts, table)
  849. }
  850. return ts
  851. }
  852. //分割表格
  853. func (table *Table) bSplit(n int, ts []*Table) {
  854. if table.BSplit {
  855. if !table.BHeader && n > 0 {
  856. for i := n - 1; i > -1; i-- {
  857. if ts[i].BHeader {
  858. if ts[i].BFirstRow {
  859. //取第一行插入到
  860. table.InsertTR(ts[i].TRs[0])
  861. table.Adjust()
  862. }
  863. break
  864. }
  865. }
  866. }
  867. }
  868. }
  869. //删除尾部空白行
  870. func (table *Table) deleteTrimTr() {
  871. for len(table.TRs) > 0 {
  872. npos := len(table.TRs)
  873. tailTR := table.TRs[npos-1] //最后一个tr,取最后一行
  874. bspace := true
  875. for _, v := range tailTR.TDs {
  876. if v.Val != "" || v.SonTableResult != nil || len(v.SortKV.Keys) > 0 {
  877. bspace = false
  878. break
  879. }
  880. }
  881. //删除尾部空行,是空行的话就删除
  882. if bspace {
  883. table.TRs = table.TRs[:npos-1]
  884. } else {
  885. break
  886. }
  887. }
  888. }
  889. //校对表格
  890. func (table *Table) Adjust() {
  891. //计算行列起止位置,跨行跨列处理
  892. table.ComputeRowColSpan()
  893. // for k1, tr := range table.TRs {
  894. // for k2, td := range tr.TDs {
  895. // qutil.Debug(k1, k2, td.Val, td.StartRow, td.EndRow, td.StartCol, td.EndCol)
  896. // }
  897. // }
  898. //大概计算每个起止行列的概率
  899. table.GetKeyRation()
  900. /*
  901. for k, v := range table.StartAndEndRation {
  902. for k1, v1 := range v.Poss {
  903. bs, _ := json.Marshal(v1)
  904. str := ""
  905. for _, td := range v.Tdmap[v1] {
  906. str += "__" + td.Val + fmt.Sprintf("%d_%d_%d_%d", td.StartRow, td.EndRow, td.StartCol, td.EndCol)
  907. }
  908. qutil.Debug(k, k1, string(bs), v.Rationmap[v1], str)
  909. }
  910. }
  911. */
  912. //u.Debug("tdnum:", num, table.RowNum, table.ColNum)
  913. //是否是规则的表格,单元各个数=行数*列数
  914. table.Brule = table.TDNum == table.RowNum*table.ColNum
  915. count := 0
  916. for _, trs := range table.TRs {
  917. for _, td := range trs.TDs {
  918. if td.BH {
  919. count++
  920. }
  921. }
  922. }
  923. if float32(count)/float32(table.TDNum) < 0.85 {
  924. //精确计算起止行列是表头的概率
  925. table.ComputeRowColIsKeyRation()
  926. bhead := false
  927. L:
  928. for i, tr := range table.TRs {
  929. for _, td := range tr.TDs {
  930. if td.BH {
  931. //qutil.Debug("----=====---", td.Val, len(table.TRs[len(table.TRs)-1].TDs), i, len(table.TRs)-1)
  932. if i == len(table.TRs)-1 && len(table.TRs[len(table.TRs)-1].TDs) == 2 {
  933. res, _, _, _, _ := CheckCommon(td.Val, "abandontable")
  934. if res {
  935. //删除此行
  936. table.TRs = table.TRs[:len(table.TRs)-1]
  937. table.Adjust()
  938. return
  939. }
  940. }
  941. bhead = true
  942. break L
  943. }
  944. }
  945. }
  946. table.BHeader = bhead
  947. }
  948. }
  949. //计算行/列表格的结束位置 StartRow=0 EndRow=0,table.TDNum td个数 table.RowNum 行数
  950. func (table *Table) ComputeRowColSpan() {
  951. n := 0 //td总个数
  952. mapRC := map[int]map[int]int{} //记录第几行pos,起始列对应的合并值
  953. for k, v := range table.TRs {
  954. n += len(v.TDs) //每行的td总数相加
  955. nk := 0 //nk列的起始,k行的起始||如果有合并,起始就不是0
  956. ball := true
  957. rowspans := v.TDs[0].Rowspan //某一行第一个td的rowspan
  958. for _, v1 := range v.TDs {
  959. if v1.Rowspan != rowspans {
  960. ball = false
  961. break
  962. }
  963. }
  964. for _, v1 := range v.TDs {
  965. if ball {
  966. v1.Rowspan = 1
  967. }
  968. mc := mapRC[k]
  969. for {
  970. if mc != nil && mc[nk] > 0 {
  971. nk += mc[nk]
  972. } else {
  973. break
  974. }
  975. }
  976. v1.StartCol = nk
  977. nk += v1.Colspan - 1
  978. v1.EndCol = nk
  979. if nk >= table.ColNum {
  980. table.ColNum = nk + 1
  981. }
  982. nk++
  983. v1.StartRow = k
  984. v1.EndRow = k + v1.Rowspan - 1
  985. ck := fmtkey("c", v1.StartCol, v1.EndCol)
  986. tdcs := table.StartAndEndRation[ck]
  987. if tdcs == nil {
  988. tdcs = NewTDRationScope(ck)
  989. table.StartAndEndRation[ck] = tdcs
  990. table.StartAndEndRationKSort.AddKey(ck, 1)
  991. }
  992. tdcs.Addtd(v1)
  993. rk := fmtkey("r", v1.StartRow, v1.EndRow)
  994. tdrs := table.StartAndEndRation[rk]
  995. if tdrs == nil {
  996. tdrs = NewTDRationScope(rk)
  997. table.StartAndEndRation[rk] = tdrs
  998. table.StartAndEndRationKSort.AddKey(rk, 1)
  999. }
  1000. tdrs.Addtd(v1)
  1001. if v1.Rowspan > 1 {
  1002. for i := 1; i < v1.Rowspan; i++ {
  1003. r := k + i
  1004. if r < len(table.TRs) {
  1005. mc := mapRC[r]
  1006. if mc == nil {
  1007. mc = map[int]int{}
  1008. }
  1009. mc[v1.StartCol] = v1.Colspan
  1010. mapRC[r] = mc
  1011. }
  1012. }
  1013. }
  1014. }
  1015. }
  1016. table.TDNum = n //td总个数
  1017. table.RowNum = len(table.TRs) //tr总行数
  1018. }
  1019. func fmtkey(t string, start, end int) string {
  1020. return fmt.Sprintf("%s_%d_%d", t, start, end)
  1021. }
  1022. //查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
  1023. func (table *Table) FindTag() {
  1024. //查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
  1025. if table.Tag != "" {
  1026. return
  1027. }
  1028. t1, _ := goquery.OuterHtml(table.Goquery)
  1029. //t1, _ := table.Goquery.OuterHtml()
  1030. html := table.Html
  1031. pos := strings.Index(html, t1)
  1032. if pos <= 0 {
  1033. doc, _ := goquery.NewDocumentFromReader(strings.NewReader(table.Html))
  1034. html, _ = doc.Html()
  1035. pos = strings.Index(html, t1)
  1036. }
  1037. //u.Debug("--------", t1, "====\n\n\n\n=====", html)
  1038. if pos > 0 {
  1039. tcon := html[:pos]
  1040. tcon = cut.ClearHtml(tcon)
  1041. tcon = ClearTagReg.ReplaceAllString(tcon, "")
  1042. //u.Debug(pos, "-----------", tcon)
  1043. strs := ttagreg.FindStringSubmatch(tcon)
  1044. if len(strs) > 0 {
  1045. table.Tag = strs[0]
  1046. //u.Debug(table.Tag)
  1047. }
  1048. }
  1049. if table.Tag == "" {
  1050. table.Tag = table.TableResult.BlockTag
  1051. }
  1052. //u.Debug(table.Tag)
  1053. }
  1054. //计算r/c_start_end的概率
  1055. func (table *Table) GetKeyRation() {
  1056. for _, vn := range table.StartAndEndRationKSort.Keys {
  1057. v := table.StartAndEndRation[vn]
  1058. for _, v1 := range v.Poss {
  1059. count := 0
  1060. n := 0
  1061. for _, td := range v.Tdmap[v1] {
  1062. n++
  1063. if td.BH {
  1064. count++
  1065. }
  1066. }
  1067. v.Rationmap[v1] = float32(count) / float32(n)
  1068. }
  1069. }
  1070. }
  1071. //计算行列是表头的概率调用GetKeyRation
  1072. func (table *Table) ComputeRowColIsKeyRation() {
  1073. //增加对跨行校正限止
  1074. // u.Debug(table.Brule, table.ColNum, table.RowNum, table.TDNum)
  1075. bkeyfirstrow := false
  1076. bkeyfirstcol := false
  1077. if table.Brule { //不存在跨行跨列的情况,规则表格
  1078. checkCompute := map[string]bool{}
  1079. for k, tr := range table.TRs {
  1080. rk := fmtkey("r", tr.TDs[0].StartRow, tr.TDs[0].EndRow)
  1081. if k == 0 { //第1行的概率
  1082. ck := fmtkey("c", tr.TDs[0].StartCol, tr.TDs[0].EndCol)
  1083. //u.Debug(table.BFirstRow, "--", table.StartAndEndRation[rk], table.StartAndEndRation[ck])
  1084. ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0])
  1085. ration2, _ := table.StartAndEndRation[ck].GetTDRation(tr.TDs[0])
  1086. if (len(tr.TDs) == 2 && ration2 < 0.55) && (len(tr.TDs) == 2 && ration1 > 0.5) { //第一行为key
  1087. bkeyfirstrow = true
  1088. ball := true
  1089. for _, td := range tr.TDs {
  1090. if MoneyReg.MatchString(td.Val) {
  1091. bkeyfirstrow = false
  1092. ball = false
  1093. td.BH = false
  1094. break
  1095. }
  1096. }
  1097. for _, td := range tr.TDs {
  1098. if ball {
  1099. //td.BH = true
  1100. td.KeyDirect = 1
  1101. td.KVDirect = 2
  1102. }
  1103. }
  1104. } else if ration2 > 0.55 { //第1列
  1105. bkeyfirstcol = true
  1106. if !checkCompute[ck] {
  1107. checkCompute[ck] = true
  1108. //重置第1列
  1109. for _, tr1 := range table.TRs {
  1110. for _, td1 := range tr1.TDs {
  1111. if td1.StartCol == 0 {
  1112. if !MoneyReg.MatchString(td1.Val) {
  1113. //td1.BH = true
  1114. td1.KeyDirect = 2
  1115. td1.KVDirect = 1
  1116. }
  1117. }
  1118. }
  1119. }
  1120. }
  1121. }
  1122. if !bkeyfirstrow && !bkeyfirstcol {
  1123. if len(tr.TDs) > 1 && ration1 > ration2 && ration1 > 0.5 {
  1124. bkeyfirstrow = true
  1125. for _, td := range tr.TDs {
  1126. if !MoneyReg.MatchString(td.Val) {
  1127. //td.BH = true
  1128. td.KeyDirect = 1
  1129. td.KVDirect = 2
  1130. }
  1131. }
  1132. } else if tr.Table.ColNum > 1 && ration2 > 0.5 {
  1133. bkeyfirstcol = true
  1134. if !checkCompute[ck] {
  1135. checkCompute[ck] = true
  1136. //重置第1列
  1137. for _, tr1 := range table.TRs {
  1138. for _, td1 := range tr1.TDs {
  1139. if td1.StartCol == 0 {
  1140. if !MoneyReg.MatchString(td1.Val) {
  1141. td1.BH = true
  1142. td1.KeyDirect = 2
  1143. td1.KVDirect = 1
  1144. }
  1145. }
  1146. }
  1147. }
  1148. }
  1149. }
  1150. }
  1151. } else {
  1152. if bkeyfirstrow {
  1153. //第一列的概率
  1154. ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0])
  1155. if k == 1 || ration1 < checkval {
  1156. for _, td := range tr.TDs {
  1157. if !td.MustBH {
  1158. td.BH = false
  1159. td.KeyDirect = 0
  1160. td.KVDirect = 0
  1161. }
  1162. }
  1163. } //else {for _, td := range tr.TDs {}}
  1164. } else {
  1165. //列在起作用
  1166. if bkeyfirstcol {
  1167. for _, td := range tr.TDs {
  1168. ck := fmtkey("c", td.StartCol, td.EndCol)
  1169. ration1, _ := table.StartAndEndRation[ck].GetTDRation(td)
  1170. if !checkCompute[ck] {
  1171. checkCompute[ck] = true
  1172. if ration1 >= checkval && td.ColPos != 1 {
  1173. for _, tr1 := range table.TRs {
  1174. for _, td1 := range tr1.TDs {
  1175. if td1.StartCol == td.StartCol {
  1176. if !MoneyReg.MatchString(td1.Val) {
  1177. td1.BH = true
  1178. td1.KeyDirect = 2
  1179. td1.KVDirect = 1
  1180. }
  1181. }
  1182. }
  1183. }
  1184. } else {
  1185. for _, tr1 := range table.TRs[1:] {
  1186. for _, td1 := range tr1.TDs[1:] {
  1187. if td1.StartCol == td.StartCol && !td1.MustBH {
  1188. td1.BH = false
  1189. td1.KeyDirect = 0
  1190. td1.KVDirect = 0
  1191. }
  1192. }
  1193. }
  1194. }
  1195. }
  1196. }
  1197. }
  1198. }
  1199. }
  1200. }
  1201. }
  1202. //qutil.Debug("table.Brule", table.Brule, !bkeyfirstcol && !bkeyfirstrow)
  1203. if !table.Brule || (!bkeyfirstcol && !bkeyfirstrow) {
  1204. //断行问题,虽然同列或同行,但中间被跨行截断,表格方向调整
  1205. for _, k := range table.StartAndEndRationKSort.Keys {
  1206. v := table.StartAndEndRation[k]
  1207. //横向判断,要判断最多的方向,否则会出现不定的情况(map遍历问题)
  1208. k1 := k[:1]
  1209. for _, v2 := range v.Poss {
  1210. lentds := len(v.Tdmap[v2])
  1211. if v.Rationmap[v2] > checkval {
  1212. for _, td := range v.Tdmap[v2] {
  1213. if td.KeyDirect == 0 && !MoneyReg.MatchString(td.Val) {
  1214. if k1 == "r" {
  1215. ck := fmtkey("c", td.StartCol, td.EndCol)
  1216. rt := table.StartAndEndRation[ck]
  1217. //clen := 0
  1218. var fv float32
  1219. var tdn []*TD
  1220. if rt != nil {
  1221. fv, tdn = rt.GetTDRation(td)
  1222. //clen = len(tdn)
  1223. }
  1224. if lentds > 1 {
  1225. if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
  1226. td.KeyDirect = 1
  1227. td.KVDirect = 2
  1228. //td.BH = true
  1229. }
  1230. }
  1231. } else {
  1232. ck := fmtkey("r", td.StartRow, td.EndRow)
  1233. rt := table.StartAndEndRation[ck]
  1234. var fv float32
  1235. var tdn []*TD
  1236. //clen := 0
  1237. if rt != nil {
  1238. fv, tdn = rt.GetTDRation(td)
  1239. //clen = len(tdn)
  1240. }
  1241. if lentds > 1 {
  1242. if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
  1243. td.KeyDirect = 2
  1244. td.KVDirect = 1
  1245. td.BH = true
  1246. }
  1247. }
  1248. }
  1249. } else {
  1250. break
  1251. }
  1252. }
  1253. } else if v.Rationmap[v2] < 0.5 && len(v.Tdmap[v2]) > 3 {
  1254. for _, td := range v.Tdmap[v2] {
  1255. // u.Debug(td.Val, "-----", td.BH)
  1256. if td.KeyDirect == 0 && td.BH && !td.MustBH {
  1257. if k1 == "r" {
  1258. ck := fmtkey("c", td.StartCol, td.EndCol)
  1259. rt := table.StartAndEndRation[ck]
  1260. clen := 0
  1261. var fv float32
  1262. var tdn []*TD
  1263. if rt != nil {
  1264. fv, tdn = rt.GetTDRation(td)
  1265. clen = len(tdn)
  1266. }
  1267. if lentds >= clen && lentds > 1 {
  1268. if (tdn != nil && v.Rationmap[v2] < fv) || tdn == nil {
  1269. td.BH = false
  1270. }
  1271. }
  1272. } else {
  1273. ck := fmtkey("r", td.StartRow, td.EndRow)
  1274. rt := table.StartAndEndRation[ck]
  1275. var fv float32
  1276. var tdn []*TD
  1277. clen := 0
  1278. if rt != nil {
  1279. fv, tdn = rt.GetTDRation(td)
  1280. clen = len(tdn)
  1281. }
  1282. if lentds >= clen && lentds > 1 {
  1283. if (tdn != nil && v.Rationmap[v2] < fv) || tdn == nil {
  1284. td.BH = false
  1285. }
  1286. }
  1287. }
  1288. } else {
  1289. break
  1290. }
  1291. }
  1292. }
  1293. }
  1294. }
  1295. }
  1296. table.GetKeyRation()
  1297. if len(table.TRs) > 0 && len(table.TRs[0].TDs) > 0 {
  1298. t0 := table.TRs[0].TDs[0]
  1299. key := fmtkey("r", t0.StartRow, t0.EndRow)
  1300. r, t := table.StartAndEndRation[key].GetTDRation(t0)
  1301. if r > 0.9 && len(t) > 1 {
  1302. table.BFirstRow = true
  1303. }
  1304. for k, tr := range table.TRs {
  1305. if len(tr.TDs) == 1 && tr.TDs[0].StartCol == 0 && tr.TDs[0].EndCol+1 == table.ColNum {
  1306. tr.TDs[0].BH = false
  1307. tr.TDs[0].KVDirect = 0
  1308. sv := FindKv(tr.TDs[0].Val, "", 2)
  1309. _, resm := colonkvEntity.entrance(tr.TDs[0].Val, "", nil, 2)
  1310. for k, v := range resm {
  1311. sv.AddKey(k, v)
  1312. }
  1313. if len(sv.Keys) > 0 {
  1314. for _, v1 := range sv.Keys {
  1315. if tr.TDs[0].SortKV.Map[v1] == nil {
  1316. table.SortKV.AddKey(v1, sv.Map[v1])
  1317. }
  1318. }
  1319. } else if table.Tag == "" && k == 0 && len(tr.TDs[0].Val) > 11 {
  1320. table.Tag = tr.TDs[0].Val
  1321. }
  1322. // subVal := tdval_reg.FindAllStringSubmatch(tr.TDs[0].Val, -1)
  1323. // //u.Debug(tr.TDs[0].Val, subVal)
  1324. // if len(subVal) > 0 {
  1325. // for _, subv1 := range subVal {
  1326. // if len(subv1) == 3 {
  1327. // table.SortKV.AddKey(subv1[1], subv1[2])
  1328. // }
  1329. // }
  1330. // } else if k == 0 && len(tr.TDs[0].Val) > 11 {
  1331. // table.Tag = tr.TDs[0].Val
  1332. // }
  1333. }
  1334. // for _, td := range tr.TDs {
  1335. // u.Debug(td.BH, td.Val, "----")
  1336. // }
  1337. }
  1338. }
  1339. }
  1340. //查找表格的kv,调用FindTdVal
  1341. func (table *Table) FindKV() {
  1342. //判断全是key的表格不再查找
  1343. if table.BHeader { //只要一个是key即为true
  1344. direct := If(table.BFirstRow, 2, 1).(int) //kv,2查找方向,向上查找
  1345. vdirect := If(direct == 2, 1, 2).(int)
  1346. //控制跨行表格
  1347. bcon := false
  1348. //增加表格切块判断,只判断切块分包
  1349. //控制中标人排序方向
  1350. bodirect := 0
  1351. //控制中标人排序数值
  1352. sort := 1
  1353. //开始抽取
  1354. for _, tr := range table.TRs {
  1355. bcon = trSingleColumn(tr, bcon, table) //tr单列,是否丢弃内容
  1356. if bcon {
  1357. continue
  1358. }
  1359. if tr.TDs[0].StartRow > 0 {
  1360. numbh := 0
  1361. for _, td := range tr.TDs {
  1362. if td.BH {
  1363. numbh++
  1364. }
  1365. }
  1366. if numbh > 0 && numbh <= len(tr.TDs)/2 {
  1367. direct, vdirect = 1, 2
  1368. } else {
  1369. direct, vdirect = 2, 1
  1370. }
  1371. }
  1372. for _, td := range tr.TDs {
  1373. /**
  1374. rt := table.StartAndEndRation[fmtkey("r", td.StartCol, td.EndCol)]
  1375. if rt != nil {
  1376. r, t := rt.GetTDRation(td)
  1377. u.Debug(td.BH, td.Val, r, t)
  1378. }
  1379. **/
  1380. // if td.Val == "电视" || td.Val == "电话机" || td.Val == "传真机" || td.Val == "音响" {
  1381. //qutil.Debug("----td.Valtype", td.Valtype, "td.BH:", td.BH, "KVDirect:", td.KVDirect, "Val:", td.Val, "direct:", direct, "vdirect:", vdirect)
  1382. // }
  1383. if !td.BH && td.KVDirect < 3 {
  1384. if !table.FindTdVal(td, direct, vdirect) { //table.FindTdVal()存储了table.SortKV
  1385. if !table.FindTdVal(td, vdirect, direct) {
  1386. //都识别不到时,对第一、二中标候选人的处理
  1387. bo, res := GetBidOrder(td, bodirect, sort)
  1388. if res {
  1389. sort++
  1390. bodirect = bo
  1391. }
  1392. if len(td.SortKV.Map) > 0 {
  1393. for _, tdv := range td.SortKV.Keys {
  1394. if tdv == "" || td.SortKV.Map[tdv] == "" { //value为空或者null不再添加到table.SortKV
  1395. continue
  1396. }
  1397. table.SortKV.AddKey(tdv, td.SortKV.Map[tdv])
  1398. }
  1399. }
  1400. }
  1401. }
  1402. //fmt.Println("td:", td.Val, td.BH, td.HeadTd, td.KVDirect)
  1403. }
  1404. }
  1405. }
  1406. //qutil.Debug("FindKV", table.SortKV.Map)
  1407. } else if len(table.TRs) > 0 { //没有表头的表格处理,默认纵向吧
  1408. res := initLongitudinalData(table) //拼装纵向数组
  1409. //再拆值,类似http://www.ggzy.hi.gov.cn/cgzbgg/16553.jhtml第二列,有多个值
  1410. nmapkeys := []int{}
  1411. nmap := map[int][]*u.Kv{}
  1412. L:
  1413. for _, r1 := range res {
  1414. for n, r := range r1 {
  1415. if len([]rune(r)) < 60 { // 长度小于60才去分
  1416. //res1, _ := GetKVAll(r, "", nil)
  1417. res1, _ := colonkvEntity.entrance(r, "", nil, 2)
  1418. if res1 != nil {
  1419. nmap[n] = res1
  1420. nmapkeys = append(nmapkeys, n)
  1421. /**
  1422. //截取串
  1423. for _k1, _ := range res1 {
  1424. r = regexp.MustCompile(_k1+".*").ReplaceAllString(r, "")
  1425. }
  1426. r1[n] = r
  1427. res[pos] = r1
  1428. **/
  1429. } else if nmap[n] != nil {
  1430. //放空值
  1431. nmap[n] = append(nmap[n], &u.Kv{})
  1432. }
  1433. } else {
  1434. nmap = nil
  1435. nmapkeys = nil
  1436. break L
  1437. }
  1438. }
  1439. }
  1440. //调整
  1441. if len(nmap) > 0 {
  1442. kmapkeys := []string{}
  1443. kmap := map[string][]string{}
  1444. for _, mk := range nmapkeys { //同是第n列
  1445. for pos, m1 := range nmap[mk] {
  1446. k, v := m1.Key, m1.Value
  1447. kv := kmap[k]
  1448. if kv == nil {
  1449. kv = []string{}
  1450. }
  1451. kv = append(kv, v)
  1452. kmap[k] = kv
  1453. kmapkeys = append(kmapkeys, k)
  1454. for _, k := range kmapkeys {
  1455. arr := kmap[k]
  1456. if len(arr) < pos {
  1457. arr = append(arr, "")
  1458. kmap[k] = arr
  1459. kmapkeys = append(kmapkeys, k)
  1460. }
  1461. }
  1462. }
  1463. }
  1464. if len(kmap) > 0 {
  1465. for _, k := range kmapkeys {
  1466. if len(kmap[k]) == 1 {
  1467. table.SortKV.AddKey(k, kmap[k][0])
  1468. } else if len(kmap[k]) > 1 {
  1469. table.SortKV.AddKey(k, kmap[k])
  1470. }
  1471. }
  1472. }
  1473. }
  1474. //=================
  1475. //解析值放到map中
  1476. for _, arr := range res {
  1477. if len(arr) > 0 {
  1478. v1 := arr[0]
  1479. _, _, _, _, repl := CheckCommon(v1, "con")
  1480. if repl == "ENT" {
  1481. table.SortKV.AddKey("中标单位", arr)
  1482. continue
  1483. } else if repl == "BO" {
  1484. table.SortKV.AddKey("排名", arr)
  1485. continue
  1486. }
  1487. }
  1488. }
  1489. }
  1490. //qutil.Debug("FindKV", table.SortKV.Map)
  1491. }
  1492. //初始化组装纵向数据
  1493. func initLongitudinalData(table *Table) [][]string {
  1494. res := make([][]string, len(table.TRs[0].TDs)) //创建table第一行的列数长度
  1495. for n, _ := range res {
  1496. res[n] = []string{}
  1497. }
  1498. for _, tr := range table.TRs {
  1499. for n, td := range table.TRs[0].TDs { //第一行的所有td
  1500. td1 := table.GetTdByRCNo(tr.TDs[0].StartRow, td.StartCol) //根据行号列号获取td对象
  1501. if td1 != nil {
  1502. res[n] = append(res[n], td1.Val)
  1503. } else {
  1504. res[n] = append(res[n], "")
  1505. }
  1506. }
  1507. }
  1508. return res
  1509. }
  1510. //tr单列,是否丢弃内容
  1511. func trSingleColumn(tr *TR, bcon bool, table *Table) bool {
  1512. if len(tr.TDs) == 1 {
  1513. bcon = false
  1514. td := tr.TDs[0]
  1515. if td.StartCol == 0 && td.EndCol+1 == table.ColNum && len([]rune(td.Val)) > 4 && len([]rune(td.Val)) < 50 {
  1516. res, _, _, _, _ := CheckCommon(td.Val, "abandontable")
  1517. if res { //以下内容丢弃
  1518. bcon = true
  1519. }
  1520. }
  1521. }
  1522. return bcon
  1523. }
  1524. //获取中标人顺序
  1525. //direct 0默认 1横向 2纵向
  1526. func GetBidOrder(td *TD, direct, n int) (d int, res bool) {
  1527. if td.Valtype != "BO" {
  1528. return
  1529. }
  1530. if td.Rowspan > 1 {
  1531. for i := 0; i < td.Rowspan; i++ {
  1532. nextcol := 1
  1533. L1:
  1534. for {
  1535. vtd := td.TR.Table.GetTdByRCNo(td.StartRow+i, td.EndCol+nextcol)
  1536. if vtd == nil {
  1537. break L1
  1538. }
  1539. nextcol += vtd.Colspan
  1540. if filter_zbdw_v2.MatchString(vtd.Val) {
  1541. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1542. if arrbo == nil {
  1543. arrbo = []map[string]interface{}{}
  1544. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1545. }
  1546. a1 := arrbo.([]map[string]interface{})
  1547. a1 = append(a1, map[string]interface{}{
  1548. "entname": vtd.Val,
  1549. "sortstr": td.Val,
  1550. "sort": GetBidSort(td.Val, n),
  1551. })
  1552. res = true
  1553. td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
  1554. }
  1555. }
  1556. }
  1557. } else if td.Colspan > 1 {
  1558. for i := 1; i < td.Colspan; i++ {
  1559. nextcol := 0
  1560. L2:
  1561. for {
  1562. vtd := td.TR.Table.GetTdByRCNo(td.StartRow+i, td.StartCol+nextcol)
  1563. if vtd == nil || vtd.Colspan >= td.Colspan {
  1564. break L2
  1565. }
  1566. nextcol += vtd.Colspan
  1567. if filter_zbdw_v2.MatchString(vtd.Val) {
  1568. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1569. if arrbo == nil {
  1570. arrbo = []map[string]interface{}{}
  1571. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1572. }
  1573. a1 := arrbo.([]map[string]interface{})
  1574. a1 = append(a1, map[string]interface{}{
  1575. "entname": vtd.Val,
  1576. "sortstr": td.Val,
  1577. "sort": GetBidSort(td.Val, n),
  1578. })
  1579. res = true
  1580. td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
  1581. }
  1582. }
  1583. }
  1584. } else {
  1585. rtd := td.TR.Table.GetTdByRCNo(td.StartRow, td.EndCol+1)
  1586. btd := td.TR.Table.GetTdByRCNo(td.EndRow+1, td.StartCol)
  1587. //if ((rtd != nil && !rtd.BH && rtd.Valtype == "BO") || direct == 1) && btd != nil && filter_zbdw_v.MatchString(btd.Val) {
  1588. if ((rtd != nil && !rtd.BH) || direct == 1) && btd != nil && filter_zbdw_v2.MatchString(btd.Val) {
  1589. d = 1
  1590. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1591. if arrbo == nil {
  1592. arrbo = []map[string]interface{}{}
  1593. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1594. }
  1595. a1 := arrbo.([]map[string]interface{})
  1596. a1 = append(a1, map[string]interface{}{
  1597. "entname": btd.Val,
  1598. "sortstr": td.Val,
  1599. "sort": GetBidSort(td.Val, n),
  1600. })
  1601. res = true
  1602. td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
  1603. //} else if ((btd != nil && !btd.BH && btd.Valtype == "BO") || direct == 2) && rtd != nil && filter_zbdw_v.MatchString(rtd.Val) {
  1604. } else if ((btd != nil && !btd.BH) || direct == 2) && rtd != nil && filter_zbdw_v2.MatchString(rtd.Val) {
  1605. d = 2
  1606. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1607. if arrbo == nil {
  1608. arrbo = []map[string]interface{}{}
  1609. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1610. }
  1611. a1 := arrbo.([]map[string]interface{})
  1612. a1 = append(a1, map[string]interface{}{
  1613. "entname": rtd.Val,
  1614. "sortstr": td.Val,
  1615. "sort": GetBidSort(td.Val, n),
  1616. })
  1617. res = true
  1618. td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
  1619. }
  1620. }
  1621. return
  1622. }
  1623. func GetBidSort(str string, n int) int {
  1624. val := n
  1625. if strings.Index(str, "首选") > -1 {
  1626. val = 1
  1627. } else {
  1628. val = winnerOrderEntity.toNumber(str, n)
  1629. }
  1630. return val
  1631. }
  1632. //查找每一个单元格的表头,调用FindNear
  1633. func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
  1634. near := table.FindNear(td, direct)
  1635. // if near != nil {
  1636. // fmt.Println("near----", near.Val, td.Val)
  1637. // }
  1638. // qutil.Debug(near != nil)
  1639. // qutil.Debug(near.BH)
  1640. // qutil.Debug(near.KeyDirect == vdirect, near.KeyDirect == 0)
  1641. // qutil.Debug(near.KVDirect == direct, near.KVDirect == 0)
  1642. // qutil.Debug(near.KVDirect < 3)
  1643. if near != nil && near.BH && (near.KeyDirect == vdirect || near.KeyDirect == 0) && (near.KVDirect == direct || near.KVDirect == 0) && near.KVDirect < 3 {
  1644. near.KVDirect = direct
  1645. near.KeyDirect = vdirect
  1646. td.KVDirect = direct
  1647. key := near.Val
  1648. if near.Val == "" {
  1649. key = fmtkey("k", near.TR.RowPos, near.ColPos)
  1650. }
  1651. val := table.SortKV.Map[key]
  1652. //qutil.Debug("====================", "key:", key, "val:", val)
  1653. bthiskey := false
  1654. if val != nil {
  1655. curpos := table.SortKV.Index[key]
  1656. thistr := table.kTD[curpos]
  1657. if thistr != near {
  1658. near.Val += "_"
  1659. for table.SortKV.Map[near.Val] != nil {
  1660. near.Val += "_"
  1661. }
  1662. key = near.Val //之前这个地方没有重置,导致把之前结果覆盖了
  1663. } else {
  1664. bthiskey = true
  1665. }
  1666. }
  1667. bfind := false
  1668. barr := false
  1669. varrpos := -1
  1670. if bthiskey {
  1671. //处理是数组值,且有合并行或合并列的情况 kvscope,对数组值的处理
  1672. pos := table.SortKV.Index[key]
  1673. mval := table.kvscope[pos]
  1674. bvalfind := false
  1675. if direct == 1 { //kv是横向
  1676. L1:
  1677. for k3, v3 := range mval {
  1678. for _, v4 := range v3 {
  1679. if v4.EndRow+1 == td.StartRow && v4.EndCol == td.EndCol {
  1680. varrpos = k3
  1681. bvalfind = true
  1682. break L1
  1683. }
  1684. }
  1685. }
  1686. } else { //kv是纵向
  1687. L2:
  1688. for k3, v3 := range mval {
  1689. for _, v4 := range v3 {
  1690. if v4.EndCol+1 == td.StartCol && v4.EndRow == td.EndRow {
  1691. varrpos = k3
  1692. bvalfind = true
  1693. break L2
  1694. }
  1695. }
  1696. }
  1697. }
  1698. if vals, ok := val.([]string); ok {
  1699. if near.Val == "" {
  1700. bn := false
  1701. for _, vs := range vals {
  1702. if vs != "" && NullTdReg.MatchString(vs) {
  1703. bn = true
  1704. } else {
  1705. bn = false
  1706. break
  1707. }
  1708. }
  1709. if bn {
  1710. near.Val = NullTxtBid
  1711. key = NullTxtBid
  1712. bfind = true
  1713. }
  1714. }
  1715. if bvalfind && varrpos > -1 && len(vals) > varrpos {
  1716. vals[varrpos] = td.Val // += "__" + td.Val
  1717. } else {
  1718. //添加时候去除空值和nil
  1719. newVals := []string{}
  1720. for _, isval := range vals {
  1721. if isval == "" {
  1722. continue
  1723. }
  1724. newVals = append(newVals, isval)
  1725. }
  1726. //vals = append(vals, td.Val)
  1727. if td.Val != "" {
  1728. newVals = append(newVals, td.Val)
  1729. }
  1730. val = newVals
  1731. varrpos = len(vals) - 1
  1732. }
  1733. } else if vals, ok := val.(string); ok && vals != "" && td.Val != "" {
  1734. if bvalfind {
  1735. val = td.Val //vals + "__" + td.Val
  1736. } else {
  1737. tval := []string{vals}
  1738. tval = append(tval, td.Val)
  1739. val = tval
  1740. varrpos = 1
  1741. }
  1742. }
  1743. barr = true
  1744. } else {
  1745. if td.Val != "" {
  1746. val = td.Val
  1747. } else if len(near.SortKV.Map) == 1 && near.SortKV.Map[near.Val] != "" {
  1748. val = near.SortKV.Map[near.Val]
  1749. }
  1750. }
  1751. td.HeadTd = near
  1752. if bfind {
  1753. tkey := fmtkey("k", near.TR.RowPos, near.ColPos)
  1754. table.SortKV.ReplaceKey(key, val, tkey)
  1755. } else {
  1756. if val == nil || val == "" || key == "采购项目预算金额" {
  1757. return
  1758. }
  1759. table.SortKV.AddKey(key, val)
  1760. //if table.SortKV.Map[key] != nil {
  1761. pos := table.SortKV.Index[key]
  1762. //qutil.Debug("=========", "key:", key, "val:", val, "pos:", pos)
  1763. if barr {
  1764. mval := table.kvscope[pos]
  1765. if mval != nil {
  1766. tds := mval[varrpos]
  1767. if tds != nil {
  1768. tds = append(tds, td)
  1769. } else {
  1770. tds = []*TD{td}
  1771. }
  1772. if varrpos > -1 {
  1773. mval[varrpos] = tds
  1774. table.kvscope[pos] = mval
  1775. }
  1776. }
  1777. } else {
  1778. table.kvscope[pos] = map[int][]*TD{
  1779. 0: []*TD{td},
  1780. }
  1781. table.kTD[pos] = near
  1782. }
  1783. //}
  1784. }
  1785. b = true
  1786. }
  1787. return
  1788. }
  1789. //查找单元格的表头时,横向或纵向
  1790. func (table *Table) FindNear(td *TD, direct int) *TD {
  1791. if direct == 1 && td.StartCol > 0 { //左临
  1792. tr := table.TRs[:td.TR.RowPos+1]
  1793. for i := len(tr) - 1; i > -1; i-- {
  1794. tds := tr[i].TDs
  1795. for _, td1 := range tds {
  1796. if td1.StartRow <= td.StartRow && td1.EndRow >= td.EndRow && td1.EndCol+1 == td.StartCol {
  1797. //找到左临节点
  1798. if td1.BH {
  1799. return td1
  1800. } else {
  1801. if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct {
  1802. return td1.HeadTd
  1803. }
  1804. }
  1805. }
  1806. }
  1807. }
  1808. } else if direct == 2 && td.StartRow > 0 { //上临
  1809. tr := table.TRs[:td.TR.RowPos]
  1810. for i := len(tr) - 1; i > -1; i-- {
  1811. tds := tr[i].TDs
  1812. for _, td1 := range tds {
  1813. if td1.StartCol <= td.StartCol && td1.EndCol >= td.EndCol && td1.EndRow+1 == td.StartRow {
  1814. //找到左临节点
  1815. if td1.BH {
  1816. return td1
  1817. } else {
  1818. if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct {
  1819. return td1.HeadTd
  1820. }
  1821. }
  1822. }
  1823. }
  1824. }
  1825. }
  1826. return nil
  1827. }
  1828. //根据行号列号获取td对象
  1829. func (tn *Table) GetTdByRCNo(row, col int) *TD {
  1830. for _, tr := range tn.TRs {
  1831. for _, td := range tr.TDs {
  1832. if td.StartCol <= col && td.EndCol >= col && td.StartRow <= row && td.EndRow >= row {
  1833. return td
  1834. }
  1835. }
  1836. }
  1837. return nil
  1838. }
  1839. //判断表格是否是分包
  1840. func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
  1841. pac := 0 //包的数量
  1842. val := 0 //分值
  1843. index = []string{} //存储分包,使用tbale.SortKV的key和value使用正则等处理对值进行判断
  1844. index_pos := []int{} //下标
  1845. //是数组且能找到标段之类的提示
  1846. //arr_count := 0 //计数table.SortKV的value是数组的数量,后面没用
  1847. key_index := -1
  1848. hasPkgTd := map[string]bool{}
  1849. //初始化CheckMultiPackageByTable方法需要的数据
  1850. key_index, index, index_pos, val, pac, hasPkgTd = initCheckMultiPackageByTable(tn, key_index, index, index_pos, val, pac, hasPkgTd)
  1851. //key是分包的情况
  1852. //记录key对应的值
  1853. commonKeyVals := map[string][]string{}
  1854. //记录key出现的次数
  1855. keyExistsCount := map[string]int{}
  1856. if pac > 1 {
  1857. val = 10
  1858. } else {
  1859. //查找标签
  1860. if TableMultiPackageReg_4.MatchString(tn.Tag) {
  1861. val += 4
  1862. } else if TableMultiPackageReg_2.MatchString(tn.Tag) {
  1863. val += 4
  1864. }
  1865. //根据table.SortKV的key判断是否分包,如果没有再根据value判断
  1866. val, index, index_pos = foundPacBySortKV(tn, val, index, index_pos, &keyExistsCount, &commonKeyVals, key_index, hasPkgTd)
  1867. }
  1868. // u.Debug(index)
  1869. //过滤重复及标准化!
  1870. standIndex := []string{}
  1871. standIndex_pos := []int{}
  1872. oldIndex := []string{} //存放包的原始值
  1873. brepeat := map[string]bool{}
  1874. for k, v := range index {
  1875. v = u.PackageNumberConvert(v)
  1876. if !brepeat[v] {
  1877. brepeat[v] = true
  1878. standIndex = append(standIndex, v)
  1879. standIndex_pos = append(standIndex_pos, index_pos[k])
  1880. oldIndex = append(oldIndex, index[k])
  1881. }
  1882. }
  1883. index = standIndex
  1884. //有一个以上的包,并且相同的key出现一次以上,认为这个key是属于包里面的
  1885. if len(commonKeyVals) > 0 {
  1886. for k, v := range commonKeyVals {
  1887. if len(index) > 1 && keyExistsCount[k] < 2 {
  1888. continue
  1889. }
  1890. tn.SortKV.AddKey(k, v)
  1891. }
  1892. }
  1893. //
  1894. isGoonNext := false
  1895. if val > 4 && len(brepeat) > 0 {
  1896. b = true
  1897. //多包解析
  1898. if b {
  1899. tn.BPackage = true
  1900. //根据数组index分包长度添加table.BlockPackage子包数组
  1901. for nk, v := range index {
  1902. if tn.BlockPackage.Map[v] == nil {
  1903. bp := &u.BlockPackage{}
  1904. bp.Index = v //序号 (转换后编号,只有数字或字母)
  1905. bp.Origin = oldIndex[nk] //包的原始值
  1906. bp.TableKV = u.NewJobKv() //table kv (分出的对应的KV值)
  1907. tn.BlockPackage.AddKey(v, bp) //table子包数组
  1908. }
  1909. }
  1910. isGoonNext = tn.manyPackageProcessByIndex(index, standIndex_pos) //多包处理,处理不同情况下的分包
  1911. }
  1912. } else {
  1913. isGoonNext = true
  1914. }
  1915. if isGoonNext { //没有处理成数组的情况下,继续调用正文查找分包的方法
  1916. tn.isGoonNext()
  1917. }
  1918. //查找分包中的中标人排序
  1919. if tn.BlockPackage != nil && tn.BlockPackage.Keys != nil && len(tn.BlockPackage.Keys) > 0 {
  1920. for _, v := range tn.BlockPackage.Keys {
  1921. vv := tn.BlockPackage.Map[v].(*u.BlockPackage)
  1922. if vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0 {
  1923. vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2)
  1924. }
  1925. }
  1926. }
  1927. return
  1928. }
  1929. //多包处理,处理不同情况下的分包
  1930. func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int) (isGoonNext bool) {
  1931. if len(index) == 1 { //是一个的情况
  1932. if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 { //table带排序的KV值小于10并且小于10列和小于4行
  1933. beq := true
  1934. for _, v2 := range tn.SortKV.Keys {
  1935. if _, ok := tn.SortKV.Map[v2].(string); !ok {
  1936. beq = false
  1937. break
  1938. }
  1939. }
  1940. if beq { //统一处理为数组
  1941. td := tn.GetTdByRCNo(tn.RowNum-1, 0)
  1942. if !td.BH && FindVal2_1.MatchString(td.Val) {
  1943. for _, v2 := range tn.SortKV.Keys {
  1944. tn.SortKV.AddKey(v2, []string{tn.SortKV.Map[v2].(string)})
  1945. }
  1946. } else {
  1947. //没有处理成数组的情况下,继续调用正文查找分包的方法
  1948. isGoonNext = true
  1949. }
  1950. }
  1951. }
  1952. }
  1953. for _, k1 := range tn.SortKV.Keys {
  1954. v1 := tn.SortKV.Map[k1]
  1955. if _, bvs := v1.(string); bvs && len(index) > 1 && !strings.HasSuffix(k1, "_") { //table.SortKV.Map.value为字符串并且index有分包而且table.SortKV.Map.key没有_
  1956. v1_array := []string{v1.(string)}
  1957. underline := ""
  1958. for {
  1959. underline += "_"
  1960. if tn.SortKV.Map[k1+underline] == nil {
  1961. break
  1962. } else if v3, v2_ok := tn.SortKV.Map[k1+underline].(string); v2_ok && v3 != "" {
  1963. v1_array = append(v1_array, v3)
  1964. }
  1965. }
  1966. v1 = v1_array
  1967. }
  1968. if val, bvs := v1.([]string); bvs {
  1969. if len(val) <= len(index) { //table.SortKV.Map.value数组小于等于分包index
  1970. for k, v := range val {
  1971. tn.assemblePackage(k1, v, index[k]) //组装解析到的分包
  1972. }
  1973. } else {
  1974. for sk1, sv2 := range index {
  1975. v := val[sk1]
  1976. //处理http://www.hljcg.gov.cn/xwzs!queryOneXwxxqx.action?xwbh=8145b599-a11e-45cb-a76a-12157a715570
  1977. if v == "" && strings.Index(k1, "供应商") > -1 {
  1978. if sk1 != len(index)-1 {
  1979. //u.Debug(val[sk1+1], val[sk1+2])
  1980. if standIndex_pos[sk1+1]-standIndex_pos[sk1] > 1 {
  1981. v = val[standIndex_pos[sk1]+1]
  1982. }
  1983. } else {
  1984. if standIndex_pos[sk1] < len(val)-1 {
  1985. v = val[standIndex_pos[sk1]+1]
  1986. }
  1987. }
  1988. }
  1989. tn.assemblePackage(k1, v, sv2)
  1990. }
  1991. }
  1992. //删除子包的kv
  1993. //u.Debug("----==1==-------", k1)
  1994. k1tags := u.GetTags(k1) //取得匹配
  1995. //if !(len(k1tags) > 0 && k1tags[0].Value == "采购单位") {
  1996. // tn.SortKV.RemoveKey(k1)
  1997. //}
  1998. for _, vcgdw := range k1tags {
  1999. if vcgdw.Value == "采购单位" {
  2000. }
  2001. }
  2002. } else if val, bvs := v1.(string); bvs && len(index) == 1 {
  2003. //删除子包的kv
  2004. kvTags, _ := CommonDataAnaly(k1, "", "", val)
  2005. for kvTag_k, kvTag_v := range kvTags {
  2006. hasValid := false
  2007. for _, kvTag_vv := range kvTag_v {
  2008. if kvTag_vv.IsInvalid {
  2009. continue
  2010. }
  2011. hasValid = true
  2012. }
  2013. if !hasValid {
  2014. continue
  2015. }
  2016. if !(len(kvTags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(kvTag_k)) {
  2017. tn.SortKV.RemoveKey(k1)
  2018. tn.assemblePackage(k1, val, index[0])
  2019. //log.Println("remove", k1, val)
  2020. }
  2021. }
  2022. //u.Debug("----==2==-------", k1)
  2023. }
  2024. }
  2025. return isGoonNext
  2026. }
  2027. //没有处理成数组的情况下,继续调用正文查找分包的方法
  2028. func (tn *Table) isGoonNext() {
  2029. blockPackage := map[string]*u.BlockPackage{}
  2030. for _, k := range tn.SortKV.Keys {
  2031. if excludeKey.MatchString(k) {
  2032. continue
  2033. }
  2034. str := "" //拼装为冒号kv
  2035. v := tn.SortKV.Map[k]
  2036. nk := regReplAllSpace.ReplaceAllString(k, "")
  2037. if vs, ok := v.([]string); ok {
  2038. str += fmt.Sprintf("%s:%s\n", nk, strings.Join(vs, " "))
  2039. } else {
  2040. str += fmt.Sprintf("%s:%s\n", nk, v)
  2041. }
  2042. b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false) //分块之后分包
  2043. if b && len(blockPackage) > 0 {
  2044. tn.BPackage = true
  2045. for mk, mv := range blockPackage {
  2046. if tn.BlockPackage.Map[mk] == nil {
  2047. tn.BlockPackage.AddKey(mk, mv)
  2048. } else {
  2049. bp := tn.BlockPackage.Map[mk].(*u.BlockPackage)
  2050. if bp.TableKV == nil {
  2051. bp.TableKV = u.NewJobKv()
  2052. }
  2053. if bp.SpaceKV == nil {
  2054. bp.SpaceKV = u.NewJobKv()
  2055. }
  2056. for k2, v2 := range mv.ColonKV.KvTags {
  2057. for _, v2v := range v2 {
  2058. isExists := false
  2059. for _, v2vv := range bp.TableKV.KvTags[k2] {
  2060. if v2v.Value == v2vv.Value {
  2061. isExists = true
  2062. break
  2063. }
  2064. }
  2065. if !isExists {
  2066. bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
  2067. bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
  2068. }
  2069. }
  2070. }
  2071. for k2, v2 := range mv.SpaceKV.KvTags {
  2072. for _, v2v := range v2 {
  2073. isExists := false
  2074. for _, v2vv := range bp.SpaceKV.KvTags[k2] {
  2075. if v2v.Value == v2vv.Value {
  2076. isExists = true
  2077. break
  2078. }
  2079. }
  2080. if !isExists {
  2081. bp.SpaceKV.KvTags[k2] = append(bp.SpaceKV.KvTags[k2], v2v)
  2082. bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
  2083. }
  2084. }
  2085. }
  2086. }
  2087. }
  2088. tn.BPackage = true
  2089. tn.SortKV.RemoveKey(k)
  2090. }
  2091. }
  2092. }
  2093. //根据table.SortKV的key判断是否分包,如果没有再根据value判断
  2094. func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, keyExistsCount *map[string]int, commonKeyVals *map[string][]string, key_index int, hasPkgTd map[string]bool) (rval int, rindex []string, rindex_pos []int) {
  2095. keyIsPkg := false
  2096. for in, k := range tn.SortKV.Keys {
  2097. if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) { //判断分包前排除
  2098. continue
  2099. }
  2100. v := tn.SortKV.Map[k]
  2101. //key是分包的情况
  2102. if ismatch := FindVal_1.MatchString(k); keyIsPkg || ismatch {
  2103. if ismatch {
  2104. keyIsPkg = true
  2105. val += 4
  2106. pkgFlag := FindVal_1.FindString(k) //对值进行分包判断
  2107. k = strings.Replace(k, pkgFlag, "", -1)
  2108. index = append(index, pkgFlag)
  2109. index_pos = append(index_pos, len(index))
  2110. val += 1
  2111. //pac++
  2112. } else {
  2113. k = strings.TrimRight(k, "_")
  2114. }
  2115. (*keyExistsCount)[k] = (*keyExistsCount)[k] + 1
  2116. (*commonKeyVals)[k] = append((*commonKeyVals)[k], qutil.ObjToString(v))
  2117. } else if k1 := FilterKey_2.ReplaceAllString(k, ""); FindKey_2.MatchString(k1) {
  2118. val += 4
  2119. //value数组分包
  2120. if vs, bvs1 := v.([]string); bvs1 {
  2121. L:
  2122. for in2, v1 := range vs {
  2123. if len([]rune(v1)) < 20 && !moneyNum.MatchString(v1) && FindVal2_1.MatchString(v1) {
  2124. for _, serial := range tn.TableResult.RuleBlock.TitleRegs {
  2125. if serial.MatchString(v1) {
  2126. break L
  2127. }
  2128. }
  2129. if key_index == -1 {
  2130. key_index = in
  2131. } else if key_index != in {
  2132. break
  2133. }
  2134. index = append(index, v1)
  2135. index_pos = append(index_pos, in2)
  2136. val += 1
  2137. //pac++
  2138. }
  2139. }
  2140. } else if v1, ok := v.(string); ok && !hasPkgTd[k] {
  2141. //value字符串分包
  2142. v1 = replPkgConfusion(v1) //替换分包中混淆的词
  2143. for _, v2 := range strings.Split(v1, "/") {
  2144. if len([]rune(v2)) < 20 && !moneyNum.MatchString(v2) && FindVal2_1.MatchString(v2) {
  2145. key_index = in
  2146. index = append(index, v1)
  2147. index_pos = append(index_pos, 0)
  2148. val += 1
  2149. //pac++
  2150. underline := ""
  2151. for {
  2152. underline += "_"
  2153. if tn.SortKV.Map[k+underline] == nil {
  2154. break
  2155. } else if v3, v2_ok := tn.SortKV.Map[k+underline].(string); v2_ok && v3 != "" {
  2156. index = append(index, v3)
  2157. index_pos = append(index_pos, 1)
  2158. } else if v3, v2_ok := tn.SortKV.Map[k+underline].([]string); v2_ok {
  2159. for v2_k, v2_v := range v3 {
  2160. index = append(index, v2_v)
  2161. index_pos = append(index_pos, v2_k+1)
  2162. }
  2163. }
  2164. }
  2165. break
  2166. }
  2167. }
  2168. }
  2169. break
  2170. }
  2171. }
  2172. return val, index, index_pos
  2173. }
  2174. //初始化CheckMultiPackageByTable方法需要的数据
  2175. func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, index_pos []int, val int, pac int, hasPkgTd map[string]bool) (rkey_index int, rindex []string, rindex_pos []int, rval int, rpac int, rhasPkgTd map[string]bool) {
  2176. for in, k := range tn.SortKV.Keys {
  2177. //涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)就跳过
  2178. if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) {
  2179. continue
  2180. }
  2181. v := tn.SortKV.Map[k]
  2182. if vs, bvs := v.([]string); bvs {
  2183. //arr_count++
  2184. haspkgs := []string{}
  2185. for in2, v1 := range vs {
  2186. v1 = replPkgConfusion(v1) //替换分包中混淆的词
  2187. if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) {
  2188. if key_index == -1 {
  2189. key_index = in
  2190. } else if key_index != in {
  2191. break
  2192. }
  2193. index = append(index, FindVal_1.FindString(v1))
  2194. index_pos = append(index_pos, in2)
  2195. val += 1
  2196. pac++
  2197. } else {
  2198. if ok, v1new := isHasOnePkgAndNoKv(v1); ok { //td的值里面有一个包,并且没有冒号kv
  2199. haspkgs = append(haspkgs, v1new)
  2200. }
  2201. }
  2202. }
  2203. /*处理这种情况:
  2204. <tr><td>包一:xxxxxxxxx</td></tr>
  2205. <tr><td>包二:xxxxxxxxx</td></tr>
  2206. */
  2207. if len(index) == 0 && len(haspkgs) > 0 && len(haspkgs) == len(vs) {
  2208. for in2, v1 := range haspkgs {
  2209. if key_index == -1 {
  2210. key_index = in
  2211. } else if key_index != in {
  2212. break
  2213. }
  2214. index = append(index, v1)
  2215. index_pos = append(index_pos, in2)
  2216. val += 1
  2217. pac++
  2218. }
  2219. }
  2220. } else if v1, ok := v.(string); ok {
  2221. v1 = replPkgConfusion(v1) //替换分包中混淆的词
  2222. if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) {
  2223. key_index = in
  2224. index = append(index, FindVal_1.FindString(v1))
  2225. index_pos = append(index_pos, 0)
  2226. val += 1
  2227. pac++
  2228. } else if getTd := tn.GetTdByRCNo(0, tn.SortKV.Index[k]); getTd != nil && getTd.KVDirect == 2 { //纵向
  2229. /*处理这种情况:
  2230. <tr><td>包一:xxxxxxxxx</td></tr>
  2231. */
  2232. if ok, v1new := isHasOnePkgAndNoKv(v1); ok {
  2233. hasPkgTd[k] = true
  2234. key_index = in
  2235. index = append(index, v1new)
  2236. index_pos = append(index_pos, 0)
  2237. val += 1
  2238. pac++
  2239. }
  2240. }
  2241. }
  2242. }
  2243. return key_index, index, index_pos, val, pac, hasPkgTd
  2244. }
  2245. //组装解析到的分包,//key如果匹配到抽取关键词就添加到table.SortKV
  2246. func (tn *Table) assemblePackage(k1, v1, key string) {
  2247. bp := tn.BlockPackage.Map[key].(*u.BlockPackage)
  2248. if bp.TableKV == nil {
  2249. bp.TableKV = u.NewJobKv()
  2250. }
  2251. if v1 != "" {
  2252. kvTags, _ := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
  2253. for k3, v3 := range kvTags {
  2254. bp.TableKV.KvTags[k3] = append(bp.TableKV.KvTags[k3], v3...)
  2255. }
  2256. }
  2257. k1 = regReplAllSpace.ReplaceAllString(k1, "")
  2258. //拼接内容
  2259. if !excludeKey.MatchString(k1) {
  2260. bp.Text += fmt.Sprintf("%v:%v\n", k1, v1)
  2261. }
  2262. tn.BlockPackage.AddKey(key, bp)
  2263. }
  2264. /**
  2265. 之前爬虫过来的数据对table表格的抓取异常问题
  2266. 查找并修正不规则表格的字符串,只对全文做处理,块内的表格不需要修正
  2267. **/
  2268. var thbf = regexp.MustCompile("(?i)</?t(head|body|foot)>")
  2269. //需要保留thead
  2270. var saveThead = regexp.MustCompile("(?is)<thead>(.+?)</thead>")
  2271. func RepairCon(con string) string {
  2272. res := saveThead.FindAllStringSubmatch(con, 1)
  2273. th := ""
  2274. if len(res) == 1 && len(res[0]) == 2 {
  2275. th = u.TrimLeftSpace(res[0][1], "")
  2276. }
  2277. con = thbf.ReplaceAllString(con, "")
  2278. con = u.TrimLeftSpace(con, "")
  2279. itbody := strings.Index(con, "<tr")
  2280. iLen := 3
  2281. if itbody == 0 {
  2282. con = findpos(con, iLen, itbody)
  2283. } else {
  2284. itable := strings.Index(con, "<table")
  2285. if itable == -1 || itable > itbody {
  2286. con = findpos(con, iLen, itbody)
  2287. }
  2288. }
  2289. //保留第一个thead
  2290. if th != "" {
  2291. con = strings.Replace(con, th, "<thead>"+th+"</thead>", 1)
  2292. }
  2293. //u.Debug(con)
  2294. return con
  2295. }
  2296. //修复表格
  2297. func findpos(con string, iLen, start int) (newcon string) {
  2298. defer qutil.Catch()
  2299. n := len(con)
  2300. layer := 0
  2301. pos := 0
  2302. if start >= 0 {
  2303. if iLen == 6 {
  2304. for i := iLen + start; i < len(con); i++ {
  2305. if con[i] == '<' && i+6 < n {
  2306. str := con[i : i+6]
  2307. if str == "</tbod" {
  2308. if layer == 0 {
  2309. pos = i
  2310. break
  2311. } else {
  2312. layer--
  2313. }
  2314. i += 6
  2315. } else if str == "<tbody" {
  2316. layer++
  2317. i += 6
  2318. }
  2319. }
  2320. }
  2321. if pos+7 <= n && start+6 < pos {
  2322. newcon = con[:start] + "<table" + con[start+6:pos] + "</table" + con[pos+7:]
  2323. }
  2324. } else {
  2325. layer++
  2326. nq := 0
  2327. lasttr := 0
  2328. for i := iLen + start; i < len(con); i++ {
  2329. if con[i] == '<' && i+4 < n {
  2330. if nq == 0 {
  2331. str := con[i : i+4]
  2332. if str == "</tr" {
  2333. if layer <= 0 {
  2334. pos = i //正常情况不会存在此类情况
  2335. break
  2336. } else {
  2337. layer--
  2338. lasttr = i
  2339. }
  2340. i += 4
  2341. } else if str[:3] == "<tr" {
  2342. layer++
  2343. i += 4
  2344. } else if str == "<tab" && i+6 < n && con[i+4:i+6] == "le" {
  2345. if layer == 0 {
  2346. break
  2347. } else {
  2348. //内嵌的表格
  2349. nq++
  2350. }
  2351. }
  2352. } else {
  2353. if i+6 < n {
  2354. str := con[i : i+6]
  2355. if str == "</tabl" {
  2356. nq--
  2357. } else if str == "<table" {
  2358. nq++
  2359. }
  2360. } else {
  2361. break
  2362. }
  2363. }
  2364. }
  2365. }
  2366. if pos == 0 && lasttr > 3 {
  2367. pos = lasttr + 5
  2368. } else if pos > 0 {
  2369. pos += 5
  2370. }
  2371. if pos <= n && pos < len(con) && start < pos {
  2372. newcon = con[:start] + "<table>" + con[start:pos] + "</table>" + con[pos:]
  2373. }
  2374. }
  2375. }
  2376. if newcon == "" {
  2377. newcon = con
  2378. }
  2379. return
  2380. }
  2381. //td的值里面有一个包,并且没有冒号kv
  2382. func isHasOnePkgAndNoKv(v1 string) (bool, string) {
  2383. v1s := FindVal_1.FindAllString(v1, -1)
  2384. colonCount := len(regDivision.FindAllString(v1, -1))
  2385. if len(v1s) == 1 && colonCount < 2 {
  2386. ispkgcolon := regexp.MustCompile(v1s[0] + "[::]").MatchString(v1)
  2387. if (ispkgcolon && colonCount == 1) || (!ispkgcolon && colonCount == 0) {
  2388. return true, v1s[0]
  2389. }
  2390. }
  2391. return false, v1
  2392. }
  2393. //替换分包中混淆的词
  2394. func replPkgConfusion(v1 string) string {
  2395. v1 = PreReg.ReplaceAllString(v1, "")
  2396. v1 = PreReg1.ReplaceAllString(v1, "")
  2397. v1 = PreCon.ReplaceAllString(v1, "")
  2398. v1 = PreCon2.ReplaceAllString(v1, "")
  2399. return v1
  2400. }
  2401. //对td中的值,进行再处理
  2402. func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
  2403. //处理表格中的联系人信息
  2404. indexMap := contactFormat.IndexMap
  2405. matchMap := contactFormat.MatchMap
  2406. //qutil.Debug("==============================td=======================", indexMap, matchMap)
  2407. weightMap := map[string]map[string]interface{}{} //权重
  2408. mustMatchFirst := len(indexMap) > 0 //第一个必须匹配上
  2409. reCreate := false
  2410. matchCount := 0
  2411. contactTypeTagMap := map[string]map[string][]interface{}{}
  2412. //u.Debug(mustMatchFirst, indexMap, matchMap)
  2413. notMatchTrCount := 0
  2414. allAscFind := true //开启正序查询
  2415. //涉及变量allAscFind,indexMap
  2416. if len(indexMap) == 0 {
  2417. isCanAddToIndexMap := false
  2418. matchPrevFlag := false
  2419. prevCanAddToIndexMap := false
  2420. LS:
  2421. for _, tr := range tn.TRs {
  2422. for td_index, td := range tr.TDs {
  2423. thisTdKvs := tn.tdkv(td) //获取td冒号kv
  2424. if len(thisTdKvs) != 1 {
  2425. continue
  2426. }
  2427. //1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
  2428. goOnFunc, isContinue, td_k := tn.tdKV(thisTdKvs[0].Key, &matchPrevFlag, &isCanAddToIndexMap, &indexMap, "LS")
  2429. if !goOnFunc {
  2430. break LS
  2431. }
  2432. if isContinue {
  2433. continue
  2434. }
  2435. //采购单位,代理机构
  2436. for _, k := range HasOrderContactType(td_k) {
  2437. if !ContactType[k].MatchString(td_k) { //不是采购单位,代理机构跳过
  2438. continue
  2439. }
  2440. if len(indexMap) == 0 {
  2441. if isCanAddToIndexMap || (prevCanAddToIndexMap && len(tr.TDs) == 1) {
  2442. myPrevTdVal := ""
  2443. if td_index-2 >= 0 {
  2444. myPrevTdVal = tr.TDs[td_index-2].Val
  2445. if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
  2446. matchPrevFlag = true
  2447. }
  2448. }
  2449. indexMap[0] = k
  2450. break
  2451. }
  2452. } else {
  2453. indexMap = map[int]string{}
  2454. break LS
  2455. }
  2456. }
  2457. }
  2458. prevCanAddToIndexMap = isCanAddToIndexMap
  2459. isCanAddToIndexMap = false
  2460. }
  2461. if len(indexMap) > 0 {
  2462. allAscFind = false
  2463. }
  2464. }
  2465. //////
  2466. L:
  2467. for tr_index, tr := range tn.TRs {
  2468. thisTrHasMatch := false
  2469. jumpNextTd := false
  2470. for td_index, td := range tr.TDs {
  2471. //和|以?及|与|、多个词和在一起
  2472. jumpNextTd, thisTrHasMatch = tn.tdsMultipleWords(jumpNextTd, td, td_index, tr, thisTrHasMatch, indexMap)
  2473. //分块之后的kv
  2474. thisTdKvs := kvAfterDivideBlock("", td.Text, 3, tn.TableResult.RuleBlock)
  2475. if len(thisTdKvs) == 0 {
  2476. thisTdKvs = tn.tdkv(td) //获取冒号kv
  2477. }
  2478. tdAscFind := true //开启td正序查询
  2479. if len(thisTdKvs) == 0 {
  2480. continue
  2481. } else if allAscFind && len(thisTdKvs) >= 3 && len(indexMap) == 0 {
  2482. //采购人在联系人、电话后面的处理
  2483. tdAscFind = tn.hasIndexMap(thisTdKvs, &indexMap, tdAscFind)
  2484. }
  2485. prevKey := ""
  2486. oldIndexMapLength := len(indexMap)
  2487. thidTdIndex := td_index
  2488. //notmatchCount := 0
  2489. kvTitle := ""
  2490. for _, td_kv := range thisTdKvs {
  2491. //u.Debug(td_kv.Key, td_kv.Value, td_kv.Title)
  2492. iscontinue := false
  2493. td_v := td_kv.Value
  2494. td_k := FilterContactKey(td_kv.Key) //带括号()[]的采购单位,代理机构处理
  2495. td_k_length := len([]rune(td_k))
  2496. if td_k_length < 2 || td_k_length > 15 {
  2497. continue
  2498. }
  2499. //都为正序查询
  2500. if allAscFind && tdAscFind {
  2501. //都为正序查询处理
  2502. matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex)
  2503. }
  2504. if iscontinue {
  2505. continue
  2506. }
  2507. //不在同一块中
  2508. if td_kv.Title != "" && kvTitle != td_kv.Title && len(indexMap) > 0 && !ContactInfoMustReg.MatchString(td_kv.Key) {
  2509. thidTdIndex = 0
  2510. matchMap = map[string]map[string]bool{}
  2511. indexMap = map[int]string{}
  2512. }
  2513. kvTitle = td_kv.Title
  2514. //u.Debug(indexMap, td_k, td_v, matchMap)
  2515. if td_k_length < 2 || td_k_length > 10 {
  2516. continue
  2517. }
  2518. if len(indexMap) > 0 {
  2519. //没有识别到采购单位联系人、联系电话、代理机构联系人、联系电话
  2520. if !ContactInfoMustReg.MatchString(td_k) {
  2521. //notmatchCount++
  2522. //if notmatchCount < len(indexMap)*2 && false {//false???????
  2523. // notmatchCount = 0
  2524. // thidTdIndex = 0
  2525. // indexMap = map[int]string{}
  2526. // matchMap = map[string]map[string]bool{}
  2527. //}
  2528. if mustMatchFirst { //indexMap初始值大于0
  2529. break L
  2530. }
  2531. continue
  2532. }
  2533. reCreate = true
  2534. index := td_index
  2535. //oldIndexMapLength原来的indexMap等于0 ,现在的indexMap大于1
  2536. if oldIndexMapLength == 0 && len(indexMap) > 1 {
  2537. if prevKey != td_k {
  2538. prevKey = td_k
  2539. index = td_index
  2540. } else if prevKey == td_k {
  2541. index++
  2542. }
  2543. }
  2544. //kv.value为空
  2545. if filterValue.MatchString(td_v) {
  2546. thisTrHasMatch = true
  2547. continue
  2548. }
  2549. //u.Debug(indexMap, td_k, td_v, matchMap, index, modle)
  2550. //myContactType
  2551. myContactType := indexMap[index]
  2552. if myContactType == "" && len(indexMap) == 1 {
  2553. _, onlyContactType := u.FirstKeyValueInMap(indexMap)
  2554. myContactType, _ = onlyContactType.(string)
  2555. }
  2556. if myContactType == "" {
  2557. continue
  2558. }
  2559. matchCount++
  2560. if matchMap[myContactType] == nil {
  2561. matchMap[myContactType] = map[string]bool{}
  2562. }
  2563. if IsContactKvHandle(ContactInfoMustReg.FindString(td_k), matchMap[myContactType]) {
  2564. continue
  2565. }
  2566. matchMap[myContactType][ContactInfoMustReg.FindString(td_k)] = true
  2567. if ContactType[myContactType].MatchString(td_k) {
  2568. continue
  2569. }
  2570. thisTrHasMatch = true
  2571. //modle
  2572. modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index)
  2573. }
  2574. }
  2575. //u.Debug(td.SortKV.Map)
  2576. }
  2577. if allAscFind && !thisTrHasMatch {
  2578. notMatchTrCount++
  2579. if notMatchTrCount >= 2 {
  2580. notMatchTrCount = 0
  2581. indexMap = map[int]string{}
  2582. }
  2583. }
  2584. }
  2585. //u.Debug("end", matchCount, indexMap, matchMap)
  2586. if matchCount == 0 {
  2587. indexMap = map[int]string{}
  2588. matchMap = map[string]map[string]bool{}
  2589. }
  2590. (*contactFormat).IndexMap = indexMap
  2591. (*contactFormat).MatchMap = matchMap
  2592. // for _, tr := range tn.TRs {
  2593. // for _, td := range tr.TDs {
  2594. // qutil.Debug("td.sort.map---", td.SortKV.Map)
  2595. // }
  2596. // }
  2597. }
  2598. //modle
  2599. func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactTypeTagMap *map[string]map[string][]interface{}, tn *Table, weightMap *map[string]map[string]interface{}, tr_index, td_index int) {
  2600. modle := 0
  2601. if len(thisTdKvs) == 1 {
  2602. if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" {
  2603. modle = 1
  2604. } else {
  2605. modle = 2
  2606. }
  2607. }
  2608. if modle == 1 {
  2609. td.Text = myContactType + td_k
  2610. td.Val = td.Text
  2611. } else {
  2612. //
  2613. if !strings.HasSuffix(td_k, "方式") {
  2614. kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts)
  2615. if len(kvTags) == 1 {
  2616. tagVal, _ := u.FirstKeyValueInMap(kvTags)
  2617. if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
  2618. return
  2619. }
  2620. if (*contactTypeTagMap)[myContactType] == nil {
  2621. (*contactTypeTagMap)[myContactType] = map[string][]interface{}{}
  2622. }
  2623. myOldKeyArray := (*contactTypeTagMap)[myContactType][tagVal]
  2624. if myOldKeyArray != nil {
  2625. tn.TRs[myOldKeyArray[0].(int)].TDs[myOldKeyArray[1].(int)].SortKV.RemoveKey(myContactType + myOldKeyArray[2].(string))
  2626. } else {
  2627. (*contactTypeTagMap)[myContactType][tagVal] = make([]interface{}, 3)
  2628. }
  2629. if (*weightMap)[myContactType] == nil {
  2630. (*weightMap)[myContactType] = map[string]interface{}{}
  2631. }
  2632. (*weightMap)[myContactType][tagVal] = 1
  2633. (*contactTypeTagMap)[myContactType][tagVal] = []interface{}{tr_index, td_index, td_k}
  2634. }
  2635. }
  2636. td.SortKV.AddKey(myContactType+td_k, td_v)
  2637. }
  2638. }
  2639. //都为正序查询
  2640. func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[string]interface{}, matchMap map[string]map[string]bool, td *TD, thisTrHasMatch bool, td_kv *u.Kv, indexMap map[int]string, iscontinue bool, reCreate bool, thidTdIndex int) (int, map[string]map[string]interface{}, map[string]map[string]bool, bool, map[int]string, bool, bool, int) {
  2641. for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
  2642. if !ContactType[k].MatchString(td_k) { //没有匹配到采购单位,代理机构
  2643. continue
  2644. }
  2645. matchCount++
  2646. if weightMap[k] == nil {
  2647. weightMap[k] = map[string]interface{}{}
  2648. }
  2649. //匹配到进行处理
  2650. if ContactInfoVagueReg.MatchString(td_k) {
  2651. thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch)
  2652. } else if k == "采购单位" { //打标签,权重高的重新覆盖
  2653. kvTags := GetKvTags([]*u.Kv{td_kv}, "", []string{"采购单位"})
  2654. tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
  2655. if tagVal == k {
  2656. if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
  2657. weightMap[k][k] = weightVal.(int)
  2658. matchMap[k] = map[string]bool{}
  2659. indexMap = map[int]string{}
  2660. }
  2661. }
  2662. }
  2663. if u.IsMapHasValue(k, indexMap) { //map中是否存在value
  2664. thisTrHasMatch = true
  2665. iscontinue = true
  2666. continue
  2667. }
  2668. if reCreate {
  2669. indexMap = map[int]string{}
  2670. reCreate = false
  2671. }
  2672. indexMap[thidTdIndex] = k
  2673. iscontinue = true
  2674. thisTrHasMatch = true
  2675. thidTdIndex++
  2676. break
  2677. }
  2678. if len(indexMap) == 0 && td_kv.PrevLine != "" {
  2679. //td_kv.PrevLine
  2680. prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "")
  2681. for k, v := range ContactType { //采购单位,代理机构正则
  2682. if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) {
  2683. indexMap[thidTdIndex] = k
  2684. thisTrHasMatch = true
  2685. thidTdIndex++
  2686. }
  2687. }
  2688. }
  2689. if len(indexMap) == 0 && td_kv.Title != "" {
  2690. //td_kv.Title
  2691. if titleMatchType := ContactTypeTitleMatch(td_kv.Title); titleMatchType != "" {
  2692. thidTdIndex = 0
  2693. matchMap = map[string]map[string]bool{}
  2694. indexMap = map[int]string{1: titleMatchType}
  2695. // for i, t := range titleMatchType {
  2696. // indexMap[i+1] = t
  2697. // }
  2698. }
  2699. }
  2700. return matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex
  2701. }
  2702. //匹配到进行处理
  2703. func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string, td_k string, td_v string, td *TD, weightMap *map[string]map[string]interface{}, thisTrHasMatch bool) bool {
  2704. if (*matchMap)[k] == nil {
  2705. (*matchMap)[k] = map[string]bool{}
  2706. }
  2707. isAddToMatchMap := true
  2708. if !strings.HasSuffix(td_k, "方式") {
  2709. kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts)
  2710. if len(kvTags) == 1 {
  2711. tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
  2712. if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
  2713. isAddToMatchMap = false
  2714. }
  2715. if td.SortKV.Map[tagVal] != nil {
  2716. if (*weightMap)[k][tagVal] == nil || (weightVal != nil && weightVal.(int) >= (*weightMap)[k][tagVal].(int)) {
  2717. (*weightMap)[k][tagVal] = weightVal.(int)
  2718. td.SortKV.AddKey(tagVal, td_v)
  2719. thisTrHasMatch = true
  2720. }
  2721. } else {
  2722. (*weightMap)[k][tagVal] = weightVal.(int)
  2723. }
  2724. }
  2725. }
  2726. if isAddToMatchMap && !filterValue.MatchString(td_v) && td_v != "" {
  2727. (*matchMap)[k][ContactInfoVagueReg.FindString(td_k)] = true
  2728. }
  2729. return thisTrHasMatch
  2730. }
  2731. //采购人在联系人、电话后面的处理
  2732. func (tn *Table) hasIndexMap(thisTdKvs []*u.Kv, indexMap *map[int]string, tdAscFind bool) bool {
  2733. //采购人在联系人、电话后面的处理
  2734. isCanAddToIndexMap := false
  2735. LL:
  2736. for _, td_kv := range thisTdKvs {
  2737. //1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
  2738. goOnFunc, isContinue, td_k := tn.tdKV(td_kv.Key, nil, &isCanAddToIndexMap, indexMap, "LL")
  2739. if !goOnFunc {
  2740. break LL
  2741. }
  2742. if isContinue {
  2743. continue
  2744. }
  2745. if len(*indexMap) == 0 {
  2746. for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
  2747. if !ContactType[k].MatchString(td_k) {
  2748. continue
  2749. }
  2750. if isCanAddToIndexMap && len(*indexMap) == 0 {
  2751. (*indexMap)[0] = k
  2752. break
  2753. }
  2754. }
  2755. }
  2756. }
  2757. if len(*indexMap) > 0 {
  2758. tdAscFind = false
  2759. }
  2760. return tdAscFind
  2761. }
  2762. //和|以?及|与|、多个词和在一起
  2763. func (tn *Table) tdsMultipleWords(jumpNextTd bool, td *TD, td_index int, tr *TR, thisTrHasMatch bool, indexMap map[int]string) (jump, thisTr bool) {
  2764. if !jumpNextTd && len([]rune(td.Text)) >= 5 && len([]rune(td.Text)) <= 15 && regSplit.MatchString(td.Text) && td_index+1 < len(tr.TDs) {
  2765. thisTdVals := regSplit.Split(td.Text, -1)
  2766. nextTdVals := MultipleValueSplitReg.Split(tr.TDs[td_index+1].Val, -1)
  2767. if len(thisTdVals) == len(nextTdVals) { //本次和下个长度相等
  2768. for _, k := range HasOrderContactType(td.Text) { //采购单位,代理机构
  2769. if ContactType[k].MatchString(td.Text) { //采购单位,代理机构
  2770. for thisTdVals_k, thisTdVals_v := range thisTdVals {
  2771. thisTdVals_v = strings.TrimSpace(thisTdVals_v)
  2772. if ContactType[k].MatchString(thisTdVals_v) { //采购单位,代理机构
  2773. thisTrHasMatch = true
  2774. tr.TDs[td_index+1].SortKV.AddKey(thisTdVals_v, nextTdVals[thisTdVals_k])
  2775. continue
  2776. }
  2777. if !ContactInfoMustReg.MatchString(thisTdVals_v) {
  2778. continue
  2779. }
  2780. jumpNextTd = true
  2781. thisTrHasMatch = true
  2782. tr.TDs[td_index+1].SortKV.AddKey(k+thisTdVals_v, nextTdVals[thisTdVals_k])
  2783. }
  2784. break
  2785. }
  2786. }
  2787. if len(indexMap) > 0 {
  2788. _, onlyContactType := u.FirstKeyValueInMap(indexMap)
  2789. if myContactType, _ := onlyContactType.(string); myContactType != "" {
  2790. for thisTdVals_k, thisTdVals_v := range thisTdVals {
  2791. thisTdVals_v = strings.TrimSpace(thisTdVals_v)
  2792. if ContactInfoMustReg.MatchString(thisTdVals_v) {
  2793. jumpNextTd = true
  2794. thisTrHasMatch = true
  2795. tr.TDs[td_index+1].SortKV.AddKey(myContactType+thisTdVals_v, nextTdVals[thisTdVals_k])
  2796. }
  2797. }
  2798. }
  2799. }
  2800. }
  2801. } else {
  2802. jumpNextTd = false
  2803. }
  2804. return jumpNextTd, thisTrHasMatch
  2805. }
  2806. //采购单位,代理机构
  2807. func (tn *Table) tdHasOrderContactType(td_k string, indexMap *map[int]string, tr *TR, prevCanAddToIndexMap, isCanAddToIndexMap, matchPrevFlag *bool, td_index int) (gotoFunc bool) {
  2808. for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
  2809. if !ContactType[k].MatchString(td_k) {
  2810. continue
  2811. }
  2812. if len(*indexMap) == 0 {
  2813. if (*isCanAddToIndexMap) || (*prevCanAddToIndexMap && len(tr.TDs) == 1) {
  2814. myPrevTdVal := ""
  2815. if td_index-2 >= 0 {
  2816. myPrevTdVal = tr.TDs[td_index-2].Val
  2817. }
  2818. if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
  2819. (*matchPrevFlag) = true
  2820. }
  2821. (*indexMap)[0] = k
  2822. break
  2823. }
  2824. } else {
  2825. (*indexMap) = map[int]string{}
  2826. return false
  2827. }
  2828. }
  2829. return true
  2830. }
  2831. //1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
  2832. func (tn *Table) tdKV(key string, matchPrevFlag, isCanAddToIndexMap *bool, indexMap *map[int]string, gotoName string) (goOnFunc, isContinue bool, td_k string) {
  2833. td_k = FilterContactKey(key) //带括号的()[]【】采购单位,代理机构处理
  2834. td_k_length := len([]rune(td_k))
  2835. if td_k_length < 2 || td_k_length > 15 {
  2836. return true, true, "" //继续执行,跳过当前循环
  2837. }
  2838. isContinue = ContactInfoMustReg.MatchString(td_k) //识别采购单位联系人、联系电话、代理机构联系人、联系电话
  2839. if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) {
  2840. if gotoName == "LS" && !(*matchPrevFlag) && len(*indexMap) > 0 {
  2841. (*indexMap) = map[int]string{}
  2842. return false, false, "" //中断外层循环
  2843. }
  2844. if gotoName == "LL" && len(*indexMap) > 0 {
  2845. (*indexMap) = map[int]string{}
  2846. return false, false, ""
  2847. }
  2848. (*isCanAddToIndexMap) = true
  2849. }
  2850. return true, false, td_k //继续执行,不结束当前循环,返回处理后的值
  2851. }
  2852. //获取td冒号kv
  2853. func (tn *Table) tdkv(td *TD) []*u.Kv {
  2854. thisTdKvs := colonkvEntity.GetKvs(td.Text, tn.Desc, 2) //获取冒号kv
  2855. //获取冒号
  2856. if len(thisTdKvs) == 0 {
  2857. tdValue := regReplAllSpace.ReplaceAllString(td.Text, "") //去除空格换行
  2858. if tdValue != "" && len([]rune(tdValue)) < 10 {
  2859. thisTdKvs = append(thisTdKvs, &u.Kv{
  2860. Key: tdValue,
  2861. Value: "",
  2862. })
  2863. }
  2864. }
  2865. return thisTdKvs
  2866. }
  2867. //table中抽取品牌,table.BrandData
  2868. func (table *Table) analyBrand() {
  2869. //5c2d8c05a5cb26b9b782572b
  2870. //产品名称 品牌 规格 单价 单位 数量 小计 质保期
  2871. lineMapArr := make(map[string]*SortMap)
  2872. lineMap := make(map[string]*SortMap)
  2873. brandRule := u.BrandRules
  2874. //初始化lineMapArr,lineMap;
  2875. lineMapArr, lineMap = initLineMapLineMapArr(table) //处理数组数据后,匹配必须title和替换要保存的title
  2876. //qutil.Debug("lineMapArr----", len(lineMapArr))
  2877. if len(lineMapArr) > 0 {
  2878. for _, aMap := range lineMapArr {
  2879. maxNum := 0 //记录最大长度
  2880. arrcount1 := 0 //记录key是否存在必须title(数组数据)
  2881. arrcount2 := 0
  2882. ka := make(map[string][]string) //最终存储数据
  2883. //qutil.Debug("aMap.Keys----", aMap.Keys)
  2884. //匹配商品规则
  2885. arrcount1, arrcount2, ka = table.matchMapArrBrandRule(aMap, brandRule, ka, arrcount1, arrcount2)
  2886. //找最终存储数据的最小len(arr)
  2887. // for _, vf := range ka {
  2888. // //找最短的数组
  2889. // lenVal := len(vf)
  2890. // if minNum == 0 || minNum > lenVal { //maxNum = len(最短数组)
  2891. // minNum = lenVal
  2892. // }
  2893. // }
  2894. //找最终存储数据的最大len(arr),小的补空
  2895. for _, vf1 := range ka {
  2896. lenVal := len(vf1)
  2897. if lenVal > maxNum {
  2898. maxNum = lenVal
  2899. }
  2900. }
  2901. //table.BrandData商品存储
  2902. finishKa := make(map[string][]string)
  2903. for vf2K, vf2 := range ka {
  2904. if len(vf2) < maxNum {
  2905. if vf2K == "unitprice" { //价格的当前总数比最大的总数小就跳过,可能是总价格而不是单个的价格
  2906. continue
  2907. }
  2908. lenMv := maxNum - len(vf2)
  2909. for i := 0; i < lenMv; i++ {
  2910. vf2 = append(vf2, "")
  2911. }
  2912. }
  2913. finishKa[vf2K] = vf2
  2914. }
  2915. hasKey(table, arrcount1) //是否匹配到table中的标题
  2916. //qutil.Debug("finishKa----", finishKa)
  2917. if arrcount1 >= 1 {
  2918. if arrcount1+arrcount2 == 1 { //删除只匹配到一个价钱(总价)
  2919. delete(finishKa, "unitprice")
  2920. }
  2921. finishData := dealArrData(maxNum, finishKa)
  2922. table.BrandData = append(table.BrandData, finishData) //存储了table.BrandData
  2923. }
  2924. }
  2925. }
  2926. //处理string数据后,匹配必须title和替换要保存的title
  2927. //qutil.Debug("lineMap----", len(lineMap))
  2928. if len(lineMap) > 0 {
  2929. for _, sMap := range lineMap {
  2930. strcount1 := 0 //记录key是否存在必须title(字符串数据)
  2931. strcount2 := 0
  2932. endStrMap := make(map[string]string)
  2933. //qutil.Debug(k, "aMap.Keys----", sMap.Keys)
  2934. //匹配商品规则
  2935. strcount1, strcount2, endStrMap = table.matchMapBrandRule(sMap, brandRule, endStrMap, strcount1, strcount2)
  2936. //原始字符串数据处理
  2937. hasKey(table, strcount1) //是否匹配到table中的标题
  2938. //qutil.Debug("endStrMap----", endStrMap)
  2939. if strcount1 >= 1 {
  2940. if strcount1+strcount2 == 1 { //删除只匹配到一个价钱(总价)
  2941. delete(endStrMap, "unitprice")
  2942. }
  2943. finishData := dealStrData(endStrMap) //处理数据
  2944. if len(finishData) > 0 {
  2945. table.BrandData = append(table.BrandData, finishData)
  2946. }
  2947. }
  2948. }
  2949. }
  2950. }
  2951. //字符串匹配商品规则
  2952. func (table *Table) matchMapBrandRule(sMap *SortMap, brandRule map[string]map[string]string, endStrMap map[string]string, strcount1, strcount2 int) (int, int, map[string]string) {
  2953. for _, k1 := range sMap.Keys {
  2954. match := false //记录must是否匹配到
  2955. v1 := qutil.ObjToString(sMap.Map[k1])
  2956. // for k1, v1 := range sMap {
  2957. //qutil.Debug(k1, "++++++++++", v1)
  2958. if v1 == "" {
  2959. continue
  2960. }
  2961. //匹配必须title
  2962. for nameM, r := range brandRule["must"] {
  2963. if convert(k1, r) { //匹配成功
  2964. v1tmp1 := v1
  2965. match = true
  2966. if nameM == "itemname" || nameM == "modal" { //特殊处理itemname
  2967. hasGoods(table, v1)
  2968. if nameM == "itemname" {
  2969. v1tmp1 = filterItem(v1)[0] //过滤itemname
  2970. if v1tmp1 == "" {
  2971. break
  2972. }
  2973. }
  2974. }
  2975. if nameM == "brandname" || nameM == "modal" { //特殊处理brandname
  2976. if endStrMap["brandname"] == "" {
  2977. brand, allNull := hasBrand(table, v1)
  2978. if !allNull {
  2979. endStrMap["brandname"] = brand[0]
  2980. }
  2981. }
  2982. }
  2983. //unitprice
  2984. if nameM == "unitprice" { //处理金额
  2985. v1tmp1 = dealPrice(k1, v1)[0]
  2986. }
  2987. if nameM != "brandname" && endStrMap[nameM] == "" {
  2988. endStrMap[nameM] = v1tmp1
  2989. }
  2990. strcount1++
  2991. }
  2992. }
  2993. //替换其它要保存字段
  2994. if !match {
  2995. for nameR, r := range brandRule["replace"] {
  2996. if convert(k1, r) { //匹配成功
  2997. v1tmp2 := v1
  2998. //totalprice
  2999. if nameR == "totalprice" { //处理金额
  3000. v1tmp2 = dealPrice(k1, v1)[0]
  3001. }
  3002. //number
  3003. if nameR == "number" { //处理数量
  3004. varr1, uname1 := dealNumber(v1)
  3005. v1tmp2 = varr1[0]
  3006. //从number中获取到的单位
  3007. if endStrMap["unitname"] == "" && uname1[0] != "" {
  3008. endStrMap["unitname"] = uname1[0]
  3009. }
  3010. }
  3011. if v1tmp2 != "" {
  3012. endStrMap[nameR] = v1tmp2
  3013. }
  3014. strcount2++
  3015. }
  3016. }
  3017. }
  3018. //}
  3019. }
  3020. return strcount1, strcount2, endStrMap
  3021. }
  3022. //数组匹配商品规则
  3023. func (table *Table) matchMapArrBrandRule(aMap *SortMap, brandRule map[string]map[string]string, ka map[string][]string, arrcount1, arrcount2 int) (int, int, map[string][]string) {
  3024. for _, k0 := range aMap.Keys {
  3025. match := false //记录must是否匹配到
  3026. v0 := aMap.Map[k0].([]string)
  3027. //匹配必须title
  3028. for nameM, r := range brandRule["must"] {
  3029. if convert(k0, r) { //匹配成功
  3030. v0tmp1 := v0
  3031. match = true
  3032. if len(ka[nameM]) != 0 && strings.Contains(k0, "描述") { //防止k0匹配到多次 和特殊情况 物料名称 物料描述同时出现
  3033. continue
  3034. }
  3035. if nameM == "itemname" || nameM == "modal" {
  3036. hasGoods(table, v0...) //判断itemname和modal中有没有商品
  3037. if nameM == "itemname" {
  3038. v0tmp1 = filterItem(v0...) //过滤itemname
  3039. }
  3040. }
  3041. if nameM == "brandname" || nameM == "modal" {
  3042. if len(ka["brandname"]) == 0 {
  3043. brand, allNull := hasBrand(table, v0...)
  3044. if !allNull {
  3045. ka["brandname"] = brand
  3046. }
  3047. }
  3048. }
  3049. //unitprice
  3050. if nameM == "unitprice" { //处理金额
  3051. v0tmp1 = dealPrice(k0, v0...)
  3052. }
  3053. if nameM != "brandname" && len(ka[nameM]) == 0 {
  3054. ka[nameM] = v0tmp1
  3055. }
  3056. arrcount1++
  3057. }
  3058. }
  3059. //替换其它要保存字段
  3060. if !match { //must未匹配,匹配replace
  3061. for nameR, r := range brandRule["replace"] {
  3062. if convert(k0, r) { //匹配成功
  3063. v0tmp2 := v0
  3064. //totalprice
  3065. if nameR == "totalprice" { //处理金额
  3066. v0tmp2 = dealPrice(k0, v0...)
  3067. }
  3068. //number
  3069. if nameR == "number" { //处理数量
  3070. uname0 := []string{}
  3071. v0tmp2, uname0 = dealNumber(v0...)
  3072. if len(ka["unitname"]) == 0 && len(uname0) != 0 {
  3073. ka["unitname"] = uname0
  3074. }
  3075. }
  3076. if len(v0tmp2) > 0 {
  3077. ka[nameR] = v0tmp2
  3078. }
  3079. arrcount2++
  3080. }
  3081. }
  3082. }
  3083. }
  3084. return arrcount1, arrcount2, ka
  3085. }
  3086. //初始化lineMapArr,lineMap
  3087. func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMap map[string]*SortMap) {
  3088. lineMapArr = make(map[string]*SortMap)
  3089. lineMap = make(map[string]*SortMap)
  3090. for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
  3091. val := table.SortKV.Map[key]
  3092. key = regReplAllSpace.ReplaceAllString(key, "")
  3093. key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
  3094. if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
  3095. /*
  3096. {
  3097. "商品":["",""],
  3098. "商品_"["",""],
  3099. }
  3100. */
  3101. valArr, allempty := filterVal(realTypeVal...) //过滤数据
  3102. if allempty {
  3103. continue
  3104. }
  3105. realTypeVal = valArr
  3106. line := underline.FindString(key)
  3107. lineValMap1 := lineMapArr[line]
  3108. // i := 1
  3109. // L:
  3110. // for { //去除数组空数据
  3111. // last := realTypeVal[len(realTypeVal)-i]
  3112. // if last == "" {
  3113. // i++
  3114. // if i > len(realTypeVal) {
  3115. // break
  3116. // }
  3117. // goto L
  3118. // } else {
  3119. // break
  3120. // }
  3121. // }
  3122. // dislodgeNull := realTypeVal[:(len(realTypeVal) - i + 1)] //去除数组中空数据
  3123. if len(realTypeVal) > 0 {
  3124. if lineValMap1 == nil {
  3125. tmp := NewSortMap()
  3126. tmp.AddKey(key, realTypeVal)
  3127. lineMapArr[line] = tmp
  3128. } else {
  3129. lineValMap1.AddKey(key, realTypeVal)
  3130. }
  3131. }
  3132. //qutil.Debug("lineMapArr---", lineMapArr[line].Keys, lineMapArr[line].Map)
  3133. } else if realTypeVal, b := val.(string); b { //val为字符串 {"数量":"1"}
  3134. /*
  3135. {
  3136. "商品:"",名称:"",
  3137. "商品_:"",名称_:"",
  3138. "商品__:"",名称__:"",
  3139. }
  3140. */
  3141. valArr, allempty := filterVal(realTypeVal) //过滤数据
  3142. if allempty {
  3143. continue
  3144. }
  3145. realTypeVal = valArr[0]
  3146. line := underline.FindString(key)
  3147. lineValMap2 := lineMap[line]
  3148. if lineValMap2 == nil {
  3149. tmp := NewSortMap()
  3150. tmp.AddKey(key, realTypeVal)
  3151. lineMap[line] = tmp
  3152. } else {
  3153. lineValMap2.AddKey(key, realTypeVal)
  3154. }
  3155. //qutil.Debug("lineMap---", lineMap[line].Keys, lineMap[line].Map)
  3156. } else {
  3157. // "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")5c2b0551a5cb26b9b7cb05db否5c2a42e6a5cb26b9b763ba5a采购人:一、采购人5c2b06f5a5cb26b9b7cc4409
  3158. //成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]]
  3159. //qutil.Debug("err data:", key, val)
  3160. }
  3161. }
  3162. return lineMapArr, lineMap
  3163. }
  3164. func dealArrData(maxNum int, ka map[string][]string) []map[string]string {
  3165. for k2, v2 := range ka {
  3166. //处理数组长度不相等,使长度一致
  3167. if len(v2) > maxNum {
  3168. ka[k2] = v2[:maxNum]
  3169. }
  3170. }
  3171. finalData := assembleData(ka, 1)
  3172. if len(finalData) > 0 {
  3173. return finalData
  3174. }
  3175. return nil
  3176. }
  3177. func dealStrData(kv map[string]string) []map[string]string {
  3178. finalData := []map[string]string{}
  3179. if len(kv) > 0 {
  3180. finalData = assembleData(kv, 2)
  3181. }
  3182. return finalData
  3183. }
  3184. //组装数据,每一行的数据为一数据集合
  3185. func assembleData(m interface{}, n int) []map[string]string {
  3186. defer qutil.Catch()
  3187. /*
  3188. {
  3189. "itemname":["计算机","打印机","机柜"],
  3190. "number" :["1","12","4"]
  3191. }
  3192. */
  3193. datas := []map[string]string{}
  3194. if n == 1 { //数组数据
  3195. realTypeM := m.(map[string][]string)
  3196. //根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr
  3197. /*
  3198. arr1 ["a1","b1","c1"]
  3199. arr2 ["a2","b2","c2"]
  3200. [
  3201. {"a1","a2"},
  3202. {"b1","b2"},
  3203. {"c1","c2"}
  3204. ]
  3205. */
  3206. //start
  3207. for k3, v3 := range realTypeM {
  3208. for _, val := range v3 {
  3209. data := make(map[string]string)
  3210. data[k3] = val
  3211. datas = append(datas, data)
  3212. }
  3213. break
  3214. }
  3215. for i, data := range datas {
  3216. for k4, v4 := range realTypeM {
  3217. if i < len(v4) { //数组数据长度不一致
  3218. if v4[i] != " " {
  3219. data[k4] = v4[i]
  3220. } else {
  3221. delete(data, k4)
  3222. }
  3223. } else {
  3224. fmt.Println("err table")
  3225. }
  3226. }
  3227. datas[i] = data
  3228. }
  3229. //end
  3230. for _, fdv := range datas { //清除空数据和只含特殊符号的数据
  3231. for fmk, fmv := range fdv {
  3232. if tabletdclear.ReplaceAllString(fmv, "") == "" {
  3233. delete(fdv, fmk)
  3234. }
  3235. }
  3236. }
  3237. } else { //字符串数据
  3238. realTypeM := m.(map[string]string)
  3239. datas = append(datas, realTypeM)
  3240. }
  3241. return datas
  3242. }
  3243. ////组装数据,每一行的数据为一数据集合
  3244. //func assembleData(m interface{}, n int) []map[string]string {
  3245. // defer qutil.Catch()
  3246. // /*
  3247. // {
  3248. // "itemname":["计算机","打印机","机柜"],
  3249. // "number" :["1","12","4"]
  3250. // }
  3251. // */
  3252. // datas := []map[string]string{}
  3253. // switch reflect.TypeOf(m).String() {
  3254. // case "map[string][]string": //数组数据
  3255. // realTypeM := m.(map[string][]string)
  3256. // //根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr
  3257. // /*
  3258. // arr1 ["a1","b1","c1"]
  3259. // arr2 ["a2","b2","c2"]
  3260. // [
  3261. // {"a1","a2"},
  3262. // {"b1","b2"},
  3263. // {"c1","c2"}
  3264. // ]
  3265. // */
  3266. // //start
  3267. // for k3, v3 := range realTypeM {
  3268. // for _, val := range v3 {
  3269. // data := make(map[string]string)
  3270. // data[k3] = val
  3271. // datas = append(datas, data)
  3272. // }
  3273. // break
  3274. // }
  3275. // for i, data := range datas {
  3276. // for k4, v4 := range realTypeM {
  3277. // if i < len(v4) { //数组数据长度不一致
  3278. // if v4[i] != " " {
  3279. // data[k4] = v4[i]
  3280. // } else {
  3281. // delete(data, k4)
  3282. // //continue
  3283. // }
  3284. // } else {
  3285. // fmt.Println("err table")
  3286. // //continue
  3287. // }
  3288. // }
  3289. // datas[i] = data
  3290. // }
  3291. // //end
  3292. // // for _, fdv := range datas { //清除空数据和只含特殊符号的数据
  3293. // // for fmk, fmv := range fdv {
  3294. // // if tabletdclear.ReplaceAllString(fmv, "") == "" {
  3295. // // delete(fdv, fmk)
  3296. // // }
  3297. // // }
  3298. // // }
  3299. // case "map[string]string": //字符串数据
  3300. // realTypeM := m.(map[string]string)
  3301. // datas = append(datas, realTypeM)
  3302. // default:
  3303. // }
  3304. // return datas
  3305. //}
  3306. func convert(key, r string) bool {
  3307. defer qutil.Catch()
  3308. flag := false
  3309. key = tabletitleclear.ReplaceAllString(key, "")
  3310. reg, err := regexp.Compile(r)
  3311. if err != nil {
  3312. fmt.Println("reg err:", err)
  3313. return false
  3314. }
  3315. flag = reg.MatchString(key)
  3316. return flag
  3317. }
  3318. func hasKey(table *Table, n int) {
  3319. defer qutil.Catch()
  3320. if table.TableResult.HasKey == 1 {
  3321. return
  3322. }
  3323. if n >= 1 {
  3324. table.TableResult.HasKey = 1
  3325. }
  3326. }
  3327. func hasGoods(table *Table, data ...string) {
  3328. defer qutil.Catch()
  3329. goodsArr := make([]string, len(data))
  3330. //fmt.Println("table.TableResult.HasGoods=====", table.TableResult.HasGoods)
  3331. if table.TableResult.HasGoods == 1 {
  3332. return
  3333. }
  3334. for i, d := range data {
  3335. if d != "" {
  3336. goods := u.GoodsGet.CheckSensitiveWord(d)
  3337. //fmt.Println("goods======", goods)
  3338. goodsArr[i] = goods
  3339. if len(goods) > 0 {
  3340. table.TableResult.HasGoods = 1
  3341. break
  3342. }
  3343. }
  3344. }
  3345. }
  3346. //func hasBrand(table *Table, data ...string) {
  3347. // defer qutil.Catch()
  3348. // if table.TableResult.HasBrand == 1 {
  3349. // return
  3350. // }
  3351. // for i, d := range data {
  3352. // if d != "" {
  3353. // brand := u.BrandGet.CheckSensitiveWord(d)
  3354. // qutil.Debug(d, brand)
  3355. // if brand != "" {
  3356. // table.TableResult.HasBrand = 1
  3357. // break
  3358. // }
  3359. // }
  3360. // }
  3361. //}
  3362. func hasBrand(table *Table, data ...string) ([]string, bool) {
  3363. defer qutil.Catch()
  3364. //fmt.Println("table.TableResult.HasBrand---------", table.TableResult.HasBrand)
  3365. brandArr := make([]string, len(data))
  3366. // if table.TableResult.HasBrand == 1 {
  3367. // return brandArr, 1
  3368. // }
  3369. allNull := true
  3370. for i, d := range data {
  3371. //if d != "" {
  3372. brand := u.BrandGet.CheckSensitiveWord(d)
  3373. if brand != "" {
  3374. allNull = false
  3375. }
  3376. //fmt.Println("brand======", brand)
  3377. brandArr[i] = brand
  3378. if len(brand) > 0 {
  3379. table.TableResult.HasBrand = 1
  3380. }
  3381. //}
  3382. }
  3383. return brandArr, allNull
  3384. }
  3385. //过滤td值
  3386. func filterVal(val ...string) ([]string, bool) {
  3387. defer qutil.Catch()
  3388. n := 0 //记录被过滤的个数
  3389. for i, v := range val {
  3390. afterFilter := tabletdclear.ReplaceAllString(v, "")
  3391. afterFilter = NullVal.ReplaceAllString(afterFilter, "")
  3392. if afterFilter == "" {
  3393. n++
  3394. }
  3395. val[i] = afterFilter
  3396. }
  3397. allempty := false
  3398. if n == len(val) { //所有都被过滤掉
  3399. allempty = true
  3400. }
  3401. return val, allempty
  3402. }
  3403. //过滤itemname全是数字
  3404. func filterItem(itemval ...string) []string {
  3405. defer qutil.Catch()
  3406. result := []string{}
  3407. for _, v := range itemval {
  3408. afterFilter := numclear.ReplaceAllString(v, "")
  3409. if afterFilter != "" {
  3410. result = append(result, v)
  3411. } else {
  3412. result = append(result, afterFilter)
  3413. }
  3414. }
  3415. return result
  3416. }
  3417. //处理价格
  3418. func dealPrice(key string, val ...string) []string {
  3419. defer qutil.Catch()
  3420. iswan := strings.Contains(key, "万") //表格title中带有万
  3421. result := []string{}
  3422. for _, v := range val { //1.00万元 1元
  3423. tmparr := strings.Split(v, ".")
  3424. tmparr[0] = moneyNum.ReplaceAllString(tmparr[0], "")
  3425. if iswan {
  3426. result = append(result, tmparr[0]+"0000")
  3427. } else {
  3428. if strings.Contains(v, "万") { //价格中带有万
  3429. result = append(result, tmparr[0]+"0000")
  3430. } else {
  3431. result = append(result, tmparr[0])
  3432. }
  3433. }
  3434. }
  3435. return result
  3436. }
  3437. //处理number
  3438. func dealNumber(val ...string) ([]string, []string) {
  3439. defer qutil.Catch()
  3440. unitnameArr := []string{}
  3441. result := []string{}
  3442. for _, v := range val { //1个 1.00个
  3443. n := numclear.FindString(v)
  3444. unitname := numclear.ReplaceAllString(v, "") //匹配个数后的单位
  3445. unitnameArr = append(unitnameArr, unitname)
  3446. //val[i] = strings.Split(n, ".")[0]
  3447. result = append(result, strings.Split(n, ".")[0])
  3448. }
  3449. return result, unitnameArr
  3450. }
  3451. func (tn *Table) analyProNameAndItemNumber() {
  3452. }