analytable.go 128 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086
  1. package pretreated
  2. import (
  3. "fmt"
  4. "jy/clear"
  5. u "jy/util"
  6. qutil "qfw/util"
  7. "regexp"
  8. "strings"
  9. "unicode/utf8"
  10. "github.com/PuerkitoBio/goquery"
  11. )
  12. /**
  13. 全局变量,主要是一堆判断正则
  14. **/
  15. var (
  16. //key 的日期单位
  17. dateReg *regexp.Regexp = regexp.MustCompile(`[年|月|日|天]`)
  18. //清理品目中数字
  19. numclear = regexp.MustCompile("^[\\d一二三四五六七八九十.]+")
  20. num1 = regexp.MustCompile("(\\d)")
  21. //清理表格title中的不需要的内容
  22. tabletitleclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、.,.。_/((人民币万元件个公斤户))]")
  23. tabletitleclear2 = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕]*")
  24. //清理表格中是key中包含的空格或数字等
  25. tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]")
  26. //清理表格td中的符号
  27. tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕¥$]*")
  28. //判断key是金额,对万元的处理
  29. moneyreg = regexp.MustCompile("(预算|费|价|额|规模|投资)")
  30. //key不需要清理-例如折扣 费率
  31. no_clear_key_reg = regexp.MustCompile(`[((](费率|年|月|日|天|日历天|历天)[))]`)
  32. //根据表格的内容判断是不是表头,如果含有金额则不是表头
  33. MoneyReg = regexp.MustCompile("^[\\s  ::0-9.万元()()人民币¥$]+$")
  34. GSReg = regexp.MustCompile(".*公司.*")
  35. //判断分包时
  36. moneyNum = regexp.MustCompile("[元整¥万]")
  37. //对隐藏表格的判断
  38. display = regexp.MustCompile("(?i).*?display\\s?[:]\\s?none.*")
  39. //---------------
  40. //求是分包的概率
  41. //根据表格的标签对分包进行打分
  42. TableMultiPackageReg_4 = regexp.MustCompile("(标段|分包|包段|划分|子包|标包|合同段)")
  43. TableMultiPackageReg_2 = regexp.MustCompile("(概况|范围|情况|内容|详细|结果|信息)")
  44. //在判断分包打分前过虑表格key
  45. FilterKey_2 = regexp.MustCompile("招标|投标|项目")
  46. //根据表格的key进行分包打分
  47. FindKey_2 = regexp.MustCompile("([分子][包标](号)?|标[号项段包](划分)?|包件?[号段名数]|包[组件])")
  48. FindKey_3 = regexp.MustCompile("(标段编号|标包)")
  49. //对值进行分包判断
  50. FindVal_1 = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|合同|分|施工|监理)?(标|包)(段|号)?)[  \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)")
  51. FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$")
  52. //判断分包前排除 包件号?
  53. excludeKey = regexp.MustCompile("(标识|数量|分包个数|标段代码|涉及包号|分包数量|项目标号|规格|型号|招标范围|业绩|废标|标段选择要求)|(^编号$)|([^包段标]编号)") //编号|划分
  54. excludeKey2 = regexp.MustCompile("包/[0-9]{0,4}[箱纸张]")
  55. excludeKey3 = regexp.MustCompile("(分包个数|每包[0-9]*元|标线|国标|享受一包服务)")
  56. //-------------
  57. cut = u.NewCut()
  58. //清理表格标签正则
  59. ClearTagReg = regexp.MustCompile("<[^>]*?>|[\\s\\n\\r]*$")
  60. //查找表格标签正则
  61. ttagreg = regexp.MustCompile("(?s)([^\\n::。,;\\s\u3000\u2003\u00a0]{2,30})[::]?[^::。;!\\n]{0,35}[\\s\\n]*$")
  62. //判断表格是表头的概率
  63. checkval = float32(0.6)
  64. //tdval_reg = regexp.MustCompile(`([\p{Han}][\p{Han}\s、()\\(\\)]{1,9})[::]([^::\\n。]{5,60})(?:[;;,,.。\\n\\t\\s])?`)
  65. //空格替换
  66. repSpace = regexp.MustCompile("[\\s\u3000\u2003\u00a0::]+|\\\\t+")
  67. //对表格kv的处理
  68. //对不能标准化的key做批识
  69. filter_tag_zb = regexp.MustCompile("(中标|成交|投标)[\\p{Han}]{0,6}(情况|结果|信息|明细)?")
  70. //中标金额
  71. //包含以下字眼做标准化处理
  72. filter_zbje_k = regexp.MustCompile("(中标|成交|总|拦标|合同|供[应货]商|报)[\\p{Han}、]{0,6}(价|额|[大小]写|[万亿]?元).{0,4}$")
  73. //简单判断金额
  74. filter_zbje_jd = regexp.MustCompile("^[^(售|保证)]{0,4}(价|额).{0,4}$")
  75. //预算金额
  76. filter_ysje_jd = regexp.MustCompile("(预算|预控价|项目概.|项目信息)")
  77. //且排队以下字眼的key
  78. filter_zbje_kn = regexp.MustCompile("得分|打分|时间|业绩|须知|分|电话|要求|需求数量|发布规模$|第[2二3三4四5五]|地址|询价保证金|行号")
  79. //且值包含以下字眼
  80. filter_zbje_v = regexp.MustCompile("[¥$$0-9一二三四五六七八九十,,〇零点..壹贰叁肆伍陆柒捌玖拾百佰千仟万亿億元圆角分整正()::大小写]{2,16}")
  81. //中标单位的处理
  82. //包含以下字眼的Key标准化
  83. filter_zbdw_ky = regexp.MustCompile("(中标|成交|拦标|合同|选中|投标|拟|预|最终)[\\p{Han}、]{0,6}(供[应货]商|企业|单位|人|机构)(名称)?.{0,4}$")
  84. //识别中标单位相关信息
  85. filter_zbdw_info = regexp.MustCompile("(中标|成交|中选|供(货|应))[^候选]{0,}")
  86. //简单判断
  87. filter_zbdw_jd = regexp.MustCompile("(投标|成交|中标|合同)(供应商|单位|人|名称).{0,4}$")
  88. //且不包含以下字眼
  89. filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址|询价保证金") //且值包含以下字眼
  90. //且值包含以下字眼
  91. filter_zbdw_v = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)")
  92. //且值包含以下字眼
  93. filter_zbdw_v2 = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$")
  94. //Tg = map[string]interface{}{}
  95. //一些表格没有表头,是空的,对值是排序的做处理对应 NullTxBid
  96. NullTdReg = regexp.MustCompile("(首选|第[一二三四五1-5])(中标|成交)?(名(称)?|(候选|排序)?(人|单位|供应商))")
  97. NullTxtBid = "成交供应商排名"
  98. projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$")
  99. MhSpilt = regexp.MustCompile("[::]")
  100. //识别采购单位联系人、联系电话、代理机构联系人、联系电话 -- 名称有异常
  101. ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?(((联系)?(电话|方式|号码)([//及]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(详细)?(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表")
  102. ContactInfoMustReg = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$")
  103. ContactType = map[string]*regexp.Regexp{
  104. "采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|招标(服务)?|甲|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心(地址)?|业主|收料人|采购部"),
  105. "代理机构": regexp.MustCompile("(代理|受托|集中采购).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"),
  106. "中标单位": regexp.MustCompile("^((拟(定)?|预|最终|唯一)?(中标|成交|中选|供(货|应))((成交))?)[^候选]{0,2}(人|方|单位|公司|(服务|供应)?商|企业)"),
  107. "监督部门": regexp.MustCompile("投诉受理部门"),
  108. }
  109. ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$")
  110. MultipleValueSplitReg = regexp.MustCompile("[,,、\\s\u3000\u2003\u00a0]")
  111. BuyerContacts = []string{"采购单位联系人", "采购单位联系电话", "采购单位联系地址"}
  112. FilterSerial = regexp.MustCompile(".+[、..::,]")
  113. underline = regexp.MustCompile("_+$")
  114. iswinnertabletag = regexp.MustCompile("(中标|候选人|成交|结果|磋商情况)")
  115. nswinnertabletag = regexp.MustCompile("评得分估|标的信息|班子成员")
  116. jsonReg = regexp.MustCompile(`\{.+:[^}]*\} `) // \{".*\":\".+\"}
  117. regHz = regexp.MustCompile("[\u4e00-\u9fa5]")
  118. winnerOrderAndBidResult = regexp.MustCompile("((中标)?候选人|(中标|评标)结果)")
  119. WinnerOrderStr = regexp.MustCompile(`(集团|公司|学校|中心|家具城|门诊|[大中小]+学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行)$`)
  120. DoubtReg = regexp.MustCompile("(我中心|有(疑问|质疑|异议|意见)|(书面)?提出|不再受理|投诉|质疑|书面形式|监督|公示期(限)?)")
  121. )
  122. //在解析时,判断表格元素是否隐藏
  123. func IsHide(g *goquery.Selection) (b bool) {
  124. style, exists := g.Attr("style")
  125. if exists {
  126. b = display.MatchString(style)
  127. }
  128. return
  129. }
  130. //59.992664,33.495715,20.001306
  131. var clearnum *regexp.Regexp = regexp.MustCompile("(([0-9.]{1,6}[,,]+){4,}|(\\d{6}[,,]\\d{2}.){2,})")
  132. //对表格的key进行标准化处理,多个k相同时,出现覆盖问题
  133. //待扩展,暂不支持正则标签库 清理key
  134. func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}, isSite bool, codeSite string) (kvTags map[string][]*u.Tag, returntag string) {
  135. kvTags = map[string][]*u.Tag{}
  136. v1 := ""
  137. if sv, sok := v.(string); sok { //取KV
  138. v1 = sv
  139. } else if sv, sok := v.([]string); sok { //是数组先默认取第一个
  140. if len(sv) >= 1 {
  141. v1 = sv[0]
  142. }
  143. }
  144. //对值单位的处理 (预算|费|价|额|规模|投资)
  145. if moneyreg.MatchString(k) {
  146. v1 += GetMoneyUnit(k, v1)
  147. }
  148. //先清理key
  149. //u.Debug(1, k, v1)
  150. //指定-key不清理 拦标价(费率或单价等)
  151. k1:=""
  152. if !no_clear_key_reg.MatchString(k) {
  153. k1 = ClearKey(k, 2)
  154. }
  155. //u.Debug(2, k)
  156. //取标准key
  157. if tabletag == "中标情况" {
  158. if k1=="价格" {
  159. k1="中标金额"
  160. }
  161. }
  162. res := u.GetTags(k1, isSite, codeSite)
  163. if len(res) == 0 && k1 != k {
  164. res = u.GetTags(k, isSite, codeSite)
  165. k1 = k
  166. }
  167. //log.Println(k, res)
  168. // if len(res) == 0 {
  169. // go u.AddtoNoMatchMap(tk)
  170. // }
  171. //当取到标准化值时,放入数组
  172. if len(res) > 0 {
  173. for _, t1 := range res {
  174. //降低冒号值的权重
  175. if MhSpilt.MatchString(v1) {
  176. t1.Weight -= 50
  177. }
  178. if winnerOrderAndBidResult.MatchString(tabletag) && t1.Value == "采购单位联系人" { //处理table中项目负责人
  179. kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
  180. } else if regexp.MustCompile("(中标候选人|名单及其排序|排序)").MatchString(tabletag) && t1.Value == "采购单位" {
  181. kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight - 150})
  182. } else {
  183. kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight})
  184. }
  185. }
  186. res[0].IsInvalid = true
  187. //k1 = res[0].Value
  188. } /*else {
  189. kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
  190. //没有取到标准化key时,对中标金额和中标单位的逻辑处理
  191. if filter_zbje_k.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) && utf8.RuneCountInString(v1) < 20 {
  192. if tabletag == "" {
  193. returntag = "中标情况"
  194. }
  195. kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: k, Value: v1, Weight: -100, IsInvalid: true})
  196. } else if filter_zbdw_ky.MatchString(k) && !filter_zbdw_kn.MatchString(k) &&
  197. filter_zbdw_v.MatchString(v1) {
  198. kvTags["中标单位"] = append(kvTags["中标单位"], &u.Tag{Key: k, Value: v1, Weight: -100, IsInvalid: true})
  199. if tabletag == "" {
  200. returntag = "中标情况"
  201. }
  202. } else if !filter_zbje_jd.MatchString(tabletag) && !filter_zbje_jd.MatchString(k) && utf8.RuneCountInString(v1) < 13 {
  203. //对上一步没有取到标准化key的进一步处理
  204. if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) {
  205. //u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1))
  206. if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
  207. if filter_ysje_jd.MatchString(k) {
  208. kvTags["预算金额"] = append(kvTags["预算金额"], &u.Tag{Key: k, Value: v1, Weight: -100})
  209. } else if !filter_zbdw_kn.MatchString(k) {
  210. kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: k, Value: v1, Weight: -100})
  211. }
  212. }
  213. }
  214. }
  215. }*/
  216. return
  217. }
  218. var glRex *regexp.Regexp = regexp.MustCompile("(成交|中标|候选|排名|名次|供应商排序|中标候选人|名单及其排序|排序)")
  219. var djReg *regexp.Regexp = regexp.MustCompile("^单价")
  220. //对解析后的表格的kv进行过滤
  221. func (table *Table) KVFilter(isSite bool, codeSite string) {
  222. //1.标准化值查找
  223. //2.对数组的处理
  224. //3.对分包的处理
  225. //4.对KV的处理
  226. //判断表格是否有用,调用abandontable正则数组进行判断
  227. //遍历每一行
  228. table.analyTdKV() //1.遍历每行每列td的sortkv添加到table.SorkVK中;2.td有子表格的处理
  229. as := NewSortMap()
  230. //遍历table.sortkv,进行过滤处理,并放入标准化KV中,如果值是数组跳到下一步处理
  231. for _, k := range table.SortKV.Keys {
  232. //遍历所有key sort.kv
  233. //表格描述处理,对成交结果的处理
  234. if k=="第一询价结果候选人" {
  235. //fmt.Println("标准化key")
  236. }
  237. if glRex.MatchString(k) {
  238. table.Desc += "成交结果,"
  239. }
  240. if djReg.MatchString(k) {
  241. continue
  242. }
  243. v := table.SortKV.Map[k]
  244. if _, ok := v.(string); ok { //table.SortKV.Value为字符串,匹配抽取关键词table.SortKV.Key,匹配到添加k,v到table.StandKV,table.StandKVWeight
  245. k = pkgFilter.ReplaceAllString(k, "")
  246. k = regSpliteSegment.ReplaceAllString(regReplAllSpace.ReplaceAllString(k, ""), "")
  247. kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v, isSite, codeSite) //对key标准化处理,没有找到会走中标
  248. if tag != "" && table.Tag == "" {
  249. table.Tag = tag
  250. }
  251. MergeKvTags(table.StandKV, kvTags)
  252. } else {
  253. as.AddKey(k, v)
  254. }
  255. }
  256. //核心-候选人-相关
  257. //处理值是数组的kv 放入标准化kv中 standKV //处理table.SortKV.value为数组的情况
  258. table.sortKVArr(as, isSite, codeSite)
  259. //
  260. if len(table.WinnerOrder) > 0 || !table.BPackage {
  261. winnerOrder := []map[string]interface{}{}
  262. maxSort := 0
  263. //调整顺序
  264. for i := 0; i < 2; i++ {
  265. for _, v := range table.WinnerOrder {
  266. sortstr, _ := v["sortstr"].(string)
  267. if (i == 0 && sortstr == "") || (i == 1 && sortstr != "") {
  268. continue
  269. }
  270. sort, _ := v["sort"].(int)
  271. if i == 0 {
  272. if maxSort == 0 || sort > maxSort {
  273. maxSort = sort
  274. }
  275. } else {
  276. maxSort++
  277. v["sort"] = maxSort
  278. }
  279. winnerOrder = append(winnerOrder, v)
  280. }
  281. if len(winnerOrder) == len(table.WinnerOrder) {
  282. break
  283. }
  284. }
  285. table.WinnerOrder = winnerOrder
  286. if len(table.WinnerOrder) == 0 {
  287. winnerOrder = []map[string]interface{}{}
  288. //遍历每个td,查询中标人
  289. for _, tr := range table.TRs {
  290. for _, td := range tr.TDs {
  291. winnerOrder = winnerOrderEntity.Find(td.Val, true, 3, isSite, codeSite)
  292. if len(winnerOrder) > 0 {
  293. //中标候选人合并
  294. winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder)
  295. }
  296. }
  297. }
  298. }
  299. if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder
  300. if len(winnerOrder) > 1 {
  301. table.WinnerOrder = winnerOrder
  302. }
  303. }
  304. }
  305. //对中标候选人进行排序
  306. winnerOrderEntity.Order(table.WinnerOrder)
  307. //该表格有一个分包,并且有中标候选人排序的情况下,把中标候选人放到包里面
  308. if table.BlockPackage != nil && table.BlockPackage.Keys != nil && len(table.BlockPackage.Keys) == 1 {
  309. if table.BlockPackage.Map != nil {
  310. onePkgKey := table.BlockPackage.Keys[0]
  311. onePkg, _ := table.BlockPackage.Map[onePkgKey].(*u.BlockPackage)
  312. if onePkg != nil && (onePkg.WinnerOrder != nil || len(onePkg.WinnerOrder) == 0) {
  313. onePkg.WinnerOrder = table.WinnerOrder
  314. table.BlockPackage.AddKey(onePkgKey, onePkg)
  315. }
  316. }
  317. }
  318. }
  319. var winMoneyReg *regexp.Regexp = regexp.MustCompile("(报价|投标价|投标总价)")
  320. //处理table.SortKV.value为数组的情况
  321. func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
  322. winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签
  323. if !winnertag {
  324. winnertag = iswinnertabletag.MatchString(table.TableResult.BlockTag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
  325. }
  326. if !winnertag {
  327. winnertag = iswinnertabletag.MatchString(table.Desc)
  328. }
  329. if !winnertag {
  330. winnertag = iswinnertabletag.MatchString(table.Html)
  331. }
  332. checkKey := map[int]bool{}
  333. //tmpBidmout := []string{}
  334. //log.Println(tmpBidmout)
  335. for kn, k := range as.Keys { //遍历table.SortKV.value为数组的key
  336. v := as.Map[k]
  337. if vm, ok := v.([]map[string]interface{}); ok && k == NullTxtBid {
  338. if table.WinnerOrder == nil {
  339. table.WinnerOrder = []map[string]interface{}{}
  340. }
  341. table.WinnerOrder = append(table.WinnerOrder, vm...)
  342. } else {
  343. //增加候选人排序逻辑
  344. if (table.WinnerOrder == nil || len(table.WinnerOrder) == 0) && !checkKey[kn] {
  345. if vs1, ok := v.([]string); ok {
  346. smap := make([]map[string]interface{}, len(vs1))
  347. for n1, _ := range vs1 {
  348. smap[n1] = map[string]interface{}{}
  349. }
  350. //hadSort := false
  351. tmpEntname := make([]string, len(vs1))
  352. tmpPrice := make([]string, len(vs1))
  353. for kn1, k := range as.Keys[kn:] {
  354. v := as.Map[k]
  355. if ContactType["采购单位"].MatchString(k) || ContactType["代理机构"].MatchString(k) {
  356. kvTags, _ := CommonDataAnaly(k, table.Tag, table.Desc, v, isSite, codeSite) //对key标准化处理,没有找到会走中标
  357. for k := range kvTags {
  358. if table.StandKV[k] == nil {
  359. MergeKvTags(table.StandKV, kvTags)
  360. }
  361. }
  362. continue
  363. }
  364. //目前对数组数据的key做判断,但是某些额可以是不满足情况的
  365. //载明内容:[第一中标候选人 第二中标候选人] id:5d00587da5cb26b9b75e367b
  366. if vs, ok := v.([]string); ok && len(vs) == len(vs1) { //数组值的个数相同
  367. res, _, _, _, repl := CheckCommon(k, "bidorder")
  368. kv := ""
  369. if !res {
  370. kt := u.GetTags(k, isSite, codeSite)
  371. if kt.Len() > 0 {
  372. if kt[0].Value == "单品报价" && winnertag {
  373. kv = "中标金额"
  374. } else {
  375. kv = kt[0].Value
  376. }
  377. }
  378. }
  379. if !res && kv == "" { //key未验证出,验证数组的val值
  380. checkKey[kn+kn1] = true
  381. if winnertag { //如果是中标信息 在根据val数组信息解析候选人
  382. for vsk, vsv := range vs {
  383. if NullTdReg.MatchString(vsv) { //数据先验证val是否有排序
  384. //hadSort = true
  385. smap[vsk]["sortstr"] = vsv
  386. smap[vsk]["sort"] = GetBidSort(vsv, vsk+1)
  387. } else if findCandidate2.MatchString(vsv) && tmpEntname[vsk] == "" { //数据验证val是否是候选人
  388. entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
  389. if entname != "" {
  390. tmpEntname[vsk] = entname
  391. }
  392. } else if winMoneyReg.MatchString(k) && len(tmpPrice[vsk]) == 0 {
  393. kv = "中标金额"
  394. }else { //验证val时如果数组中的第一条数据既不满足sort或者entname 判定此数组数据错误
  395. break
  396. }
  397. }
  398. }
  399. }
  400. if res || kv != "" { //连续往下找几个key
  401. checkKey[kn+kn1] = true
  402. SORT:
  403. if repl == "sort" {
  404. //hadSort = true
  405. for vsk, vsv := range vs {
  406. smap[vsk]["sortstr"] = vsv
  407. smap[vsk]["sort"] = GetBidSort(vsv, vsk+1)
  408. if findCandidate2.MatchString(vsv) && kv == "中标单位" && tmpEntname[vsk] == "" { //数据验证val是否是候选人
  409. entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
  410. if entname != "" {
  411. tmpEntname[vsk] = entname
  412. }
  413. }
  414. }
  415. } else if repl == "entname" || kv == "中标单位" {
  416. for vsk, vsv := range vs {
  417. if winnerReg6.MatchString(vsv) { //k:中标候选人 v:["第一名","第二名"]
  418. repl = "sort"
  419. goto SORT
  420. }
  421. //if entname, _ := smap[vsk]["entname"].(string); entname != "" || len([]rune(vsv)) < 3 {
  422. // break
  423. //}
  424. //entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
  425. //if entname != "" {
  426. // smap[vsk]["entname"] = entname
  427. if tmpEntname[vsk] != "" || len([]rune(vsv)) < 4 { //排除 单位:["台","个","套"]
  428. break
  429. }
  430. entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
  431. if entname != "" {
  432. tmpEntname[vsk] = entname
  433. }
  434. }
  435. } else if kv == "中标金额" {
  436. for vsk, vsv := range vs {
  437. //过滤price 2348273.432元(万元)-->2348273.432
  438. //tmp1, _ := smap[vsk]["price"].(string)
  439. tmp1 := tmpPrice[vsk]
  440. p1num := numberReg2.FindString(tmp1)
  441. p2num := numberReg2.FindString(vsv)
  442. p1 := qutil.Float64All(p1num)
  443. p2 := qutil.Float64All(p2num)
  444. if p2 > p1 {
  445. //smap[vsk]["price"] = winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv))
  446. price := winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv))
  447. if pricestr, _ := price.(string); len(pricestr) < 30 && len(pricestr) > 0 {
  448. tmpPrice[vsk] = pricestr
  449. }
  450. }
  451. }
  452. } else if kv == "预算" {
  453. if strings.Contains(k, "万元") {
  454. for vsk, vsv := range vs {
  455. if !strings.Contains(vsv, "万元") {
  456. vs[vsk] = vsv + "万元"
  457. }
  458. }
  459. }
  460. }
  461. }
  462. } else {
  463. //break
  464. }
  465. }
  466. newSmap := []map[string]interface{}{}
  467. //qutil.Debug("smap=======", smap)
  468. //qutil.Debug("tmpEntname--", len(tmpEntname), tmpEntname)
  469. //qutil.Debug("tmpPrice--", len(tmpPrice), tmpPrice)
  470. for n, smap_v := range smap {
  471. //if hadSort { //有排序,再添加entname和price
  472. if len(tmpEntname) > 0 && n < len(tmpEntname) && tmpEntname[n] != "" {
  473. smap_v["entname"] = tmpEntname[n]
  474. if len(tmpPrice) > 0 && n < len(tmpPrice) && tmpPrice[n] != "" {
  475. smap_v["price"] = tmpPrice[n]
  476. }
  477. }
  478. //} else if len(tmpEntname) > 0 {
  479. //fmt.Println("table winnerorder only has entname", tmpEntname)
  480. //}
  481. //qutil.Debug("len-smap_v--", len(smap_v))
  482. if len(smap_v) > 2 { //只有排序信息 sort和sortstr
  483. newSmap = append(newSmap, smap_v)
  484. }
  485. }
  486. if len(newSmap) > 0 {
  487. table.WinnerOrder = newSmap
  488. }
  489. }
  490. } else if vsss, ok := v.([]string); ok {
  491. if (len(table.WinnerOrder) > 0 && table.WinnerOrder[0]["price"] == nil && len(vsss) == len(table.WinnerOrder)) ||
  492. (len(table.WinnerOrder) > 0 && strings.Contains(k,"总报价") && len(vsss) == len(table.WinnerOrder)){
  493. kv := ""
  494. if winMoneyReg.MatchString(k) {
  495. kv = "中标金额"
  496. } else {
  497. kt := u.GetTags(k, isSite, codeSite)
  498. if kt.Len() > 0 {
  499. if kt[0].Value == "单品报价" && winnertag {
  500. kv = "中标金额"
  501. } else {
  502. kv = kt[0].Value
  503. }
  504. }
  505. }
  506. if kv == "中标金额" {
  507. for i, vx := range vsss {
  508. p1num := numberReg2.FindString(vx)
  509. if strings.Contains(p1num, ",") && strings.Contains(p1num, ".") {
  510. p1num = strings.ReplaceAll(p1num, ",", "")
  511. }
  512. p1 := qutil.Float64All(p1num)
  513. if p1 > 0 {
  514. //smap[vsk]["price"] = winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv))
  515. price := winnerOrderEntity.clear(kv, vx+GetMoneyUnit(k, vx))
  516. if pricestr, _ := price.(string); len(pricestr) < 30 && len(pricestr) > 0 && !clearnum.MatchString(pricestr) {
  517. table.WinnerOrder[i]["price"] = pricestr
  518. }
  519. }
  520. }
  521. }
  522. } else if table.StandKV[k] == nil {
  523. kvTags, _ := CommonDataAnaly(k, table.Tag, table.Desc, v, isSite, codeSite) //对key标准化处理,没有找到会走中标
  524. MergeKvTags(table.StandKV, kvTags)
  525. }
  526. }
  527. }
  528. }
  529. //特殊处理--组合候选人
  530. }
  531. //1.遍历每行每列td的sortkv添加到table.SorkVK中;2.td有子表格的处理
  532. func (table *Table) analyTdKV() {
  533. //遍历每一行
  534. for _, tr := range table.TRs {
  535. for _, td := range tr.TDs {
  536. //fmt.Println(td.BH, td.MustBH, td.Val, td.SortKV.Map)
  537. bc := false
  538. if !td.BH {
  539. //表头是否是无用内容
  540. if td.HeadTd != nil {
  541. bc, _, _, _, _ = CheckCommon(td.HeadTd.Val, "abandontable")
  542. }
  543. }
  544. if !bc {
  545. //td元素有内嵌kv,遍历放入table的Kv中
  546. if len(td.SortKV.Keys) > 0 {
  547. for _, k3 := range td.SortKV.Keys {
  548. _val := td.SortKV.Map[k3]
  549. //thisFlag := false
  550. if td.HeadTd != nil && len([]rune(k3)) < 4 {
  551. k3 = td.HeadTd.Val + k3
  552. }
  553. if table.SortKV.Map[k3] == nil && _val != nil && _val != "" {
  554. //u.Debug(k3, _val)
  555. //if !thisFlag || (thisFlag && table.SortKV.Map[k3] == nil) {
  556. table.SortKV.AddKey(k3, _val)
  557. }
  558. }
  559. }
  560. }
  561. //td有子表格的处理
  562. //u.Debug(td.BH, td.Val, td.SonTableResult)
  563. if td.SonTableResult != nil {
  564. //u.Debug(td.SonTableResult.SortKV.Map, "-------", td.SonTableResult.Tabs)
  565. for k3, v3 := range td.SonTableResult.KvTags {
  566. table.StandKV[k3] = append(table.StandKV[k3], v3...)
  567. }
  568. //中标候选人排序
  569. if table.WinnerOrder == nil || len(table.WinnerOrder) == 0 {
  570. table.WinnerOrder = td.SonTableResult.WinnerOrder
  571. } else {
  572. winnerOrderEntity.Merge(table.WinnerOrder, td.SonTableResult.WinnerOrder)
  573. }
  574. }
  575. }
  576. }
  577. }
  578. //表格结果合并到父表格集中
  579. func (table *Table) MergerToTableresult() {
  580. //对多包表格的多包值的合并处理
  581. if table.BPackage {
  582. table.TableResult.IsMultiPackage = true
  583. for _, v2 := range table.BlockPackage.Keys {
  584. package1 := table.TableResult.PackageMap.Map[v2]
  585. if package1 == nil {
  586. table.TableResult.PackageMap.AddKey(v2, table.BlockPackage.Map[v2])
  587. if vvv, ok := table.BlockPackage.Map[v2].(*u.BlockPackage); ok {
  588. if vvv.TableKV != nil && len(vvv.TableKV.KvTags) > 0 {
  589. MergeKvTags(table.TableResult.KvTags, vvv.TableKV.KvTags)
  590. }
  591. }
  592. } else {
  593. bp := package1.(*u.BlockPackage)
  594. if bp.TableKV == nil {
  595. bp.TableKV = u.NewJobKv()
  596. }
  597. v1 := table.BlockPackage.Map[v2].(*u.BlockPackage)
  598. if v1.TableKV != nil && len(v1.TableKV.KvTags) > 0 {
  599. for k2, v2 := range v1.TableKV.KvTags {
  600. if k2 == "" {
  601. continue
  602. }
  603. isExists := false
  604. for _, v2v := range v2 {
  605. if v2v.Value == "" {
  606. continue
  607. }
  608. for _, v2vv := range bp.TableKV.KvTags[k2] {
  609. if v2v.Value == v2vv.Value {
  610. isExists = true
  611. break
  612. }
  613. }
  614. if !isExists {
  615. bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
  616. bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
  617. }
  618. }
  619. }
  620. }
  621. if bp.Bidamount <= 0 && !bp.IsTrueBidamount {
  622. bp.Bidamount = v1.Bidamount
  623. bp.IsTrueBidamount = v1.IsTrueBidamount
  624. }
  625. if bp.Budget <= 0 && !bp.IsTrueBudget {
  626. bp.Budget = v1.Budget
  627. bp.IsTrueBudget = v1.IsTrueBudget
  628. }
  629. bp.Text += bp.Text
  630. if len(v1.WinnerOrder) > 0 && len(bp.WinnerOrder) == 0 {
  631. bp.WinnerOrder = v1.WinnerOrder
  632. }
  633. }
  634. }
  635. }
  636. //遍历标准key到tableresult.sortkv中
  637. for fieldKey, v := range table.StandKV {
  638. for _, vv := range v {
  639. if fieldKey=="项目周期"||fieldKey=="工期单位"||fieldKey=="工期时长" {
  640. dateStr := dateReg.FindString(vv.Key)
  641. if dateStr !="" && !strings.Contains(vv.Value,dateStr) {
  642. vv.Value = vv.Value+dateStr
  643. }
  644. }
  645. vv.Value = strings.Replace(vv.Value, "__", "", -1)
  646. }
  647. }
  648. MergeKvTags(table.TableResult.KvTags, table.StandKV)
  649. //表格的块标签
  650. if table.TableResult.BlockTag == "" && table.Tag != "" {
  651. table.TableResult.BlockTag = table.Tag
  652. }
  653. //中标候选人(多个table,现在默认取第一个table的信息,考虑需不需要多个table分析合并数据?)
  654. if table.TableResult.WinnerOrder == nil || len(table.TableResult.WinnerOrder) == 0 {
  655. table.TableResult.WinnerOrder = table.WinnerOrder
  656. }
  657. //增加brand 并列table
  658. if len(table.BrandData) > 0 {
  659. for _, v := range table.BrandData {
  660. if len(v) > 0 {
  661. table.TableResult.BrandData = append(table.TableResult.BrandData, v)
  662. }
  663. }
  664. }
  665. //抽取prince和number 并列table
  666. if len(table.PriceNumberData) > 0 {
  667. for _, v := range table.PriceNumberData {
  668. if len(v) > 0 {
  669. table.TableResult.PriceNumberData = append(table.TableResult.PriceNumberData, v)
  670. }
  671. }
  672. }
  673. if table.BlockPackage != nil && len(table.BlockPackage.Keys) == 0 {
  674. for _, v := range table.BlockPackage.Keys {
  675. if table.BlockPackage.Map[v] != nil {
  676. if vvv, ok := table.BlockPackage.Map[v].((*u.BlockPackage)); ok {
  677. if vvv.TableKV != nil && len(vvv.TableKV.KvTags) > 0 {
  678. for kk, vv := range vvv.TableKV.KvTags {
  679. if kk == "" {
  680. continue
  681. }
  682. if len(table.TableResult.KvTags[kk]) == 0 {
  683. table.TableResult.KvTags[kk] = vv
  684. }
  685. }
  686. }
  687. }
  688. }
  689. }
  690. }
  691. }
  692. /**
  693. 解析表格入口
  694. 返回:汇总表格对象
  695. **/
  696. func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock, isSite bool, codeSite string) (tabres *TableResult) {
  697. defer qutil.Catch()
  698. //u.Debug(con)
  699. if itype == 1 {
  700. //修复表格
  701. con = RepairCon(con)
  702. }
  703. //生成tableresult对象
  704. tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock)
  705. if fblbReg.MatchString(blockTag) {
  706. return
  707. }
  708. //可以有多个table
  709. //for _, table := range tabs {
  710. //隐藏表格跳过
  711. if IsHide(tabs) {
  712. return
  713. }
  714. tabres.GoqueryTabs = tabs
  715. //}
  716. //解析表格集
  717. tabres.Analy(isSite, codeSite)
  718. return
  719. }
  720. //开始解析表格集
  721. func (ts *TableResult) Analy(isSite bool, codeSite string) {
  722. tabs := []*Table{}
  723. contactFormat := &u.ContactFormat{
  724. IndexMap: map[int]string{},
  725. MatchMap: map[string]map[string]bool{},
  726. }
  727. //for _, table := range ts.GoqueryTabs {
  728. tn := NewTable(ts.Html, ts, ts.GoqueryTabs)
  729. //核心模块
  730. tsw := tn.Analy(contactFormat, isSite, codeSite)
  731. for _, tab := range tsw {
  732. if len(tab.TRs) > 0 {
  733. tabs = append(tabs, tab)
  734. }
  735. //fmt.Println("tab.SortKV.Map", tab.SortKV.Keys)
  736. }
  737. //tn.SonTables = append(tn.SonTables, tn)
  738. //}
  739. //统一合并,考虑统一多表格是多包的情况---新增 与子表格合并
  740. if len(tabs) > 1 {
  741. pns := map[string]string{}
  742. pnarr := []string{}
  743. for _, table := range tabs {
  744. if len(table.StandKV["项目名称"]) == 0 {
  745. continue
  746. }
  747. pn := table.StandKV["项目名称"][0]
  748. if pn != nil && pn.Value != "" && TitleReg.MatchString(pn.Value) {
  749. pnarr = append(pnarr, pn.Value)
  750. matchres := TitleReg.FindAllStringSubmatch(pn.Value, -1)
  751. if len(matchres) == 1 && len(matchres[0]) > 0 {
  752. v1 := u.PackageNumberConvert(matchres[0][0])
  753. pns[v1] = matchres[0][0]
  754. bp := &u.BlockPackage{}
  755. bp.Index = v1
  756. bp.Origin = matchres[0][0]
  757. if bp.TableKV == nil {
  758. bp.TableKV = u.NewJobKv()
  759. }
  760. for _, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
  761. if len(table.StandKV[k]) > 0 {
  762. bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
  763. }
  764. }
  765. bp.WinnerOrder = table.WinnerOrder
  766. if table.BlockPackage.Map[v1] == nil {
  767. table.BPackage = true
  768. table.BlockPackage.AddKey(v1, bp)
  769. } else {
  770. table.BlockPackage.RemoveKey(v1)
  771. table.BlockPackage.AddKey(v1, bp)
  772. }
  773. }
  774. }
  775. }
  776. if len(tabs) == len(pns) {
  777. //多个表格,每个表格都是一个分包 http://www.cxzwfw.gov.cn/info/1009/6963.htm
  778. //项目名称、项目编号、采购单位、招标机构、预算
  779. pname := projectnameReg.ReplaceAllString(pnarr[0], "")
  780. btrue := true
  781. for _, pn := range pnarr[1:] {
  782. pn = projectnameReg.ReplaceAllString(pn, "")
  783. //u.Debug(pn, pname)
  784. if pn != pname {
  785. //项目名称不一致
  786. btrue = false
  787. break
  788. }
  789. }
  790. if btrue {
  791. ts.KvTags["项目名称"] = append(ts.KvTags["项目名称"], &u.Tag{Key: "项目名称", Value: pname, Weight: 100})
  792. for _, table := range tabs {
  793. table.BPackage = true
  794. //预算、中标金额、NullTxtBid成交供应商排名 中标单位 成交状态
  795. if table.BlockPackage != nil && len(table.BlockPackage.Keys) == 1 {
  796. bp := table.BlockPackage.Map[table.BlockPackage.Keys[0]].(*u.BlockPackage)
  797. if table.TableResult.WinnerOrder != nil {
  798. bp.WinnerOrder = table.WinnerOrder
  799. }
  800. if bp != nil && table.StandKV != nil {
  801. if bp.TableKV == nil {
  802. bp.TableKV = u.NewJobKv()
  803. }
  804. for nk, k := range []string{"中标金额", "中标单位", "预算", "成交状态", "项目名称", "项目编号", "采购范围"} {
  805. if len(table.StandKV[k]) > 0 {
  806. bp.TableKV.KvTags[k] = append(bp.TableKV.KvTags[k], &u.Tag{Key: k, Value: table.StandKV[k][0].Value})
  807. }
  808. if nk < 4 {
  809. delete(table.StandKV, k)
  810. }
  811. }
  812. }
  813. }
  814. }
  815. }
  816. }
  817. }
  818. for _, table := range tabs {
  819. table.MergerToTableresult()
  820. MergeKvTags(ts.KvTags, table.TableResult.KvTags)
  821. if !table.Brule {
  822. ts.isUnRulesTab = true
  823. }
  824. }
  825. }
  826. //解析表格
  827. func (table *Table) Analy(contactFormat *u.ContactFormat, isSite bool, codeSite string) []*Table {
  828. //查找表体中的tr对象
  829. trs := table.Goquery.ChildrenFiltered("tbody,thead,tfoot").ChildrenFiltered("tr")
  830. if trs.Size() == 0 {
  831. trs = table.Goquery.ChildrenFiltered("tr")
  832. }
  833. ztb := table.Goquery.Find("table").Size()
  834. if ztb >= 9 {
  835. return []*Table{}
  836. }
  837. //遍历节点,初始化table 结构 TRs Sorts
  838. table.createTabe(trs, isSite, codeSite)
  839. if len(table.TRs) == 0 {
  840. return []*Table{}
  841. }
  842. //重置行列
  843. table.ComputeRowColSpan()
  844. //对table结构体进行整体解析处理
  845. ts := table.AnalyTables(contactFormat, isSite, codeSite)
  846. return ts
  847. }
  848. var fblbReg *regexp.Regexp = regexp.MustCompile("(废标|流标|负责人资格|负责人业绩|相关业绩|技术评分明细表|开标记录|附件[:0-9]|越南盾|技术分|填报项目业绩|未通过.*原因)")
  849. //遍历节点,初始化table 结构体
  850. func (table *Table) createTabe(trs *goquery.Selection, isSite bool, codeSite string) {
  851. trs.Each(func(n int, sel *goquery.Selection) {
  852. //隐藏行不处理
  853. if IsHide(sel) {
  854. return
  855. }
  856. //遍历每行的td
  857. tds := sel.ChildrenFiltered("td,th")
  858. TR := NewTR(table)
  859. tdTextIsNull := false
  860. var empty int
  861. tds.Each(func(m int, selm *goquery.Selection) {
  862. //对隐藏列不处理!!!
  863. if IsHide(selm) {
  864. return
  865. }
  866. //进入每一个单元格
  867. td := NewTD(selm, TR, table, isSite, codeSite) //初始化td,kv处理,td中有table处理,td的方向
  868. //num++
  869. TR.AddTD(td)
  870. if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0 { //删除一个tr,tr中所有td是空值的
  871. empty++
  872. if tds.Size() == empty {
  873. tdTextIsNull = true
  874. }
  875. }
  876. })
  877. //向table添加每行不为空的tr
  878. if !tdTextIsNull {
  879. table.AddTR(TR)
  880. }
  881. })
  882. }
  883. //对table进行整体解析处理
  884. func (tn *Table) AnalyTables(contactFormat *u.ContactFormat, isSite bool, codeSite string) []*Table {
  885. ts := tn.tableSubDemolitionTable() //分包,拆表
  886. for n, table := range ts {
  887. //处理每个table
  888. if len(table.TRs) > 0 {
  889. //删除尾部空白行
  890. table.deleteTrimTr()
  891. //table.Print()
  892. //校对表格
  893. table.Adjust(isSite, codeSite)
  894. //查找表格的标签,table.Tag字段
  895. table.FindTag()
  896. //分割表格
  897. table.bSplit(n, ts, isSite, codeSite)
  898. table.TdContactFormat(contactFormat, isSite, codeSite) //contactFormat,处理采购单位,代理机构
  899. //开始查找kv,核心模块,table.SortKV
  900. table.FindKV(isSite, codeSite)
  901. //table中抽取品牌,table.BrandData
  902. if u.IsBrandGoods {
  903. table.analyBrand()
  904. }
  905. //table中抽取单价和个数
  906. if u.IsPriceNumber {
  907. //qutil.Debug("======================抽取price和number===========")
  908. table.extractPriceNumber()
  909. }
  910. res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
  911. if !res {
  912. //过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
  913. table.KVFilter(isSite, codeSite)
  914. }
  915. //对有表头表格的处理
  916. if table.Tag != "" {
  917. co, m, b := CheckMultiPackage(table.Tag) //分包处理
  918. if b {
  919. table.BPackage = b
  920. if len(table.BlockPackage.Map) == 0 {
  921. for _, av := range m {
  922. kv := u.NewJobKv()
  923. kv.KvTags = table.StandKV
  924. bd := u.PackageNumberConvert(av[0])
  925. blockPackage := &u.BlockPackage{
  926. Origin: av[0],
  927. Name: av[0],
  928. Text: co,
  929. TableKV: kv,
  930. Index: bd,
  931. }
  932. if bd != "" {
  933. table.BlockPackage.AddKey(bd, blockPackage)
  934. } else {
  935. table.BlockPackage.AddKey(av[0], blockPackage)
  936. }
  937. }
  938. }
  939. table.StandKV["项目名称"] = append(table.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: table.Tag, Weight: -300})
  940. }
  941. }
  942. //判断是否是多包,并处理分包的//遍历td分块
  943. table.CheckMultiPackageByTable(isSite, codeSite) //分包处理
  944. //MergeKvTags(table.TableResult.KvTags, table.StandKV)
  945. }
  946. }
  947. return ts
  948. }
  949. //分包,拆表
  950. func (table *Table) tableSubDemolitionTable() []*Table {
  951. tm := []map[string]interface{}{}
  952. tmk := map[string]bool{}
  953. tmn := map[int]map[string]interface{}{}
  954. for rownum, tr := range table.TRs {
  955. if len(tr.TDs) == 1 && table.ColNum > 1 { //tr里面有一列,table里面有多列
  956. td := tr.TDs[0] //取每行第一个td
  957. //td开始列等于0 && td结束列+1等于table列数 && td长度大于1小于50
  958. if td.StartCol == 0 && td.EndCol+1 == table.ColNum && len([]rune(td.Val)) > 1 && len([]rune(td.Val)) < 50 {
  959. con, m1, b := CheckMultiPackage(td.Val) //判断分包
  960. if b {
  961. for k, _ := range m1 {
  962. numstr := u.PackageNumberConvert(k)
  963. m2 := map[string]interface{}{
  964. "tag": con,
  965. //"num": numstr,
  966. //"numtxt": v[0],
  967. "startrow": rownum,
  968. }
  969. tmk[numstr] = true
  970. tmn[rownum] = m2
  971. tm = append(tm, m2)
  972. break
  973. }
  974. }
  975. }
  976. }
  977. }
  978. //拆表
  979. ts := []*Table{}
  980. if len(tmk) > 1 && len(tmk) == len(tm) {
  981. var tab1 *Table
  982. for rownum, tr := range table.TRs {
  983. if tab1 == nil {
  984. tab1 = NewTable("", table.TableResult, table.Goquery)
  985. tab1.BSplit = true
  986. if tmn[rownum] != nil {
  987. tab1.StandKV["项目名称"] = append(tab1.StandKV["项目名称"], &u.Tag{Key: "项目名称", Value: tmn[rownum]["tag"].(string), Weight: -100})
  988. }
  989. ts = append(ts, tab1)
  990. }
  991. if tmn[rownum] != nil {
  992. tab1.Tag = tmn[rownum]["tag"].(string)
  993. } else {
  994. tab1.AddTR(tr)
  995. }
  996. if tmn[rownum+1] != nil {
  997. tab1 = nil
  998. }
  999. }
  1000. } else {
  1001. ts = append(ts, table)
  1002. }
  1003. return ts
  1004. }
  1005. //分割表格
  1006. func (table *Table) bSplit(n int, ts []*Table, isSite bool, codeSite string) {
  1007. if table.BSplit {
  1008. if !table.BHeader && n > 0 {
  1009. for i := n - 1; i > -1; i-- {
  1010. if ts[i].BHeader {
  1011. if ts[i].BFirstRow {
  1012. //取第一行插入到
  1013. table.InsertTR(ts[i].TRs[0])
  1014. table.Adjust(isSite, codeSite)
  1015. }
  1016. break
  1017. }
  1018. }
  1019. }
  1020. }
  1021. }
  1022. //删除尾部空白行
  1023. func (table *Table) deleteTrimTr() {
  1024. for len(table.TRs) > 0 {
  1025. npos := len(table.TRs)
  1026. tailTR := table.TRs[npos-1] //最后一个tr,取最后一行
  1027. bspace := true
  1028. for _, v := range tailTR.TDs {
  1029. if v.Val != "" || v.SonTableResult != nil || len(v.SortKV.Keys) > 0 {
  1030. bspace = false
  1031. break
  1032. }
  1033. }
  1034. //删除尾部空行,是空行的话就删除
  1035. if bspace {
  1036. table.TRs = table.TRs[:npos-1]
  1037. } else {
  1038. break
  1039. }
  1040. }
  1041. }
  1042. //校对表格
  1043. func (table *Table) Adjust(isSite bool, codeSite string) {
  1044. //计算行列起止位置,跨行跨列处理
  1045. table.ComputeRowColSpan()
  1046. // for k1, tr := range table.TRs {
  1047. // for k2, td := range tr.TDs {
  1048. // qutil.Debug(k1, k2, td.Val, td.StartRow, td.EndRow, td.StartCol, td.EndCol)
  1049. // }
  1050. // }
  1051. //大概计算每个起止行列的概率
  1052. table.GetKeyRation()
  1053. /*
  1054. for k, v := range table.StartAndEndRation {
  1055. for k1, v1 := range v.Poss {
  1056. bs, _ := json.Marshal(v1)
  1057. str := ""
  1058. for _, td := range v.Tdmap[v1] {
  1059. str += "__" + td.Val + fmt.Sprintf("%d_%d_%d_%d", td.StartRow, td.EndRow, td.StartCol, td.EndCol)
  1060. }
  1061. qutil.Debug(k, k1, string(bs), v.Rationmap[v1], str)
  1062. }
  1063. }
  1064. */
  1065. //u.Debug("tdnum:", num, table.RowNum, table.ColNum)
  1066. //是否是规则的表格,单元各个数=行数*列数
  1067. table.Brule = table.TDNum == table.RowNum*table.ColNum
  1068. count := 0
  1069. for _, trs := range table.TRs {
  1070. for _, td := range trs.TDs {
  1071. if td.BH {
  1072. count++
  1073. }
  1074. }
  1075. }
  1076. if float32(count)/float32(table.TDNum) < 0.85 {
  1077. //精确计算起止行列是表头的概率
  1078. table.ComputeRowColIsKeyRation(isSite, codeSite)
  1079. bhead := false
  1080. L:
  1081. for i, tr := range table.TRs {
  1082. for _, td := range tr.TDs {
  1083. if td.BH {
  1084. //qutil.Debug("----=====---", td.Val, len(table.TRs[len(table.TRs)-1].TDs), i, len(table.TRs)-1)
  1085. if i == len(table.TRs)-1 && len(table.TRs[len(table.TRs)-1].TDs) == 2 {
  1086. res, _, _, _, _ := CheckCommon(td.Val, "abandontable")
  1087. if res {
  1088. //删除此行
  1089. table.TRs = table.TRs[:len(table.TRs)-1]
  1090. table.Adjust(isSite, codeSite)
  1091. return
  1092. }
  1093. }
  1094. bhead = true
  1095. break L
  1096. }
  1097. }
  1098. }
  1099. table.BHeader = bhead
  1100. }
  1101. }
  1102. //计算行/列表格的结束位置 StartRow=0 EndRow=0,table.TDNum td个数 table.RowNum 行数
  1103. func (table *Table) ComputeRowColSpan() {
  1104. n := 0 //td总个数
  1105. mapRC := map[int]map[int]int{} //记录第几行pos,起始列对应的合并值
  1106. for k, v := range table.TRs {
  1107. n += len(v.TDs) //每行的td总数相加
  1108. nk := 0 //nk列的起始,k行的起始||如果有合并,起始就不是0
  1109. ball := true
  1110. rowspans := v.TDs[0].Rowspan //某一行第一个td的rowspan
  1111. for k1, v1 := range v.TDs {
  1112. if k1 == 0 && k == 0 {
  1113. table.TRs[k].TDs[k1].MustBH = true
  1114. table.TRs[k].TDs[k1].BH = true
  1115. }
  1116. if v1.Rowspan != rowspans {
  1117. ball = false
  1118. break
  1119. }
  1120. }
  1121. for _, v1 := range v.TDs {
  1122. if ball {
  1123. v1.Rowspan = 1
  1124. }
  1125. mc := mapRC[k]
  1126. for {
  1127. if mc != nil && mc[nk] > 0 {
  1128. nk += mc[nk]
  1129. } else {
  1130. break
  1131. }
  1132. }
  1133. v1.StartCol = nk
  1134. nk += v1.Colspan - 1
  1135. v1.EndCol = nk
  1136. if nk >= table.ColNum {
  1137. table.ColNum = nk + 1
  1138. }
  1139. nk++
  1140. v1.StartRow = k
  1141. v1.EndRow = k + v1.Rowspan - 1
  1142. ck := fmtkey("c", v1.StartCol, v1.EndCol)
  1143. tdcs := table.StartAndEndRation[ck]
  1144. if tdcs == nil {
  1145. tdcs = NewTDRationScope(ck)
  1146. table.StartAndEndRation[ck] = tdcs
  1147. table.StartAndEndRationKSort.AddKey(ck, 1)
  1148. }
  1149. tdcs.Addtd(v1)
  1150. rk := fmtkey("r", v1.StartRow, v1.EndRow)
  1151. tdrs := table.StartAndEndRation[rk]
  1152. if tdrs == nil {
  1153. tdrs = NewTDRationScope(rk)
  1154. table.StartAndEndRation[rk] = tdrs
  1155. table.StartAndEndRationKSort.AddKey(rk, 1)
  1156. }
  1157. tdrs.Addtd(v1)
  1158. if v1.Rowspan > 1 {
  1159. for i := 1; i < v1.Rowspan; i++ {
  1160. r := k + i
  1161. if r < len(table.TRs) {
  1162. mc := mapRC[r]
  1163. if mc == nil {
  1164. mc = map[int]int{}
  1165. }
  1166. mc[v1.StartCol] = v1.Colspan
  1167. mapRC[r] = mc
  1168. }
  1169. }
  1170. }
  1171. }
  1172. }
  1173. table.TDNum = n //td总个数
  1174. table.RowNum = len(table.TRs) //tr总行数
  1175. }
  1176. func fmtkey(t string, start, end int) string {
  1177. return fmt.Sprintf("%s_%d_%d", t, start, end)
  1178. }
  1179. //查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
  1180. func (table *Table) FindTag() {
  1181. //查找每个table的标签,如果有标签可按标签处理,否则根据表格去判断
  1182. if table.Tag != "" {
  1183. return
  1184. }
  1185. if table.Tag == "" {
  1186. table.Tag = table.TableResult.BlockTag
  1187. }
  1188. //u.Debug(table.Tag)
  1189. }
  1190. //计算r/c_start_end的概率
  1191. func (table *Table) GetKeyRation() {
  1192. for _, vn := range table.StartAndEndRationKSort.Keys {
  1193. v := table.StartAndEndRation[vn]
  1194. for _, v1 := range v.Poss {
  1195. count := 0
  1196. n := 0
  1197. for _, td := range v.Tdmap[v1] {
  1198. n++
  1199. if td.BH {
  1200. count++
  1201. }
  1202. }
  1203. v.Rationmap[v1] = float32(count) / float32(n)
  1204. }
  1205. }
  1206. }
  1207. //计算行列是表头的概率调用GetKeyRation
  1208. func (table *Table) ComputeRowColIsKeyRation(isSite bool, codeSite string) {
  1209. //增加对跨行校正限止
  1210. // u.Debug(table.Brule, table.ColNum, table.RowNum, table.TDNum)
  1211. bkeyfirstrow := false
  1212. bkeyfirstcol := false
  1213. if table.Brule { //不存在跨行跨列的情况,规则表格
  1214. checkCompute := map[string]bool{}
  1215. for k, tr := range table.TRs {
  1216. rk := fmtkey("r", tr.TDs[0].StartRow, tr.TDs[0].EndRow)
  1217. if k == 0 { //第1行的概率
  1218. ck := fmtkey("c", tr.TDs[0].StartCol, tr.TDs[0].EndCol)
  1219. //u.Debug(table.BFirstRow, "--", table.StartAndEndRation[rk], table.StartAndEndRation[ck])
  1220. ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0])
  1221. ration2, _ := table.StartAndEndRation[ck].GetTDRation(tr.TDs[0])
  1222. if (len(tr.TDs) == 2 && ration2 < 0.55) && (len(tr.TDs) == 2 && ration1 > 0.5) { //第一行为key
  1223. bkeyfirstrow = true
  1224. ball := true
  1225. for _, td := range tr.TDs {
  1226. if MoneyReg.MatchString(td.Val) {
  1227. bkeyfirstrow = false
  1228. ball = false
  1229. td.BH = false
  1230. break
  1231. }
  1232. }
  1233. for _, td := range tr.TDs {
  1234. if ball {
  1235. //td.BH = true
  1236. td.KeyDirect = 1
  1237. td.KVDirect = 2
  1238. }
  1239. }
  1240. } else if ration2 > 0.55 { //第1列
  1241. bkeyfirstcol = true
  1242. if !checkCompute[ck] {
  1243. checkCompute[ck] = true
  1244. //重置第1列
  1245. for _, tr1 := range table.TRs {
  1246. for _, td1 := range tr1.TDs {
  1247. if td1.StartCol == 0 {
  1248. if !MoneyReg.MatchString(td1.Val) {
  1249. //td1.BH = true
  1250. td1.KeyDirect = 2
  1251. td1.KVDirect = 1
  1252. }
  1253. }
  1254. }
  1255. }
  1256. }
  1257. }
  1258. if !bkeyfirstrow && !bkeyfirstcol {
  1259. if len(tr.TDs) > 1 && ration1 > ration2 && ration1 > 0.5 {
  1260. bkeyfirstrow = true
  1261. for _, td := range tr.TDs {
  1262. if !MoneyReg.MatchString(td.Val) {
  1263. //td.BH = true
  1264. td.KeyDirect = 1
  1265. td.KVDirect = 2
  1266. }
  1267. }
  1268. } else if tr.Table.ColNum > 1 && ration2 > 0.5 {
  1269. bkeyfirstcol = true
  1270. if !checkCompute[ck] {
  1271. checkCompute[ck] = true
  1272. //重置第1列
  1273. for _, tr1 := range table.TRs {
  1274. for _, td1 := range tr1.TDs {
  1275. if td1.StartCol == 0 {
  1276. if !MoneyReg.MatchString(td1.Val) {
  1277. td1.BH = true
  1278. td1.KeyDirect = 2
  1279. td1.KVDirect = 1
  1280. }
  1281. }
  1282. }
  1283. }
  1284. }
  1285. }
  1286. }
  1287. } else {
  1288. if bkeyfirstrow {
  1289. //第一列的概率
  1290. ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0])
  1291. if k == 1 || ration1 < checkval {
  1292. for _, td := range tr.TDs {
  1293. if !td.MustBH {
  1294. td.BH = false
  1295. td.KeyDirect = 0
  1296. td.KVDirect = 0
  1297. }
  1298. }
  1299. } //else {for _, td := range tr.TDs {}}
  1300. } else {
  1301. //列在起作用
  1302. if bkeyfirstcol {
  1303. for _, td := range tr.TDs {
  1304. ck := fmtkey("c", td.StartCol, td.EndCol)
  1305. ration1, _ := table.StartAndEndRation[ck].GetTDRation(td)
  1306. if !checkCompute[ck] {
  1307. checkCompute[ck] = true
  1308. if ration1 >= checkval && td.ColPos != 1 {
  1309. for _, tr1 := range table.TRs {
  1310. for _, td1 := range tr1.TDs {
  1311. if td1.StartCol == td.StartCol {
  1312. if !MoneyReg.MatchString(td1.Val) {
  1313. td1.BH = true
  1314. td1.KeyDirect = 2
  1315. td1.KVDirect = 1
  1316. }
  1317. }
  1318. }
  1319. }
  1320. } else {
  1321. for _, tr1 := range table.TRs[1:] {
  1322. for _, td1 := range tr1.TDs[1:] {
  1323. if td1.StartCol == td.StartCol && !td1.MustBH {
  1324. td1.BH = false
  1325. td1.KeyDirect = 0
  1326. td1.KVDirect = 0
  1327. }
  1328. }
  1329. }
  1330. }
  1331. }
  1332. }
  1333. }
  1334. }
  1335. }
  1336. }
  1337. }
  1338. //qutil.Debug("table.Brule", table.Brule, !bkeyfirstcol && !bkeyfirstrow)
  1339. if !table.Brule || (!bkeyfirstcol && !bkeyfirstrow) {
  1340. //断行问题,虽然同列或同行,但中间被跨行截断,表格方向调整
  1341. for _, k := range table.StartAndEndRationKSort.Keys {
  1342. v := table.StartAndEndRation[k]
  1343. //横向判断,要判断最多的方向,否则会出现不定的情况(map遍历问题)
  1344. k1 := k[:1]
  1345. for _, v2 := range v.Poss {
  1346. lentds := len(v.Tdmap[v2])
  1347. if v.Rationmap[v2] > checkval {
  1348. for _, td := range v.Tdmap[v2] {
  1349. if td.KeyDirect == 0 && !MoneyReg.MatchString(td.Val) && !GSReg.MatchString(td.Val) {
  1350. if k1 == "r" {
  1351. ck := fmtkey("c", td.StartCol, td.EndCol)
  1352. rt := table.StartAndEndRation[ck]
  1353. //clen := 0
  1354. var fv float32
  1355. var tdn []*TD
  1356. if rt != nil {
  1357. fv, tdn = rt.GetTDRation(td)
  1358. //clen = len(tdn)
  1359. }
  1360. if lentds > 1 {
  1361. if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
  1362. td.KeyDirect = 1
  1363. td.KVDirect = 2
  1364. //td.BH = true
  1365. }
  1366. }
  1367. } else {
  1368. ck := fmtkey("r", td.StartRow, td.EndRow)
  1369. rt := table.StartAndEndRation[ck]
  1370. var fv float32
  1371. var tdn []*TD
  1372. //clen := 0
  1373. if rt != nil {
  1374. fv, tdn = rt.GetTDRation(td)
  1375. //clen = len(tdn)
  1376. }
  1377. if lentds > 1 {
  1378. if td.Valtype != "NOHEAD" && utf8.RuneCountInString(td.Val) < 15 && ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
  1379. td.KeyDirect = 2
  1380. td.KVDirect = 1
  1381. td.BH = true
  1382. }
  1383. }
  1384. }
  1385. } else {
  1386. break
  1387. }
  1388. }
  1389. } else if v.Rationmap[v2] < 0.5 && len(v.Tdmap[v2]) > 3 {
  1390. for _, td := range v.Tdmap[v2] {
  1391. // u.Debug(td.Val, "-----", td.BH)
  1392. if td.KeyDirect == 0 && td.BH && !td.MustBH {
  1393. if k1 == "r" {
  1394. ck := fmtkey("c", td.StartCol, td.EndCol)
  1395. rt := table.StartAndEndRation[ck]
  1396. clen := 0
  1397. var fv float32
  1398. var tdn []*TD
  1399. if rt != nil {
  1400. fv, tdn = rt.GetTDRation(td)
  1401. clen = len(tdn)
  1402. }
  1403. if lentds >= clen && lentds > 1 {
  1404. if (tdn != nil && v.Rationmap[v2] < fv) || tdn == nil {
  1405. td.BH = false
  1406. }
  1407. }
  1408. } else {
  1409. ck := fmtkey("r", td.StartRow, td.EndRow)
  1410. rt := table.StartAndEndRation[ck]
  1411. var fv float32
  1412. var tdn []*TD
  1413. clen := 0
  1414. if rt != nil {
  1415. fv, tdn = rt.GetTDRation(td)
  1416. clen = len(tdn)
  1417. }
  1418. if lentds >= clen && lentds > 1 {
  1419. if (tdn != nil && v.Rationmap[v2] < fv) || tdn == nil {
  1420. td.BH = false
  1421. }
  1422. }
  1423. }
  1424. } else {
  1425. break
  1426. }
  1427. }
  1428. }
  1429. }
  1430. }
  1431. }
  1432. table.GetKeyRation()
  1433. if len(table.TRs) > 0 && len(table.TRs[0].TDs) > 0 {
  1434. t0 := table.TRs[0].TDs[0]
  1435. key := fmtkey("r", t0.StartRow, t0.EndRow)
  1436. r, t := table.StartAndEndRation[key].GetTDRation(t0)
  1437. if r > 0.9 && len(t) > 1 {
  1438. table.BFirstRow = true
  1439. }
  1440. for k, tr := range table.TRs {
  1441. if len(tr.TDs) == 1 && tr.TDs[0].StartCol == 0 && tr.TDs[0].EndCol+1 == table.ColNum {
  1442. tr.TDs[0].BH = false
  1443. tr.TDs[0].KVDirect = 0
  1444. sv := FindKv(tr.TDs[0].Val, "", 2)
  1445. _, resm := colonkvEntity.entrance(tr.TDs[0].Val, "", nil, 2, isSite, codeSite)
  1446. for k, v := range resm {
  1447. sv.AddKey(k, v)
  1448. }
  1449. if len(sv.Keys) > 0 {
  1450. for _, v1 := range sv.Keys {
  1451. if tr.TDs[0].SortKV.Map[v1] == nil {
  1452. table.SortKV.AddKey(v1, sv.Map[v1])
  1453. }
  1454. }
  1455. } else if table.Tag == "" && k == 0 && len(tr.TDs[0].Val) > 11 {
  1456. table.Tag = tr.TDs[0].Val
  1457. }
  1458. }
  1459. }
  1460. }
  1461. }
  1462. //查找表格的kv,调用FindTdVal
  1463. func (table *Table) FindKV(isSite bool, codeSite string) {
  1464. //判断全是key的表格不再查找
  1465. if table.BHeader { //只要一个是key即为true
  1466. direct := If(table.BFirstRow, 2, 1).(int) //kv,2查找方向,向上查找
  1467. vdirect := If(direct == 2, 1, 2).(int)
  1468. //控制跨行表格
  1469. bcon := false
  1470. //增加表格切块判断,只判断切块分包
  1471. //控制中标人排序方向
  1472. //bodirect := 0
  1473. //控制中标人排序数值
  1474. //sort := 1
  1475. nextdirect, nextvdirect := 0, 0
  1476. //开始抽取
  1477. //若第一排全为头-临时让第二排-新增 左临 查询,zhengkun
  1478. tb_first_allhead := false
  1479. for tr_index, tr := range table.TRs {
  1480. if tr_index==6 {
  1481. //fmt.Println("调试指定tr")
  1482. }
  1483. bcon = trSingleColumn(tr, bcon, table) //tr单列,是否丢弃内容
  1484. if bcon {
  1485. continue
  1486. }
  1487. if tr.TDs[0].StartRow >= 0 {
  1488. numbh := 0
  1489. for _, td := range tr.TDs {
  1490. //log.Println(tr_index,kkk,td.Val)
  1491. if td.BH {
  1492. numbh++
  1493. }
  1494. }
  1495. if numbh != 0 && numbh == len(tr.TDs) { //5e0d53ef0cf41612e0640495
  1496. if tr_index==0 {
  1497. tb_first_allhead = true
  1498. }
  1499. nextdirect, nextvdirect = 2, 1
  1500. continue
  1501. } else if nextdirect > 0 && nextvdirect > 0 {
  1502. direct, vdirect = 2, 1
  1503. } else if numbh > 0 && numbh <= len(tr.TDs)/2 {
  1504. direct, vdirect = 1, 2
  1505. } else {
  1506. direct, vdirect = 2, 1
  1507. }
  1508. }
  1509. for _, td := range tr.TDs {
  1510. if !td.BH && td.KVDirect < 3 {
  1511. if !table.FindTdVal(td, direct, vdirect) { //table.FindTdVal()存储了table.SortKV
  1512. if !table.FindTdVal(td, vdirect, direct) {
  1513. ////都识别不到时,对第一、二中标候选人的处理
  1514. //bo, res := GetBidOrder(td, bodirect, sort)
  1515. //if res {
  1516. // sort++
  1517. // bodirect = bo
  1518. //}
  1519. //if len(td.SortKV.Map) > 0 {
  1520. // for _, tdv := range td.SortKV.Keys {
  1521. // if tdv == "" || td.SortKV.Map[tdv] == "" { //value为空或者null不再添加到table.SortKV
  1522. // continue
  1523. // }
  1524. // table.SortKV.AddKey(tdv, td.SortKV.Map[tdv])
  1525. // }
  1526. //}
  1527. }
  1528. }
  1529. if tb_first_allhead && tr_index==1 { //临时-让第二排-向左比对
  1530. if !table.FindTdVal(td, 1, 2) { //table.FindTdVal()存储了table.SortKV
  1531. if !table.FindTdVal(td, vdirect, direct) {
  1532. }
  1533. }
  1534. tb_first_allhead = false
  1535. }
  1536. //fmt.Println("td:", td.Val, td.BH, td.HeadTd, td.KVDirect)
  1537. }
  1538. }
  1539. nextdirect, nextvdirect = 0, 0
  1540. }
  1541. //qutil.Debug("FindKV", table.SortKV.Map)
  1542. } else if len(table.TRs) > 0 { //没有表头的表格处理,默认纵向吧
  1543. res := initLongitudinalData(table) //拼装纵向数组
  1544. //再拆值,类似http://www.ggzy.hi.gov.cn/cgzbgg/16553.jhtml第二列,有多个值
  1545. nmapkeys := []int{}
  1546. nmap := map[int][]*u.Kv{}
  1547. L:
  1548. for _, r1 := range res {
  1549. for n, r := range r1 {
  1550. if len([]rune(r)) < 60 { // 长度小于60才去分
  1551. //res1, _ := GetKVAll(r, "", nil)
  1552. res1, _ := colonkvEntity.entrance(r, "", nil, 2, isSite, codeSite)
  1553. if res1 != nil {
  1554. nmap[n] = res1
  1555. nmapkeys = append(nmapkeys, n)
  1556. /**
  1557. //截取串
  1558. for _k1, _ := range res1 {
  1559. r = regexp.MustCompile(_k1+".*").ReplaceAllString(r, "")
  1560. }
  1561. r1[n] = r
  1562. res[pos] = r1
  1563. **/
  1564. } else if nmap[n] != nil {
  1565. //放空值
  1566. nmap[n] = append(nmap[n], &u.Kv{})
  1567. }
  1568. } else {
  1569. nmap = nil
  1570. nmapkeys = nil
  1571. break L
  1572. }
  1573. }
  1574. }
  1575. //调整
  1576. if len(nmap) > 0 {
  1577. kmapkeys := []string{}
  1578. kmap := map[string][]string{}
  1579. for _, mk := range nmapkeys { //同是第n列
  1580. for pos, m1 := range nmap[mk] {
  1581. k, v := m1.Key, m1.Value
  1582. kv := kmap[k]
  1583. if kv == nil {
  1584. kv = []string{}
  1585. }
  1586. kv = append(kv, v)
  1587. kmap[k] = kv
  1588. kmapkeys = append(kmapkeys, k)
  1589. for _, k := range kmapkeys {
  1590. arr := kmap[k]
  1591. if len(arr) < pos {
  1592. arr = append(arr, "")
  1593. kmap[k] = arr
  1594. kmapkeys = append(kmapkeys, k)
  1595. }
  1596. }
  1597. }
  1598. }
  1599. if len(kmap) > 0 {
  1600. for _, k := range kmapkeys {
  1601. if len(kmap[k]) == 1 {
  1602. table.SortKV.AddKey(k, kmap[k][0])
  1603. } else if len(kmap[k]) > 1 {
  1604. table.SortKV.AddKey(k, kmap[k])
  1605. }
  1606. }
  1607. }
  1608. }
  1609. //=================
  1610. //解析值放到map中
  1611. for _, arr := range res {
  1612. if len(arr) > 0 {
  1613. v1 := arr[0]
  1614. _, _, _, _, repl := CheckCommon(v1, "con")
  1615. if repl == "ENT" {
  1616. table.SortKV.AddKey("中标单位", arr)
  1617. continue
  1618. } else if repl == "BO" {
  1619. table.SortKV.AddKey("排名", arr)
  1620. continue
  1621. }
  1622. }
  1623. }
  1624. }
  1625. //qutil.Debug("Table-FindKV", table.SortKV.Map)
  1626. }
  1627. //初始化组装纵向数据
  1628. func initLongitudinalData(table *Table) [][]string {
  1629. res := make([][]string, len(table.TRs[0].TDs)) //创建table第一行的列数长度
  1630. for n, _ := range res {
  1631. res[n] = []string{}
  1632. }
  1633. for _, tr := range table.TRs {
  1634. for n, td := range table.TRs[0].TDs { //第一行的所有td
  1635. td1 := table.GetTdByRCNo(tr.TDs[0].StartRow, td.StartCol) //根据行号列号获取td对象
  1636. if td1 != nil {
  1637. res[n] = append(res[n], td1.Val)
  1638. } else {
  1639. res[n] = append(res[n], "")
  1640. }
  1641. }
  1642. }
  1643. return res
  1644. }
  1645. //tr单列,是否丢弃内容
  1646. func trSingleColumn(tr *TR, bcon bool, table *Table) bool {
  1647. if len(tr.TDs) == 1 {
  1648. bcon = false
  1649. td := tr.TDs[0]
  1650. if td.StartCol == 0 && td.EndCol+1 == table.ColNum && len([]rune(td.Val)) > 4 && len([]rune(td.Val)) < 50 {
  1651. res, _, _, _, _ := CheckCommon(td.Val, "abandontable")
  1652. if res { //以下内容丢弃
  1653. bcon = true
  1654. }
  1655. }
  1656. }
  1657. return bcon
  1658. }
  1659. //获取中标人顺序
  1660. //direct 0默认 1横向 2纵向
  1661. func GetBidOrder(td *TD, direct, n int) (d int, res bool) {
  1662. if td.Valtype != "BO" {
  1663. return
  1664. }
  1665. if td.Rowspan > 1 {
  1666. for i := 0; i < td.Rowspan; i++ {
  1667. nextcol := 1
  1668. L1:
  1669. for {
  1670. vtd := td.TR.Table.GetTdByRCNo(td.StartRow+i, td.EndCol+nextcol)
  1671. if vtd == nil {
  1672. break L1
  1673. }
  1674. nextcol += vtd.Colspan
  1675. if filter_zbdw_v2.MatchString(vtd.Val) {
  1676. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1677. if arrbo == nil {
  1678. arrbo = []map[string]interface{}{}
  1679. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1680. }
  1681. a1 := arrbo.([]map[string]interface{})
  1682. a1 = append(a1, map[string]interface{}{
  1683. "entname": vtd.Val,
  1684. "sortstr": td.Val,
  1685. "sort": GetBidSort(td.Val, n),
  1686. })
  1687. res = true
  1688. td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
  1689. }
  1690. }
  1691. }
  1692. } else if td.Colspan > 1 {
  1693. for i := 1; i < td.Colspan; i++ {
  1694. nextcol := 0
  1695. L2:
  1696. for {
  1697. vtd := td.TR.Table.GetTdByRCNo(td.StartRow+i, td.StartCol+nextcol)
  1698. if vtd == nil || vtd.Colspan >= td.Colspan {
  1699. break L2
  1700. }
  1701. nextcol += vtd.Colspan
  1702. if filter_zbdw_v2.MatchString(vtd.Val) {
  1703. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1704. if arrbo == nil {
  1705. arrbo = []map[string]interface{}{}
  1706. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1707. }
  1708. a1 := arrbo.([]map[string]interface{})
  1709. a1 = append(a1, map[string]interface{}{
  1710. "entname": vtd.Val,
  1711. "sortstr": td.Val,
  1712. "sort": GetBidSort(td.Val, n),
  1713. })
  1714. res = true
  1715. td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
  1716. }
  1717. }
  1718. }
  1719. } else {
  1720. rtd := td.TR.Table.GetTdByRCNo(td.StartRow, td.EndCol+1)
  1721. btd := td.TR.Table.GetTdByRCNo(td.EndRow+1, td.StartCol)
  1722. //if ((rtd != nil && !rtd.BH && rtd.Valtype == "BO") || direct == 1) && btd != nil && filter_zbdw_v.MatchString(btd.Val) {
  1723. if ((rtd != nil && !rtd.BH) || direct == 1) && btd != nil && filter_zbdw_v2.MatchString(btd.Val) {
  1724. d = 1
  1725. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1726. if arrbo == nil {
  1727. arrbo = []map[string]interface{}{}
  1728. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1729. }
  1730. a1 := arrbo.([]map[string]interface{})
  1731. a1 = append(a1, map[string]interface{}{
  1732. "entname": btd.Val,
  1733. "sortstr": td.Val,
  1734. "sort": GetBidSort(td.Val, n),
  1735. })
  1736. res = true
  1737. td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
  1738. //} else if ((btd != nil && !btd.BH && btd.Valtype == "BO") || direct == 2) && rtd != nil && filter_zbdw_v.MatchString(rtd.Val) {
  1739. } else if ((btd != nil && !btd.BH) || direct == 2) && rtd != nil && filter_zbdw_v2.MatchString(rtd.Val) {
  1740. d = 2
  1741. arrbo := td.TR.Table.SortKV.Map[NullTxtBid]
  1742. if arrbo == nil {
  1743. arrbo = []map[string]interface{}{}
  1744. td.TR.Table.SortKV.AddKey(NullTxtBid, arrbo)
  1745. }
  1746. a1 := arrbo.([]map[string]interface{})
  1747. a1 = append(a1, map[string]interface{}{
  1748. "entname": rtd.Val,
  1749. "sortstr": td.Val,
  1750. "sort": GetBidSort(td.Val, n),
  1751. })
  1752. res = true
  1753. td.TR.Table.SortKV.AddKey(NullTxtBid, a1)
  1754. }
  1755. }
  1756. return
  1757. }
  1758. func GetBidSort(str string, n int) int {
  1759. val := n
  1760. if strings.Index(str, "首选") > -1 {
  1761. val = 1
  1762. } else {
  1763. val = winnerOrderEntity.toNumber(str, n)
  1764. }
  1765. return val
  1766. }
  1767. var cleardwReg *regexp.Regexp = regexp.MustCompile("[((]{1}\\d*[人元件个公斤户]/[人元件个公斤户][))]")
  1768. var zbhxrReg *regexp.Regexp = regexp.MustCompile("(中标候选人|投标单位名称)")
  1769. //查找每一个单元格的表头,调用FindNear
  1770. func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
  1771. if td.Val == "" || strings.TrimSpace(td.Val) == "" {
  1772. return
  1773. }
  1774. near := table.FindNear(td, direct)
  1775. if near != nil && near.BH && (near.KeyDirect == vdirect || near.KeyDirect == 0) && (near.KVDirect == direct || near.KVDirect == 0) && near.KVDirect < 3 {
  1776. near.KVDirect = direct
  1777. near.KeyDirect = vdirect
  1778. td.KVDirect = direct
  1779. key := repSpace.ReplaceAllString(near.Val, "")
  1780. if key == "名称" && near.StartCol == 0 && near.Rowspan > 0 {
  1781. for _, vn := range table.TRs[near.Rowspan-1].TDs {
  1782. if strings.Contains(vn.Val, "代理") {
  1783. key = "代理机构"
  1784. break
  1785. } else if strings.Contains(vn.Val, "招标") {
  1786. key = "采购单位"
  1787. break
  1788. } else if strings.Contains(vn.Val, "中标") {
  1789. key = "中标单位"
  1790. break
  1791. }
  1792. }
  1793. } else if zbhxrReg.MatchString(key) && findCandidate2.MatchString(td.Val) {
  1794. key = "中标单位"
  1795. } else if key == "单位名称" {
  1796. tmpnewnear := table.FindNear(near, 2)
  1797. if tmpnewnear != nil {
  1798. if tmpnewnear.MustBH || tmpnewnear.BH {
  1799. key = tmpnewnear.Val + near.Val
  1800. }
  1801. } else {
  1802. tmpnewnear = table.FindNear(near, 1)
  1803. if tmpnewnear != nil {
  1804. if tmpnewnear.MustBH || tmpnewnear.BH {
  1805. key = tmpnewnear.Val + near.Val
  1806. }
  1807. }
  1808. }
  1809. }
  1810. if near.Val == "" {
  1811. key = fmtkey("k", near.TR.RowPos, near.ColPos)
  1812. }
  1813. val := table.SortKV.Map[key]
  1814. //qutil.Debug("====================", "key:", key, "val:", val)
  1815. bthiskey := false
  1816. if val != nil {
  1817. curpos := table.SortKV.Index[key]
  1818. thistr := table.kTD[curpos]
  1819. if thistr != near {
  1820. if strings.TrimSpace(near.Val) == "名称" && near.TR != nil && len(near.TR.TDs) > 0 && near.ColPos-1 >= 0 {
  1821. rv := near.TR.TDs[near.ColPos-1].Val
  1822. if near.ColPos > 0 && (strings.Contains(rv, "招标") || strings.Contains(rv, "代理") || strings.Contains(rv, "采购") || strings.Contains(rv, "中标")) {
  1823. near = near.TR.TDs[near.ColPos-1]
  1824. }
  1825. } else {
  1826. bthiskey = true
  1827. }
  1828. } else {
  1829. bthiskey = true
  1830. }
  1831. }
  1832. bfind := false
  1833. barr := false
  1834. varrpos := -1
  1835. if bthiskey {
  1836. //处理是数组值,且有合并行或合并列的情况 kvscope,对数组值的处理
  1837. pos := table.SortKV.Index[key]
  1838. mval := table.kvscope[pos]
  1839. bvalfind := false
  1840. if direct == 1 { //kv是横向
  1841. L1:
  1842. for k3, v3 := range mval {
  1843. for _, v4 := range v3 {
  1844. if v4.EndRow+1 == td.StartRow && v4.EndCol == td.EndCol {
  1845. varrpos = k3
  1846. bvalfind = true
  1847. break L1
  1848. }
  1849. }
  1850. }
  1851. } else { //kv是纵向
  1852. L2:
  1853. for k3, v3 := range mval {
  1854. for _, v4 := range v3 {
  1855. if v4.EndCol+1 == td.StartCol && v4.EndRow == td.EndRow {
  1856. varrpos = k3
  1857. bvalfind = true
  1858. break L2
  1859. }
  1860. }
  1861. }
  1862. }
  1863. if vals, ok := val.([]string); ok {
  1864. if near.Val == "" {
  1865. bn := false
  1866. for _, vs := range vals {
  1867. if vs != "" && NullTdReg.MatchString(vs) {
  1868. bn = true
  1869. } else {
  1870. bn = false
  1871. break
  1872. }
  1873. }
  1874. if bn {
  1875. near.Val = NullTxtBid
  1876. key = NullTxtBid
  1877. bfind = true
  1878. }
  1879. }
  1880. if bvalfind && varrpos > -1 && len(vals) > varrpos {
  1881. tmapval := strings.TrimSpace(cleardwReg.ReplaceAllString(td.Val, ""))
  1882. if tmapval == "" {
  1883. vals = append(vals, td.Val) // 累加
  1884. } else {
  1885. vals = append(vals, tmapval) // 累加
  1886. }
  1887. val = vals
  1888. //vals[varrpos] = td.Val // += "__" + td.Val
  1889. } else {
  1890. //添加时候去除空值和nil
  1891. newVals := []string{}
  1892. for _, isval := range vals {
  1893. if isval == "" {
  1894. continue
  1895. }
  1896. newVals = append(newVals, isval)
  1897. }
  1898. //vals = append(vals, td.Val)
  1899. if td.Val != "" {
  1900. newVals = append(newVals, td.Val)
  1901. }
  1902. val = newVals
  1903. varrpos = len(vals) - 1
  1904. }
  1905. } else if vals, ok := val.(string); ok && vals != "" && td.Val != "" {
  1906. tmapval := strings.TrimSpace(cleardwReg.ReplaceAllString(vals, ""))//已存在的kv
  1907. tmapvaltd := strings.TrimSpace(cleardwReg.ReplaceAllString(td.Val, ""))
  1908. if bvalfind {
  1909. //if tmapvaltd == "" {
  1910. // val = td.Val //vals + "__" + td.Val
  1911. //} else {
  1912. // val = tmapvaltd
  1913. //}
  1914. if key=="中标单位" {
  1915. //不能覆盖---
  1916. }else {
  1917. if tmapvaltd == "" {
  1918. val = td.Val //vals + "__" + td.Val
  1919. } else {
  1920. val = tmapvaltd
  1921. }
  1922. }
  1923. } else{
  1924. if key=="中标单位" {
  1925. //新增不能数组
  1926. }else {
  1927. tval := []string{}
  1928. if tmapval == "" {
  1929. tval = append(tval, vals)
  1930. } else {
  1931. tval = append(tval, tmapval)
  1932. }
  1933. if tmapvaltd == "" {
  1934. tval = append(tval, td.Val)
  1935. } else {
  1936. tval = append(tval, tmapvaltd)
  1937. }
  1938. val = tval
  1939. varrpos = 1
  1940. }
  1941. }
  1942. }
  1943. barr = true
  1944. } else {
  1945. if td.Val != "" {
  1946. tmapval := strings.TrimSpace(cleardwReg.ReplaceAllString(td.Val, ""))
  1947. if tmapval == "" {
  1948. val = td.Val
  1949. } else {
  1950. val = tmapval
  1951. }
  1952. } else if len(near.SortKV.Map) == 1 && near.SortKV.Map[near.Val] != "" {
  1953. val = near.SortKV.Map[near.Val]
  1954. }
  1955. }
  1956. td.HeadTd = near
  1957. if bfind {
  1958. tkey := fmtkey("k", near.TR.RowPos, near.ColPos)
  1959. table.SortKV.ReplaceKey(key, val, tkey)
  1960. } else {
  1961. if key == "单位名称" && len(near.TR.TDs) > 1 {
  1962. if near.TR.TDs[0].Val != "序号" {
  1963. key = near.TR.TDs[0].Val
  1964. }
  1965. }
  1966. table.SortKV.AddKey(key, val)
  1967. pos := table.SortKV.Index[key]
  1968. if barr {
  1969. mval := table.kvscope[pos]
  1970. if mval != nil {
  1971. tds := mval[varrpos]
  1972. if tds != nil {
  1973. tds = append(tds, td)
  1974. } else {
  1975. tds = []*TD{td}
  1976. }
  1977. if varrpos > -1 {
  1978. mval[varrpos] = tds
  1979. table.kvscope[pos] = mval
  1980. }
  1981. }
  1982. } else {
  1983. table.kvscope[pos] = map[int][]*TD{
  1984. 0: []*TD{td},
  1985. }
  1986. table.kTD[pos] = near
  1987. }
  1988. }
  1989. b = true
  1990. }
  1991. return
  1992. }
  1993. //查找单元格的表头时,横向或纵向
  1994. func (table *Table) FindNear(td *TD, direct int) *TD {
  1995. if direct == 1 && td.StartCol > 0 { //左临
  1996. tr := table.TRs[:td.TR.RowPos+1]
  1997. for i := len(tr) - 1; i > -1; i-- {
  1998. tds := tr[i].TDs
  1999. for _, td1 := range tds {
  2000. if td1.StartRow <= td.StartRow && td1.EndRow >= td.EndRow && td1.EndCol+1 == td.StartCol {
  2001. //找到左临节点
  2002. if td1.BH {
  2003. return td1
  2004. } else {
  2005. if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct {
  2006. return td1.HeadTd
  2007. }
  2008. }
  2009. }
  2010. }
  2011. }
  2012. } else if direct == 2 && td.StartRow > 0 { //上临
  2013. tr := table.TRs[:td.TR.RowPos]
  2014. for i := len(tr) - 1; i > -1; i-- {
  2015. tds := tr[i].TDs
  2016. for it, td1 := range tds {
  2017. if td1.StartCol <= td.StartCol && td1.EndCol >= td.EndCol && td1.EndRow+1 == td.StartRow {
  2018. //找到左临节点
  2019. if td1.BH {
  2020. return td1
  2021. } else if len(tr[i].TDs) == len(td.TR.TDs) && td1.HeadTd != nil && td1.HeadTd.KVDirect == direct {
  2022. return td1.HeadTd
  2023. } else if it > 0 && td1.Val == "" && td1.TR.TopTR == nil && len(td.TR.TDs)-(td.StartCol-1) > 0 && strings.Contains(td.TR.TDs[td.StartCol-1].Val, "中标候选人") {
  2024. return tds[it-1]
  2025. } else if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct && td.Colspan == td1.Colspan && td.Rowspan == td.Rowspan {
  2026. return td1.HeadTd
  2027. }
  2028. } else if td1.StartCol <= td.StartCol && td1.EndCol >= td.EndCol && td1.EndRow+1 == td.StartRow {
  2029. }
  2030. }
  2031. }
  2032. }
  2033. return nil
  2034. }
  2035. //根据行号列号获取td对象
  2036. func (tn *Table) GetTdByRCNo(row, col int) *TD {
  2037. for _, tr := range tn.TRs {
  2038. for _, td := range tr.TDs {
  2039. if td.StartCol <= col && td.EndCol >= col && td.StartRow <= row && td.EndRow >= row {
  2040. return td
  2041. }
  2042. }
  2043. }
  2044. return nil
  2045. }
  2046. //判断表格是否是分包
  2047. func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool, index []string) {
  2048. pac := 0 //包的数量
  2049. val := 0 //分值
  2050. index = []string{} //存储分包,使用tbale.SortKV的key和value使用正则等处理对值进行判断
  2051. index_pos := []int{} //下标
  2052. //是数组且能找到标段之类的提示
  2053. //arr_count := 0 //计数table.SortKV的value是数组的数量,后面没用
  2054. key_index := -1
  2055. hasPkgTd := map[string]bool{}
  2056. //初始化CheckMultiPackageByTable方法需要的数据
  2057. key_index, index, index_pos, val, pac, hasPkgTd = initCheckMultiPackageByTable(tn, key_index, index, index_pos, val, pac, hasPkgTd)
  2058. //key是分包的情况
  2059. //记录key对应的值
  2060. commonKeyVals := map[string][]string{}
  2061. //记录key出现的次数
  2062. keyExistsCount := map[string]int{}
  2063. if pac > 1 {
  2064. val = 10
  2065. } else {
  2066. //查找标签
  2067. if TableMultiPackageReg_4.MatchString(tn.Tag) {
  2068. val += 4
  2069. } else if TableMultiPackageReg_2.MatchString(tn.Tag) {
  2070. val += 4
  2071. }
  2072. //根据table.SortKV的key判断是否分包,如果没有再根据value判断
  2073. val, index, index_pos = foundPacBySortKV(tn, val, index, index_pos, &keyExistsCount, &commonKeyVals, key_index, hasPkgTd)
  2074. }
  2075. // u.Debug(index)
  2076. //过滤重复及标准化!
  2077. standIndex := []string{}
  2078. standIndex_pos := []int{}
  2079. oldIndex := []string{} //存放包的原始值
  2080. brepeat := map[string]bool{}
  2081. for k, v := range index {
  2082. v = u.PackageNumberConvert(v)
  2083. if !brepeat[v] {
  2084. brepeat[v] = true
  2085. standIndex = append(standIndex, v)
  2086. standIndex_pos = append(standIndex_pos, index_pos[k])
  2087. oldIndex = append(oldIndex, index[k])
  2088. }
  2089. }
  2090. index = standIndex
  2091. //有一个以上的包,并且相同的key出现一次以上,认为这个key是属于包里面的
  2092. if len(commonKeyVals) > 0 {
  2093. for k, v := range commonKeyVals {
  2094. if len(index) > 1 && keyExistsCount[k] < 2 {
  2095. continue
  2096. }
  2097. tn.SortKV.AddKey(k, v)
  2098. }
  2099. }
  2100. //
  2101. isGoonNext := false
  2102. if val > 4 && len(brepeat) > 0 {
  2103. b = true
  2104. //多包解析
  2105. if b {
  2106. tn.BPackage = true
  2107. //pnum := len(index)
  2108. //根据数组index分包长度添加table.BlockPackage子包数组
  2109. for nk, v := range index {
  2110. if tn.BlockPackage.Map[v] == nil {
  2111. kv := u.NewJobKv()
  2112. for tnk, tnv := range tn.StandKV {
  2113. if nk >= len(tnv) {
  2114. continue
  2115. } else if len(index) == len(tnv) {
  2116. //特殊处理-
  2117. if tnk=="预算"&& codeSite=="ha_zmdszfcgw_cgxx" && len(tnv)>1{
  2118. isEqErr,budget_v := false,""
  2119. for bk,bv:=range tnv {
  2120. if bk==0 {
  2121. budget_v = bv.Value
  2122. }else {
  2123. if budget_v != bv.Value {
  2124. isEqErr = true
  2125. break
  2126. }
  2127. }
  2128. }
  2129. if isEqErr {
  2130. kv.KvTags[tnk] = append(kv.KvTags[tnk], tnv[nk])
  2131. }
  2132. }else {
  2133. kv.KvTags[tnk] = append(kv.KvTags[tnk], tnv[nk])
  2134. }
  2135. }
  2136. }
  2137. //kv.KvTags = tn.StandKV
  2138. bp := &u.BlockPackage{}
  2139. bp.Index = v //序号 (转换后编号,只有数字或字母)
  2140. bp.Origin = oldIndex[nk] //包的原始值
  2141. bp.TableKV = kv //table kv (分出的对应的KV值)
  2142. bp.Name = v
  2143. if bp.TableKV != nil && bp.TableKV.KvTags != nil && len(bp.TableKV.KvTags) > 0 {
  2144. for kc, cv := range bp.TableKV.KvTags {
  2145. if kc == "预算" && bp.Budget <= 0 {
  2146. moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
  2147. if len(moneys) > 0 {
  2148. if vf, ok := moneys[0].(float64); ok {
  2149. bp.Budget = vf
  2150. bp.IsTrueBudget = moneys[len(moneys)-1].(bool)
  2151. } else if vi, ok := moneys[0].(int); ok {
  2152. bp.Budget = float64(vi)
  2153. bp.IsTrueBudget = moneys[len(moneys)-1].(bool)
  2154. }
  2155. }
  2156. } else if kc == "中标金额" && bp.Bidamount <= 0 {
  2157. moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
  2158. if len(moneys) > 0 {
  2159. if vf, ok := moneys[0].(float64); ok {
  2160. bp.Bidamount = vf
  2161. bp.IsTrueBidamount = moneys[len(moneys)-1].(bool)
  2162. } else if vi, ok := moneys[0].(int); ok {
  2163. bp.Bidamount = float64(vi)
  2164. bp.IsTrueBidamount = moneys[len(moneys)-1].(bool)
  2165. }
  2166. }
  2167. } else if kc == "中标单位" && bp.Winner == "" {
  2168. bp.Winner = cv[0].Value
  2169. }
  2170. //拼接内容
  2171. if !excludeKey.MatchString(kc) {
  2172. bp.Text += fmt.Sprintf("%v:%v\n", kc, cv[0].Value)
  2173. }
  2174. }
  2175. }
  2176. tn.BlockPackage.AddKey(v, bp) //table子包数组
  2177. }
  2178. }
  2179. isGoonNext = tn.manyPackageProcessByIndex(index, standIndex_pos, isSite, codeSite) //多包处理,处理不同情况下的分包
  2180. }
  2181. } else {
  2182. isGoonNext = true
  2183. }
  2184. if isGoonNext { //没有处理成数组的情况下,继续调用正文查找分包的方法
  2185. tn.isGoonNext(isSite, codeSite)
  2186. }
  2187. //查找分包中的中标人排序
  2188. if tn.BlockPackage != nil && tn.BlockPackage.Keys != nil && len(tn.BlockPackage.Keys) > 0 {
  2189. for _, v := range tn.BlockPackage.Keys {
  2190. vv, ok := tn.BlockPackage.Map[v].(*u.BlockPackage)
  2191. if ok && (vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0) {
  2192. vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2, isSite, codeSite)
  2193. }
  2194. }
  2195. }
  2196. return
  2197. }
  2198. //多包处理,处理不同情况下的分包
  2199. func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int, isSite bool, codeSite string) (isGoonNext bool) {
  2200. if len(index) == 1 { //是一个的情况
  2201. if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 { //table带排序的KV值小于10并且小于10列和小于4行
  2202. beq := true
  2203. for _, v2 := range tn.SortKV.Keys {
  2204. if _, ok := tn.SortKV.Map[v2].(string); !ok {
  2205. beq = false
  2206. break
  2207. }
  2208. }
  2209. if beq { //统一处理为数组
  2210. td := tn.GetTdByRCNo(tn.RowNum-1, 0)
  2211. if !td.BH && FindVal2_1.MatchString(td.Val) {
  2212. for _, v2 := range tn.SortKV.Keys {
  2213. tn.SortKV.AddKey(v2, []string{tn.SortKV.Map[v2].(string)})
  2214. }
  2215. } else {
  2216. //没有处理成数组的情况下,继续调用正文查找分包的方法
  2217. isGoonNext = true
  2218. }
  2219. }
  2220. }
  2221. }
  2222. for _, k1 := range tn.SortKV.Keys {
  2223. v1 := tn.SortKV.Map[k1]
  2224. var v1_arr []string
  2225. if vtmpv1, ok := v1.(string); ok {
  2226. v1_arr = PreCon4.FindAllString(qutil.ObjToString(vtmpv1), -1)
  2227. if len(v1_arr) > 0 {
  2228. if dw := Precon4dw.FindString(vtmpv1); dw != "" {
  2229. for i, v := range v1_arr {
  2230. v1_arr[i] = v + dw
  2231. }
  2232. }
  2233. }
  2234. } else if vtmpv1s, ok := v1.([]string); ok {
  2235. v1_arr = vtmpv1s
  2236. }
  2237. if len(v1_arr) > 0 && len(v1_arr) <= len(index) { //table.SortKV.Map.value数组小于等于分包index
  2238. for k, v := range v1_arr {
  2239. tn.assemblePackage(k1, v, index[k], isSite, codeSite) //组装解析到的分包
  2240. }
  2241. }
  2242. }
  2243. return isGoonNext
  2244. }
  2245. //没有处理成数组的情况下,继续调用正文查找分包的方法
  2246. func (tn *Table) isGoonNext(isSite bool, codeSite string) {
  2247. blockPackage := map[string]*u.BlockPackage{}
  2248. for _, k := range tn.SortKV.Keys {
  2249. if excludeKey.MatchString(k) || strings.Contains(k, "批复") || excludeKey3.MatchString(k) {
  2250. continue
  2251. }
  2252. str := "" //拼装为冒号kv
  2253. v := tn.SortKV.Map[k]
  2254. nk := regReplAllSpace.ReplaceAllString(k, "")
  2255. if vs, ok := v.([]string); ok {
  2256. str += fmt.Sprintf("%s:%s\n", nk, strings.Join(vs, " "))
  2257. } else {
  2258. str += fmt.Sprintf("%s:%s\n", nk, v)
  2259. }
  2260. if excludeKey2.MatchString(str) {
  2261. continue
  2262. }
  2263. b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false, isSite, codeSite) //分块之后分包
  2264. if b && len(blockPackage) > 0 {
  2265. tn.BPackage = true
  2266. for mk, mv := range blockPackage {
  2267. if tn.BlockPackage.Map[mk] == nil {
  2268. tn.BlockPackage.AddKey(mk, mv)
  2269. } else {
  2270. bp := tn.BlockPackage.Map[mk].(*u.BlockPackage)
  2271. if bp.TableKV == nil {
  2272. bp.TableKV = u.NewJobKv()
  2273. }
  2274. if bp.SpaceKV == nil {
  2275. bp.SpaceKV = u.NewJobKv()
  2276. }
  2277. for k2, v2 := range mv.ColonKV.KvTags {
  2278. for _, v2v := range v2 {
  2279. isExists := false
  2280. for _, v2vv := range bp.TableKV.KvTags[k2] {
  2281. if v2v.Value == v2vv.Value {
  2282. isExists = true
  2283. break
  2284. }
  2285. }
  2286. if !isExists {
  2287. bp.TableKV.KvTags[k2] = append(bp.TableKV.KvTags[k2], v2v)
  2288. bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
  2289. }
  2290. }
  2291. }
  2292. for k2, v2 := range mv.SpaceKV.KvTags {
  2293. for _, v2v := range v2 {
  2294. isExists := false
  2295. for _, v2vv := range bp.SpaceKV.KvTags[k2] {
  2296. if v2v.Value == v2vv.Value {
  2297. isExists = true
  2298. break
  2299. }
  2300. }
  2301. if !isExists {
  2302. bp.SpaceKV.KvTags[k2] = append(bp.SpaceKV.KvTags[k2], v2v)
  2303. bp.Text += fmt.Sprintf("%v:%v\n", k2, v2)
  2304. }
  2305. }
  2306. }
  2307. }
  2308. }
  2309. tn.BPackage = true
  2310. tn.SortKV.RemoveKey(k)
  2311. }
  2312. }
  2313. }
  2314. //根据table.SortKV的key判断是否分包,如果没有再根据value判断
  2315. func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, keyExistsCount *map[string]int, commonKeyVals *map[string][]string, key_index int, hasPkgTd map[string]bool) (rval int, rindex []string, rindex_pos []int) {
  2316. keyIsPkg := false
  2317. for in, k := range tn.SortKV.Keys {
  2318. if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) || excludeKey3.MatchString(k) || regFJWarap.MatchString(k) || regAZWarap.MatchString(k) { //判断分包前排除
  2319. continue
  2320. }
  2321. v := tn.SortKV.Map[k]
  2322. //key是分包的情况
  2323. if ismatch := FindVal_1.MatchString(k); keyIsPkg || ismatch {
  2324. if ismatch {
  2325. keyIsPkg = true
  2326. val += 4
  2327. pkgFlag := FindVal_1.FindString(k) //对值进行分包判断
  2328. k = strings.Replace(k, pkgFlag, "", -1)
  2329. index = append(index, pkgFlag)
  2330. index_pos = append(index_pos, len(index))
  2331. val += 1
  2332. //pac++
  2333. } else {
  2334. k = strings.TrimRight(k, "_")
  2335. }
  2336. (*keyExistsCount)[k] = (*keyExistsCount)[k] + 1
  2337. (*commonKeyVals)[k] = append((*commonKeyVals)[k], qutil.ObjToString(v))
  2338. } else if k1 := FilterKey_2.ReplaceAllString(k, ""); FindKey_2.MatchString(k1) {
  2339. val += 4
  2340. //value数组分包
  2341. if vs, bvs1 := v.([]string); bvs1 {
  2342. L:
  2343. for in2, v1 := range vs {
  2344. if len([]rune(v1)) < 20 && !moneyNum.MatchString(v1) && FindVal2_1.MatchString(v1) {
  2345. for _, serial := range tn.TableResult.RuleBlock.TitleRegs {
  2346. if serial.MatchString(v1) {
  2347. break L
  2348. }
  2349. }
  2350. if key_index == -1 {
  2351. key_index = in
  2352. } else if key_index != in {
  2353. break
  2354. }
  2355. index = append(index, v1)
  2356. index_pos = append(index_pos, in2)
  2357. val += 1
  2358. //pac++
  2359. }
  2360. }
  2361. } else if v1, ok := v.(string); ok && !hasPkgTd[k] {
  2362. //value字符串分包
  2363. v1 = replPkgConfusion(v1) //替换分包中混淆的词
  2364. for _, v2 := range strings.Split(v1, "/") {
  2365. if len([]rune(v2)) < 20 && !moneyNum.MatchString(v2) && FindVal2_1.MatchString(v2) {
  2366. key_index = in
  2367. index = append(index, v1)
  2368. index_pos = append(index_pos, 0)
  2369. val += 1
  2370. //pac++
  2371. underline := ""
  2372. for {
  2373. underline += "_"
  2374. if tn.SortKV.Map[k+underline] == nil {
  2375. break
  2376. } else if v3, v2_ok := tn.SortKV.Map[k+underline].(string); v2_ok && v3 != "" {
  2377. index = append(index, v3)
  2378. index_pos = append(index_pos, 1)
  2379. } else if v3, v2_ok := tn.SortKV.Map[k+underline].([]string); v2_ok {
  2380. for v2_k, v2_v := range v3 {
  2381. index = append(index, v2_v)
  2382. index_pos = append(index_pos, v2_k+1)
  2383. }
  2384. }
  2385. }
  2386. break
  2387. }
  2388. }
  2389. }
  2390. if k1=="标段" && len(index)==0 {
  2391. continue
  2392. }else {
  2393. break
  2394. }
  2395. }
  2396. }
  2397. return val, index, index_pos
  2398. }
  2399. //初始化CheckMultiPackageByTable方法需要的数据
  2400. func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, index_pos []int, val int, pac int, hasPkgTd map[string]bool) (rkey_index int, rindex []string, rindex_pos []int, rval int, rpac int, rhasPkgTd map[string]bool) {
  2401. for in, k := range tn.SortKV.Keys {
  2402. //涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)就跳过
  2403. if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) || excludeKey3.MatchString(k) || strings.Contains(k, "批复") {
  2404. continue
  2405. }
  2406. v := tn.SortKV.Map[k]
  2407. if vs, bvs := v.([]string); bvs {
  2408. //arr_count++
  2409. haspkgs := []string{}
  2410. for in2, v1 := range vs {
  2411. v1 = replPkgConfusion(v1) //替换分包中混淆的词
  2412. if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) {
  2413. if key_index == -1 {
  2414. key_index = in
  2415. } else if key_index != in {
  2416. break
  2417. }
  2418. index = append(index, FindVal_1.FindString(v1))
  2419. index_pos = append(index_pos, in2)
  2420. val += 1
  2421. pac++
  2422. } else if FindKey_3.MatchString(k) {
  2423. //5db2a101a5cb26b9b73054ac
  2424. index = append(index, v1)
  2425. index_pos = append(index_pos, in2)
  2426. val += 1
  2427. pac++
  2428. } else {
  2429. if ok, v1new := isHasOnePkgAndNoKv(v1); ok { //td的值里面有一个包,并且没有冒号kv
  2430. haspkgs = append(haspkgs, v1new)
  2431. }
  2432. }
  2433. }
  2434. /*处理这种情况:
  2435. <tr><td>包一:xxxxxxxxx</td></tr>
  2436. <tr><td>包二:xxxxxxxxx</td></tr>
  2437. */
  2438. if len(index) == 0 && len(haspkgs) > 0 && len(haspkgs) == len(vs) {
  2439. for in2, v1 := range haspkgs {
  2440. if key_index == -1 {
  2441. key_index = in
  2442. } else if key_index != in {
  2443. break
  2444. }
  2445. index = append(index, v1)
  2446. index_pos = append(index_pos, in2)
  2447. val += 1
  2448. pac++
  2449. }
  2450. }
  2451. } else if v1, ok := v.(string); ok {
  2452. v1 = replPkgConfusion(v1) //替换分包中混淆的词
  2453. if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) {
  2454. key_index = in
  2455. index = append(index, FindVal_1.FindString(v1))
  2456. index_pos = append(index_pos, 0)
  2457. val += 1
  2458. pac++
  2459. } else if getTd := tn.GetTdByRCNo(0, tn.SortKV.Index[k]); getTd != nil && getTd.KVDirect == 2 { //纵向
  2460. /*处理这种情况:
  2461. <tr><td>包一:xxxxxxxxx</td></tr>
  2462. */
  2463. if ok, v1new := isHasOnePkgAndNoKv(v1); ok {
  2464. hasPkgTd[k] = true
  2465. key_index = in
  2466. index = append(index, v1new)
  2467. index_pos = append(index_pos, 0)
  2468. val += 1
  2469. pac++
  2470. }
  2471. }
  2472. }
  2473. }
  2474. return key_index, index, index_pos, val, pac, hasPkgTd
  2475. }
  2476. //组装解析到的分包,//key如果匹配到抽取关键词就添加到table.SortKV
  2477. func (tn *Table) assemblePackage(k1, v1, key string, isSite bool, codeSite string) {
  2478. bp := tn.BlockPackage.Map[key].(*u.BlockPackage)
  2479. if bp.TableKV == nil {
  2480. bp.TableKV = u.NewJobKv()
  2481. }
  2482. if v1 != "" {
  2483. kvTags, _ := CommonDataAnaly(k1, "中标情况", "", v1, isSite, codeSite) //匹配抽取关键词
  2484. for k3, v3 := range kvTags {
  2485. if bp.TableKV.KvTags[k3] == nil {
  2486. bp.TableKV.KvTags[k3] = append(bp.TableKV.KvTags[k3], v3...)
  2487. } else if k3 == "预算" && bp.Budget <= 0 {
  2488. moneys := clear.ObjToMoney([]interface{}{v3[0].Value, ""})
  2489. if len(moneys) > 0 {
  2490. if vf, ok := moneys[0].(float64); ok {
  2491. bp.Budget = vf
  2492. bp.IsTrueBudget = moneys[len(moneys)-1].(bool)
  2493. } else if vi, ok := moneys[0].(int); ok {
  2494. bp.Budget = float64(vi)
  2495. bp.IsTrueBudget = moneys[len(moneys)-1].(bool)
  2496. }
  2497. }
  2498. } else if k3 == "中标金额" && bp.Bidamount <= 0 {
  2499. moneys := clear.ObjToMoney([]interface{}{v3[0].Value, ""})
  2500. if len(moneys) > 0 {
  2501. if vf, ok := moneys[0].(float64); ok {
  2502. bp.Bidamount = vf
  2503. bp.IsTrueBidamount = moneys[len(moneys)-1].(bool)
  2504. } else if vi, ok := moneys[0].(int); ok {
  2505. bp.Bidamount = float64(vi)
  2506. bp.IsTrueBidamount = moneys[len(moneys)-1].(bool)
  2507. }
  2508. }
  2509. }
  2510. }
  2511. }
  2512. k1 = regReplAllSpace.ReplaceAllString(k1, "")
  2513. //拼接内容
  2514. if !excludeKey.MatchString(k1) {
  2515. bp.Text += fmt.Sprintf("%v:%v\n", k1, v1)
  2516. }
  2517. tn.BlockPackage.AddKey(key, bp)
  2518. }
  2519. /**
  2520. 之前爬虫过来的数据对table表格的抓取异常问题
  2521. 查找并修正不规则表格的字符串,只对全文做处理,块内的表格不需要修正
  2522. **/
  2523. var thbf = regexp.MustCompile("(?i)</?t(head|body|foot)>")
  2524. //需要保留thead
  2525. var saveThead = regexp.MustCompile("(?is)<thead>(.+?)</thead>")
  2526. var clearpkg = regexp.MustCompile("(标示|标识)")
  2527. func RepairCon(con string) string {
  2528. con = clearpkg.ReplaceAllString(con, "")
  2529. res := saveThead.FindAllStringSubmatch(con, 1)
  2530. th := ""
  2531. if len(res) == 1 && len(res[0]) == 2 {
  2532. th = u.TrimLeftSpace(res[0][1], "")
  2533. }
  2534. con = thbf.ReplaceAllString(con, "")
  2535. con = u.TrimLeftSpace(con, "")
  2536. itbody := strings.Index(con, "<tr")
  2537. iLen := 3
  2538. if itbody == 0 {
  2539. con = findpos(con, iLen, itbody)
  2540. } else {
  2541. itable := strings.Index(con, "<table")
  2542. if itable == -1 || itable > itbody {
  2543. con = findpos(con, iLen, itbody)
  2544. }
  2545. }
  2546. //保留第一个thead
  2547. if th != "" {
  2548. con = strings.Replace(con, th, "<thead>"+th+"</thead>", 1)
  2549. }
  2550. //u.Debug(con)
  2551. return con
  2552. }
  2553. //修复表格
  2554. func findpos(con string, iLen, start int) (newcon string) {
  2555. defer qutil.Catch()
  2556. n := len(con)
  2557. layer := 0
  2558. pos := 0
  2559. if start >= 0 {
  2560. if iLen == 6 {
  2561. for i := iLen + start; i < len(con); i++ {
  2562. if con[i] == '<' && i+6 < n {
  2563. str := con[i : i+6]
  2564. if str == "</tbod" {
  2565. if layer == 0 {
  2566. pos = i
  2567. break
  2568. } else {
  2569. layer--
  2570. }
  2571. i += 6
  2572. } else if str == "<tbody" {
  2573. layer++
  2574. i += 6
  2575. }
  2576. }
  2577. }
  2578. if pos+7 <= n && start+6 < pos {
  2579. newcon = con[:start] + "<table" + con[start+6:pos] + "</table" + con[pos+7:]
  2580. }
  2581. } else {
  2582. layer++
  2583. nq := 0
  2584. lasttr := 0
  2585. for i := iLen + start; i < len(con); i++ {
  2586. if con[i] == '<' && i+4 < n {
  2587. if nq == 0 {
  2588. str := con[i : i+4]
  2589. if str == "</tr" {
  2590. if layer <= 0 {
  2591. pos = i //正常情况不会存在此类情况
  2592. break
  2593. } else {
  2594. layer--
  2595. lasttr = i
  2596. }
  2597. i += 4
  2598. } else if str[:3] == "<tr" {
  2599. layer++
  2600. i += 4
  2601. } else if str == "<tab" && i+6 < n && con[i+4:i+6] == "le" {
  2602. if layer == 0 {
  2603. break
  2604. } else {
  2605. //内嵌的表格
  2606. nq++
  2607. }
  2608. }
  2609. } else {
  2610. if i+6 < n {
  2611. str := con[i : i+6]
  2612. if str == "</tabl" {
  2613. nq--
  2614. } else if str == "<table" {
  2615. nq++
  2616. }
  2617. } else {
  2618. break
  2619. }
  2620. }
  2621. }
  2622. }
  2623. if pos == 0 && lasttr > 3 {
  2624. pos = lasttr + 5
  2625. } else if pos > 0 {
  2626. pos += 5
  2627. }
  2628. if pos <= n && pos < len(con) && start < pos {
  2629. newcon = con[:start] + "<table>" + con[start:pos] + "</table>" + con[pos:]
  2630. }
  2631. }
  2632. }
  2633. if newcon == "" {
  2634. newcon = con
  2635. }
  2636. return
  2637. }
  2638. //td的值里面有一个包,并且没有冒号kv
  2639. func isHasOnePkgAndNoKv(v1 string) (bool, string) {
  2640. v1s := FindVal_1.FindAllString(v1, -1)
  2641. colonCount := len(regDivision.FindAllString(v1, -1))
  2642. if len(v1s) == 1 && colonCount < 2 {
  2643. ispkgcolon := regexp.MustCompile(v1s[0] + "[::]").MatchString(v1)
  2644. if (ispkgcolon && colonCount == 1) || (!ispkgcolon && colonCount == 0) {
  2645. return true, v1s[0]
  2646. }
  2647. }
  2648. return false, v1
  2649. }
  2650. //替换分包中混淆的词
  2651. func replPkgConfusion(v1 string) string {
  2652. v1 = PreReg.ReplaceAllString(v1, "")
  2653. v1 = PreReg1.ReplaceAllString(v1, "")
  2654. v1 = PreCon.ReplaceAllString(v1, "")
  2655. v1 = PreCon2.ReplaceAllString(v1, "")
  2656. return v1
  2657. }
  2658. //对td中的值,进行再处理
  2659. func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat, isSite bool, codeSite string) {
  2660. //处理表格中的联系人信息
  2661. indexMap := contactFormat.IndexMap
  2662. matchMap := contactFormat.MatchMap
  2663. //qutil.Debug("==============================td=======================", indexMap, matchMap)
  2664. weightMap := map[string]map[string]interface{}{} //权重
  2665. mustMatchFirst := len(indexMap) > 0 //第一个必须匹配上
  2666. reCreate := false
  2667. matchCount := 0
  2668. contactTypeTagMap := map[string]map[string][]interface{}{}
  2669. //qutil.Debug("============================", mustMatchFirst, indexMap, matchMap)
  2670. notMatchTrCount := 0
  2671. allAscFind := true //开启正序查询
  2672. //涉及变量allAscFind,indexMap
  2673. if len(indexMap) == 0 {
  2674. isCanAddToIndexMap := false
  2675. matchPrevFlag := false
  2676. prevCanAddToIndexMap := false
  2677. LS:
  2678. for _, tr := range tn.TRs {
  2679. for td_index, td := range tr.TDs {
  2680. thisTdKvs := tn.tdkv(td) //获取td冒号kv
  2681. //qutil.Debug(td.Val, len(thisTdKvs))
  2682. // for _, v := range thisTdKvs {
  2683. // qutil.Debug(v.Key, v.Value)
  2684. // }
  2685. if len(thisTdKvs) != 1 {
  2686. preTdIndex := td_index - 1
  2687. if preTdIndex >= 0 {
  2688. preTdVal := tr.TDs[td_index-1].Val
  2689. tdType := "" //前一个td中是否是采购、代理、中标
  2690. for k, v := range ContactType {
  2691. if v.MatchString(preTdVal) {
  2692. tdType = k
  2693. break
  2694. }
  2695. }
  2696. if tdType != "" {
  2697. for _, this := range thisTdKvs {
  2698. if str := ContactInfoVagueReg.FindString(this.Key); str != "" {
  2699. td.SortKV.AddKey(tdType+str, this.Value)
  2700. }
  2701. }
  2702. }
  2703. }
  2704. continue
  2705. }
  2706. //1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
  2707. goOnFunc, isContinue, td_k := tn.tdKV(thisTdKvs[0].Key, &matchPrevFlag, &isCanAddToIndexMap, &indexMap, "LS")
  2708. //qutil.Debug("goOnFunc---", goOnFunc, "isContinue---", isContinue, "indexMap---", indexMap, "isCanAddToIndexMap---", isCanAddToIndexMap)
  2709. if !goOnFunc {
  2710. break LS
  2711. }
  2712. if isContinue {
  2713. continue
  2714. }
  2715. //采购单位,代理机构,中标单位
  2716. //qutil.Debug("td_k---", td_k, HasOrderContactType(td_k))
  2717. for _, k := range HasOrderContactType(td_k) {
  2718. if !ContactType[k].MatchString(td_k) { //不是采购单位,代理机构,中标单位跳过
  2719. continue
  2720. }
  2721. if len(indexMap) == 0 {
  2722. //qutil.Debug("isCanAddToIndexMap---", isCanAddToIndexMap, "prevCanAddToIndexMap---", prevCanAddToIndexMap, len(tr.TDs))
  2723. if isCanAddToIndexMap || (prevCanAddToIndexMap && len(tr.TDs) == 1) {
  2724. myPrevTdVal := ""
  2725. if td_index-2 >= 0 {
  2726. myPrevTdVal = tr.TDs[td_index-2].Val
  2727. if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
  2728. matchPrevFlag = true
  2729. }
  2730. }
  2731. indexMap[0] = k
  2732. break
  2733. }
  2734. } else {
  2735. indexMap = map[int]string{}
  2736. break LS
  2737. }
  2738. }
  2739. }
  2740. prevCanAddToIndexMap = isCanAddToIndexMap
  2741. isCanAddToIndexMap = false
  2742. }
  2743. if len(indexMap) > 0 {
  2744. allAscFind = false
  2745. }
  2746. }
  2747. //////
  2748. L:
  2749. for tr_index, tr := range tn.TRs {
  2750. thisTrHasMatch := false
  2751. jumpNextTd := false
  2752. for td_index, td := range tr.TDs {
  2753. //和|以?及|与|、多个词和在一起
  2754. jumpNextTd, thisTrHasMatch = tn.tdsMultipleWords(jumpNextTd, td, td_index, tr, thisTrHasMatch, indexMap)
  2755. //分块之后的kv
  2756. thisTdKvs := kvAfterDivideBlock("", td.Text, 3, tn.TableResult.RuleBlock, isSite, codeSite)
  2757. if len(thisTdKvs) == 0 {
  2758. thisTdKvs = tn.tdkv(td) //获取冒号kv
  2759. }
  2760. tdAscFind := true //开启td正序查询
  2761. //qutil.Debug("---", td.Val, len(thisTdKvs), len(indexMap))
  2762. if len(thisTdKvs) == 0 {
  2763. continue
  2764. } else if allAscFind && len(thisTdKvs) >= 3 && len(indexMap) == 0 {
  2765. //采购人在联系人、电话后面的处理
  2766. tdAscFind = tn.hasIndexMap(thisTdKvs, &indexMap, tdAscFind)
  2767. }
  2768. //qutil.Debug(len(thisTdKvs), len(tr.TDs))
  2769. // if len(thisTdKvs) >= 2 && len(tr.TDs) == 1 { //td中包含多个kv值 5d6b2aa2a5cb26b9b73e79d2
  2770. // tmpIndexMap := map[int]string{}
  2771. // start := 0
  2772. // for _, td_kv := range thisTdKvs {
  2773. // qutil.Debug(td_kv.Key)
  2774. // for _, k := range HasOrderContactType(td_kv.Key) {
  2775. // tmpIndexMap[start] = k
  2776. // start++
  2777. // }
  2778. // }
  2779. // indexMap = tmpIndexMap
  2780. // }
  2781. prevKey := ""
  2782. oldIndexMapLength := len(indexMap)
  2783. thidTdIndex := td_index
  2784. //notmatchCount := 0
  2785. kvTitle := ""
  2786. //qutil.Debug("indexMap++++++++++++++++++", indexMap, "len(thisTdKvs)", len(thisTdKvs), oldIndexMapLength)
  2787. if len(thisTdKvs) >= 2 { //td中有多个kv重置indexMap
  2788. indexMap = map[int]string{}
  2789. allAscFind = true
  2790. }
  2791. for _, td_kv := range thisTdKvs {
  2792. iscontinue := false
  2793. td_v := td_kv.Value
  2794. td_k := FilterContactKey(td_kv.Key) //带括号()[]的采购单位,代理机构处理
  2795. td_k_length := len([]rune(td_k))
  2796. if td_k_length < 2 || td_k_length > 15 {
  2797. continue
  2798. }
  2799. //都为正序查询
  2800. //qutil.Debug("td_k+++", td_k, "td_v+++", td_v, "allAscFind+++", allAscFind, "tdAscFind+++", tdAscFind)
  2801. if allAscFind && tdAscFind {
  2802. //都为正序查询处理
  2803. matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex, isSite, codeSite)
  2804. }
  2805. //qutil.Debug("indexMap++++++", indexMap, len(indexMap), "iscontinue+++", iscontinue)
  2806. if iscontinue {
  2807. continue
  2808. }
  2809. //不在同一块中
  2810. //qutil.Debug(td_kv.Title, kvTitle, !ContactInfoMustReg.MatchString(td_kv.Key))
  2811. if td_kv.Title != "" && kvTitle != td_kv.Title && len(indexMap) > 0 && !ContactInfoMustReg.MatchString(td_kv.Key) {
  2812. thidTdIndex = 0
  2813. matchMap = map[string]map[string]bool{}
  2814. indexMap = map[int]string{}
  2815. }
  2816. kvTitle = td_kv.Title
  2817. //qutil.Debug(td_k_length, td_k_length, len(indexMap))
  2818. if td_k_length < 2 || td_k_length > 10 {
  2819. continue
  2820. }
  2821. if len(indexMap) > 0 {
  2822. //没有识别到采购单位联系人、联系电话、代理机构联系人、联系电话
  2823. if !ContactInfoMustReg.MatchString(td_k) {
  2824. //notmatchCount++
  2825. //if notmatchCount < len(indexMap)*2 && false {//false???????
  2826. // notmatchCount = 0
  2827. // thidTdIndex = 0
  2828. // indexMap = map[int]string{}
  2829. // matchMap = map[string]map[string]bool{}
  2830. //}
  2831. if mustMatchFirst { //indexMap初始值大于0
  2832. break L
  2833. }
  2834. continue
  2835. }
  2836. reCreate = true
  2837. index := td_index
  2838. //oldIndexMapLength原来的indexMap等于0 ,现在的indexMap大于1
  2839. if oldIndexMapLength == 0 && len(indexMap) > 1 {
  2840. if prevKey != td_k {
  2841. prevKey = td_k
  2842. index = td_index
  2843. } else if prevKey == td_k {
  2844. index++
  2845. }
  2846. }
  2847. //kv.value为空
  2848. if filterValue.MatchString(td_v) {
  2849. thisTrHasMatch = true
  2850. continue
  2851. }
  2852. //u.Debug(indexMap, td_k, td_v, matchMap, index, modle)
  2853. //myContactType
  2854. myContactType := indexMap[index]
  2855. //qutil.Debug(indexMap, index, myContactType)
  2856. if myContactType == "" && len(indexMap) == 1 {
  2857. _, onlyContactType := u.FirstKeyValueInMap(indexMap)
  2858. myContactType, _ = onlyContactType.(string)
  2859. }
  2860. //qutil.Debug("indexMap+++", indexMap, "index+++", index, "myContactType+++", myContactType)
  2861. if myContactType == "" {
  2862. continue
  2863. }
  2864. matchCount++
  2865. if matchMap[myContactType] == nil {
  2866. matchMap[myContactType] = map[string]bool{}
  2867. }
  2868. if IsContactKvHandle(ContactInfoMustReg.FindString(td_k), matchMap[myContactType]) {
  2869. continue
  2870. }
  2871. matchMap[myContactType][ContactInfoMustReg.FindString(td_k)] = true
  2872. if ContactType[myContactType].MatchString(td_k) {
  2873. continue
  2874. }
  2875. thisTrHasMatch = true
  2876. //modle
  2877. modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index, isSite, codeSite)
  2878. }
  2879. }
  2880. //qutil.Debug("map===", td.SortKV.Map)
  2881. }
  2882. if allAscFind && !thisTrHasMatch {
  2883. notMatchTrCount++
  2884. if notMatchTrCount >= 2 {
  2885. notMatchTrCount = 0
  2886. indexMap = map[int]string{}
  2887. }
  2888. }
  2889. }
  2890. //u.Debug("end", matchCount, indexMap, matchMap)
  2891. if matchCount == 0 {
  2892. indexMap = map[int]string{}
  2893. matchMap = map[string]map[string]bool{}
  2894. }
  2895. (*contactFormat).IndexMap = indexMap
  2896. (*contactFormat).MatchMap = matchMap
  2897. // for _, tr := range tn.TRs {
  2898. // for _, td := range tr.TDs {
  2899. // qutil.Debug("td.sort.map---", td.SortKV.Map)
  2900. // }
  2901. // }
  2902. }
  2903. //modle
  2904. func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactTypeTagMap *map[string]map[string][]interface{}, tn *Table, weightMap *map[string]map[string]interface{}, tr_index, td_index int, isSite bool, codeSite string) {
  2905. modle := 0
  2906. if len(thisTdKvs) == 1 {
  2907. if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" {
  2908. modle = 1
  2909. } else {
  2910. modle = 2
  2911. }
  2912. }
  2913. if modle == 1 {
  2914. td.Text = myContactType + td_k
  2915. td.Val = td.Text
  2916. } else {
  2917. //
  2918. if !strings.HasSuffix(td_k, "方式") {
  2919. kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts, isSite, codeSite)
  2920. if len(kvTags) == 1 {
  2921. tagVal, _ := u.FirstKeyValueInMap(kvTags)
  2922. if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
  2923. return
  2924. }
  2925. if (*contactTypeTagMap)[myContactType] == nil {
  2926. (*contactTypeTagMap)[myContactType] = map[string][]interface{}{}
  2927. }
  2928. myOldKeyArray := (*contactTypeTagMap)[myContactType][tagVal]
  2929. if myOldKeyArray != nil {
  2930. tn.TRs[myOldKeyArray[0].(int)].TDs[myOldKeyArray[1].(int)].SortKV.RemoveKey(myContactType + myOldKeyArray[2].(string))
  2931. } else {
  2932. (*contactTypeTagMap)[myContactType][tagVal] = make([]interface{}, 3)
  2933. }
  2934. if (*weightMap)[myContactType] == nil {
  2935. (*weightMap)[myContactType] = map[string]interface{}{}
  2936. }
  2937. (*weightMap)[myContactType][tagVal] = 1
  2938. (*contactTypeTagMap)[myContactType][tagVal] = []interface{}{tr_index, td_index, td_k}
  2939. }
  2940. }
  2941. td.SortKV.AddKey(myContactType+td_k, td_v)
  2942. }
  2943. }
  2944. //都为正序查询
  2945. func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[string]interface{}, matchMap map[string]map[string]bool, td *TD, thisTrHasMatch bool, td_kv *u.Kv, indexMap map[int]string, iscontinue bool, reCreate bool, thidTdIndex int, isSite bool, codeSite string) (int, map[string]map[string]interface{}, map[string]map[string]bool, bool, map[int]string, bool, bool, int) {
  2946. for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构,中标单位
  2947. if !ContactType[k].MatchString(td_k) { //没有匹配到采购单位,代理机构,中标单位
  2948. continue
  2949. }
  2950. matchCount++
  2951. if weightMap[k] == nil {
  2952. weightMap[k] = map[string]interface{}{}
  2953. }
  2954. //匹配到进行处理
  2955. if ContactInfoVagueReg.MatchString(td_k) {
  2956. thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch, isSite, codeSite)
  2957. } else if k == "采购单位" { //打标签,权重高的重新覆盖
  2958. kvTags := GetKvTags([]*u.Kv{td_kv}, "", []string{"采购单位"}, isSite, codeSite)
  2959. tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
  2960. if tagVal == k {
  2961. if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
  2962. weightMap[k][k] = weightVal.(int)
  2963. matchMap[k] = map[string]bool{}
  2964. indexMap = map[int]string{}
  2965. }
  2966. }
  2967. }
  2968. if u.IsMapHasValue(k, indexMap) { //map中是否存在value
  2969. thisTrHasMatch = true
  2970. iscontinue = true
  2971. continue
  2972. }
  2973. if reCreate {
  2974. indexMap = map[int]string{}
  2975. reCreate = false
  2976. }
  2977. indexMap[thidTdIndex] = k
  2978. iscontinue = true
  2979. thisTrHasMatch = true
  2980. thidTdIndex++
  2981. break
  2982. }
  2983. if len(indexMap) == 0 && td_kv.PrevLine != "" {
  2984. //td_kv.PrevLine
  2985. prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "")
  2986. for k, v := range ContactType { //采购单位,代理机构正则
  2987. if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) {
  2988. indexMap[thidTdIndex] = k
  2989. thisTrHasMatch = true
  2990. thidTdIndex++
  2991. }
  2992. }
  2993. }
  2994. if len(indexMap) == 0 && td_kv.Title != "" {
  2995. //td_kv.Title
  2996. if titleMatchType := ContactTypeTitleMatch(td_kv.Title); titleMatchType != "" {
  2997. thidTdIndex = 0
  2998. matchMap = map[string]map[string]bool{}
  2999. indexMap = map[int]string{1: titleMatchType}
  3000. // for i, t := range titleMatchType {
  3001. // indexMap[i+1] = t
  3002. // }
  3003. }
  3004. }
  3005. return matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex
  3006. }
  3007. //匹配到进行处理
  3008. func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string, td_k string, td_v string, td *TD, weightMap *map[string]map[string]interface{}, thisTrHasMatch bool, isSite bool, codeSite string) bool {
  3009. if (*matchMap)[k] == nil {
  3010. (*matchMap)[k] = map[string]bool{}
  3011. }
  3012. isAddToMatchMap := true
  3013. if !strings.HasSuffix(td_k, "方式") {
  3014. kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts, isSite, codeSite)
  3015. if len(kvTags) == 1 {
  3016. tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
  3017. if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
  3018. isAddToMatchMap = false
  3019. }
  3020. if td.SortKV.Map[tagVal] != nil {
  3021. if (*weightMap)[k][tagVal] == nil || (weightVal != nil && weightVal.(int) >= (*weightMap)[k][tagVal].(int)) {
  3022. (*weightMap)[k][tagVal] = weightVal.(int)
  3023. td.SortKV.AddKey(tagVal, td_v)
  3024. thisTrHasMatch = true
  3025. }
  3026. } else {
  3027. (*weightMap)[k][tagVal] = weightVal.(int)
  3028. }
  3029. }
  3030. }
  3031. if isAddToMatchMap && !filterValue.MatchString(td_v) && td_v != "" {
  3032. (*matchMap)[k][ContactInfoVagueReg.FindString(td_k)] = true
  3033. }
  3034. return thisTrHasMatch
  3035. }
  3036. //采购人在联系人、电话后面的处理
  3037. func (tn *Table) hasIndexMap(thisTdKvs []*u.Kv, indexMap *map[int]string, tdAscFind bool) bool {
  3038. //采购人在联系人、电话后面的处理
  3039. isCanAddToIndexMap := false
  3040. LL:
  3041. for _, td_kv := range thisTdKvs {
  3042. //1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
  3043. goOnFunc, isContinue, td_k := tn.tdKV(td_kv.Key, nil, &isCanAddToIndexMap, indexMap, "LL")
  3044. if !goOnFunc {
  3045. break LL
  3046. }
  3047. if isContinue {
  3048. continue
  3049. }
  3050. if len(*indexMap) == 0 {
  3051. for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
  3052. if !ContactType[k].MatchString(td_k) {
  3053. continue
  3054. }
  3055. if isCanAddToIndexMap && len(*indexMap) == 0 {
  3056. (*indexMap)[0] = k
  3057. break
  3058. }
  3059. }
  3060. }
  3061. }
  3062. if len(*indexMap) > 0 {
  3063. tdAscFind = false
  3064. }
  3065. return tdAscFind
  3066. }
  3067. //和|以?及|与|、多个词和在一起
  3068. func (tn *Table) tdsMultipleWords(jumpNextTd bool, td *TD, td_index int, tr *TR, thisTrHasMatch bool, indexMap map[int]string) (jump, thisTr bool) {
  3069. if !jumpNextTd && len([]rune(td.Text)) >= 5 && len([]rune(td.Text)) <= 15 && regSplit.MatchString(td.Text) && td_index+1 < len(tr.TDs) {
  3070. thisTdVals := regSplit.Split(td.Text, -1)
  3071. nextTdVals := MultipleValueSplitReg.Split(tr.TDs[td_index+1].Val, -1)
  3072. if len(thisTdVals) == len(nextTdVals) { //本次和下个长度相等
  3073. for _, k := range HasOrderContactType(td.Text) { //采购单位,代理机构
  3074. if ContactType[k].MatchString(td.Text) { //采购单位,代理机构
  3075. for thisTdVals_k, thisTdVals_v := range thisTdVals {
  3076. thisTdVals_v = strings.TrimSpace(thisTdVals_v)
  3077. if ContactType[k].MatchString(thisTdVals_v) { //采购单位,代理机构
  3078. thisTrHasMatch = true
  3079. tr.TDs[td_index+1].SortKV.AddKey(thisTdVals_v, nextTdVals[thisTdVals_k])
  3080. continue
  3081. }
  3082. if !ContactInfoMustReg.MatchString(thisTdVals_v) {
  3083. continue
  3084. }
  3085. jumpNextTd = true
  3086. thisTrHasMatch = true
  3087. tr.TDs[td_index+1].SortKV.AddKey(k+thisTdVals_v, nextTdVals[thisTdVals_k])
  3088. }
  3089. break
  3090. }
  3091. }
  3092. if len(indexMap) > 0 {
  3093. _, onlyContactType := u.FirstKeyValueInMap(indexMap)
  3094. if myContactType, _ := onlyContactType.(string); myContactType != "" {
  3095. for thisTdVals_k, thisTdVals_v := range thisTdVals {
  3096. thisTdVals_v = strings.TrimSpace(thisTdVals_v)
  3097. if ContactInfoMustReg.MatchString(thisTdVals_v) {
  3098. jumpNextTd = true
  3099. thisTrHasMatch = true
  3100. tr.TDs[td_index+1].SortKV.AddKey(myContactType+thisTdVals_v, nextTdVals[thisTdVals_k])
  3101. }
  3102. }
  3103. }
  3104. }
  3105. }
  3106. } else {
  3107. jumpNextTd = false
  3108. }
  3109. return jumpNextTd, thisTrHasMatch
  3110. }
  3111. //采购单位,代理机构
  3112. func (tn *Table) tdHasOrderContactType(td_k string, indexMap *map[int]string, tr *TR, prevCanAddToIndexMap, isCanAddToIndexMap, matchPrevFlag *bool, td_index int) (gotoFunc bool) {
  3113. for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
  3114. if !ContactType[k].MatchString(td_k) {
  3115. continue
  3116. }
  3117. if len(*indexMap) == 0 {
  3118. if (*isCanAddToIndexMap) || (*prevCanAddToIndexMap && len(tr.TDs) == 1) {
  3119. myPrevTdVal := ""
  3120. if td_index-2 >= 0 {
  3121. myPrevTdVal = tr.TDs[td_index-2].Val
  3122. }
  3123. if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
  3124. (*matchPrevFlag) = true
  3125. }
  3126. (*indexMap)[0] = k
  3127. break
  3128. }
  3129. } else {
  3130. (*indexMap) = map[int]string{}
  3131. return false
  3132. }
  3133. }
  3134. return true
  3135. }
  3136. //1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
  3137. func (tn *Table) tdKV(key string, matchPrevFlag, isCanAddToIndexMap *bool, indexMap *map[int]string, gotoName string) (goOnFunc, isContinue bool, td_k string) {
  3138. td_k = FilterContactKey(key) //带括号的()[]【】采购单位,代理机构处理
  3139. td_k_length := len([]rune(td_k))
  3140. if td_k_length < 2 || td_k_length > 15 {
  3141. return true, true, "" //继续执行,跳过当前循环
  3142. }
  3143. isContinue = ContactInfoMustReg.MatchString(td_k) //识别采购单位联系人、联系电话、代理机构联系人、联系电话
  3144. if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) {
  3145. if gotoName == "LS" && !(*matchPrevFlag) && len(*indexMap) > 0 {
  3146. (*indexMap) = map[int]string{}
  3147. return false, false, "" //中断外层循环
  3148. }
  3149. if gotoName == "LL" && len(*indexMap) > 0 {
  3150. (*indexMap) = map[int]string{}
  3151. return false, false, ""
  3152. }
  3153. (*isCanAddToIndexMap) = true
  3154. }
  3155. return true, false, td_k //继续执行,不结束当前循环,返回处理后的值
  3156. }
  3157. //获取td冒号kv
  3158. func (tn *Table) tdkv(td *TD) []*u.Kv {
  3159. thisTdKvs := colonkvEntity.GetKvs(td.Text, tn.Desc, 2) //获取冒号kv
  3160. //获取冒号
  3161. if len(thisTdKvs) == 0 {
  3162. tdValue := regReplAllSpace.ReplaceAllString(td.Text, "") //去除空格换行
  3163. if tdValue != "" && len([]rune(tdValue)) < 10 {
  3164. thisTdKvs = append(thisTdKvs, &u.Kv{
  3165. Key: tdValue,
  3166. Value: "",
  3167. })
  3168. }
  3169. }
  3170. return thisTdKvs
  3171. }
  3172. func (table *Table) extractPriceNumber() {
  3173. lineMapArr := make(map[string]*SortMap)
  3174. lineMap := make(map[string]*SortMap)
  3175. lineMapArr, lineMap = initLineMapLineMapArr(table) //不同数据类型的数据组合
  3176. //qutil.Debug("lineMapArr----", lineMapArr)
  3177. if len(lineMapArr) > 0 {
  3178. for _, arrMap := range lineMapArr {
  3179. resultArrMap := table.matchMapArrPrinceNumber(arrMap) //最终数据
  3180. //qutil.Debug("resultArrMap-------------------", resultArrMap)
  3181. //处理数组长度不一致情况
  3182. if len(resultArrMap) > 0 {
  3183. numLen := len(resultArrMap["number"])
  3184. priceLen := len(resultArrMap["price"])
  3185. itemLen := len(resultArrMap["item"])
  3186. maxNum := numLen //获取最大长度
  3187. if numLen == 0 { //没有
  3188. maxNum = priceLen
  3189. }
  3190. //取个数数据的长度为基准(数据长度可能不一致)
  3191. if numLen != priceLen && numLen > 0 && priceLen > 0 { //有number和price数据且长度不同,进行数据增减补齐
  3192. if priceLen > numLen { //price多,删
  3193. tmpArr := resultArrMap["price"]
  3194. resultArrMap["price"] = tmpArr[:numLen]
  3195. } else if priceLen < numLen { //price少,补空
  3196. for {
  3197. resultArrMap["price"] = append(resultArrMap["price"], "")
  3198. //qutil.Debug("=============price==============")
  3199. if len(resultArrMap["price"]) == numLen {
  3200. break
  3201. }
  3202. }
  3203. }
  3204. }
  3205. if maxNum > 0 && itemLen > 0 && maxNum != itemLen { //有price或者number,item长度保持一致
  3206. if itemLen > maxNum {
  3207. tmpArr := resultArrMap["item"]
  3208. resultArrMap["item"] = tmpArr[:maxNum]
  3209. } else if itemLen < maxNum {
  3210. for {
  3211. resultArrMap["item"] = append(resultArrMap["item"], "")
  3212. //qutil.Debug("=============item==============")
  3213. if len(resultArrMap["item"]) == maxNum {
  3214. break
  3215. }
  3216. }
  3217. }
  3218. }
  3219. //封装成一一对应数据
  3220. /*
  3221. {
  3222. "price" :["123","125"],
  3223. "number" :["1","12"]
  3224. }
  3225. 转换为:
  3226. [
  3227. {"price":"123","number":"1"},
  3228. {"price":"125","number":"12"}
  3229. ]
  3230. */
  3231. finishData := []map[string]interface{}{}
  3232. //qutil.Debug("maxNum--------------------", maxNum)
  3233. for t := 0; t < maxNum; t++ {
  3234. tmpMap := make(map[string]interface{})
  3235. if len(resultArrMap["number"]) > 0 {
  3236. number := resultArrMap["number"][t]
  3237. tmpMap["number"] = number
  3238. }
  3239. if len(resultArrMap["price"]) > 0 {
  3240. price := resultArrMap["price"][t]
  3241. tmpMap["price"] = price
  3242. }
  3243. if len(resultArrMap["item"]) > 0 {
  3244. item := resultArrMap["item"][t]
  3245. runeItem := []rune(qutil.ObjToString(item))
  3246. if len(runeItem) > 50 {
  3247. tmpMap["item"] = string(runeItem[:50])
  3248. } else {
  3249. tmpMap["item"] = item
  3250. }
  3251. }
  3252. finishData = append(finishData, tmpMap)
  3253. }
  3254. //qutil.Debug(finishData)
  3255. if len(finishData) > 0 {
  3256. table.PriceNumberData = append(table.PriceNumberData, finishData)
  3257. }
  3258. }
  3259. }
  3260. }
  3261. if len(lineMap) > 0 {
  3262. for _, strMap := range lineMap {
  3263. resultStrMap := table.matchMapPrinceNumber(strMap)
  3264. //qutil.Debug("resultStrMap---", resultStrMap)
  3265. if len(resultStrMap) > 0 {
  3266. if resultStrMap["price"] != nil || resultStrMap["number"] != nil { //有price或者number在保存
  3267. if item := qutil.ObjToString(resultStrMap["item"]); item != "" { //item过长截取
  3268. runeItem := []rune(item)
  3269. if len(runeItem) > 50 {
  3270. resultStrMap["item"] = string(runeItem[:50])
  3271. }
  3272. }
  3273. finishData := []map[string]interface{}{}
  3274. finishData = append(finishData, resultStrMap)
  3275. //qutil.Debug("finishData---", finishData)
  3276. if len(finishData) > 0 {
  3277. table.PriceNumberData = append(table.PriceNumberData, finishData)
  3278. }
  3279. }
  3280. }
  3281. }
  3282. }
  3283. //qutil.Debug("table.PriceNumberData---------", table.PriceNumberData)
  3284. }
  3285. //数组数据抽取price和number
  3286. func (table *Table) matchMapArrPrinceNumber(arrMap *SortMap) map[string][]interface{} {
  3287. result := make(map[string][]interface{}) //最终存储数据
  3288. for _, key := range arrMap.Keys {
  3289. val := arrMap.Map[key].([]string)
  3290. for f, reg := range u.PriceNumberReg {
  3291. key = tabletitleclear2.ReplaceAllString(key, "") //过滤
  3292. if reg.MatchString(key) { //匹配成功
  3293. //qutil.Debug("arr----key", key, val, f)
  3294. tmp := []interface{}{}
  3295. if f == "price" {
  3296. tmp = dealPriceInterface(key, val...) //处理金额,单位转换
  3297. } else if f == "number" {
  3298. tmp = dealNumberInterface(val...) //处理数量
  3299. } else {
  3300. for _, v := range val {
  3301. tmp = append(tmp, v)
  3302. }
  3303. }
  3304. if len(tmp) > 0 {
  3305. result[f] = tmp
  3306. }
  3307. }
  3308. }
  3309. }
  3310. return result
  3311. }
  3312. //字符串数据抽取price和number
  3313. func (table *Table) matchMapPrinceNumber(strMap *SortMap) map[string]interface{} {
  3314. result := make(map[string]interface{})
  3315. for _, key := range strMap.Keys {
  3316. val := qutil.ObjToString(strMap.Map[key])
  3317. for f, reg := range u.PriceNumberReg {
  3318. key = tabletitleclear2.ReplaceAllString(key, "") //过滤
  3319. if reg.MatchString(key) { //匹配成功
  3320. //qutil.Debug("str----key", key, val)
  3321. if f == "price" {
  3322. if len(regHz.FindAllString(val, -1)) > 5 { //price中汉字过多视为内容错误
  3323. continue
  3324. }
  3325. tmp := dealPriceInterface(key, val)[0] //处理金额,单位转换
  3326. result[f] = tmp
  3327. } else if f == "number" {
  3328. tmp := dealNumberInterface(val)[0]
  3329. result[f] = tmp
  3330. } else {
  3331. result[f] = val
  3332. }
  3333. }
  3334. }
  3335. }
  3336. return result
  3337. }
  3338. //table中抽取品牌,table.BrandData
  3339. func (table *Table) analyBrand() {
  3340. //5c2d8c05a5cb26b9b782572b
  3341. //产品名称 品牌 规格 单价 单位 数量 小计 质保期
  3342. lineMapArr := make(map[string]*SortMap)
  3343. lineMap := make(map[string]*SortMap)
  3344. brandRule := u.BrandRules
  3345. //初始化lineMapArr,lineMap;
  3346. lineMapArr, lineMap = initLineMapLineMapArr(table) //处理数组数据后,匹配必须title和替换要保存的title
  3347. //qutil.Debug("lineMapArr----", len(lineMapArr))
  3348. if len(lineMapArr) > 0 {
  3349. for _, aMap := range lineMapArr {
  3350. maxNum := 0 //记录最大长度
  3351. arrcount1 := 0 //记录key是否存在必须title(数组数据)
  3352. arrcount2 := 0
  3353. ka := make(map[string][]string) //最终存储数据
  3354. //qutil.Debug("aMap.Keys----", aMap.Keys)
  3355. //匹配商品规则
  3356. arrcount1, arrcount2, ka = table.matchMapArrBrandRule(aMap, brandRule, ka, arrcount1, arrcount2)
  3357. //找最终存储数据的最小len(arr)
  3358. // for _, vf := range ka {
  3359. // //找最短的数组
  3360. // lenVal := len(vf)
  3361. // if minNum == 0 || minNum > lenVal { //maxNum = len(最短数组)
  3362. // minNum = lenVal
  3363. // }
  3364. // }
  3365. //找最终存储数据的最大len(arr),小的补空
  3366. for _, vf1 := range ka {
  3367. lenVal := len(vf1)
  3368. if lenVal > maxNum {
  3369. maxNum = lenVal
  3370. }
  3371. }
  3372. //table.BrandData商品存储
  3373. finishKa := make(map[string][]string)
  3374. for vf2K, vf2 := range ka {
  3375. if len(vf2) < maxNum {
  3376. if vf2K == "unitprice" { //价格的当前总数比最大的总数小就跳过,可能是总价格而不是单个的价格
  3377. continue
  3378. }
  3379. lenMv := maxNum - len(vf2)
  3380. for i := 0; i < lenMv; i++ {
  3381. vf2 = append(vf2, "")
  3382. }
  3383. }
  3384. finishKa[vf2K] = vf2
  3385. }
  3386. hasKey(table, arrcount1) //是否匹配到table中的标题
  3387. //qutil.Debug("finishKa----", finishKa)
  3388. if arrcount1 >= 1 {
  3389. if arrcount1+arrcount2 == 1 { //删除只匹配到一个价钱(总价)
  3390. delete(finishKa, "unitprice")
  3391. }
  3392. finishData := dealArrData(maxNum, finishKa)
  3393. table.BrandData = append(table.BrandData, finishData) //存储table.BrandData
  3394. }
  3395. }
  3396. }
  3397. //处理string数据后,匹配必须title和替换要保存的title
  3398. //qutil.Debug("lineMap----", len(lineMap))
  3399. if len(lineMap) > 0 {
  3400. for _, sMap := range lineMap {
  3401. strcount1 := 0 //记录key是否存在必须title(字符串数据)
  3402. strcount2 := 0
  3403. endStrMap := make(map[string]string)
  3404. //qutil.Debug(k, "aMap.Keys----", sMap.Keys)
  3405. //匹配商品规则
  3406. strcount1, strcount2, endStrMap = table.matchMapBrandRule(sMap, brandRule, endStrMap, strcount1, strcount2)
  3407. //原始字符串数据处理
  3408. hasKey(table, strcount1) //是否匹配到table中的标题
  3409. //qutil.Debug("endStrMap----", endStrMap)
  3410. if strcount1 >= 1 {
  3411. if strcount1+strcount2 == 1 { //删除只匹配到一个价钱(总价)
  3412. delete(endStrMap, "unitprice")
  3413. }
  3414. finishData := dealStrData(endStrMap) //处理数据
  3415. if len(finishData) > 0 {
  3416. table.BrandData = append(table.BrandData, finishData)
  3417. }
  3418. }
  3419. }
  3420. }
  3421. }
  3422. //字符串匹配商品规则
  3423. func (table *Table) matchMapBrandRule(sMap *SortMap, brandRule map[string]map[string]string, endStrMap map[string]string, strcount1, strcount2 int) (int, int, map[string]string) {
  3424. for _, k1 := range sMap.Keys {
  3425. match := false //记录must是否匹配到
  3426. v1 := qutil.ObjToString(sMap.Map[k1])
  3427. // for k1, v1 := range sMap {
  3428. //qutil.Debug(k1, "++++++++++", v1)
  3429. if v1 == "" {
  3430. continue
  3431. }
  3432. //匹配必须title
  3433. for nameM, r := range brandRule["must"] {
  3434. if convert(k1, r) { //匹配成功
  3435. v1tmp1 := v1
  3436. match = true
  3437. if nameM == "itemname" || nameM == "modal" { //特殊处理itemname
  3438. hasGoods(table, v1)
  3439. if nameM == "itemname" {
  3440. v1tmp1 = filterItem(v1)[0] //过滤itemname
  3441. if v1tmp1 == "" {
  3442. break
  3443. }
  3444. }
  3445. }
  3446. if nameM == "brandname" || nameM == "modal" { //特殊处理brandname
  3447. if endStrMap["brandname"] == "" {
  3448. brand, allNull := hasBrand(table, v1)
  3449. if !allNull {
  3450. endStrMap["brandname"] = brand[0]
  3451. }
  3452. }
  3453. }
  3454. //unitprice
  3455. if nameM == "unitprice" { //处理金额
  3456. v1tmp1 = dealPrice(k1, v1)[0]
  3457. }
  3458. if nameM != "brandname" && endStrMap[nameM] == "" {
  3459. endStrMap[nameM] = v1tmp1
  3460. }
  3461. strcount1++
  3462. }
  3463. }
  3464. //替换其它要保存字段
  3465. if !match {
  3466. for nameR, r := range brandRule["replace"] {
  3467. if convert(k1, r) { //匹配成功
  3468. v1tmp2 := v1
  3469. //totalprice
  3470. if nameR == "totalprice" { //处理金额
  3471. v1tmp2 = dealPrice(k1, v1)[0]
  3472. }
  3473. //number
  3474. if nameR == "number" { //处理数量
  3475. varr1, uname1 := dealNumber(v1)
  3476. v1tmp2 = varr1[0]
  3477. //从number中获取到的单位
  3478. if endStrMap["unitname"] == "" && uname1[0] != "" {
  3479. endStrMap["unitname"] = uname1[0]
  3480. }
  3481. }
  3482. if v1tmp2 != "" {
  3483. endStrMap[nameR] = v1tmp2
  3484. }
  3485. strcount2++
  3486. }
  3487. }
  3488. }
  3489. //}
  3490. }
  3491. return strcount1, strcount2, endStrMap
  3492. }
  3493. //数组匹配商品规则
  3494. func (table *Table) matchMapArrBrandRule(aMap *SortMap, brandRule map[string]map[string]string, ka map[string][]string, arrcount1, arrcount2 int) (int, int, map[string][]string) {
  3495. for _, k0 := range aMap.Keys {
  3496. match := false //记录must是否匹配到
  3497. v0 := aMap.Map[k0].([]string)
  3498. //匹配必须title
  3499. for nameM, r := range brandRule["must"] {
  3500. if convert(k0, r) { //匹配成功
  3501. v0tmp1 := v0
  3502. match = true
  3503. if len(ka[nameM]) != 0 && strings.Contains(k0, "描述") { //防止k0匹配到多次 和特殊情况 物料名称 物料描述同时出现
  3504. continue
  3505. }
  3506. if nameM == "itemname" || nameM == "modal" {
  3507. hasGoods(table, v0...) //判断itemname和modal中有没有商品
  3508. if nameM == "itemname" {
  3509. v0tmp1 = filterItem(v0...) //过滤itemname
  3510. }
  3511. }
  3512. if nameM == "brandname" || nameM == "modal" {
  3513. if len(ka["brandname"]) == 0 {
  3514. brand, allNull := hasBrand(table, v0...)
  3515. if !allNull {
  3516. ka["brandname"] = brand
  3517. }
  3518. }
  3519. }
  3520. //unitprice
  3521. if nameM == "unitprice" { //处理金额
  3522. v0tmp1 = dealPrice(k0, v0...)
  3523. }
  3524. if nameM != "brandname" && len(ka[nameM]) == 0 {
  3525. ka[nameM] = v0tmp1
  3526. }
  3527. arrcount1++
  3528. }
  3529. }
  3530. //替换其它要保存字段
  3531. if !match { //must未匹配,匹配replace
  3532. for nameR, r := range brandRule["replace"] {
  3533. if convert(k0, r) { //匹配成功
  3534. v0tmp2 := v0
  3535. //totalprice
  3536. if nameR == "totalprice" { //处理金额
  3537. v0tmp2 = dealPrice(k0, v0...)
  3538. }
  3539. //number
  3540. if nameR == "number" { //处理数量
  3541. uname0 := []string{}
  3542. v0tmp2, uname0 = dealNumber(v0...)
  3543. if len(ka["unitname"]) == 0 && len(uname0) != 0 {
  3544. ka["unitname"] = uname0
  3545. }
  3546. }
  3547. if len(v0tmp2) > 0 {
  3548. ka[nameR] = v0tmp2
  3549. }
  3550. arrcount2++
  3551. }
  3552. }
  3553. }
  3554. }
  3555. return arrcount1, arrcount2, ka
  3556. }
  3557. //初始化lineMapArr,lineMap
  3558. func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMap map[string]*SortMap) {
  3559. lineMapArr = make(map[string]*SortMap)
  3560. lineMap = make(map[string]*SortMap)
  3561. for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
  3562. val := table.SortKV.Map[key]
  3563. key = regReplAllSpace.ReplaceAllString(key, "")
  3564. key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
  3565. //qutil.Debug(key, "---------------------------", val)
  3566. if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
  3567. /*
  3568. {
  3569. "商品":["",""],
  3570. "商品_"["",""],
  3571. }
  3572. */
  3573. valArr, allempty := filterVal(realTypeVal...) //过滤数据
  3574. if allempty {
  3575. continue
  3576. }
  3577. realTypeVal = valArr
  3578. line := underline.FindString(key)
  3579. lineValMap1 := lineMapArr[line]
  3580. // i := 1
  3581. // L:
  3582. // for { //去除数组空数据
  3583. // last := realTypeVal[len(realTypeVal)-i]
  3584. // if last == "" {
  3585. // i++
  3586. // if i > len(realTypeVal) {
  3587. // break
  3588. // }
  3589. // goto L
  3590. // } else {
  3591. // break
  3592. // }
  3593. // }
  3594. // dislodgeNull := realTypeVal[:(len(realTypeVal) - i + 1)] //去除数组中空数据
  3595. if len(realTypeVal) > 0 {
  3596. if lineValMap1 == nil {
  3597. tmp := NewSortMap()
  3598. tmp.AddKey(key, realTypeVal)
  3599. lineMapArr[line] = tmp
  3600. } else {
  3601. lineValMap1.AddKey(key, realTypeVal)
  3602. }
  3603. }
  3604. //qutil.Debug("lineMapArr---", lineMapArr[line].Keys, lineMapArr[line].Map)
  3605. } else if realTypeVal, b := val.(string); b { //val为字符串 {"数量":"1"}
  3606. /*
  3607. {
  3608. "商品:"",名称:"",
  3609. "商品_:"",名称_:"",
  3610. "商品__:"",名称__:"",
  3611. }
  3612. */
  3613. valArr, allempty := filterVal(realTypeVal) //过滤数据
  3614. if allempty {
  3615. continue
  3616. }
  3617. realTypeVal = valArr[0]
  3618. line := underline.FindString(key)
  3619. lineValMap2 := lineMap[line]
  3620. if lineValMap2 == nil {
  3621. tmp := NewSortMap()
  3622. tmp.AddKey(key, realTypeVal)
  3623. lineMap[line] = tmp
  3624. } else {
  3625. lineValMap2.AddKey(key, realTypeVal)
  3626. }
  3627. //qutil.Debug("lineMap---", lineMap[line].Keys, lineMap[line].Map)
  3628. } else {
  3629. // "_id" : ObjectId("5c2c3802a5cb26b9b78646c4")5c2b0551a5cb26b9b7cb05db否5c2a42e6a5cb26b9b763ba5a采购人:一、采购人5c2b06f5a5cb26b9b7cc4409
  3630. //成交供应商排名 [map[entname:昆明合优科技有限公司 sortstr:第一中标候选人 sort:1] map[sort:2 entname:昆明厚起科技有限公司 sortstr:第二中标候选人] map[entname:云南远安科技发展有限公司 sortstr:第三中标候选人 sort:3]]
  3631. //qutil.Debug("err data:", key, val)
  3632. }
  3633. }
  3634. return lineMapArr, lineMap
  3635. }
  3636. func dealArrData(maxNum int, ka map[string][]string) []map[string]string {
  3637. for k2, v2 := range ka {
  3638. //处理数组长度不相等,使长度一致
  3639. if len(v2) > maxNum {
  3640. ka[k2] = v2[:maxNum]
  3641. }
  3642. }
  3643. finalData := assembleData(ka, 1)
  3644. if len(finalData) > 0 {
  3645. return finalData
  3646. }
  3647. return nil
  3648. }
  3649. func dealStrData(kv map[string]string) []map[string]string {
  3650. finalData := []map[string]string{}
  3651. if len(kv) > 0 {
  3652. finalData = assembleData(kv, 2)
  3653. }
  3654. return finalData
  3655. }
  3656. //组装数据,每一行的数据为一数据集合
  3657. func assembleData(m interface{}, n int) []map[string]string {
  3658. defer qutil.Catch()
  3659. /*
  3660. {
  3661. "itemname":["计算机","打印机","机柜"],
  3662. "number" :["1","12","4"]
  3663. }
  3664. */
  3665. datas := []map[string]string{}
  3666. if n == 1 { //数组数据
  3667. realTypeM := m.(map[string][]string)
  3668. //根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr
  3669. /*
  3670. arr1 ["a1","b1","c1"]
  3671. arr2 ["a2","b2","c2"]
  3672. [
  3673. {"a1","a2"},
  3674. {"b1","b2"},
  3675. {"c1","c2"}
  3676. ]
  3677. */
  3678. //start
  3679. for k3, v3 := range realTypeM {
  3680. for _, val := range v3 {
  3681. data := make(map[string]string)
  3682. data[k3] = val
  3683. datas = append(datas, data)
  3684. }
  3685. break
  3686. }
  3687. for i, data := range datas {
  3688. for k4, v4 := range realTypeM {
  3689. if i < len(v4) { //数组数据长度不一致
  3690. if v4[i] != " " {
  3691. data[k4] = v4[i]
  3692. } else {
  3693. delete(data, k4)
  3694. }
  3695. } else {
  3696. fmt.Println("err table")
  3697. }
  3698. }
  3699. datas[i] = data
  3700. }
  3701. //end
  3702. for _, fdv := range datas { //清除空数据和只含特殊符号的数据
  3703. for fmk, fmv := range fdv {
  3704. if tabletdclear.ReplaceAllString(fmv, "") == "" {
  3705. delete(fdv, fmk)
  3706. }
  3707. }
  3708. }
  3709. } else { //字符串数据
  3710. realTypeM := m.(map[string]string)
  3711. datas = append(datas, realTypeM)
  3712. }
  3713. return datas
  3714. }
  3715. ////组装数据,每一行的数据为一数据集合
  3716. //func assembleData(m interface{}, n int) []map[string]string {
  3717. // defer qutil.Catch()
  3718. // /*
  3719. // {
  3720. // "itemname":["计算机","打印机","机柜"],
  3721. // "number" :["1","12","4"]
  3722. // }
  3723. // */
  3724. // datas := []map[string]string{}
  3725. // switch reflect.TypeOf(m).String() {
  3726. // case "map[string][]string": //数组数据
  3727. // realTypeM := m.(map[string][]string)
  3728. // //根据数组数据的顺序 将多个数组中索引相同的数据拼装成一个map,并将这多个map放入一个arr
  3729. // /*
  3730. // arr1 ["a1","b1","c1"]
  3731. // arr2 ["a2","b2","c2"]
  3732. // [
  3733. // {"a1","a2"},
  3734. // {"b1","b2"},
  3735. // {"c1","c2"}
  3736. // ]
  3737. // */
  3738. // //start
  3739. // for k3, v3 := range realTypeM {
  3740. // for _, val := range v3 {
  3741. // data := make(map[string]string)
  3742. // data[k3] = val
  3743. // datas = append(datas, data)
  3744. // }
  3745. // break
  3746. // }
  3747. // for i, data := range datas {
  3748. // for k4, v4 := range realTypeM {
  3749. // if i < len(v4) { //数组数据长度不一致
  3750. // if v4[i] != " " {
  3751. // data[k4] = v4[i]
  3752. // } else {
  3753. // delete(data, k4)
  3754. // //continue
  3755. // }
  3756. // } else {
  3757. // fmt.Println("err table")
  3758. // //continue
  3759. // }
  3760. // }
  3761. // datas[i] = data
  3762. // }
  3763. // //end
  3764. // // for _, fdv := range datas { //清除空数据和只含特殊符号的数据
  3765. // // for fmk, fmv := range fdv {
  3766. // // if tabletdclear.ReplaceAllString(fmv, "") == "" {
  3767. // // delete(fdv, fmk)
  3768. // // }
  3769. // // }
  3770. // // }
  3771. // case "map[string]string": //字符串数据
  3772. // realTypeM := m.(map[string]string)
  3773. // datas = append(datas, realTypeM)
  3774. // default:
  3775. // }
  3776. // return datas
  3777. //}
  3778. func convert(key, r string) bool {
  3779. defer qutil.Catch()
  3780. flag := false
  3781. key = tabletitleclear.ReplaceAllString(key, "")
  3782. reg, err := regexp.Compile(r)
  3783. if err != nil {
  3784. fmt.Println("reg err:", err)
  3785. return false
  3786. }
  3787. flag = reg.MatchString(key)
  3788. return flag
  3789. }
  3790. func hasKey(table *Table, n int) {
  3791. defer qutil.Catch()
  3792. if table.TableResult.HasKey == 1 {
  3793. return
  3794. }
  3795. if n >= 1 {
  3796. table.TableResult.HasKey = 1
  3797. }
  3798. }
  3799. func hasGoods(table *Table, data ...string) {
  3800. defer qutil.Catch()
  3801. goodsArr := make([]string, len(data))
  3802. //fmt.Println("table.TableResult.HasGoods=====", table.TableResult.HasGoods)
  3803. if table.TableResult.HasGoods == 1 {
  3804. return
  3805. }
  3806. for i, d := range data {
  3807. if d != "" {
  3808. goods := u.GoodsGet.CheckSensitiveWord(d)
  3809. //fmt.Println("goods======", goods)
  3810. goodsArr[i] = goods
  3811. if len(goods) > 0 {
  3812. table.TableResult.HasGoods = 1
  3813. break
  3814. }
  3815. }
  3816. }
  3817. }
  3818. //func hasBrand(table *Table, data ...string) {
  3819. // defer qutil.Catch()
  3820. // if table.TableResult.HasBrand == 1 {
  3821. // return
  3822. // }
  3823. // for i, d := range data {
  3824. // if d != "" {
  3825. // brand := u.BrandGet.CheckSensitiveWord(d)
  3826. // qutil.Debug(d, brand)
  3827. // if brand != "" {
  3828. // table.TableResult.HasBrand = 1
  3829. // break
  3830. // }
  3831. // }
  3832. // }
  3833. //}
  3834. func hasBrand(table *Table, data ...string) ([]string, bool) {
  3835. defer qutil.Catch()
  3836. //fmt.Println("table.TableResult.HasBrand---------", table.TableResult.HasBrand)
  3837. brandArr := make([]string, len(data))
  3838. // if table.TableResult.HasBrand == 1 {
  3839. // return brandArr, 1
  3840. // }
  3841. allNull := true
  3842. for i, d := range data {
  3843. //if d != "" {
  3844. brand := u.BrandGet.CheckSensitiveWord(d)
  3845. if brand != "" {
  3846. allNull = false
  3847. }
  3848. //fmt.Println("brand======", brand)
  3849. brandArr[i] = brand
  3850. if len(brand) > 0 {
  3851. table.TableResult.HasBrand = 1
  3852. }
  3853. //}
  3854. }
  3855. return brandArr, allNull
  3856. }
  3857. var clearnn *regexp.Regexp = regexp.MustCompile("([\\d.]*)[\\n\\s]*[\\((][\\d.]+[)\\)]")
  3858. //过滤td值
  3859. func filterVal(val ...string) ([]string, bool) {
  3860. defer qutil.Catch()
  3861. n := 0 //记录被过滤的个数
  3862. for i, v := range val {
  3863. if len(clearnn.FindStringSubmatch(v)) > 0 {
  3864. tmpv := clearnn.FindStringSubmatch(v)[1]
  3865. if tmpv != "" {
  3866. v = tmpv
  3867. }
  3868. }
  3869. afterFilter := tabletdclear.ReplaceAllString(v, "")
  3870. afterFilter = NullVal.ReplaceAllString(afterFilter, "")
  3871. if afterFilter == "" {
  3872. n++
  3873. }
  3874. val[i] = afterFilter
  3875. }
  3876. allempty := false
  3877. if n == len(val) { //所有都被过滤掉
  3878. allempty = true
  3879. }
  3880. return val, allempty
  3881. }
  3882. //过滤itemname全是数字
  3883. func filterItem(itemval ...string) []string {
  3884. defer qutil.Catch()
  3885. result := []string{}
  3886. for _, v := range itemval {
  3887. afterFilter := numclear.ReplaceAllString(v, "")
  3888. if afterFilter != "" {
  3889. result = append(result, v)
  3890. } else {
  3891. result = append(result, afterFilter)
  3892. }
  3893. }
  3894. return result
  3895. }
  3896. //处理价格
  3897. func dealPriceInterface(key string, val ...string) (result []interface{}) {
  3898. defer qutil.Catch()
  3899. for _, v := range val {
  3900. if num1.MatchString(v) { //含数字
  3901. tdIsWan := strings.Contains(v, "万")
  3902. if !tdIsWan {
  3903. if strings.Contains(key, "万") {
  3904. v = v + "万"
  3905. }
  3906. }
  3907. data := []interface{}{v, ""}
  3908. money := clear.ObjToMoney(data)[0]
  3909. result = append(result, money)
  3910. } else {
  3911. result = append(result, "")
  3912. }
  3913. }
  3914. return
  3915. }
  3916. //处理number
  3917. func dealNumberInterface(val ...string) (result []interface{}) {
  3918. defer qutil.Catch()
  3919. for _, v := range val { //1个 1.00个
  3920. n := numclear.FindString(v)
  3921. if n == "" {
  3922. result = append(result, "")
  3923. } else if tmp := clear.NumChar[n]; tmp != nil { //一二三...
  3924. result = append(result, tmp)
  3925. } else { //数字
  3926. result = append(result, qutil.IntAll(strings.Split(n, ".")[0]))
  3927. }
  3928. }
  3929. return
  3930. }
  3931. //处理价格
  3932. func dealPrice(key string, val ...string) []string {
  3933. defer qutil.Catch()
  3934. result := []string{}
  3935. for _, v := range val {
  3936. data := []interface{}{v, key}
  3937. money := clear.ObjToMoney(data)[0]
  3938. result = append(result, fmt.Sprintf("%v", money))
  3939. }
  3940. // result := []string{}
  3941. // for _, v := range val { //1.00万元 1元 2.25元/斤
  3942. // tmparr := strings.Split(v, ".")
  3943. // tmparr[0] = moneyNum.ReplaceAllString(tmparr[0], "")
  3944. // if iswan {
  3945. // result = append(result, tmparr[0]+"0000")
  3946. // } else { //td val值带万
  3947. // if strings.Contains(v, "万") { //价格中带有万
  3948. // result = append(result, tmparr[0]+"0000")
  3949. // } else {
  3950. // result = append(result, tmparr[0])
  3951. // }
  3952. // }
  3953. // }
  3954. return result
  3955. }
  3956. //处理number
  3957. func dealNumber(val ...string) ([]string, []string) {
  3958. defer qutil.Catch()
  3959. unitnameArr := []string{}
  3960. result := []string{}
  3961. for _, v := range val { //1个 1.00个
  3962. n := numclear.FindString(v)
  3963. unitname := numclear.ReplaceAllString(v, "") //匹配个数后的单位
  3964. unitnameArr = append(unitnameArr, unitname)
  3965. //val[i] = strings.Split(n, ".")[0]
  3966. result = append(result, strings.Split(n, ".")[0])
  3967. }
  3968. return result, unitnameArr
  3969. }
  3970. func (tn *Table) analyProNameAndItemNumber() {
  3971. }