# HTML 替换 import re def th(neirong): tihuan = { '': '', '"': "'", '\n': '', '\xa0': "", '': '', '': '', ' ': '', '': '
', '

': '
', '
': '
', '
': '
', '': '
', '
': '
', '': '', '': '', '': '', '': '', '': '', '': '', '': '', '': '
', '': '', 'style=".*?"': '', "style='.*?'": '', 'class=".*?"': '', "class='.*?'": '', "bordercolor='.*?'": '', 'bgcolor=".*?"': '', 'BORDERCOLOR=".*?"': '', 'width=".*?"': '', '': '', '': '', '': '', '': '', '': '', '': '', '': '', '': '', '': '', '': '', '.*': '', '': '', '': '', '【关闭】': '', '【打印】': '', } nr = neirong all_tag = re.findall("<[^>]+>", nr) for tag in all_tag: nr = nr.replace(tag, str(tag).lower()) def thh(k, v, c): return re.sub(k, v, c) for k, v in tihuan.items(): nr = re.sub(k, v, thh(k, v, nr), re.S, re.M) return nr def th_1(neirong): tihuan = { '': '', '"': "'", '\n': '', '\xa0': "", '': '', } nr = neirong all_tag = re.findall("<[^>]+>", nr) for tag in all_tag: nr = nr.replace(tag, str(tag).lower()) def thh(k, v, c): return re.sub(k, v, c) for k, v in tihuan.items(): nr = re.sub(k, v, thh(k, v, nr), re.S, re.M) return nr