# HTML 替换
import re
def th(neirong):
tihuan = {
'': '',
'"': "'",
'\n': '',
'\xa0': "",
'': '',
'': '',
' ': '',
'
': '
',
'': '
',
'': '
',
'
': '
',
'
![]()
': '
',
'
': '
',
'
': '',
'': '',
'': '',
'': '',
'': '',
'': '',
'': '',
'': '
',
'': '',
'style=".*?"': '',
"style='.*?'": '',
'class=".*?"': '',
"class='.*?'": '',
"bordercolor='.*?'": '',
'bgcolor=".*?"': '',
'BORDERCOLOR=".*?"': '',
'width=".*?"': '',
'': '',
'': '',
'': '',
'': '',
'': '',
'': '',
'': '',
'': '',
'': '',
'': '',
'.*': '',
'': '',
'': '',
'【关闭】': '',
'【打印】': '',
}
nr = neirong
all_tag = re.findall("<[^>]+>", nr)
for tag in all_tag:
nr = nr.replace(tag, str(tag).lower())
def thh(k, v, c):
return re.sub(k, v, c)
for k, v in tihuan.items():
nr = re.sub(k, v, thh(k, v, nr), re.S, re.M)
return nr
def th_1(neirong):
tihuan = {
'': '',
'"': "'",
'\n': '',
'\xa0': "",
'': '',
}
nr = neirong
all_tag = re.findall("<[^>]+>", nr)
for tag in all_tag:
nr = nr.replace(tag, str(tag).lower())
def thh(k, v, c):
return re.sub(k, v, c)
for k, v in tihuan.items():
nr = re.sub(k, v, thh(k, v, nr), re.S, re.M)
return nr