|
@@ -8,8 +8,11 @@ import (
|
|
|
|
|
|
//
|
|
|
type Cut struct {
|
|
|
- scripttag *regexp.Regexp
|
|
|
- sa *regexp.Regexp
|
|
|
+ scripttag *regexp.Regexp
|
|
|
+ sa *regexp.Regexp
|
|
|
+ replBlankLine *regexp.Regexp
|
|
|
+ replStartWrap *regexp.Regexp
|
|
|
+ multiCR *regexp.Regexp
|
|
|
//annotate *regexp.Regexp
|
|
|
//tag *regexp.Regexp
|
|
|
//inputag *regexp.Regexp
|
|
@@ -19,9 +22,6 @@ type Cut struct {
|
|
|
//colstag *regexp.Regexp
|
|
|
//rowstag *regexp.Regexp
|
|
|
//display *regexp.Regexp
|
|
|
- //multiCR *regexp.Regexp
|
|
|
- //replBlankLine *regexp.Regexp
|
|
|
- //replStartWrap *regexp.Regexp
|
|
|
//replTags2CR []string
|
|
|
//retainTags2CR []string
|
|
|
}
|
|
@@ -30,9 +30,11 @@ type Cut struct {
|
|
|
func NewCut() *Cut {
|
|
|
scs := regexp.MustCompile("(?s)<(script|style)[^>]*>.+?</(script|style)>")
|
|
|
sa := regexp.MustCompile("<[a|A]\\s*[^>]*>(.*?)</[a|A]>")
|
|
|
+ bl := regexp.MustCompile("\\s+[\r\n]")
|
|
|
+ sw := regexp.MustCompile("^[\u3000\u2003\u00a0\\s]+|[\u3000\u2003\u00a0\\s]+$")
|
|
|
+ m, _ := regexp.Compile("([\r\n][\u3000\u2003\u00a0\\s]*)+|[\r\n]+")
|
|
|
|
|
|
//t, _ := regexp.Compile("<[^>]+>")
|
|
|
- //m, _ := regexp.Compile("([\r\n][\u3000\u2003\u00a0\\s]*)+|[\r\n]+")
|
|
|
////sc, _ := regexp.Compile("\\<script[^\\>]*\\>*[^\\>]+\\</script\\>")
|
|
|
////ss, _ := regexp.Compile("\\<style[^\\>]*\\>*[^\\>]+\\</style\\>")
|
|
|
//at := regexp.MustCompile("(?s)<(!%-%-|!--).*?(%-%-|--)>") //注释 css
|
|
@@ -43,8 +45,11 @@ func NewCut() *Cut {
|
|
|
//border, _ := regexp.Compile(`(border="(\d+)")|(cellpadding="(\d+)")|(cellspacing="(\d+)")`)
|
|
|
//dis, _ := regexp.Compile(`display:none`)
|
|
|
return &Cut{
|
|
|
- scripttag: scs,
|
|
|
- sa: sa,
|
|
|
+ scripttag: scs,
|
|
|
+ sa: sa,
|
|
|
+ replBlankLine: bl,
|
|
|
+ replStartWrap: sw,
|
|
|
+ multiCR: m,
|
|
|
//annotate: at,
|
|
|
//tag: t,
|
|
|
//hiddentag: hiddentag,
|
|
@@ -53,9 +58,6 @@ func NewCut() *Cut {
|
|
|
//isborder: border,
|
|
|
//rowstag: rows,
|
|
|
//display: dis,
|
|
|
- //multiCR: m,
|
|
|
- //replBlankLine: regexp.MustCompile("\\s+[\r\n]"),
|
|
|
- //replStartWrap: regexp.MustCompile("^[\u3000\u2003\u00a0\\s]+|[\u3000\u2003\u00a0\\s]+$"),
|
|
|
//replTags2CR: []string{"div", "p", "br", "h1", "h2", "h3", "h4", "h5"},
|
|
|
//retainTags2CR: []string{"table", "thead", "tfoot", "tbody", "th", "td", "tr"},
|
|
|
}
|
|
@@ -66,6 +68,10 @@ func (c *Cut) ClearHtml(src string) string {
|
|
|
//清script,style
|
|
|
src = c.scripttag.ReplaceAllString(src, "")
|
|
|
src = c.sa.ReplaceAllString(src, "$1")
|
|
|
+ src = c.replStartWrap.ReplaceAllString(src, "")
|
|
|
+ src = c.replBlankLine.ReplaceAllString(src, "\n")
|
|
|
+ //清除多余换行
|
|
|
+ c.multiCR.ReplaceAllString(src, "\n")
|
|
|
return strings.Replace(src, "\n", "<br/>", -1)
|
|
|
}
|
|
|
|