1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- package util
- import (
- mc "app.yhyue.com/moapp/jybase/common"
- "regexp"
- "strings"
- )
- //
- type Cut struct {
- scriptTag *regexp.Regexp
- sa *regexp.Regexp
- replBlankLine *regexp.Regexp
- replStartWrap *regexp.Regexp
- multiCR *regexp.Regexp
- tag *regexp.Regexp
- //annotate *regexp.Regexp
- //tag *regexp.Regexp
- //inputag *regexp.Regexp
- //isborder *regexp.Regexp
- //hiddentag *regexp.Regexp
- //styletag *regexp.Regexp
- //colstag *regexp.Regexp
- //rowstag *regexp.Regexp
- //display *regexp.Regexp
- //replTags2CR []string
- //retainTags2CR []string
- }
- //
- func NewCut() *Cut {
- scs := regexp.MustCompile("(?s)<(script|style)[^>]*>.+?</(script|style)>")
- sa := regexp.MustCompile("<[a|A]\\s*[^>]*>(.*?)</[a|A]>")
- bl := regexp.MustCompile("\\s+[\r\n]")
- sw := regexp.MustCompile("^[\u3000\u2003\u00a0\\s]+|[\u3000\u2003\u00a0\\s]+$")
- m, _ := regexp.Compile("([\r\n][\u3000\u2003\u00a0\\s]*)+|[\r\n]+")
- t, _ := regexp.Compile("<[^>]+>")
- //t, _ := regexp.Compile("<[^>]+>")
- ////sc, _ := regexp.Compile("\\<script[^\\>]*\\>*[^\\>]+\\</script\\>")
- ////ss, _ := regexp.Compile("\\<style[^\\>]*\\>*[^\\>]+\\</style\\>")
- //at := regexp.MustCompile("(?s)<(!%-%-|!--).*?(%-%-|--)>") //注释 css
- //hiddentag := regexp.MustCompile(`<\s*input[^<]*type=("|')hidden("|')[^<]*>`)
- //input := regexp.MustCompile(`<\s*input[^<]*value=("|')([^>"']*)[^<]*>`)
- //cols, _ := regexp.Compile(`colspan="\d+"`)
- //rows, _ := regexp.Compile(`rowspan="\d+"`)
- //border, _ := regexp.Compile(`(border="(\d+)")|(cellpadding="(\d+)")|(cellspacing="(\d+)")`)
- //dis, _ := regexp.Compile(`display:none`)
- return &Cut{
- scriptTag: scs,
- sa: sa,
- replBlankLine: bl,
- replStartWrap: sw,
- multiCR: m,
- tag: t,
- //annotate: at,
- //tag: t,
- //hiddentag: hiddentag,
- //inputag: input,
- //colstag: cols,
- //isborder: border,
- //rowstag: rows,
- //display: dis,
- //replTags2CR: []string{"div", "p", "br", "h1", "h2", "h3", "h4", "h5"},
- //retainTags2CR: []string{"table", "thead", "tfoot", "tbody", "th", "td", "tr"},
- }
- }
- //清理HTML标签
- func (c *Cut) ClearHtml(src string) string {
- src = strings.Replace(src, ">\n", ">", -1)
- src = strings.Replace(src, " ", "", -1)
- //标签全转小写
- src = c.tag.ReplaceAllStringFunc(src, strings.ToLower)
- //清script,style
- src = c.scriptTag.ReplaceAllString(src, "")
- src = c.sa.ReplaceAllString(src, "$1")
- src = c.replStartWrap.ReplaceAllString(src, "")
- src = c.replBlankLine.ReplaceAllString(src, "\n")
- //清除多余换行
- c.multiCR.ReplaceAllString(src, "\n")
- return strings.Replace(src, "\n", "<br/>", -1)
- }
- //判断table是否加表格线
- func isHasBoder(con string, reg *regexp.Regexp) bool {
- res := reg.FindAllStringSubmatch(con, -1)
- hasBorder := false
- for _, v := range res {
- for k, val := range v {
- if k > 0 && k%2 == 0 && mc.IntAll(val) > 0 {
- hasBorder = true
- break
- }
- }
- }
- return hasBorder
- }
|