fengweiqiang 6 年之前
父節點
當前提交
5350d7adf9
共有 2 個文件被更改,包括 2 次插入4 次删除
  1. 1 2
      src/jy/pretreated/analykv.go
  2. 1 2
      src/jy/util/clearHtml.go

+ 1 - 2
src/jy/pretreated/analykv.go

@@ -13,7 +13,7 @@ var Key = regexp.MustCompile("[:::]")
 var Time = regexp.MustCompile("[\\d]")
 var dh = regexp.MustCompile("[,,.]")
 var space = regexp.MustCompile("[\\s\\n \u3000\u2003\u00a0]+")
-var val = regexp.MustCompile("[^\\s\\n \u3000\u2003\u00a0,,。!;;\\-]")
+var val = regexp.MustCompile("[^\\s\\n \u3000\u2003\u00a0,,。!;;]")
 var matchkh = map[string]string{
 	"(": ")",
 	"(": ")",
@@ -165,7 +165,6 @@ func FindKv(con, tag string, from int) (m *SortMap) {
 		}
 		return false
 	})
-
 	if len(s1) > 0 {
 		str := strings.Join(s1, "")
 		str = u.TrimLRSpace(str, "")

+ 1 - 2
src/jy/util/clearHtml.go

@@ -2,7 +2,6 @@ package util
 
 import (
 	"github.com/PuerkitoBio/goquery"
-	"log"
 	"regexp"
 	"strings"
 	"unicode/utf8"
@@ -110,7 +109,7 @@ func (c *Cut) ClearHtml(src string) string {
 
 			}
 		}
-		if tmp == "<br>" || tmp == "<br/>" {
+		if tmp == "<br>" || tmp == "<br/>" || tmp == "<center>" || tmp == "</center>"{
 			return "\n"
 		}
 		if tmp[1] != 47 { //开始标签