spiderutil.go 308 B

1234567891011
  1. package spiderutil
  2. import "regexp"
  3. var Reg = regexp.MustCompile("[^0-9A-Za-z\u4e00-\u9fa5]+")
  4. var Filter = regexp.MustCompile("<[^>]*?>|[\\s\u3000\u2003\u00a0]")
  5. // FilterDetail 去除无效文本
  6. func FilterDetail(con string) string {
  7. return Reg.ReplaceAllString(Filter.ReplaceAllString(con, ""), "")
  8. }