|
@@ -21,6 +21,7 @@ var (
|
|
|
regKV = regexp.MustCompile("([\\p{Han}][^,,。、.;;\r\n]{1,30}?)[::](.*)")
|
|
|
filterK = regexp.MustCompile("[((\\[【].*?[))\\]】]|<[^>].+?>|[①②③¥·;;‘“'’”,*<>((\\[【、))/\\]】??,。.\".\\s\u3000\u2003\u00a0]+|^[一二三四五六七八九十0-91234567890]+")
|
|
|
filterValue = regexp.MustCompile("(^(无)$|^[\r\n\\s\u3000\u2003\u00a0]+$|^<.*>)")
|
|
|
+ filterWinner = regexp.MustCompile(".{2,40}(集团|公司|学校|中心|家具城|门诊|[大中小]学|部|院|局|厂|店|所|队|社|室|厅|会|场|行)")
|
|
|
regReplKey = regexp.MustCompile("^(包(.+[A-Za-z\\d])?|本项目|推荐|的|本次)|([约为元万亿]+|[大小]写|人民币|[全]称|姓名)$")
|
|
|
buyerAndAgency = regexp.MustCompile("(代理(机构|人)|采购(人|单位))")
|
|
|
BlockTagMap = map[string]bool{
|