package clear import ( "fmt" "regexp" "sync" ) //方法清单 var clearfns = make(map[string]func(data []interface{},spidercode ... string) []interface{}) var lock sync.RWMutex func init() { BindFn("cutspace", CutSpace) //去除首尾空格 BindFn("cutallspace", CutAllSpace) //去除所有空格 BindFn("toint", ObjToInt) //转int BindFn("tofloat", ObjToFloat) //转float BindFn("totimestamp", ObjToTimestamp) //转时间戳 BindFn("tomoney", ObjToMoney) //转换金额 BindFn("getcurrency", GetCurrency) //获取币种 BindFn("cutSymbol", CutSymbol) //清理符号 BindFn("cutNotPrs", CutNotPrs) //不成对出现的符号,把符号后面的内容清理掉 BindFn("rateToFloat", RateToFloat) //费率转小数 BindFn("clearAllWord", ClearAllWord) //全部是汉字或者特殊符号的情况,清理掉 BindFn("clearMaxAmount", ClearMaxAmount) //大于1万亿的过滤掉 BindFn("clearProjectName", ClearProjectName) //清理项目名称 BindFn("getPhone", GetPhone) //取手机号 BindFn("chiToEng", ChiToEng) //中文符号转英文 BindFn("clearBuyerPerson", ClearBuyerPerson) //处理较长采购联系人 BindFn("clearNumber", ClearNumber) //一般用于处理抽取联系人后带有电话的情况 BindFn("clearEndSymblo", ClearEndSymblo) //去除尾部特殊符号 BindFn("chiToInt", ChiToFloat) //中文转数字(费率、折扣率) } //绑定清理方法 func BindFn(fnname string, fn func(data []interface{},spidercode ...string) []interface{}) { lock.Lock() clearfns[fnname] = fn lock.Unlock() } //执行清理动作,如果调用的清理方法不存在,则不做处理 func DoClearFn(clear []string, data []interface{},spidercode ...string) []interface{} { if len(clear) == 0 { return data } for _, fnname := range clear { if v, ok := clearfns[fnname]; ok { data = v(data,spidercode...) } } return data } //取手机号 var PhoneReg = regexp.MustCompile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,5})?|\\d{3,5}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{3,})?|\\d{3,4}\\*{3,4}\\d{3,4}|\\d{3,4}[\u3000\u2003\u00a0\\s]*\\d{4,5}[\u3000\u2003\u00a0\\s]*\\d{3,4}|(\\d{2,}[×―—-\\-])+\\d{2,}[×―—-\\-]+(\\d{3,})+|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)") func GetPhone(data []interface{},spidercode ...string) []interface{} { data[0] = PhoneReg.FindString(fmt.Sprint(data[0])) return data } //去除数字 func ClearNumber(data []interface{},spidercode ...string) []interface{} { data[0] = clearNum.ReplaceAllString(fmt.Sprint(data[0]), "") return data }