clear.go 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. package clear
  2. import (
  3. "fmt"
  4. "regexp"
  5. "sync"
  6. )
  7. //方法清单
  8. var clearfns = make(map[string]func(data []interface{},spidercode ... string) []interface{})
  9. var lock sync.RWMutex
  10. func init() {
  11. BindFn("cutspace", CutSpace) //去除首尾空格
  12. BindFn("cutallspace", CutAllSpace) //去除所有空格
  13. BindFn("toint", ObjToInt) //转int
  14. BindFn("tofloat", ObjToFloat) //转float
  15. BindFn("totimestamp", ObjToTimestamp) //转时间戳
  16. BindFn("tomoney", ObjToMoney) //转换金额
  17. BindFn("getcurrency", GetCurrency) //获取币种
  18. BindFn("cutSymbol", CutSymbol) //清理符号
  19. BindFn("cutNotPrs", CutNotPrs) //不成对出现的符号,把符号后面的内容清理掉
  20. BindFn("rateToFloat", RateToFloat) //费率转小数
  21. BindFn("clearAllWord", ClearAllWord) //全部是汉字或者特殊符号的情况,清理掉
  22. BindFn("clearMaxAmount", ClearMaxAmount) //大于1万亿的过滤掉
  23. BindFn("clearProjectName", ClearProjectName) //清理项目名称
  24. BindFn("getPhone", GetPhone) //取手机号
  25. BindFn("chiToEng", ChiToEng) //中文符号转英文
  26. BindFn("clearBuyerPerson", ClearBuyerPerson) //处理较长采购联系人
  27. BindFn("clearNumber", ClearNumber) //一般用于处理抽取联系人后带有电话的情况
  28. BindFn("clearEndSymblo", ClearEndSymblo) //去除尾部特殊符号
  29. BindFn("chiToInt", ChiToFloat) //中文转数字(费率、折扣率)
  30. }
  31. //绑定清理方法
  32. func BindFn(fnname string, fn func(data []interface{},spidercode ...string) []interface{}) {
  33. lock.Lock()
  34. clearfns[fnname] = fn
  35. lock.Unlock()
  36. }
  37. //执行清理动作,如果调用的清理方法不存在,则不做处理
  38. func DoClearFn(clear []string, data []interface{},spidercode ...string) []interface{} {
  39. if len(clear) == 0 {
  40. return data
  41. }
  42. for _, fnname := range clear {
  43. if v, ok := clearfns[fnname]; ok {
  44. data = v(data,spidercode...)
  45. }
  46. }
  47. return data
  48. }
  49. //取手机号
  50. var PhoneReg = regexp.MustCompile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,5})?|\\d{3,5}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{3,})?|\\d{3,4}\\*{3,4}\\d{3,4}|\\d{3,4}[\u3000\u2003\u00a0\\s]*\\d{4,5}[\u3000\u2003\u00a0\\s]*\\d{3,4}|(\\d{2,}[×―—-\\-])+\\d{2,}[×―—-\\-]+(\\d{3,})+|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
  51. func GetPhone(data []interface{},spidercode ...string) []interface{} {
  52. data[0] = PhoneReg.FindString(fmt.Sprint(data[0]))
  53. return data
  54. }
  55. //去除数字
  56. func ClearNumber(data []interface{},spidercode ...string) []interface{} {
  57. data[0] = clearNum.ReplaceAllString(fmt.Sprint(data[0]), "")
  58. return data
  59. }