c_money.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. package clean
  2. import (
  3. "fmt"
  4. "github.com/shopspring/decimal"
  5. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  6. "math"
  7. "regexp"
  8. "strconv"
  9. "strings"
  10. "unicode/utf8"
  11. )
  12. var (
  13. moneyReg1 = regexp.MustCompile("([\\s ,]+)")
  14. moneyReg2 = regexp.MustCompile("^([0-9.]+)E([1-7])$")
  15. numReg1 = regexp.MustCompile("([0-9\\.]+)")
  16. )
  17. var unpkvBidamountReg = regexp.MustCompile("^([Xx]\\+[1-9\\.]+元/每)")
  18. var specBidamountReg = regexp.MustCompile("^([0-9.]+)E([1-7])$")
  19. var regUnitMoneyClean = regexp.MustCompile("^(.*单价[0-9.]+元[/][袋|块])[,,](含税总价[0-9.]+[万元]+)[.。]$")
  20. var blackMoneyClean = regexp.MustCompile("^([0-9.]+以下[万]?|分)$")
  21. var impactMoneyClean = regexp.MustCompile("(分二串口|分站模块)")
  22. // 大写金额补充
  23. var impactMoneyeplenish = regexp.MustCompile("^([壹贰叁肆伍陆柒捌玖]分)")
  24. // 特殊金额-格式-重置
  25. var resetAamountReg = regexp.MustCompile("[.](0|00)[.](0|00)")
  26. var regPercentMoney, _ = regexp.Compile(`[0-9.]+[((]?[%|%][))]?`)
  27. var regQianw, _ = regexp.Compile(`\d{1,2}千万`)
  28. var kxjsReg = regexp.MustCompile("[0-9][E|e]{1}[-—+]{1}[0-9]{1,2}")
  29. var regOperator, _ = regexp.Compile(`[*|+|)*)]`)
  30. var regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
  31. var regStrUnit, _ = regexp.Compile(`[元|万|亿]`)
  32. var regStrJe = regexp.MustCompile(`([1-9]\d*|0)(\.\d_+)?[\s|元|万|亿]{0,3}`)
  33. var regStrChar = `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]`
  34. var moneyRegChar, _ = regexp.Compile(regStrChar)
  35. var contentUnit, _ = regexp.Compile(`(万元|单位/万)`)
  36. var numCapitals, _ = regexp.Compile(`([〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]{4,40})`)
  37. var moneyUnitRegBool = regexp.MustCompile(`(中标金额|成交金额|合同金额|中标价|成交价|成交价格|中标\(成交\)金额|投标报价|中标标价|成交结果)?[::\s]?(0|零|0.0|¥0)+(0|\.)*[\s]?(万|元|){0,2}[\s]?((人民币))?$`)
  38. var cutAllSpace, _ = regexp.Compile(`\s*`)
  39. var spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n", "\u0001"}
  40. var moneyClearSpidercode map[string]interface{}
  41. var moneyChar = map[string]interface{}{ //"〇": "0", "零": "0",壹贰叁肆伍陆柒捌玖
  42. "一": float64(1), "壹": float64(1), "二": float64(2), "贰": float64(2), "三": float64(3), "叁": float64(3), "四": float64(4), "肆": float64(4), "五": float64(5), "伍": float64(5),
  43. "六": float64(6), "陆": float64(6), "七": float64(7), "柒": float64(7), "八": float64(8), "捌": float64(8), "九": float64(9), "玖": float64(9), "十": float64(10), "拾": float64(10),
  44. "百": float64(100), "佰": float64(100), "千": float64(1000), "仟": float64(1000), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000),
  45. "零": float64(0), "点": ".", "角": float64(0.1), "分": float64(0.01),
  46. }
  47. var NumChar = map[string]interface{}{
  48. "一": 1, "二": 1, "三": 1, "四": 1, "五": 1, "六": 1, "七": 1, "八": 1, "久": 1, "十": 1,
  49. }
  50. var moneyUnit = map[string]float64{
  51. "元": float64(1), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000), //单位
  52. }
  53. func init() {
  54. regOperator, _ = regexp.Compile(`[*|+|)*)]`)
  55. regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
  56. regStrUnit, _ = regexp.Compile(`[元|万|亿]`)
  57. regStrJe = regexp.MustCompile(`([1-9]\d*|0)(\.\d_+)?[\s|元|万|亿]{0,3}`)
  58. regStrChar = `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]`
  59. moneyRegChar, _ = regexp.Compile(regStrChar)
  60. contentUnit, _ = regexp.Compile(`(万元|单位/万)`)
  61. numCapitals, _ = regexp.Compile(`([〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]{4,40})`)
  62. regQianw, _ = regexp.Compile(`\d{1,2}千万`)
  63. kxjsReg = regexp.MustCompile("[0-9][E|e]{1}[-—+]{1}[0-9]{1,2}")
  64. regPercentMoney, _ = regexp.Compile(`[0-9.]+[((]?[%|%][))]?`)
  65. }
  66. // 转换金额
  67. func ConvertMoney(money float64, unit string) float64 {
  68. if strings.Contains(unit, "万") && money > 0.0 {
  69. //倍率
  70. num1 := decimal.NewFromFloat(money)
  71. num2 := decimal.NewFromFloat(10000)
  72. decimalValue := num1.Mul(num2)
  73. res, _ := decimalValue.Float64()
  74. if res < 1000000000.0 {
  75. return res
  76. }
  77. }
  78. if strings.Contains(unit, "亿") && money > 0.0 {
  79. //倍率
  80. num1 := decimal.NewFromFloat(money)
  81. num2 := decimal.NewFromFloat(100000000)
  82. decimalValue := num1.Mul(num2)
  83. res, _ := decimalValue.Float64()
  84. if res < 1000000000.0 {
  85. return res
  86. }
  87. }
  88. return money
  89. }
  90. // 金额转换
  91. func CleanMoney(data []interface{}) (float64, bool) {
  92. isFindUnit := false
  93. tmpstr := (data)[0]
  94. totmpstr := ""
  95. if _, ok := tmpstr.(float64); ok {
  96. totmpstr = fmt.Sprintf("%f", tmpstr)
  97. } else {
  98. totmpstr = util.ObjToString(tmpstr)
  99. }
  100. //去除空格
  101. totmpstr = strings.ReplaceAll(totmpstr, " ", "")
  102. (data)[0] = totmpstr
  103. //特殊转换-科学计数法
  104. if specBidamountReg.MatchString(totmpstr) {
  105. price := util.Float64All(specBidamountReg.ReplaceAllString(totmpstr, "${1}"))
  106. if unit := util.Float64All(specBidamountReg.ReplaceAllString(totmpstr, "${2}")); unit > 0.0 && price > 0.0 {
  107. totmpstr = fmt.Sprintf("%f", math.Pow(10, unit)*price)
  108. (data)[0] = totmpstr
  109. }
  110. }
  111. //异常替换
  112. if unpkvBidamountReg.MatchString(totmpstr) {
  113. totmpstr = unpkvBidamountReg.ReplaceAllString(totmpstr, "")
  114. (data)[0] = totmpstr
  115. }
  116. if resetAamountReg.MatchString(totmpstr) {
  117. totmpstr = resetAamountReg.ReplaceAllString(totmpstr, ".0")
  118. (data)[0] = totmpstr
  119. }
  120. //单位指定
  121. if regUnitMoneyClean.MatchString(totmpstr) {
  122. totmpstr = regUnitMoneyClean.ReplaceAllString(totmpstr, "$2")
  123. (data)[0] = totmpstr
  124. }
  125. //特殊替换
  126. if impactMoneyClean.MatchString(totmpstr) {
  127. totmpstr = impactMoneyClean.ReplaceAllString(totmpstr, "")
  128. (data)[0] = totmpstr
  129. }
  130. //大写金额补充
  131. if impactMoneyeplenish.MatchString(totmpstr) {
  132. totmpstr = "零元" + totmpstr
  133. (data)[0] = totmpstr
  134. }
  135. //黑名单
  136. if blackMoneyClean.MatchString(totmpstr) {
  137. totmpstr = ""
  138. (data)[0] = totmpstr
  139. }
  140. //未含税总价1454400.00元,税率6%,含税总价1541664.00元
  141. Percent := regPercentMoney.FindAllString(totmpstr, -1)
  142. for _, v := range Percent {
  143. totmpstr = strings.ReplaceAll(totmpstr, v, "")
  144. }
  145. totmpstr = strings.ReplaceAll(totmpstr, "_", "")
  146. (data)[0] = totmpstr //过滤到%相关数字
  147. if utf8.RuneCountInString(totmpstr) > 100 { //过长-字符无有效金额
  148. (data)[0] = 0
  149. data = append(data, false)
  150. return 0.0, isFindUnit
  151. }
  152. if utf8.RuneCountInString(totmpstr) > 20 {
  153. if numCapitals.MatchString(totmpstr) {
  154. tmpstr = numCapitals.FindString(totmpstr)
  155. } else if regStrJe.MatchString(totmpstr) {
  156. tmpstr = regStrJe.FindString(totmpstr)
  157. } else {
  158. (data)[0] = 0
  159. data = append(data, false)
  160. return 0.0, isFindUnit
  161. }
  162. }
  163. //是否发现单位
  164. if strings.Contains(fmt.Sprint(data[0]), "万") || strings.Contains(fmt.Sprint(data[0]), "亿") {
  165. isFindUnit = true
  166. }
  167. ret := capitalMoney(data)[0]
  168. if ret.(float64) < float64(10000) || ret.(float64) > float64(50000000000) {
  169. ret2, _ := numMoney(data)
  170. //isfindUnit = b
  171. if ret2[0].(float64) > ret.(float64) {
  172. ret = ret2[0]
  173. }
  174. }
  175. f := util.Float64All(ret)
  176. //f, _ := strconv.ParseFloat(strconv.FormatFloat(ret.(float64), 'f', 4, 64), 64)
  177. //if f < 1 {
  178. // f = 0
  179. //}
  180. //若果金额小于50,全文检索单位:万
  181. // if f < 50 && f > 0 && isfindUnit {
  182. // rep := contentUnit.FindAllStringIndex(fmt.Sprint(data[1]), -1)
  183. // if len(rep) > 0 {
  184. // f = f * 10000
  185. // }
  186. // }
  187. data[0] = util.Float64All(ret)
  188. if f == 0 && !moneyUnitRegBool.MatchString(fmt.Sprint(tmpstr)) {
  189. data = append(data, false)
  190. return 0.0, isFindUnit
  191. }
  192. data = append(data, true)
  193. if len(data) > 0 {
  194. return util.Float64All(data[0]), isFindUnit
  195. } else {
  196. return 0.0, isFindUnit
  197. }
  198. }
  199. // 数字金额转换
  200. func numMoney(data []interface{}) ([]interface{}, bool) {
  201. tmp := ""
  202. if _, ok := data[0].(float64); ok {
  203. tmp = fmt.Sprintf("%f", data[0])
  204. } else {
  205. tmp = util.ObjToString(data[0])
  206. }
  207. tmp = strings.ReplaceAll(tmp, "(不含税)", "")
  208. //费率转换% ‰
  209. flv := float64(1)
  210. if strings.HasSuffix(tmp, "%") {
  211. flv = 0.01
  212. } else if strings.HasSuffix(tmp, "‰") {
  213. flv = 0.001
  214. }
  215. repUnit := float64(1)
  216. if regQianw.MatchString(tmp) {
  217. tmp = strings.Replace(tmp, "千万", "万", -1)
  218. repUnit = float64(1000)
  219. }
  220. tmp = replaceSymbol(tmp, []string{",", ",", "(", ")", "(", ")", ":", "\n"})
  221. tmp = replaceString(tmp, []string{"万元", "亿元", "."}, []string{"万", "亿", "."})
  222. tmp = fmt.Sprint(CutAllSpace([]interface{}{tmp, data[1]})[0])
  223. rets := regNumFloat.FindAllString(tmp, -1)
  224. fnums := []float64{}
  225. unitstrs := []string{}
  226. if len(rets) > 0 {
  227. pindex := 0 //单位前置
  228. for k, v := range rets {
  229. f, err := strconv.ParseFloat(v, 64)
  230. if err == nil {
  231. fnums = append(fnums, f)
  232. index := strings.Index(tmp, v)
  233. //单位后置
  234. start := index + len(v)
  235. end := start + 3
  236. //log.Println("vvv", tmp, v, pindex, index, start)
  237. if k > 0 {
  238. if start >= pindex+3 {
  239. pstart := pindex + 3
  240. if pstart >= index {
  241. pstart = index
  242. }
  243. if len(tmp) > end {
  244. unitstrs = append(unitstrs, tmp[pstart:index]+tmp[start:end])
  245. } else {
  246. unitstrs = append(unitstrs, tmp[pstart:index]+tmp[start:])
  247. }
  248. } else {
  249. if len(tmp) > end {
  250. unitstrs = append(unitstrs, tmp[start:end])
  251. } else {
  252. unitstrs = append(unitstrs, tmp[start:])
  253. }
  254. }
  255. } else {
  256. if len(tmp) > end {
  257. if index-3 >= 0 {
  258. unitstrs = append(unitstrs, tmp[index-3:index]+tmp[start:end])
  259. } else {
  260. unitstrs = append(unitstrs, tmp[start:end])
  261. }
  262. } else {
  263. if index-3 >= 0 {
  264. unitstrs = append(unitstrs, tmp[index-3:index]+tmp[start:])
  265. } else {
  266. unitstrs = append(unitstrs, tmp[start:])
  267. }
  268. }
  269. }
  270. pindex = start
  271. }
  272. }
  273. }
  274. //log.Println("unitstrs", fnums, unitstrs)
  275. unit := float64(0)
  276. fnum := float64(0)
  277. for k, v := range fnums {
  278. fnum = v
  279. units := regStrUnit.FindAllString(unitstrs[k], -1)
  280. for _, v := range units {
  281. if moneyUnit[v] != 0 {
  282. unit = moneyUnit[v]
  283. break
  284. }
  285. }
  286. if unit != float64(0) { //取第一个
  287. break
  288. }
  289. }
  290. fnum = fnum * repUnit
  291. if unit == float64(0) {
  292. num1 := decimal.NewFromFloat(fnum)
  293. num2 := decimal.NewFromFloat(flv)
  294. decimalValue := num1.Mul(num2)
  295. decimal_res, _ := decimalValue.Float64()
  296. data[0] = decimal_res
  297. } else {
  298. num1 := decimal.NewFromFloat(fnum)
  299. num2 := decimal.NewFromFloat(unit)
  300. num3 := decimal.NewFromFloat(flv)
  301. decimalValue := num1.Mul(num2).Mul(num3)
  302. decimal_res, _ := decimalValue.Float64()
  303. data[0] = decimal_res
  304. }
  305. if unit == 10000 {
  306. return data, false
  307. } else {
  308. return data, true
  309. }
  310. }
  311. // 大写数子金额转换
  312. func capitalMoney(data []interface{}) []interface{} {
  313. nodes := []float64{}
  314. node := float64(0)
  315. tmp := float64(0)
  316. decimals := 0.0
  317. ishaspoint := false //是否含小数点
  318. fnum := float64(0)
  319. end := false
  320. str := fmt.Sprint(data[0])
  321. //提取第一个大写信息
  322. if strings.Contains(str, "壹") {
  323. str = strings.ReplaceAll(str, "一", "壹")
  324. }
  325. strmatch := numCapitals.FindAllStringSubmatch(str, -1)
  326. if len(strmatch) > 0 {
  327. str = strmatch[0][0]
  328. }
  329. suffixUnit := float64(1)
  330. if strings.HasSuffix(str, "万") || strings.HasSuffix(str, "万元") || strings.HasSuffix(str, "万元整") {
  331. index := strings.LastIndex(str, "万")
  332. str = str[0:index]
  333. suffixUnit = float64(10000)
  334. }
  335. yy := false
  336. moneyRegChar.ReplaceAllStringFunc(str, func(key string) string {
  337. if key == "元" || key == "圆" || key == "点" {
  338. ishaspoint = true
  339. }
  340. if v, ok := moneyChar[key].(float64); ok && !end {
  341. if ishaspoint && v > 10 { //排除后面有其他的单位
  342. return ""
  343. }
  344. //fmt.Println(key, v, fnum)
  345. if v < 10 && v >= 0 {
  346. if ishaspoint { //小数部分
  347. if v >= 1 {
  348. fnum = v
  349. } else if v < 1 && v > 0 {
  350. decimals += fnum * v
  351. }
  352. } else {
  353. if tmp != float64(0) {
  354. node += tmp
  355. }
  356. tmp = float64(v)
  357. }
  358. } else if v == 10000 || v == 100000000 { //单位万、亿
  359. if tmp != float64(0) {
  360. node += tmp
  361. tmp = float64(0)
  362. }
  363. nodes = append(nodes, node*util.Float64All(v))
  364. if v == 100000000 {
  365. yy = true
  366. }
  367. node = float64(0)
  368. } else {
  369. if v == 10 && tmp == 0 {
  370. tmp = 1
  371. }
  372. tmp = tmp * util.Float64All(v)
  373. node += tmp
  374. tmp = float64(0)
  375. }
  376. }
  377. if key == "整" || key == "正" || key == "分" {
  378. end = true
  379. }
  380. return ""
  381. })
  382. if yy {
  383. nodes = append(nodes, node*suffixUnit, tmp)
  384. } else {
  385. nodes = append(nodes, node, tmp)
  386. }
  387. ret := float64(0)
  388. for _, v := range nodes {
  389. ret += v
  390. }
  391. if yy {
  392. return []interface{}{(ret + decimals), data[1]}
  393. } else {
  394. return []interface{}{(ret + decimals) * suffixUnit, data[1]}
  395. }
  396. }
  397. // 过滤符号
  398. func replaceSymbol(con string, rep []string) string {
  399. for _, v := range rep {
  400. con = strings.Replace(con, v, "", -1)
  401. }
  402. return con
  403. }
  404. // 符号替换
  405. func replaceString(con string, ret, rep []string) string {
  406. for k, v := range ret {
  407. if len(rep) > k {
  408. con = strings.Replace(con, v, rep[k], -1)
  409. }
  410. }
  411. return con
  412. }
  413. // 清理所有空白符
  414. func CutAllSpace(data []interface{}) []interface{} {
  415. tmp := cutAllSpace.ReplaceAllString(fmt.Sprint(data[0]), "")
  416. tmp = replaceSymbol(tmp, spaces)
  417. data[0] = tmp
  418. return data
  419. }