util.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. package main
  2. import (
  3. "regexp"
  4. "strconv"
  5. "strings"
  6. )
  7. type TagMatching struct {
  8. tagName string // 标签名称
  9. tagCode string // 标签值(保存)
  10. matchField []string //关键词匹配字段
  11. matchKey string //匹配词
  12. matchKeyReg []*RegexpInfo
  13. addField []string //附加词匹配字段
  14. addKey string // 附件词匹配词
  15. addKeyReg []*RegexpInfo
  16. excludeField []string //排除词
  17. excludeKey string //排除词匹配词
  18. excludeKeyReg []*RegexpInfo
  19. //clearField []string // 清理词匹配字段
  20. clearKey []string //清理词匹配字段跟关键词一样
  21. }
  22. type RegexpInfo struct {
  23. keyStr string
  24. regs *regexp.Regexp
  25. }
  26. func GetRegex(key string) []*RegexpInfo {
  27. var infos []*RegexpInfo
  28. for _, s := range strings.Split(key, ",") {
  29. if strings.Contains(s, "&&") {
  30. info := &RegexpInfo{
  31. keyStr: s,
  32. regs: nil,
  33. }
  34. infos = append(infos, info)
  35. } else {
  36. if s == "" {
  37. continue
  38. }
  39. info := &RegexpInfo{
  40. keyStr: s,
  41. regs: regexp.MustCompile(".*(?i)" + s + ".*"),
  42. }
  43. infos = append(infos, info)
  44. }
  45. }
  46. return infos
  47. }
  48. var (
  49. regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
  50. regStrUnit, _ = regexp.Compile(`[元|万|亿]`)
  51. contentUnit, _ = regexp.Compile(`(万元|单位/万)`)
  52. numCapitals, _ = regexp.Compile(`([〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]{4,40})`)
  53. regStrChar = `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]`
  54. moneyRegChar, _ = regexp.Compile(regStrChar)
  55. regQianw, _ = regexp.Compile(`\d{1,2}千万`)
  56. cutAllSpace, _ = regexp.Compile(`\s*`)
  57. spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n"}
  58. moneyChar = map[string]interface{}{ //"〇": "0", "零": "0",
  59. "一": float64(1), "壹": float64(1), "二": float64(2), "贰": float64(2), "三": float64(3), "叁": float64(3), "四": float64(4), "肆": float64(4), "五": float64(5), "伍": float64(5),
  60. "六": float64(6), "陆": float64(6), "七": float64(7), "柒": float64(7), "八": float64(8), "捌": float64(8), "九": float64(9), "玖": float64(9), "十": float64(10), "拾": float64(10),
  61. "百": float64(100), "佰": float64(100), "千": float64(1000), "仟": float64(1000), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000),
  62. "零": float64(0), "点": ".", "角": float64(0.1), "分": float64(0.01),
  63. }
  64. moneyUnit = map[string]float64{
  65. "元": float64(1), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000), //单位
  66. }
  67. )
  68. var currencyItem = map[string]string{
  69. "人民币": "人民币",
  70. "rmb": "人民币",
  71. "RMB": "人民币",
  72. "$": "美元",
  73. "$": "美元",
  74. "美元": "美元",
  75. "港元": "港币",
  76. "港币": "港币",
  77. "澳币": "澳币",
  78. "澳元": "澳币",
  79. }
  80. //获取币种
  81. func GetCurrency(text string) (currency string) {
  82. if text == "" {
  83. return
  84. }
  85. currency = "人民币"
  86. for k, v := range currencyItem {
  87. if strings.Contains(text, k) {
  88. currency = v
  89. return
  90. }
  91. }
  92. return
  93. }
  94. //金额转换
  95. func ObjToMoney(text string) float64 {
  96. isfindUnit := true
  97. ret := capitalMoney(text)
  98. if ret < float64(10000) || ret > float64(50000000000) {
  99. ret2, b := numMoney(text)
  100. isfindUnit = b
  101. if ret2 > ret {
  102. ret = ret2
  103. }
  104. }
  105. f, _ := strconv.ParseFloat(strconv.FormatFloat(ret, 'f', 4, 64), 64)
  106. // if f < 1 {
  107. // f = 0
  108. // }
  109. //如果金额小于50,全文检索单位:万
  110. if f < 50 && f > 0 && isfindUnit {
  111. rep := contentUnit.FindAllStringIndex(text, -1)
  112. if len(rep) > 0 {
  113. f = f * 10000
  114. }
  115. }
  116. return f
  117. }
  118. func capitalMoney(text string) float64 {
  119. nodes := []float64{}
  120. node := float64(0)
  121. tmp := float64(0)
  122. decimals := 0.0
  123. ishaspoint := false //是否含小数点
  124. fnum := float64(0)
  125. end := false
  126. //str := fmt.Sprint(data[0])
  127. //提取第一个大写信息
  128. strmatch := numCapitals.FindAllStringSubmatch(text, -1)
  129. if len(strmatch) > 0 {
  130. text = strmatch[0][0]
  131. }
  132. suffixUnit := float64(1)
  133. if strings.HasSuffix(text, "万") || strings.HasSuffix(text, "万元") || strings.HasSuffix(text, "万元整") {
  134. index := strings.LastIndex(text, "万")
  135. text = text[0:index]
  136. suffixUnit = float64(10000)
  137. }
  138. moneyRegChar.ReplaceAllStringFunc(text, func(key string) string {
  139. if key == "元" || key == "圆" || key == "点" {
  140. ishaspoint = true
  141. }
  142. if v, ok := moneyChar[key].(float64); ok && !end {
  143. if ishaspoint && v > 10 { //排除后面有其他的单位
  144. return ""
  145. }
  146. //fmt.Println(key, v, fnum)
  147. if v < 10 && v >= 0 {
  148. if ishaspoint { //小数部分
  149. if v >= 1 {
  150. fnum = v
  151. } else if v < 1 && v > 0 {
  152. decimals += fnum * v
  153. }
  154. } else {
  155. if tmp != float64(0) {
  156. node += tmp
  157. }
  158. tmp = float64(v)
  159. }
  160. } else if v == 10000 || v == 100000000 { //单位万、亿
  161. if tmp != float64(0) {
  162. node += tmp
  163. tmp = float64(0)
  164. }
  165. nodes = append(nodes, node*float64(v))
  166. node = float64(0)
  167. } else {
  168. if v == 10 && tmp == 0 {
  169. tmp = 1
  170. }
  171. tmp = tmp * float64(v)
  172. node += tmp
  173. tmp = float64(0)
  174. }
  175. }
  176. if key == "整" || key == "正" || key == "分" {
  177. end = true
  178. }
  179. return ""
  180. })
  181. nodes = append(nodes, node, tmp)
  182. ret := float64(0)
  183. for _, v := range nodes {
  184. ret += v
  185. }
  186. return (ret + decimals) * suffixUnit
  187. }
  188. //数字金额转换
  189. func numMoney(text string) (moneyFloat float64, flag bool) {
  190. //tmp := fmt.Sprintf("%f", data[0])
  191. repUnit := float64(1)
  192. if regQianw.MatchString(text) {
  193. text = strings.Replace(text, "千万", "万", -1)
  194. repUnit = float64(1000)
  195. }
  196. text = replaceSymbol(text, []string{",", ",", "(", ")", "(", ")", ":", "\n"})
  197. text = replaceString(text, []string{"万元", "亿元", "."}, []string{"万", "亿", "."})
  198. text = CutAllSpace(text)
  199. rets := regNumFloat.FindAllString(text, -1)
  200. fnums := []float64{}
  201. unitstrs := []string{}
  202. if len(rets) > 0 {
  203. pindex := 0 //单位前置
  204. for k, v := range rets {
  205. f, err := strconv.ParseFloat(v, 64)
  206. if err == nil {
  207. fnums = append(fnums, f)
  208. index := strings.Index(text, v)
  209. //单位后置
  210. start := index + len(v)
  211. end := start + 3
  212. //log.Println("vvv", tmp, v, pindex, index, start)
  213. if k > 0 {
  214. if start >= pindex+3 {
  215. pstart := pindex + 3
  216. if pstart >= index {
  217. pstart = index
  218. }
  219. if len(text) > end {
  220. unitstrs = append(unitstrs, text[pstart:index]+text[start:end])
  221. } else {
  222. unitstrs = append(unitstrs, text[pstart:index]+text[start:])
  223. }
  224. } else {
  225. if len(text) > end {
  226. unitstrs = append(unitstrs, text[start:end])
  227. } else {
  228. unitstrs = append(unitstrs, text[start:])
  229. }
  230. }
  231. } else {
  232. if len(text) > end {
  233. if index-3 >= 0 {
  234. unitstrs = append(unitstrs, text[index-3:index]+text[start:end])
  235. } else {
  236. unitstrs = append(unitstrs, text[start:end])
  237. }
  238. } else {
  239. if index-3 >= 0 {
  240. unitstrs = append(unitstrs, text[index-3:index]+text[start:])
  241. } else {
  242. unitstrs = append(unitstrs, text[start:])
  243. }
  244. }
  245. }
  246. pindex = start
  247. }
  248. }
  249. }
  250. //log.Println("unitstrs", fnums, unitstrs)
  251. unit := float64(0)
  252. fnum := float64(0)
  253. for k, v := range fnums {
  254. fnum = v
  255. units := regStrUnit.FindAllString(unitstrs[k], -1)
  256. for _, v := range units {
  257. if moneyUnit[v] != 0 {
  258. unit = moneyUnit[v]
  259. break
  260. }
  261. }
  262. if unit != float64(0) { //取第一个
  263. break
  264. }
  265. }
  266. fnum = fnum * repUnit
  267. if unit == float64(0) {
  268. moneyFloat = fnum
  269. } else {
  270. moneyFloat = fnum * unit
  271. }
  272. if unit == 10000 {
  273. flag = false
  274. } else {
  275. flag = true
  276. }
  277. return
  278. }
  279. //清理所有空白符
  280. func CutAllSpace(text string) string {
  281. tmp := cutAllSpace.ReplaceAllString(text, "")
  282. tmp = replaceSymbol(tmp, spaces)
  283. return tmp
  284. }
  285. //符号替换
  286. func replaceString(con string, ret, rep []string) string {
  287. for k, v := range ret {
  288. if len(rep) > k {
  289. con = strings.Replace(con, v, rep[k], -1)
  290. }
  291. }
  292. return con
  293. }
  294. //过滤符号
  295. func replaceSymbol(con string, rep []string) string {
  296. for _, v := range rep {
  297. con = strings.Replace(con, v, "", -1)
  298. }
  299. return con
  300. }