util.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. package main
  2. import (
  3. "github.com/xuri/excelize/v2"
  4. "log"
  5. "regexp"
  6. "strconv"
  7. "strings"
  8. )
  9. var (
  10. regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
  11. regStrUnit, _ = regexp.Compile(`[元|万|亿]`)
  12. contentUnit, _ = regexp.Compile(`(万元|单位/万)`)
  13. numCapitals, _ = regexp.Compile(`([〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]{4,40})`)
  14. regStrChar = `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]`
  15. moneyRegChar, _ = regexp.Compile(regStrChar)
  16. regQianw, _ = regexp.Compile(`\d{1,2}千万`)
  17. yangMap = make(map[string]bool) //存储98家央企
  18. yangChildMap = make(map[string]bool) //存储央企 下属子公司
  19. cutAllSpace, _ = regexp.Compile(`\s*`)
  20. spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n"}
  21. moneyChar = map[string]interface{}{ //"〇": "0", "零": "0",
  22. "一": float64(1), "壹": float64(1), "二": float64(2), "贰": float64(2), "三": float64(3), "叁": float64(3), "四": float64(4), "肆": float64(4), "五": float64(5), "伍": float64(5),
  23. "六": float64(6), "陆": float64(6), "七": float64(7), "柒": float64(7), "八": float64(8), "捌": float64(8), "九": float64(9), "玖": float64(9), "十": float64(10), "拾": float64(10),
  24. "百": float64(100), "佰": float64(100), "千": float64(1000), "仟": float64(1000), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000),
  25. "零": float64(0), "点": ".", "角": float64(0.1), "分": float64(0.01),
  26. }
  27. moneyUnit = map[string]float64{
  28. "元": float64(1), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000), //单位
  29. }
  30. )
  31. var currencyItem = map[string]string{
  32. "人民币": "人民币",
  33. "rmb": "人民币",
  34. "RMB": "人民币",
  35. "$": "美元",
  36. "$": "美元",
  37. "美元": "美元",
  38. "港元": "港币",
  39. "港币": "港币",
  40. "澳币": "澳币",
  41. "澳元": "澳币",
  42. }
  43. // 获取币种
  44. func GetCurrency(text string) (currency string) {
  45. if text == "" {
  46. return
  47. }
  48. currency = "人民币"
  49. for k, v := range currencyItem {
  50. if strings.Contains(text, k) {
  51. currency = v
  52. return
  53. }
  54. }
  55. return
  56. }
  57. // 金额转换
  58. func ObjToMoney(text string) float64 {
  59. isfindUnit := true
  60. ret := capitalMoney(text)
  61. if ret < float64(10000) || ret > float64(50000000000) {
  62. ret2, b := numMoney(text)
  63. isfindUnit = b
  64. if ret2 > ret {
  65. ret = ret2
  66. }
  67. }
  68. f, _ := strconv.ParseFloat(strconv.FormatFloat(ret, 'f', 4, 64), 64)
  69. // if f < 1 {
  70. // f = 0
  71. // }
  72. //如果金额小于50,全文检索单位:万
  73. if f < 50 && f > 0 && isfindUnit {
  74. rep := contentUnit.FindAllStringIndex(text, -1)
  75. if len(rep) > 0 {
  76. f = f * 10000
  77. }
  78. }
  79. return f
  80. }
  81. func capitalMoney(text string) float64 {
  82. nodes := []float64{}
  83. node := float64(0)
  84. tmp := float64(0)
  85. decimals := 0.0
  86. ishaspoint := false //是否含小数点
  87. fnum := float64(0)
  88. end := false
  89. //str := fmt.Sprint(data[0])
  90. //提取第一个大写信息
  91. strmatch := numCapitals.FindAllStringSubmatch(text, -1)
  92. if len(strmatch) > 0 {
  93. text = strmatch[0][0]
  94. }
  95. suffixUnit := float64(1)
  96. if strings.HasSuffix(text, "万") || strings.HasSuffix(text, "万元") || strings.HasSuffix(text, "万元整") {
  97. index := strings.LastIndex(text, "万")
  98. text = text[0:index]
  99. suffixUnit = float64(10000)
  100. }
  101. moneyRegChar.ReplaceAllStringFunc(text, func(key string) string {
  102. if key == "元" || key == "圆" || key == "点" {
  103. ishaspoint = true
  104. }
  105. if v, ok := moneyChar[key].(float64); ok && !end {
  106. if ishaspoint && v > 10 { //排除后面有其他的单位
  107. return ""
  108. }
  109. //fmt.Println(key, v, fnum)
  110. if v < 10 && v >= 0 {
  111. if ishaspoint { //小数部分
  112. if v >= 1 {
  113. fnum = v
  114. } else if v < 1 && v > 0 {
  115. decimals += fnum * v
  116. }
  117. } else {
  118. if tmp != float64(0) {
  119. node += tmp
  120. }
  121. tmp = float64(v)
  122. }
  123. } else if v == 10000 || v == 100000000 { //单位万、亿
  124. if tmp != float64(0) {
  125. node += tmp
  126. tmp = float64(0)
  127. }
  128. nodes = append(nodes, node*float64(v))
  129. node = float64(0)
  130. } else {
  131. if v == 10 && tmp == 0 {
  132. tmp = 1
  133. }
  134. tmp = tmp * float64(v)
  135. node += tmp
  136. tmp = float64(0)
  137. }
  138. }
  139. if key == "整" || key == "正" || key == "分" {
  140. end = true
  141. }
  142. return ""
  143. })
  144. nodes = append(nodes, node, tmp)
  145. ret := float64(0)
  146. for _, v := range nodes {
  147. ret += v
  148. }
  149. return (ret + decimals) * suffixUnit
  150. }
  151. // 数字金额转换
  152. func numMoney(text string) (moneyFloat float64, flag bool) {
  153. //tmp := fmt.Sprintf("%f", data[0])
  154. repUnit := float64(1)
  155. if regQianw.MatchString(text) {
  156. text = strings.Replace(text, "千万", "万", -1)
  157. repUnit = float64(1000)
  158. }
  159. text = replaceSymbol(text, []string{",", ",", "(", ")", "(", ")", ":", "\n"})
  160. text = replaceString(text, []string{"万元", "亿元", "."}, []string{"万", "亿", "."})
  161. text = CutAllSpace(text)
  162. rets := regNumFloat.FindAllString(text, -1)
  163. fnums := []float64{}
  164. unitstrs := []string{}
  165. if len(rets) > 0 {
  166. pindex := 0 //单位前置
  167. for k, v := range rets {
  168. f, err := strconv.ParseFloat(v, 64)
  169. if err == nil {
  170. fnums = append(fnums, f)
  171. index := strings.Index(text, v)
  172. //单位后置
  173. start := index + len(v)
  174. end := start + 3
  175. //log.Println("vvv", tmp, v, pindex, index, start)
  176. if k > 0 {
  177. if start >= pindex+3 {
  178. pstart := pindex + 3
  179. if pstart >= index {
  180. pstart = index
  181. }
  182. if len(text) > end {
  183. unitstrs = append(unitstrs, text[pstart:index]+text[start:end])
  184. } else {
  185. unitstrs = append(unitstrs, text[pstart:index]+text[start:])
  186. }
  187. } else {
  188. if len(text) > end {
  189. unitstrs = append(unitstrs, text[start:end])
  190. } else {
  191. unitstrs = append(unitstrs, text[start:])
  192. }
  193. }
  194. } else {
  195. if len(text) > end {
  196. if index-3 >= 0 {
  197. unitstrs = append(unitstrs, text[index-3:index]+text[start:end])
  198. } else {
  199. unitstrs = append(unitstrs, text[start:end])
  200. }
  201. } else {
  202. if index-3 >= 0 {
  203. unitstrs = append(unitstrs, text[index-3:index]+text[start:])
  204. } else {
  205. unitstrs = append(unitstrs, text[start:])
  206. }
  207. }
  208. }
  209. pindex = start
  210. }
  211. }
  212. }
  213. //log.Println("unitstrs", fnums, unitstrs)
  214. unit := float64(0)
  215. fnum := float64(0)
  216. for k, v := range fnums {
  217. fnum = v
  218. units := regStrUnit.FindAllString(unitstrs[k], -1)
  219. for _, v := range units {
  220. if moneyUnit[v] != 0 {
  221. unit = moneyUnit[v]
  222. break
  223. }
  224. }
  225. if unit != float64(0) { //取第一个
  226. break
  227. }
  228. }
  229. fnum = fnum * repUnit
  230. if unit == float64(0) {
  231. moneyFloat = fnum
  232. } else {
  233. moneyFloat = fnum * unit
  234. }
  235. if unit == 10000 {
  236. flag = false
  237. } else {
  238. flag = true
  239. }
  240. return
  241. }
  242. // 清理所有空白符
  243. func CutAllSpace(text string) string {
  244. tmp := cutAllSpace.ReplaceAllString(text, "")
  245. tmp = replaceSymbol(tmp, spaces)
  246. return tmp
  247. }
  248. // 符号替换
  249. func replaceString(con string, ret, rep []string) string {
  250. for k, v := range ret {
  251. if len(rep) > k {
  252. con = strings.Replace(con, v, rep[k], -1)
  253. }
  254. }
  255. return con
  256. }
  257. // 过滤符号
  258. func replaceSymbol(con string, rep []string) string {
  259. for _, v := range rep {
  260. con = strings.Replace(con, v, "", -1)
  261. }
  262. return con
  263. }
  264. // IsInSlice 判断目标字符串是否是在切片中
  265. func IsInSlice(slice []string, s string) bool {
  266. if len(slice) == 0 {
  267. return false
  268. }
  269. isIn := false
  270. for _, f := range slice {
  271. if f == s {
  272. isIn = true
  273. break
  274. }
  275. }
  276. return isIn
  277. }
  278. // readXlsx 读取央企
  279. func readXlsx() {
  280. filePath := "央企.xlsx"
  281. // 1. 读取 Excel(获取 A 列数据)
  282. f, err := excelize.OpenFile(filePath)
  283. if err != nil {
  284. log.Fatal("❌ 无法打开 Excel 文件:", err)
  285. }
  286. defer f.Close()
  287. //读取央企
  288. rows, err := f.GetRows("Sheet1")
  289. if err != nil {
  290. log.Fatal("❌ 无法读取 Sheet1:", err)
  291. }
  292. for i := 1; i < len(rows); i++ {
  293. name := rows[i][0]
  294. if name != "" {
  295. yangMap[name] = true
  296. }
  297. }
  298. // 央企下属
  299. rows2, err := f.GetRows("Sheet2")
  300. if err != nil {
  301. log.Fatal("❌ 无法读取 Sheet2:", err)
  302. }
  303. for i := 1; i < len(rows2); i++ {
  304. name := rows2[i][1]
  305. if name != "" {
  306. yangChildMap[name] = true
  307. }
  308. }
  309. }
  310. // getCompanyType 获取公司类型;央企、国企、央企下属、事业单位、民企
  311. func getCompanyType(name, ctype string) (company_type string) {
  312. if name == "" {
  313. return
  314. }
  315. if yangMap[name] {
  316. company_type = "央企"
  317. return
  318. }
  319. if yangChildMap[name] {
  320. company_type = "央企"
  321. return
  322. }
  323. if strings.Contains(ctype, "国有独资") || strings.Contains(ctype, "国有控股") ||
  324. ctype == "全民所有制" || ctype == "集体所有制" || ctype == "全民所有制分支机构(非法人)" ||
  325. ctype == "集体分支机构(非法人)" {
  326. company_type = "国企"
  327. return
  328. }
  329. company_type = "其他"
  330. return
  331. }