package main import ( "github.com/xuri/excelize/v2" "log" "regexp" "strconv" "strings" ) var ( regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`) regStrUnit, _ = regexp.Compile(`[元|万|亿]`) contentUnit, _ = regexp.Compile(`(万元|单位/万)`) numCapitals, _ = regexp.Compile(`([〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]{4,40})`) regStrChar = `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]` moneyRegChar, _ = regexp.Compile(regStrChar) regQianw, _ = regexp.Compile(`\d{1,2}千万`) yangMap = make(map[string]bool) //存储98家央企 yangChildMap = make(map[string]bool) //存储央企 下属子公司 cutAllSpace, _ = regexp.Compile(`\s*`) spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n"} moneyChar = map[string]interface{}{ //"〇": "0", "零": "0", "一": float64(1), "壹": float64(1), "二": float64(2), "贰": float64(2), "三": float64(3), "叁": float64(3), "四": float64(4), "肆": float64(4), "五": float64(5), "伍": float64(5), "六": float64(6), "陆": float64(6), "七": float64(7), "柒": float64(7), "八": float64(8), "捌": float64(8), "九": float64(9), "玖": float64(9), "十": float64(10), "拾": float64(10), "百": float64(100), "佰": float64(100), "千": float64(1000), "仟": float64(1000), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000), "零": float64(0), "点": ".", "角": float64(0.1), "分": float64(0.01), } moneyUnit = map[string]float64{ "元": float64(1), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000), //单位 } ) var currencyItem = map[string]string{ "人民币": "人民币", "rmb": "人民币", "RMB": "人民币", "$": "美元", "$": "美元", "美元": "美元", "港元": "港币", "港币": "港币", "澳币": "澳币", "澳元": "澳币", } // 获取币种 func GetCurrency(text string) (currency string) { if text == "" { return } currency = "人民币" for k, v := range currencyItem { if strings.Contains(text, k) { currency = v return } } return } // 金额转换 func ObjToMoney(text string) float64 { isfindUnit := true ret := capitalMoney(text) if ret < float64(10000) || ret > float64(50000000000) { ret2, b := numMoney(text) isfindUnit = b if ret2 > ret { ret = ret2 } } f, _ := strconv.ParseFloat(strconv.FormatFloat(ret, 'f', 4, 64), 64) // if f < 1 { // f = 0 // } //如果金额小于50,全文检索单位:万 if f < 50 && f > 0 && isfindUnit { rep := contentUnit.FindAllStringIndex(text, -1) if len(rep) > 0 { f = f * 10000 } } return f } func capitalMoney(text string) float64 { nodes := []float64{} node := float64(0) tmp := float64(0) decimals := 0.0 ishaspoint := false //是否含小数点 fnum := float64(0) end := false //str := fmt.Sprint(data[0]) //提取第一个大写信息 strmatch := numCapitals.FindAllStringSubmatch(text, -1) if len(strmatch) > 0 { text = strmatch[0][0] } suffixUnit := float64(1) if strings.HasSuffix(text, "万") || strings.HasSuffix(text, "万元") || strings.HasSuffix(text, "万元整") { index := strings.LastIndex(text, "万") text = text[0:index] suffixUnit = float64(10000) } moneyRegChar.ReplaceAllStringFunc(text, func(key string) string { if key == "元" || key == "圆" || key == "点" { ishaspoint = true } if v, ok := moneyChar[key].(float64); ok && !end { if ishaspoint && v > 10 { //排除后面有其他的单位 return "" } //fmt.Println(key, v, fnum) if v < 10 && v >= 0 { if ishaspoint { //小数部分 if v >= 1 { fnum = v } else if v < 1 && v > 0 { decimals += fnum * v } } else { if tmp != float64(0) { node += tmp } tmp = float64(v) } } else if v == 10000 || v == 100000000 { //单位万、亿 if tmp != float64(0) { node += tmp tmp = float64(0) } nodes = append(nodes, node*float64(v)) node = float64(0) } else { if v == 10 && tmp == 0 { tmp = 1 } tmp = tmp * float64(v) node += tmp tmp = float64(0) } } if key == "整" || key == "正" || key == "分" { end = true } return "" }) nodes = append(nodes, node, tmp) ret := float64(0) for _, v := range nodes { ret += v } return (ret + decimals) * suffixUnit } // 数字金额转换 func numMoney(text string) (moneyFloat float64, flag bool) { //tmp := fmt.Sprintf("%f", data[0]) repUnit := float64(1) if regQianw.MatchString(text) { text = strings.Replace(text, "千万", "万", -1) repUnit = float64(1000) } text = replaceSymbol(text, []string{",", ",", "(", ")", "(", ")", ":", "\n"}) text = replaceString(text, []string{"万元", "亿元", "."}, []string{"万", "亿", "."}) text = CutAllSpace(text) rets := regNumFloat.FindAllString(text, -1) fnums := []float64{} unitstrs := []string{} if len(rets) > 0 { pindex := 0 //单位前置 for k, v := range rets { f, err := strconv.ParseFloat(v, 64) if err == nil { fnums = append(fnums, f) index := strings.Index(text, v) //单位后置 start := index + len(v) end := start + 3 //log.Println("vvv", tmp, v, pindex, index, start) if k > 0 { if start >= pindex+3 { pstart := pindex + 3 if pstart >= index { pstart = index } if len(text) > end { unitstrs = append(unitstrs, text[pstart:index]+text[start:end]) } else { unitstrs = append(unitstrs, text[pstart:index]+text[start:]) } } else { if len(text) > end { unitstrs = append(unitstrs, text[start:end]) } else { unitstrs = append(unitstrs, text[start:]) } } } else { if len(text) > end { if index-3 >= 0 { unitstrs = append(unitstrs, text[index-3:index]+text[start:end]) } else { unitstrs = append(unitstrs, text[start:end]) } } else { if index-3 >= 0 { unitstrs = append(unitstrs, text[index-3:index]+text[start:]) } else { unitstrs = append(unitstrs, text[start:]) } } } pindex = start } } } //log.Println("unitstrs", fnums, unitstrs) unit := float64(0) fnum := float64(0) for k, v := range fnums { fnum = v units := regStrUnit.FindAllString(unitstrs[k], -1) for _, v := range units { if moneyUnit[v] != 0 { unit = moneyUnit[v] break } } if unit != float64(0) { //取第一个 break } } fnum = fnum * repUnit if unit == float64(0) { moneyFloat = fnum } else { moneyFloat = fnum * unit } if unit == 10000 { flag = false } else { flag = true } return } // 清理所有空白符 func CutAllSpace(text string) string { tmp := cutAllSpace.ReplaceAllString(text, "") tmp = replaceSymbol(tmp, spaces) return tmp } // 符号替换 func replaceString(con string, ret, rep []string) string { for k, v := range ret { if len(rep) > k { con = strings.Replace(con, v, rep[k], -1) } } return con } // 过滤符号 func replaceSymbol(con string, rep []string) string { for _, v := range rep { con = strings.Replace(con, v, "", -1) } return con } // IsInSlice 判断目标字符串是否是在切片中 func IsInSlice(slice []string, s string) bool { if len(slice) == 0 { return false } isIn := false for _, f := range slice { if f == s { isIn = true break } } return isIn } // readXlsx 读取央企 func readXlsx() { filePath := "央企.xlsx" // 1. 读取 Excel(获取 A 列数据) f, err := excelize.OpenFile(filePath) if err != nil { log.Fatal("❌ 无法打开 Excel 文件:", err) } defer f.Close() //读取央企 rows, err := f.GetRows("Sheet1") if err != nil { log.Fatal("❌ 无法读取 Sheet1:", err) } for i := 1; i < len(rows); i++ { name := rows[i][0] if name != "" { yangMap[name] = true } } // 央企下属 rows2, err := f.GetRows("Sheet2") if err != nil { log.Fatal("❌ 无法读取 Sheet2:", err) } for i := 1; i < len(rows2); i++ { name := rows2[i][1] if name != "" { yangChildMap[name] = true } } } // getCompanyType 获取公司类型;央企、国企、央企下属、事业单位、民企 func getCompanyType(name, ctype string) (company_type string) { if name == "" { return } if yangMap[name] { company_type = "央企" return } if yangChildMap[name] { company_type = "央企" return } if strings.Contains(ctype, "国有独资") || strings.Contains(ctype, "国有控股") || ctype == "全民所有制" || ctype == "集体所有制" || ctype == "全民所有制分支机构(非法人)" || ctype == "集体分支机构(非法人)" { company_type = "国企" return } company_type = "其他" return }