123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543 |
- package main
- import (
- "fmt"
- "github.com/xuri/excelize/v2"
- "log"
- "regexp"
- "strings"
- )
- // 循环外:私营企业code集合
- var privateCodes = map[string]bool{
- "1130": true, "1151": true, "1152": true, "1212": true, "1222": true,
- "2130": true, "2151": true, "2152": true, "2212": true, "2222": true,
- "4531": true, "4532": true, "4533": true, "4540": true, "4551": true,
- "4552": true, "4553": true, "4560": true,
- }
- // dealZhuTi 处理经营主体
- func dealZhuTi() {
- // 打开 Excel
- filePath := "经营主体类型.xlsx" // TODO: 改成真实的 xlsx 文件路径
- f, err := excelize.OpenFile(filePath)
- if err != nil {
- log.Fatalf("打开文件失败: %v", err)
- }
- // 假设只有一个 sheet
- sheetName := f.GetSheetName(0)
- // 先构造代码 -> 名称 map
- codeNameMap := make(map[string]string)
- rows, err := f.GetRows(sheetName)
- if err != nil {
- log.Fatalf("读取行失败: %v", err)
- }
- for idx, row := range rows {
- // 跳过表头
- if idx == 0 {
- continue
- }
- if len(row) < 2 {
- continue
- }
- code := strings.TrimSpace(row[0])
- name := strings.TrimSpace(row[1])
- // 替换中文括号为英文括号
- name = strings.ReplaceAll(name, "(", "(")
- name = strings.ReplaceAll(name, ")", ")")
- codeNameMap[code] = name
- }
- // 遍历数据行,新增四列
- for idx, row := range rows {
- if idx == 0 {
- // 表头
- _ = f.SetCellValue(sheetName, fmt.Sprintf("D%d", idx+1), "第一位名称")
- _ = f.SetCellValue(sheetName, fmt.Sprintf("E%d", idx+1), "第二位名称")
- _ = f.SetCellValue(sheetName, fmt.Sprintf("F%d", idx+1), "第三位名称")
- _ = f.SetCellValue(sheetName, fmt.Sprintf("G%d", idx+1), "第四位名称")
- _ = f.SetCellValue(sheetName, fmt.Sprintf("H%d", idx+1), "层级")
- _ = f.SetCellValue(sheetName, fmt.Sprintf("I%d", idx+1), "标签1")
- continue
- }
- if len(row) < 2 {
- continue
- }
- code := strings.TrimSpace(row[0])
- name := strings.TrimSpace(row[1])
- name = strings.ReplaceAll(name, "(", "(")
- name = strings.ReplaceAll(name, ")", ")")
- // 判断标签
- tag := "内资企业"
- if strings.HasPrefix(code, "5") || strings.HasPrefix(code, "7") || code == "8500" {
- tag = "外企"
- } else if strings.HasPrefix(code, "6") {
- tag = "外企-港澳台"
- } else if strings.HasPrefix(code, "91") || strings.HasPrefix(code, "92") {
- tag = "农合"
- } else if strings.HasPrefix(code, "95") {
- tag = "个体工商户"
- } else if privateCodes[code] {
- tag = "私营企业"
- }
- if len(code) != 4 {
- continue
- }
- // 层级判断
- level := 4
- if strings.HasSuffix(code, "000") {
- level = 1
- } else if strings.HasSuffix(code, "00") {
- level = 2
- } else if strings.HasSuffix(code, "0") {
- level = 3
- }
- // 各层级代码
- firstCode := code[:1] + "000"
- secondCode := code[:2] + "00"
- thirdCode := code[:3] + "0"
- fourthCode := code
- firstName := ""
- secondName := ""
- thirdName := ""
- fourthName := ""
- // 根据层级决定填哪些
- if level >= 1 {
- firstName = codeNameMap[firstCode]
- }
- if level >= 2 {
- secondName = codeNameMap[secondCode]
- }
- if level >= 3 {
- thirdName = codeNameMap[thirdCode]
- }
- if level == 4 {
- fourthName = codeNameMap[fourthCode]
- }
- // B列加缩进
- indent := strings.Repeat(" ", level-1) // 每层两个空格
- newName := indent + name
- rowNum := idx + 1
- // 写回新的名称到 B 列
- _ = f.SetCellValue(sheetName, fmt.Sprintf("B%d", rowNum), newName)
- // 写层级名称列
- _ = f.SetCellValue(sheetName, fmt.Sprintf("D%d", rowNum), firstName)
- _ = f.SetCellValue(sheetName, fmt.Sprintf("E%d", rowNum), secondName)
- _ = f.SetCellValue(sheetName, fmt.Sprintf("F%d", rowNum), thirdName)
- _ = f.SetCellValue(sheetName, fmt.Sprintf("G%d", rowNum), fourthName)
- _ = f.SetCellValue(sheetName, fmt.Sprintf("H%d", rowNum), level)
- _ = f.SetCellValue(sheetName, fmt.Sprintf("I%d", rowNum), tag)
- }
- // 保存新文件
- err = f.SaveAs("经营主体类型-result3.xlsx")
- if err != nil {
- log.Fatalf("保存文件失败: %v", err)
- }
- fmt.Println("处理完成,结果保存在 经营主体类型-result3.xlsx")
- }
- // matchCompanyType 根据企业类型,返回标准经营主体代码
- func matchCompanyType() {
- // ⚙️ Excel 文件名
- inputFile := "company_types.xlsx"
- outputFile := "company_types_out.xlsx"
- // 📂 打开 Excel 文件
- f, err := excelize.OpenFile(inputFile)
- if err != nil {
- log.Fatalf("无法打开文件: %v", err)
- }
- defer f.Close()
- // ✅ 加载经营主体类型 Sheet
- codeMap, err := loadCodeMap(f, "经营主体类型")
- nameMap, err := loadNameMap(f, "经营主体类型")
- if err != nil {
- log.Fatalf("加载经营主体类型失败: %v", err)
- }
- // 🔍 正则,用于提取4位数字
- re4 := regexp.MustCompile(`\d{4}`)
- // 📄 遍历 Sheet2
- sheetName := "Sheet2"
- rows, err := f.GetRows(sheetName)
- if err != nil {
- log.Fatalf("读取 Sheet2 失败: %v", err)
- }
- for idx, row := range rows {
- if len(row) == 0 {
- continue
- }
- originText := strings.TrimSpace(row[0])
- if originText == "" {
- continue
- }
- var code string
- // 去除引号
- cleanText := strings.ReplaceAll(originText, `"`, "")
- cleanText = strings.TrimSpace(cleanText)
- // 替换中文括号为英文括号
- cleanText = strings.ReplaceAll(cleanText, "(", "(")
- cleanText = strings.ReplaceAll(cleanText, ")", ")")
- cleanText = strings.ReplaceAll(cleanText, "外资比例低于25%", "")
- // 判断是不是日期(如 2008/10/31)
- if matched, _ := regexp.MatchString(`^\d{4}/\d{1,2}/\d{1,2}$`, cleanText); matched {
- log.Printf("跳过日期行: %s", cleanText)
- continue
- }
- // 如果包含“年”/“月”/“日”等,也跳过(不提取数字)
- if strings.Contains(cleanText, "年") || strings.Contains(cleanText, "月") || strings.Contains(cleanText, "日") {
- log.Printf("跳过含日期描述的行: %s", cleanText)
- continue
- }
- // 优先提取4位数字
- if m := re4.FindString(cleanText); m != "" {
- code = m
- } else {
- // 去除引号后完全匹配
- if c, ok := codeMap[cleanText]; ok {
- code = c
- }
- }
- //匹配不到时,需要 提取信息;然后重新匹配
- code = getCodeByCompanyType(cleanText, codeMap)
- // ✏️ 如果匹配到,就写到 B 列
- if code != "" {
- cell, _ := excelize.CoordinatesToCellName(2, idx+1) // B列
- cell2, _ := excelize.CoordinatesToCellName(3, idx+1) // B列
- if err := f.SetCellValue(sheetName, cell, code); err != nil {
- log.Printf("写入单元格失败 %s: %v", cell, err)
- }
- if err = f.SetCellValue(sheetName, cell2, nameMap[code]); err != nil {
- log.Printf("写入单元格失败 %s: %v", cell2, err)
- }
- } else {
- log.Printf("未匹配: %s", originText)
- }
- }
- // 💾 保存结果
- if err := f.SaveAs(outputFile); err != nil {
- log.Fatalf("保存文件失败: %v", err)
- }
- fmt.Printf("✅ 处理完成!输出文件:%s\n", outputFile)
- }
- // getCodeByCompanyType 根据企业类型,返回经营主体代码
- func getCodeByCompanyType(cleanText string, codeMap map[string]string) (code string) {
- if code == "" {
- //一人有限责任公司
- if strings.Contains(cleanText, "一人有限责任公司分公司") {
- cleanText = "一人有限责任公司分公司"
- } else if strings.Contains(cleanText, "一人有限责任公司") {
- cleanText = "一人有限责任公司"
- } else if cleanText == "有限责任(公司)" {
- cleanText = "有限责任公司"
- } else if cleanText == "上市股份有限公司分公司" {
- cleanText = "股份有限公司分公司(上市)"
- } else if cleanText == "上市股份有限公司分公司(非上市)" {
- cleanText = "股份有限公司分公司(非上市)"
- } else if !strings.Contains(cleanText, "非个人") && !strings.Contains(cleanText, "非个体") {
- if strings.Contains(cleanText, "个人") || strings.Contains(cleanText, "个体") || strings.Contains(cleanText, "户体商工个") {
- cleanText = "个体工商户"
- }
- } else if strings.Contains(cleanText, "中外合作") { // 中外合作企业 - 有限责任公司(中外合作)
- if strings.Contains(cleanText, "非公司外商投资企业(中外合作)") {
- cleanText = "非公司外商投资企业(中外合作)"
- } else {
- cleanText = "有限责任公司(中外合作)"
- }
- } else if strings.Contains(cleanText, "有限责任公司(中外合资)") { //有限责任公司(中外合资)
- cleanText = "有限责任公司(中外合资)"
- } else if strings.Contains(cleanText, "外商投资企业分支机构") { //非公司外商投资企业分支机构;外商投资企业分支机构
- if strings.Contains(cleanText, "非公司") {
- cleanText = "非公司外商投资企业分支机构"
- } else {
- cleanText = "外商投资企业分支机构"
- }
- } else if strings.Contains(cleanText, "其他") {
- //有限责任公司(港、澳、台)
- if strings.Contains(cleanText, "港、澳、台") {
- if strings.Contains(cleanText, "有限责任公司") {
- cleanText = "有限责任公司(港、澳、台)"
- } else if strings.Contains(cleanText, "非公司") {
- code = "6300"
- cleanText = "非公司"
- }
- }
- } else if strings.Contains(cleanText, "有限责任公司(台港澳与境内合作)") {
- cleanText = "有限责任公司(台港澳与境内合作)"
- } else if strings.Contains(cleanText, "有限责任公司(台港澳与境内合资)") {
- cleanText = "有限责任公司(台港澳与境内合资)"
- } else if strings.Contains(cleanText, "有限责任公司(台港澳与外国投资者合资)") {
- cleanText = "有限责任公司(台港澳与外国投资者合资)"
- } else if strings.Contains(cleanText, "有限责任公司(台港澳合资)") || (strings.Contains(cleanText, "台、港、澳资") && strings.Contains(cleanText, "有限责任公司")) {
- cleanText = "有限责任公司(台港澳合资)"
- } else if strings.Contains(cleanText, "有限责任公司(台港澳法人独资)") {
- cleanText = "有限责任公司(港澳台法人独资)"
- } else if strings.Contains(cleanText, "有限责任公司(台港澳自然人独资)") {
- cleanText = "有限责任公司(港澳台自然人独资)"
- } else if strings.Contains(cleanText, "有限责任公司(外商合资)") {
- cleanText = "有限责任公司(外商合资)"
- } else if strings.Contains(cleanText, "有限责任公司(外商投资") {
- cleanText = "有限责任公司(外商投资、非独资)"
- } else if strings.Contains(cleanText, "有限责任公司(外国法人独资)") {
- cleanText = "有限责任公司(外国法人独资)"
- } else if strings.Contains(cleanText, "外国非法人经济组织独资") {
- cleanText = "外国非法人经济组织独资"
- } else if strings.Contains(cleanText, "有限责任公司(外国自然人独资)") || strings.Contains(cleanText, "有限责任公司(外自然人独资)") {
- cleanText = "有限责任公司(外国自然人独资)"
- } else if strings.Contains(cleanText, "有限责任公司(法人独资)(外商投资企业投资)") {
- cleanText = "有限责任公司分公司(外商投资企业法人独资)"
- } else if strings.Contains(cleanText, "有限责任公司(港、澳、台)") || strings.Contains(cleanText, "有限责任公司(港澳台合资)") {
- cleanText = "有限责任公司(台港澳合资)"
- } else if strings.Contains(cleanText, "有限责任公司") || strings.Contains(cleanText, "港澳台与境内合作") {
- cleanText = "有限责任公司(台港澳与境内合作)"
- } else if strings.Contains(cleanText, "港澳台与境内合资") && strings.Contains(cleanText, "有限责任公司") {
- cleanText = "有限责任公司(台港澳与境内合资)"
- } else
- // 这个需要和贾老师确认
- if strings.Contains(cleanText, "有限责任公司(法人独资") && strings.Contains(cleanText, "私营") {
- cleanText = "有限责任公司(非自然人投资或控股的法人独资)"
- } else if strings.Contains(cleanText, "港澳台与外国投资者合资") && strings.Contains(cleanText, "有限责任公司") {
- cleanText = "有限责任公司(台港澳与外国投资者合资)"
- } else if strings.Contains(cleanText, "港澳台法人独资") && strings.Contains(cleanText, "有限责任公司") {
- cleanText = "有限责任公司(港澳台法人独资)"
- } else if strings.Contains(cleanText, "港澳台自然人独资") && strings.Contains(cleanText, "有限责任公司") {
- cleanText = "有限责任公司(港澳台自然人独资)"
- } else if strings.Contains(cleanText, "有限责任公司(港澳台非法人经济组织独资)") {
- cleanText = "有限责任公司(台港澳非法人经济组织独资)"
- } else if strings.Contains(cleanText, "港澳台投资、非独资") && strings.Contains(cleanText, "有限责任公司") {
- cleanText = "有限责任公司(港澳台投资、非独资)"
- } else if strings.Contains(cleanText, "港澳台合资") && strings.Contains(cleanText, "未上市") {
- if strings.Contains(cleanText, "股份有限公司") {
- code = "6230"
- }
- } else if strings.Contains(cleanText, "集体所有制") {
- cleanText = "集体所有制"
- } else if strings.Contains(cleanText, "国有独资") {
- if strings.Contains(cleanText, "有限责公司分公司") {
- cleanText = "有限责任公司分公司(国有独资)"
- } else if strings.Contains(cleanText, "有限责公司") {
- cleanText = "有限责任公司(国有独资)"
- }
- } else
- //台、港、澳
- if strings.Contains(cleanText, "台、港、澳") {
- if strings.Contains(cleanText, "台、港、澳分公司") || strings.Contains(cleanText, "台、港、澳投资企业分公司") || strings.Contains(cleanText, "台、港、澳投资公司分公司") {
- code = "6810"
- }
- if strings.Contains(cleanText, "台、港、澳办事处") {
- code = "6830"
- }
- if strings.Contains(cleanText, "台、港、澳投资企业") {
- code = "6000"
- }
- if strings.Contains(cleanText, "台、港、澳投资企业其他") {
- code = "6190"
- }
- } else if cleanText == "股份有限公司分支机构(台港澳与境内合资)" {
- code = "6220"
- } else if strings.Contains(cleanText, "台港澳投资企业办事处") {
- code = "6830"
- } else if strings.Contains(cleanText, "台港澳投资有限合伙企业") {
- code = "6400"
- } else if strings.Contains(cleanText, "台港澳投资普通合伙企业分支机构") {
- code = "6840"
- } else if strings.Contains(cleanText, "台港澳投资特殊普通合伙企业分支机构") {
- code = "6420"
- } else if strings.Contains(cleanText, "台港澳股份有限公司") {
- code = "6200"
- } else if strings.Contains(cleanText, "台港澳非公司") {
- code = "6300"
- } else if cleanText == "国外投资" {
- code = "5000"
- } else if strings.Contains(cleanText, "国有事业单位营业") {
- cleanText = "国有事业单位营业"
- } else if strings.Contains(cleanText, "国有控股") {
- if strings.Contains(cleanText, "非上市") {
- if strings.Contains(cleanText, "股份有限公司分公司") {
- code = "1223"
- }
- } else if strings.Contains(cleanText, "上市") {
- if strings.Contains(cleanText, "股份有限公司分公司") {
- code = "2213"
- cleanText = "股份有限公司分公司(上市、国有控股)"
- }
- } else if strings.Contains(cleanText, "股份有限公司分公司") {
- code = "2223"
- } else if strings.Contains(cleanText, "有限责任公司分公司") {
- code = "2140"
- } else if strings.Contains(cleanText, "有限责任公司") {
- code = "1140"
- }
- } else if strings.Contains(cleanText, "国有") {
- if strings.Contains(cleanText, "经营单位") && strings.Contains(cleanText, "非法人") {
- code = "4410"
- cleanText = "国有经营单位(非法人)"
- } else if strings.Contains(cleanText, "国有联营") {
- code = "4600"
- }
- } else
- //合伙企业
- if strings.Contains(cleanText, "合伙企业") {
- if strings.Contains(cleanText, "特殊普通合伙") {
- code = "4532"
- } else if strings.Contains(cleanText, "普通合伙") {
- code = "4530"
- } else if strings.Contains(cleanText, "有限合伙") {
- code = "4533"
- } else if strings.Contains(cleanText, "合伙企业分支机构") {
- code = "4550"
- }
- } else if strings.Contains(cleanText, "合伙私营企业") {
- code = "4530"
- } else if cleanText == "内资企业法人联营" {
- cleanText = "联营"
- } else
- //农民专业合作社分支机构
- if strings.Contains(cleanText, "合作社") {
- if strings.Contains(cleanText, "农民专业合作社分支机构") {
- cleanText = "农民专业合作社分支机构"
- } else if strings.Contains(cleanText, "农民专业合作社") {
- cleanText = "农民专业合作社"
- } else if strings.Contains(cleanText, "合作社分支机构") {
- cleanText = "农民专业合作社分支机构"
- } else {
- cleanText = "农民专业合作社"
- }
- } else if strings.Contains(cleanText, "非公司") {
- if strings.Contains(cleanText, "港、澳、台投资企业分支机构") {
- cleanText = "非公司台、港、澳投资企业分支机构"
- code = "6820"
- } else if strings.Contains(cleanText, "港、澳、台企业(港澳台合资)") {
- cleanText = "非公司台、港、澳企业(台港澳与境内合作)"
- code = "6310"
- } else if strings.Contains(cleanText, "非公司外商投资企业") && strings.Contains(cleanText, "其他") {
- code = "5390"
- }
- } else if strings.Contains(cleanText, "股份有限公司(上市公司)") {
- cleanText = "股份有限公司(上市)"
- code = "1210"
- } else if strings.Contains(cleanText, "股份有限公司(中外合资、上市)") {
- cleanText = "股份有限公司(中外合资、上市)"
- code = "5220"
- } else if strings.Contains(cleanText, "股份有限公司(中外合资、未上市)") || strings.Contains(cleanText, "股份有限公司(中外合资,未上市)") {
- cleanText = "股份有限公司(中外合资、未上市)"
- code = "5210"
- } else if cleanText == "股份有限公司(其他)" {
- code = "5290"
- } else if strings.Contains(cleanText, "股份有限公司(其他台港澳股份有限公司)") {
- code = "6290"
- } else if cleanText == "股份有限公司(台、港、澳资)" {
- code = "6270"
- } else if strings.Contains(cleanText, "股份有限公司(台港澳与境内合资、上市)") || strings.Contains(cleanText, "股份有限公司(台港澳与境内合资,上市)") {
- cleanText = "股份有限公司(台港澳与境内合资、上市)"
- code = "6220"
- } else if strings.Contains(cleanText, "股份有限公司(台港澳与境内合资、未上市)") {
- cleanText = "股份有限公司(台港澳与境内合资、未上市)"
- code = "6210"
- } else if strings.Contains(cleanText, "股份有限公司(台港澳与外国投资者合资、未上市)") {
- cleanText = "股份有限公司(台港澳与外国投资者合资、未上市)"
- code = "6250"
- } else if strings.Contains(cleanText, "股份有限公司(台港澳合资、上市)") {
- cleanText = "股份有限公司(台港澳合资、上市)"
- code = "6240"
- } else if strings.Contains(cleanText, "股份有限公司(台港澳合资、未上市)") {
- cleanText = "股份有限公司(台港澳合资、未上市)"
- code = "6230"
- } else if cleanText == "股份有限公司(港、澳、台)" {
- code = "6200"
- } else {
- }
- // 判断是不是日期(如 2008/10/31)
- if matched, _ := regexp.MatchString(`^\d{4}/\d{1,2}/\d{1,2}$`, cleanText); matched {
- log.Printf("跳过日期行: %s", cleanText)
- return
- }
- // 如果包含“年”/“月”/“日”等,也跳过(不提取数字)
- if strings.Contains(cleanText, "年") || strings.Contains(cleanText, "月") || strings.Contains(cleanText, "日") {
- log.Printf("跳过含日期描述的行: %s", cleanText)
- return
- }
- cleanText = strings.TrimSpace(cleanText)
- cleanText = strings.ReplaceAll(cleanText, "(", "(")
- cleanText = strings.ReplaceAll(cleanText, ")", ")")
- if code == "" {
- if c, ok := codeMap[cleanText]; ok {
- code = c
- }
- }
- }
- return code
- }
- // 📦 加载经营主体类型 Sheet,返回 名称=>代码 的映射
- func loadCodeMap(f *excelize.File, sheetName string) (map[string]string, error) {
- codeMap := make(map[string]string)
- rows, err := f.GetRows(sheetName)
- if err != nil {
- return nil, err
- }
- for idx, row := range rows {
- if idx == 0 {
- continue // 跳过表头
- }
- if len(row) >= 2 {
- name := strings.TrimSpace(row[1])
- code := strings.TrimSpace(row[0])
- // 替换中文括号为英文括号
- name = strings.ReplaceAll(name, "(", "(")
- name = strings.ReplaceAll(name, ")", ")")
- codeMap[name] = code
- }
- }
- return codeMap, nil
- }
- func loadNameMap(f *excelize.File, sheetName string) (map[string]string, error) {
- codeMap := make(map[string]string)
- rows, err := f.GetRows(sheetName)
- if err != nil {
- return nil, err
- }
- for idx, row := range rows {
- if idx == 0 {
- continue // 跳过表头
- }
- if len(row) >= 2 {
- name := strings.TrimSpace(row[1])
- code := strings.TrimSpace(row[0])
- // 替换中文括号为英文括号
- name = strings.ReplaceAll(name, "(", "(")
- name = strings.ReplaceAll(name, ")", ")")
- codeMap[code] = name
- }
- }
- return codeMap, nil
- }
|