Ver Fonte

修复专项债 发行利率 格式错误

wangchengcheng há 3 dias atrás
pai
commit
8f61af518b
4 ficheiros alterados com 13 adições e 251 exclusões
  1. 0 243
      zhaiquan/attach.go
  2. 1 1
      zhaiquan/config.go
  3. 12 7
      zhaiquan/main.go
  4. BIN
      zhaiquan/zhaiquan-import

+ 0 - 243
zhaiquan/attach.go

@@ -1,243 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"github.com/yanyiwu/gojieba"
-	"gorm.io/gorm"
-	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
-	"regexp"
-	"strings"
-)
-
-// dealAttachment 处理债券附件
-//func dealAttachment1() {
-//	sess := Mgo.GetMgoConn()
-//	defer Mgo.DestoryMongoConn(sess)
-//	query := sess.DB("py_theme").C("special_purpose_bond_files_detail").Find(nil).Select(nil).Iter()
-//	count := 0
-//
-//	bonds := make([]BondInfo, 0) // MySQL 债券数据
-//	for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
-//		if count%100 == 0 {
-//			log.Info("current:", zap.Int("count", count), zap.Any("title", tmp["title"]))
-//		}
-//		//
-//		if atta, ok := tmp["attachments"]; ok {
-//			if atm, ok := atta.(map[string]interface{}); ok {
-//				for _, v := range atm {
-//					if dd, ok := v.(map[string]interface{}); ok {
-//						//attachments 里面的文件名称
-//						filename := util.ObjToString(dd["filename"])
-//						fmt.Println(filename)
-//					}
-//				}
-//			}
-//		}
-//	}
-//}
-
-func dealAttachment(db *gorm.DB) {
-	sess := Mgo.GetMgoConn()
-	defer Mgo.DestoryMongoConn(sess)
-
-	// 1. 获取 MySQL 所有债券数据
-	var bonds []BondInfo
-	if err := db.Table("zxz_bond_info").Find(&bonds).Error; err != nil {
-		fmt.Println("获取 MySQL 债券数据失败:", err)
-		return
-	}
-
-	// 2. 初始化分词器
-	tokenizer := gojieba.NewJieba()
-	defer tokenizer.Free()
-
-	// 3. 遍历 MongoDB
-	query := sess.DB("py_theme").C("special_purpose_bond_files_detail").Find(nil).Iter()
-	count := 0
-	tmp := make(map[string]interface{})
-
-	for query.Next(&tmp) {
-		count++
-		if count%100 == 0 {
-			fmt.Println("Progress:", count)
-		}
-
-		if attachments, ok := tmp["attachments"].(map[string]interface{}); ok {
-			for _, item := range attachments {
-				if attMap, ok := item.(map[string]interface{}); ok {
-					filename := util.ObjToString(attMap["filename"])
-					cleanFilename := strings.TrimSuffix(filename, ".pdf")
-					// 提取 MongoDB 附件关键词
-					k1 := ExtractKeywords(cleanFilename, tokenizer)
-
-					// 逐个匹配 MySQL 中的 bond_name
-					for _, bond := range bonds {
-						k2 := ExtractKeywords(bond.BondName, tokenizer)
-						if IsMatch(k1, k2) {
-							fmt.Printf("匹配成功: [%s] <=> [%s]\n", cleanFilename, bond.BondName)
-							// 可记录匹配对结果到文件/数据库
-							break
-						}
-					}
-				}
-			}
-		}
-	}
-}
-
-func ExtractKeywordsV4(text string) []map[string]string {
-	text = strings.ReplaceAll(text, "(", "(")
-	text = strings.ReplaceAll(text, ")", ")")
-
-	// 匹配模式:完整提取年 + 省 + 债券类型 + 期数 + 括号说明
-	mainReg := regexp.MustCompile(`(?P<year>\d{4}年)(?P<province>[\p{Han}]{2,3}省)(?P<type>[\p{Han}]{2,20}债券)(?P<phase>[一二三四五六七八九十百至]{1,6}期)?(?:\((?P<bracket>[^)]+)\))?`)
-	matches := mainReg.FindAllStringSubmatch(text, -1)
-
-	results := make([]map[string]string, 0, len(matches))
-
-	for _, match := range matches {
-		if len(match) < 6 {
-			continue
-		}
-		year := match[1]
-		province := match[2]
-		bondType := match[3]
-		phase := match[4]
-		bracket := match[5]
-
-		results = append(results, map[string]string{
-			"year":     year,
-			"province": province,
-			"type":     bondType,
-			"phase":    phase,
-			"bracket":  bracket,
-		})
-	}
-
-	return results
-}
-
-func ExtractKeywordsV3WithJieba(text string, tokenizer *gojieba.Jieba) []map[string]string {
-	text = strings.ReplaceAll(text, "(", "(")
-	text = strings.ReplaceAll(text, ")", ")")
-
-	// 提取年份
-	yearReg := regexp.MustCompile(`\d{4}年`)
-	year := yearReg.FindString(text)
-
-	// 提取“年”后的省份
-	province := ""
-	if year != "" {
-		afterYear := text[strings.Index(text, year)+len(year):]
-		provinceReg := regexp.MustCompile(`[\p{Han}]{2,3}省`)
-		province = provinceReg.FindString(afterYear)
-	}
-
-	// 债券类型词典
-	bondTypes := []string{
-		"专项债券", "政府专项债券", "一般债券", "政府一般债券",
-		"再融资专项债券", "再融资一般债券", "再融资债券",
-	}
-
-	// combo 匹配正则
-	comboReg := regexp.MustCompile(`(?P<type>[\p{Han}]{2,12}债券)[,、,]?(?P<phase>[一二三四五六七八九十百至]{1,6}期)?(?:\((?P<bracket>[^)]+)\))?`)
-	matches := comboReg.FindAllStringSubmatch(text, -1)
-
-	results := make([]map[string]string, 0, len(matches))
-
-	for _, match := range matches {
-		if len(match) < 4 {
-			continue
-		}
-		bondType := match[1]
-		phase := match[2]
-		bracket := match[3]
-
-		// 精确类型匹配
-		bestMatch := ""
-		for _, t := range bondTypes {
-			if strings.Contains(bondType, t) {
-				bestMatch = t
-				break
-			}
-		}
-		if bestMatch == "" {
-			bestMatch = bondType // fallback
-		}
-
-		results = append(results, map[string]string{
-			"year":     year,
-			"province": province,
-			"type":     bestMatch,
-			"phase":    phase,
-			"bracket":  bracket,
-		})
-	}
-
-	return results
-}
-
-// ExtractKeywords 提取五类关键词
-func ExtractKeywords(text string, tokenizer *gojieba.Jieba) map[string]string {
-	text = removeFileExtension(text)
-	// 分词
-	words := tokenizer.Cut(text, true)
-	wordSet := make(map[string]bool)
-	for _, w := range words {
-		wordSet[w] = true
-	}
-
-	// 正则抽取
-	yearReg := regexp.MustCompile(`\d{4}(年|年度)`)
-	provinceReg := regexp.MustCompile(`20\d{2}年([\p{Han}]{2,3}省)`)
-	phaseReg := regexp.MustCompile(`(第[一二三四五六七八九十百]{1,3}期|[一二三四五六七八九十百]{1,3}至[一二三四五六七八九十百]{1,3}期)`)
-	bracketReg := regexp.MustCompile(`([^)]+)`)
-
-	// 提取关键字段
-	year := yearReg.FindString(text)
-	province := ""
-	if match := provinceReg.FindStringSubmatch(text); len(match) == 2 {
-		province = match[1]
-	}
-	phase := phaseReg.FindString(text)
-	bracket := bracketReg.FindString(text)
-
-	// 通过词判断类型
-	bondType := ""
-	for _, t := range []string{"专项债券", "专项债", "一般债券", "一般债", "再融资债", "再融资一般债"} {
-		if wordSet[t] {
-			bondType = t
-			break
-		}
-	}
-
-	return map[string]string{
-		"year":     year,
-		"province": province,
-		"phase":    phase,
-		"bracket":  bracket,
-		"type":     bondType,
-	}
-}
-
-// removeFileExtension 去除常见扩展名
-func removeFileExtension(text string) string {
-	suffixes := []string{".pdf", ".doc", ".docx", ".xls", ".xlsx", ".txt", ".zip"}
-	for _, ext := range suffixes {
-		if strings.HasSuffix(text, ext) {
-			return strings.TrimSuffix(text, ext)
-		}
-	}
-	return text
-}
-
-// IsMatch 判断关键词是否完全一致
-func IsMatch(k1, k2 map[string]string) bool {
-	for k := range k1 {
-		if k1[k] != "" && k1[k] == k2[k] {
-			continue
-		}
-		return false
-	}
-	return true
-}

+ 1 - 1
zhaiquan/config.go

@@ -173,7 +173,7 @@ type ProjectIssueDetails struct {
 	FirstPublishDate       string  `json:"first_publish_date" gorm:"size:50;column:first_publish_date;comment:发布时间"`
 	BatchNum               int     `json:"batch_num" gorm:"column:batch_num;comment:批次"`
 	PresentIssueAmount     float64 `json:"present_issue_amount" gorm:"column:present_issue_amount;comment:发行额"`
-	IssueInterestRate      float64 `json:"issue_interest_rate" gorm:"column:issue_interest_rate;comment:发行利率"`
+	IssueInterestRate      string  `json:"issue_interest_rate" gorm:"column:issue_interest_rate;comment:发行利率"`
 	PresentAsSpecialAmount float64 `json:"present_as_special_amount" gorm:"column:present_as_special_amount;comment:专项债作为资本金发行额"`
 	TotalIssueAmount       float64 `json:"total_issue_amount" gorm:"column:total_issue_amount;comment:累计发行金额"`
 	ReviseLog              string  `json:"revise_log" gorm:"size:1000;column:revise_log;comment:调整记录"`

+ 12 - 7
zhaiquan/main.go

@@ -1,14 +1,15 @@
 package main
 
 import (
+	"strings"
+	"time"
+
 	"github.com/robfig/cron/v3"
 	"go.uber.org/zap"
 	"gorm.io/gorm"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
-	"strings"
-	"time"
 )
 
 var (
@@ -18,9 +19,13 @@ var (
 
 func main() {
 	Init()
-	exportData()
-	return
-	
+
+	//导出债券数据
+	/**
+	//exportData()
+	//return
+	*/
+
 	local, _ := time.LoadLocation("Asia/Shanghai")
 	c := cron.New(cron.WithLocation(local), cron.WithSeconds())
 	eid, err := c.AddFunc(GF.Cron.Spec, importData) // 处理增量专项债
@@ -297,7 +302,7 @@ func dealProjectIssueDetails(fxmx []interface{}, projectName string, projectId i
 				issue_detail_exist.FirstPublishDate = util.ObjToString(fx["firstPublishDate"])
 				issue_detail_exist.BatchNum = util.IntAll(fx["batchNum"])
 				issue_detail_exist.PresentIssueAmount = util.Float64All(fx["presentIssueAmount"])
-				issue_detail_exist.IssueInterestRate = util.Float64All(fx["issueInterestRate"])
+				issue_detail_exist.IssueInterestRate = util.ObjToString(fx["issueInterestRate"])
 				issue_detail_exist.PresentAsSpecialAmount = util.Float64All(fx["presentAsSpecialAmount"])
 				issue_detail_exist.TotalIssueAmount = util.Float64All(fx["totalIssueAmount"])
 				issue_detail_exist.ReviseLog = util.ObjToString(fx["revise_log"])
@@ -314,7 +319,7 @@ func dealProjectIssueDetails(fxmx []interface{}, projectName string, projectId i
 					FirstPublishDate:       util.ObjToString(fx["firstPublishDate"]),
 					BatchNum:               util.IntAll(fx["batchNum"]),
 					PresentIssueAmount:     util.Float64All(fx["presentIssueAmount"]),
-					IssueInterestRate:      util.Float64All(fx["issueInterestRate"]),
+					IssueInterestRate:      util.ObjToString(fx["issueInterestRate"]),
 					PresentAsSpecialAmount: util.Float64All(fx["presentAsSpecialAmount"]),
 					TotalIssueAmount:       util.Float64All(fx["totalIssueAmount"]),
 					ReviseLog:              util.ObjToString(fx["revise_log"]),

BIN
zhaiquan/zhaiquan-import