123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280 |
- package main
- import (
- "context"
- "encoding/json"
- "fmt"
- ckgo "github.com/ClickHouse/clickhouse-go/v2"
- "github.com/ClickHouse/clickhouse-go/v2/lib/driver"
- "io/ioutil"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "log"
- "os"
- "strings"
- "time"
- )
- // connectClickhouse 连接Clickhouse,其它方式连接;bitmap字段无法处理,需要使用下面方法
- func connectClickhouse(host, username, password, dbname string) (driver.Conn, error) {
- //host := "192.168.3.207:19000"
- //username := "jytop"
- //password := "pwdTopJy123"
- //dbname := "pub_tags"
- var (
- ctx = context.Background()
- conn, err = ckgo.Open(&ckgo.Options{
- Addr: []string{host},
- DialTimeout: 10 * time.Second,
- MaxIdleConns: 3,
- MaxOpenConns: 30,
- Auth: ckgo.Auth{
- Database: dbname,
- Username: username,
- Password: password,
- },
- Debugf: func(format string, v ...interface{}) {
- log.Println(format, v)
- },
- })
- )
- if err != nil {
- return nil, err
- }
- if err := conn.Ping(ctx); err != nil {
- if exception, ok := err.(*ckgo.Exception); ok {
- log.Println("Exception [%d] %s \n%s\n", exception.Code, exception.Message, exception.StackTrace)
- }
- return nil, err
- }
- return conn, nil
- }
- // 动态构建 toUInt64 数组字符串
- func buildToUInt64Array(labels []uint64) string {
- if len(labels) == 0 {
- return "[]"
- }
- toUInt64Labels := make([]string, len(labels))
- for i, label := range labels {
- toUInt64Labels[i] = fmt.Sprintf("toUInt64(%d)", label)
- }
- return fmt.Sprintf("[%s]", strings.Join(toUInt64Labels, ", "))
- }
- // IndustryClassification 国标行业分类的结构体
- type IndustryClassification struct {
- Code string `json:"code"`
- EntCode uint64 `json:"ent_code"` // 法人库bitmap
- Name string `json:"name"`
- Children []IndustryClassification `json:"children"`
- Level int `json:"level"` // 新增字段 Level
- }
- // 全局变量 seen 用于记录已经分配的 Name 和 EntCode
- var seen = make(map[string]uint64)
- // assignLevel 为每个分类分配层级
- func assignLevel(classifications []IndustryClassification, level int) {
- for i := range classifications {
- classifications[i].Level = level
- // 递归处理子分类
- if len(classifications[i].Children) > 0 {
- assignLevel(classifications[i].Children, level+1)
- }
- }
- }
- // assignEntCode 为每个分类分配 EntCode
- func assignEntCode(classifications []IndustryClassification, entCode *uint64) {
- for i := range classifications {
- // 如果是第三级或更高层级,且只有一个子分类且名称相同,父子共享EntCode
- if len(classifications[i].Children) == 1 && classifications[i].Level == 3 && classifications[i].Name == classifications[i].Children[0].Name {
- classifications[i].EntCode = *entCode
- classifications[i].Children[0].EntCode = *entCode
- //seen[classifications[i].Name] = *entCode
- (*entCode)++
- continue
- } else {
- classifications[i].EntCode = *entCode
- //seen[classifications[i].Name] = *entCode
- (*entCode)++
- }
- // 如果有子分类,则递归处理
- if len(classifications[i].Children) > 0 {
- assignEntCode(classifications[i].Children, entCode)
- }
- }
- }
- // readIndustryClassifications 读取行业分类数据
- func readIndustryClassifications(filePath string) ([]IndustryClassification, error) {
- // 打开文件
- file, err := os.Open(filePath)
- if err != nil {
- return nil, err
- }
- defer file.Close()
- // 读取文件内容
- bytes, err := ioutil.ReadAll(file)
- if err != nil {
- return nil, err
- }
- // 解析 JSON
- var classifications []IndustryClassification
- err = json.Unmarshal(bytes, &classifications)
- if err != nil {
- return nil, err
- }
- // 先分配层级
- assignLevel(classifications, 1)
- // 然后分配EntCode
- startEntCode := uint64(192)
- assignEntCode(classifications, &startEntCode)
- return classifications, nil
- }
- // readFile d读取文件
- func readFile() []IndustryClassification {
- // 读取 JSON 文件
- filePath := "./国民经济行业分类_2017.json"
- //var err error
- classifications, err := readIndustryClassifications(filePath)
- if err != nil {
- fmt.Println("Error reading industry classifications:", err)
- return []IndustryClassification{}
- }
- return classifications
- //// 打印结果(调试用)
- //output, _ := json.MarshalIndent(classifications, "", " ")
- //fmt.Println(string(output))
- }
- // removeDuplicates 去除重复字符串
- func removeDuplicates(arr []string) []string {
- uniqueMap := make(map[string]bool)
- var result []string
- for _, str := range arr {
- if !uniqueMap[str] {
- uniqueMap[str] = true
- result = append(result, str)
- }
- }
- return result
- }
- // getMondayOfCurrentWeek 获取本周周一
- func getMondayOfCurrentWeek() time.Time {
- // 获取当前时间
- now := time.Now()
- // 获取今天是本周的第几天(0 表示周日,1 表示周一,...,6 表示周六)
- weekday := int(now.Weekday())
- // 如果是周日,转换为 7(以便计算为上一周的最后一天)
- if weekday == 0 {
- weekday = 7
- }
- // 计算周一日期
- monday := now.AddDate(0, 0, -weekday+1)
- // 清除时间部分,保留日期
- monday = time.Date(monday.Year(), monday.Month(), monday.Day(), 0, 0, 0, 0, monday.Location())
- return monday
- }
- // CalculateRegionCode 处理地域代码
- func CalculateRegionCode(area string, city string, district string) (area_code string, city_code string, district_code string) {
- area_code, city_code, district_code = "000000", "", ""
- if district != "" {
- key := area + "~" + city + "~" + district + "~"
- code := RegionCodeData[key]
- if code != "" {
- district_code = code
- city_code = code[:4] + "00"
- area_code = code[:2] + "0000"
- return
- }
- }
- if city != "" {
- key := area + "~" + city + "~" + "" + "~"
- code := RegionCodeData[key]
- if code != "" {
- city_code = code
- area_code = city_code[:2] + "0000"
- return
- }
- }
- if area != "" {
- key := area + "~" + "" + "~" + "" + "~"
- code := RegionCodeData[key]
- if code != "" {
- area_code = code
- return
- }
- }
- return
- }
- // getCompanyLabelBitmap 根据统一信用代码和ID获取bitmap 和对应标签字符串
- func getCompanyLabelBitmap(creditNo, companyID string) (bitLabels []uint64, nameLabels []string) {
- //var bitLabels = make([]uint64, 0) //bitmap数组
- //var nameLabels = make([]string, 0) //标签数组
- //1 dealCompanyNo
- tags1 := dealCompanyNo(creditNo)
- if len(tags1) > 0 {
- bitLabels = append(bitLabels, tags1...)
- }
- //获取对应Bitmap的标签字符串
- for _, v := range tags1 {
- if la, ok := entLabelMap[v]; ok {
- nameLabels = append(nameLabels, la)
- }
- }
- //2.获取凭安库里的国标经济行业分类
- label := getLabelsByID(companyID)
- if label != "" {
- labels := strings.Split(label, "-")
- for _, v := range labels {
- if bit, ok := nameBitMap[v]; ok {
- bitLabels = append(bitLabels, bit)
- nameLabels = append(nameLabels, v)
- }
- }
- }
- return
- }
- // getLabelsByID 根据名称,获取凭安库的国标经济行业分类
- func getLabelsByID(companyID string) string {
- whereIndustry := map[string]interface{}{
- "company_id": companyID,
- }
- industry, _ := MgoQY.FindOne("company_industry", whereIndustry)
- if industry != nil && len(*industry) > 0 {
- label := ""
- if util.ObjToString((*industry)["industry_l1_name"]) != "" {
- label = util.ObjToString((*industry)["industry_l1_name"])
- } else {
- return ""
- }
- //二级名称
- if util.ObjToString((*industry)["industry_l2_name"]) != "" {
- label = label + "-" + util.ObjToString((*industry)["industry_l2_name"])
- }
- if util.ObjToString((*industry)["industry_l3_name"]) != "" {
- label = label + "-" + util.ObjToString((*industry)["industry_l3_name"])
- }
- if util.ObjToString((*industry)["industry_l4_name"]) != "" {
- label = label + "-" + util.ObjToString((*industry)["industry_l4_name"])
- }
- return label
- }
- return ""
- }
|