package main import ( "context" "encoding/json" "fmt" ckgo "github.com/ClickHouse/clickhouse-go/v2" "github.com/ClickHouse/clickhouse-go/v2/lib/driver" "io/ioutil" util "jygit.jydev.jianyu360.cn/data_processing/common_utils" "log" "os" "strings" "time" ) // connectClickhouse 连接Clickhouse,其它方式连接;bitmap字段无法处理,需要使用下面方法 func connectClickhouse(host, username, password, dbname string) (driver.Conn, error) { //host := "192.168.3.207:19000" //username := "jytop" //password := "pwdTopJy123" //dbname := "pub_tags" var ( ctx = context.Background() conn, err = ckgo.Open(&ckgo.Options{ Addr: []string{host}, DialTimeout: 10 * time.Second, MaxIdleConns: 3, MaxOpenConns: 30, Auth: ckgo.Auth{ Database: dbname, Username: username, Password: password, }, Debugf: func(format string, v ...interface{}) { log.Println(format, v) }, }) ) if err != nil { return nil, err } if err := conn.Ping(ctx); err != nil { if exception, ok := err.(*ckgo.Exception); ok { log.Println("Exception [%d] %s \n%s\n", exception.Code, exception.Message, exception.StackTrace) } return nil, err } return conn, nil } // 动态构建 toUInt64 数组字符串 func buildToUInt64Array(labels []uint64) string { if len(labels) == 0 { return "[]" } toUInt64Labels := make([]string, len(labels)) for i, label := range labels { toUInt64Labels[i] = fmt.Sprintf("toUInt64(%d)", label) } return fmt.Sprintf("[%s]", strings.Join(toUInt64Labels, ", ")) } // IndustryClassification 国标行业分类的结构体 type IndustryClassification struct { Code string `json:"code"` EntCode uint64 `json:"ent_code"` // 法人库bitmap Name string `json:"name"` Children []IndustryClassification `json:"children"` Level int `json:"level"` // 新增字段 Level } // 全局变量 seen 用于记录已经分配的 Name 和 EntCode var seen = make(map[string]uint64) // assignLevel 为每个分类分配层级 func assignLevel(classifications []IndustryClassification, level int) { for i := range classifications { classifications[i].Level = level // 递归处理子分类 if len(classifications[i].Children) > 0 { assignLevel(classifications[i].Children, level+1) } } } // assignEntCode 为每个分类分配 EntCode func assignEntCode(classifications []IndustryClassification, entCode *uint64) { for i := range classifications { // 如果是第三级或更高层级,且只有一个子分类且名称相同,父子共享EntCode if len(classifications[i].Children) == 1 && classifications[i].Level == 3 && classifications[i].Name == classifications[i].Children[0].Name { classifications[i].EntCode = *entCode classifications[i].Children[0].EntCode = *entCode //seen[classifications[i].Name] = *entCode (*entCode)++ continue } else { classifications[i].EntCode = *entCode //seen[classifications[i].Name] = *entCode (*entCode)++ } // 如果有子分类,则递归处理 if len(classifications[i].Children) > 0 { assignEntCode(classifications[i].Children, entCode) } } } // readIndustryClassifications 读取行业分类数据 func readIndustryClassifications(filePath string) ([]IndustryClassification, error) { // 打开文件 file, err := os.Open(filePath) if err != nil { return nil, err } defer file.Close() // 读取文件内容 bytes, err := ioutil.ReadAll(file) if err != nil { return nil, err } // 解析 JSON var classifications []IndustryClassification err = json.Unmarshal(bytes, &classifications) if err != nil { return nil, err } // 先分配层级 assignLevel(classifications, 1) // 然后分配EntCode startEntCode := uint64(192) assignEntCode(classifications, &startEntCode) return classifications, nil } // readFile d读取文件 func readFile() []IndustryClassification { // 读取 JSON 文件 filePath := "./国民经济行业分类_2017.json" //var err error classifications, err := readIndustryClassifications(filePath) if err != nil { fmt.Println("Error reading industry classifications:", err) return []IndustryClassification{} } return classifications //// 打印结果(调试用) //output, _ := json.MarshalIndent(classifications, "", " ") //fmt.Println(string(output)) } // removeDuplicates 去除重复字符串 func removeDuplicates(arr []string) []string { uniqueMap := make(map[string]bool) var result []string for _, str := range arr { if !uniqueMap[str] { uniqueMap[str] = true result = append(result, str) } } return result } // getMondayOfCurrentWeek 获取本周周一 func getMondayOfCurrentWeek() time.Time { // 获取当前时间 now := time.Now() // 获取今天是本周的第几天(0 表示周日,1 表示周一,...,6 表示周六) weekday := int(now.Weekday()) // 如果是周日,转换为 7(以便计算为上一周的最后一天) if weekday == 0 { weekday = 7 } // 计算周一日期 monday := now.AddDate(0, 0, -weekday+1) // 清除时间部分,保留日期 monday = time.Date(monday.Year(), monday.Month(), monday.Day(), 0, 0, 0, 0, monday.Location()) return monday } // CalculateRegionCode 处理地域代码 func CalculateRegionCode(area string, city string, district string) (area_code string, city_code string, district_code string) { area_code, city_code, district_code = "000000", "", "" if district != "" { key := area + "~" + city + "~" + district + "~" code := RegionCodeData[key] if code != "" { district_code = code city_code = code[:4] + "00" area_code = code[:2] + "0000" return } } if city != "" { key := area + "~" + city + "~" + "" + "~" code := RegionCodeData[key] if code != "" { city_code = code area_code = city_code[:2] + "0000" return } } if area != "" { key := area + "~" + "" + "~" + "" + "~" code := RegionCodeData[key] if code != "" { area_code = code return } } return } // getCompanyLabelBitmap 根据统一信用代码和ID获取bitmap 和对应标签字符串 func getCompanyLabelBitmap(creditNo, companyID string) (bitLabels []uint64, nameLabels []string) { //var bitLabels = make([]uint64, 0) //bitmap数组 //var nameLabels = make([]string, 0) //标签数组 //1 dealCompanyNo tags1 := dealCompanyNo(creditNo) if len(tags1) > 0 { bitLabels = append(bitLabels, tags1...) } //获取对应Bitmap的标签字符串 for _, v := range tags1 { if la, ok := entLabelMap[v]; ok { nameLabels = append(nameLabels, la) } } //2.获取凭安库里的国标经济行业分类 label := getLabelsByID(companyID) if label != "" { labels := strings.Split(label, "-") for _, v := range labels { if bit, ok := nameBitMap[v]; ok { bitLabels = append(bitLabels, bit) nameLabels = append(nameLabels, v) } } } return } // getLabelsByID 根据名称,获取凭安库的国标经济行业分类 func getLabelsByID(companyID string) string { whereIndustry := map[string]interface{}{ "company_id": companyID, } industry, _ := MgoQY.FindOne("company_industry", whereIndustry) if industry != nil && len(*industry) > 0 { label := "" if util.ObjToString((*industry)["industry_l1_name"]) != "" { label = util.ObjToString((*industry)["industry_l1_name"]) } else { return "" } //二级名称 if util.ObjToString((*industry)["industry_l2_name"]) != "" { label = label + "-" + util.ObjToString((*industry)["industry_l2_name"]) } if util.ObjToString((*industry)["industry_l3_name"]) != "" { label = label + "-" + util.ObjToString((*industry)["industry_l3_name"]) } if util.ObjToString((*industry)["industry_l4_name"]) != "" { label = label + "-" + util.ObjToString((*industry)["industry_l4_name"]) } return label } return "" } // dealCompanyNo dealCompanyNo 统一信用代码 func dealCompanyNo(companyNo string) (newTags []uint64) { // 前缀与标签映射表 prefixTagMap := map[string]uint64{ "11": 151, "12": 152, "13": 153, "19": 154, "21": 155, "29": 156, "31": 157, "32": 158, "33": 159, "34": 160, "35": 161, "39": 162, "41": 163, "49": 164, "51": 165, "52": 166, "53": 167, "59": 168, "61": 169, "62": 170, "69": 171, "71": 172, "72": 173, "79": 174, "81": 175, "89": 176, "91": 177, "92": 178, "93": 179, "A1": 180, "A2": 181, "N1": 182, "N2": 183, "N3": 184, "N9": 185, "Y1": 186, } // 遍历映射表进行前缀匹配 for prefix, tag := range prefixTagMap { if strings.HasPrefix(companyNo, prefix) { newTags = append(newTags, tag) } } return } // dealOrgTags 处理国民经济行业分类 func dealOrgTags(org_tags string) (newTags []uint64) { var categoryMap = map[string]uint64{ "外交": 2193, "发展和改革": 2194, "科学技术/科技": 2195, "民族事务": 2196, "保密局": 2197, "国安局": 2198, "司法": 2199, "法院": 2200, "检察院": 2201, "人力资源和社会保障": 2202, "生态环境": 2203, "交通运输": 2204, "农业农村": 2205, "退役军人事务": 2206, "人民银行": 2207, "国防": 2208, "教育": 2209, "党校": 2210, "工业和信息化": 2211, "公安": 2212, "民政": 2213, "财政": 2214, "自然资源(包含规划)": 2215, "住建": 2216, "水利": 2217, "商务": 2218, "卫生健康": 2219, "应急管理": 2220, "审计": 2221, "国有资产监督管理": 2222, "海关": 2223, "市场监督": 2224, "证券监督管理": 2225, "体育": 2226, "统计": 2227, "国际发展合作": 2228, "税务": 2229, "金融": 2230, "广播电视": 2231, "信访": 2232, "知识产权": 2233, "医疗保障": 2234, "新华通讯社": 2235, "气象": 2236, "科学院": 2237, "工程院": 2238, "粮食和物资储备": 2239, "数据": 2240, "烟草专卖": 2241, "林业和草原": 2242, "民用航空": 2243, "文物": 2244, "疾病预防控制": 2245, "消防救援": 2246, "药品监督": 2247, "能源": 2248, "移民": 2249, "铁路": 2250, "邮政": 2251, "中医药": 2252, "外汇": 2253, "供销合作社": 2254, "公共资源交易中心": 2255, "监狱": 2256, "城乡建设": 2257, "文旅": 2258, "人民防空": 2259, "园林": 2260, "物流口岸": 2261, "大数据": 2262, "政务服务": 2263, "地方史志": 2264, "住房公积金管理中心": 2265, "仲裁": 2266, "招商": 2267, "社保中心": 2268, "管委会": 2269, "人民政府": 2270, "工商联": 2271, "残联": 2272, "妇联": 2273, "艺术联": 2274, "侨联": 2275, "台联": 2276, "城管": 2277, "编办": 2278, "政协": 2279, "民主党派": 2280, "党委": 2281, "团委": 2282, "人大": 2283, "档案局": 2284, "武装": 2285, "医院": 2286, "渔业": 2287, "学校": 2288, } orgTags := strings.TrimSpace(org_tags) if strings.Contains(orgTags, "-") { classifications := readFile() //读取国标行业分类 if len(classifications) > 0 { ss := findEntCodesByLabel(orgTags, classifications) newTags = append(newTags, ss...) } } else { //其它单独标签 for prefix, tag := range categoryMap { if orgTags == prefix { newTags = append(newTags, tag) } } } return } // 根据label返回对应的EntCode数组 func findEntCodesByLabel(label string, classifications []IndustryClassification) []uint64 { // 将label分割成多个层级 labels := strings.Split(label, "-") var result []uint64 if len(labels) > 0 { for k, v := range labels { rs := findIndustryClassification(k+1, v, classifications) if rs != nil { result = append(result, rs.EntCode) } } } return result } // 根据level和name查找对应的IndustryClassification func findIndustryClassification(level int, name string, classifications []IndustryClassification) *IndustryClassification { // 遍历每个分类 for _, classification := range classifications { // 如果当前分类的level和name匹配,返回当前分类 if classification.Level == level && classification.Name == name { return &classification } // 如果当前分类有子分类,则递归查找子分类 if len(classification.Children) > 0 { if result := findIndustryClassification(level, name, classification.Children); result != nil { return result } } } // 如果没有找到匹配的分类,返回nil return nil } // getQyxyStd 获取qyxy_std 数据 func getQyxyStd(companyName string) map[string]interface{} { qyxy_info := map[string]interface{}{} where := map[string]interface{}{ "company_name": companyName, } dataArr, _ := Mgo.FindOne("qyxy_std", where) if len(*dataArr) > 0 { qyxy_info = *dataArr return qyxy_info } else { where2 := map[string]interface{}{ "history_name": companyName, } hisData, _ := MgoQY.FindOne("company_history_name", where2) if len(*hisData) > 0 { where3 := map[string]interface{}{ "company_name": companyName, } dataArr, _ = Mgo.FindOne("qyxy_std", where3) if len(*dataArr) > 0 { qyxy_info = *dataArr return qyxy_info } } } return qyxy_info }