123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493 |
- package main
- import (
- "context"
- "encoding/json"
- "fmt"
- ckgo "github.com/ClickHouse/clickhouse-go/v2"
- "github.com/ClickHouse/clickhouse-go/v2/lib/driver"
- "io/ioutil"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "log"
- "os"
- "strings"
- "time"
- )
- // connectClickhouse 连接Clickhouse,其它方式连接;bitmap字段无法处理,需要使用下面方法
- func connectClickhouse(host, username, password, dbname string) (driver.Conn, error) {
- //host := "192.168.3.207:19000"
- //username := "jytop"
- //password := "pwdTopJy123"
- //dbname := "pub_tags"
- var (
- ctx = context.Background()
- conn, err = ckgo.Open(&ckgo.Options{
- Addr: []string{host},
- DialTimeout: 10 * time.Second,
- MaxIdleConns: 3,
- MaxOpenConns: 30,
- Auth: ckgo.Auth{
- Database: dbname,
- Username: username,
- Password: password,
- },
- Debugf: func(format string, v ...interface{}) {
- log.Println(format, v)
- },
- })
- )
- if err != nil {
- return nil, err
- }
- if err := conn.Ping(ctx); err != nil {
- if exception, ok := err.(*ckgo.Exception); ok {
- log.Println("Exception [%d] %s \n%s\n", exception.Code, exception.Message, exception.StackTrace)
- }
- return nil, err
- }
- return conn, nil
- }
- // 动态构建 toUInt64 数组字符串
- func buildToUInt64Array(labels []uint64) string {
- if len(labels) == 0 {
- return "[]"
- }
- toUInt64Labels := make([]string, len(labels))
- for i, label := range labels {
- toUInt64Labels[i] = fmt.Sprintf("toUInt64(%d)", label)
- }
- return fmt.Sprintf("[%s]", strings.Join(toUInt64Labels, ", "))
- }
- // IndustryClassification 国标行业分类的结构体
- type IndustryClassification struct {
- Code string `json:"code"`
- EntCode uint64 `json:"ent_code"` // 法人库bitmap
- Name string `json:"name"`
- Children []IndustryClassification `json:"children"`
- Level int `json:"level"` // 新增字段 Level
- }
- // 全局变量 seen 用于记录已经分配的 Name 和 EntCode
- var seen = make(map[string]uint64)
- // assignLevel 为每个分类分配层级
- func assignLevel(classifications []IndustryClassification, level int) {
- for i := range classifications {
- classifications[i].Level = level
- // 递归处理子分类
- if len(classifications[i].Children) > 0 {
- assignLevel(classifications[i].Children, level+1)
- }
- }
- }
- // assignEntCode 为每个分类分配 EntCode
- func assignEntCode(classifications []IndustryClassification, entCode *uint64) {
- for i := range classifications {
- // 如果是第三级或更高层级,且只有一个子分类且名称相同,父子共享EntCode
- if len(classifications[i].Children) == 1 && classifications[i].Level == 3 && classifications[i].Name == classifications[i].Children[0].Name {
- classifications[i].EntCode = *entCode
- classifications[i].Children[0].EntCode = *entCode
- //seen[classifications[i].Name] = *entCode
- (*entCode)++
- continue
- } else {
- classifications[i].EntCode = *entCode
- //seen[classifications[i].Name] = *entCode
- (*entCode)++
- }
- // 如果有子分类,则递归处理
- if len(classifications[i].Children) > 0 {
- assignEntCode(classifications[i].Children, entCode)
- }
- }
- }
- // readIndustryClassifications 读取行业分类数据
- func readIndustryClassifications(filePath string) ([]IndustryClassification, error) {
- // 打开文件
- file, err := os.Open(filePath)
- if err != nil {
- return nil, err
- }
- defer file.Close()
- // 读取文件内容
- bytes, err := ioutil.ReadAll(file)
- if err != nil {
- return nil, err
- }
- // 解析 JSON
- var classifications []IndustryClassification
- err = json.Unmarshal(bytes, &classifications)
- if err != nil {
- return nil, err
- }
- // 先分配层级
- assignLevel(classifications, 1)
- // 然后分配EntCode
- startEntCode := uint64(192)
- assignEntCode(classifications, &startEntCode)
- return classifications, nil
- }
- // readFile d读取文件
- func readFile() []IndustryClassification {
- // 读取 JSON 文件
- filePath := "./国民经济行业分类_2017.json"
- //var err error
- classifications, err := readIndustryClassifications(filePath)
- if err != nil {
- fmt.Println("Error reading industry classifications:", err)
- return []IndustryClassification{}
- }
- return classifications
- //// 打印结果(调试用)
- //output, _ := json.MarshalIndent(classifications, "", " ")
- //fmt.Println(string(output))
- }
- // removeDuplicates 去除重复字符串
- func removeDuplicates(arr []string) []string {
- uniqueMap := make(map[string]bool)
- var result []string
- for _, str := range arr {
- if !uniqueMap[str] {
- uniqueMap[str] = true
- result = append(result, str)
- }
- }
- return result
- }
- // getMondayOfCurrentWeek 获取本周周一
- func getMondayOfCurrentWeek() time.Time {
- // 获取当前时间
- now := time.Now()
- // 获取今天是本周的第几天(0 表示周日,1 表示周一,...,6 表示周六)
- weekday := int(now.Weekday())
- // 如果是周日,转换为 7(以便计算为上一周的最后一天)
- if weekday == 0 {
- weekday = 7
- }
- // 计算周一日期
- monday := now.AddDate(0, 0, -weekday+1)
- // 清除时间部分,保留日期
- monday = time.Date(monday.Year(), monday.Month(), monday.Day(), 0, 0, 0, 0, monday.Location())
- return monday
- }
- // CalculateRegionCode 处理地域代码
- func CalculateRegionCode(area string, city string, district string) (area_code string, city_code string, district_code string) {
- area_code, city_code, district_code = "000000", "", ""
- if district != "" {
- key := area + "~" + city + "~" + district + "~"
- code := RegionCodeData[key]
- if code != "" {
- district_code = code
- city_code = code[:4] + "00"
- area_code = code[:2] + "0000"
- return
- }
- }
- if city != "" {
- key := area + "~" + city + "~" + "" + "~"
- code := RegionCodeData[key]
- if code != "" {
- city_code = code
- area_code = city_code[:2] + "0000"
- return
- }
- }
- if area != "" {
- key := area + "~" + "" + "~" + "" + "~"
- code := RegionCodeData[key]
- if code != "" {
- area_code = code
- return
- }
- }
- return
- }
- // getCompanyLabelBitmap 根据统一信用代码和ID获取bitmap 和对应标签字符串
- func getCompanyLabelBitmap(creditNo, companyID string) (bitLabels []uint64, nameLabels []string) {
- //var bitLabels = make([]uint64, 0) //bitmap数组
- //var nameLabels = make([]string, 0) //标签数组
- //1 dealCompanyNo
- tags1 := dealCompanyNo(creditNo)
- if len(tags1) > 0 {
- bitLabels = append(bitLabels, tags1...)
- }
- //获取对应Bitmap的标签字符串
- for _, v := range tags1 {
- if la, ok := entLabelMap[v]; ok {
- nameLabels = append(nameLabels, la)
- }
- }
- //2.获取凭安库里的国标经济行业分类
- label := getLabelsByID(companyID)
- if label != "" {
- labels := strings.Split(label, "-")
- for _, v := range labels {
- if bit, ok := nameBitMap[v]; ok {
- bitLabels = append(bitLabels, bit)
- nameLabels = append(nameLabels, v)
- }
- }
- }
- return
- }
- // getLabelsByID 根据名称,获取凭安库的国标经济行业分类
- func getLabelsByID(companyID string) string {
- whereIndustry := map[string]interface{}{
- "company_id": companyID,
- }
- industry, _ := MgoQY.FindOne("company_industry", whereIndustry)
- if industry != nil && len(*industry) > 0 {
- label := ""
- if util.ObjToString((*industry)["industry_l1_name"]) != "" {
- label = util.ObjToString((*industry)["industry_l1_name"])
- } else {
- return ""
- }
- //二级名称
- if util.ObjToString((*industry)["industry_l2_name"]) != "" {
- label = label + "-" + util.ObjToString((*industry)["industry_l2_name"])
- }
- if util.ObjToString((*industry)["industry_l3_name"]) != "" {
- label = label + "-" + util.ObjToString((*industry)["industry_l3_name"])
- }
- if util.ObjToString((*industry)["industry_l4_name"]) != "" {
- label = label + "-" + util.ObjToString((*industry)["industry_l4_name"])
- }
- return label
- }
- return ""
- }
- // dealCompanyNo dealCompanyNo 统一信用代码
- func dealCompanyNo(companyNo string) (newTags []uint64) {
- // 前缀与标签映射表
- prefixTagMap := map[string]uint64{
- "11": 151, "12": 152, "13": 153, "19": 154,
- "21": 155, "29": 156, "31": 157, "32": 158,
- "33": 159, "34": 160, "35": 161, "39": 162,
- "41": 163, "49": 164, "51": 165, "52": 166,
- "53": 167, "59": 168, "61": 169, "62": 170,
- "69": 171, "71": 172, "72": 173, "79": 174,
- "81": 175, "89": 176, "91": 177, "92": 178,
- "93": 179, "A1": 180, "A2": 181, "N1": 182,
- "N2": 183, "N3": 184, "N9": 185, "Y1": 186,
- }
- // 遍历映射表进行前缀匹配
- for prefix, tag := range prefixTagMap {
- if strings.HasPrefix(companyNo, prefix) {
- newTags = append(newTags, tag)
- }
- }
- return
- }
- // dealOrgTags 处理国民经济行业分类
- func dealOrgTags(org_tags string) (newTags []uint64) {
- var categoryMap = map[string]uint64{
- "外交": 2193,
- "发展和改革": 2194,
- "科学技术/科技": 2195,
- "民族事务": 2196,
- "保密局": 2197,
- "国安局": 2198,
- "司法": 2199,
- "法院": 2200,
- "检察院": 2201,
- "人力资源和社会保障": 2202,
- "生态环境": 2203,
- "交通运输": 2204,
- "农业农村": 2205,
- "退役军人事务": 2206,
- "人民银行": 2207,
- "国防": 2208,
- "教育": 2209,
- "党校": 2210,
- "工业和信息化": 2211,
- "公安": 2212,
- "民政": 2213,
- "财政": 2214,
- "自然资源(包含规划)": 2215,
- "住建": 2216,
- "水利": 2217,
- "商务": 2218,
- "卫生健康": 2219,
- "应急管理": 2220,
- "审计": 2221,
- "国有资产监督管理": 2222,
- "海关": 2223,
- "市场监督": 2224,
- "证券监督管理": 2225,
- "体育": 2226,
- "统计": 2227,
- "国际发展合作": 2228,
- "税务": 2229,
- "金融": 2230,
- "广播电视": 2231,
- "信访": 2232,
- "知识产权": 2233,
- "医疗保障": 2234,
- "新华通讯社": 2235,
- "气象": 2236,
- "科学院": 2237,
- "工程院": 2238,
- "粮食和物资储备": 2239,
- "数据": 2240,
- "烟草专卖": 2241,
- "林业和草原": 2242,
- "民用航空": 2243,
- "文物": 2244,
- "疾病预防控制": 2245,
- "消防救援": 2246,
- "药品监督": 2247,
- "能源": 2248,
- "移民": 2249,
- "铁路": 2250,
- "邮政": 2251,
- "中医药": 2252,
- "外汇": 2253,
- "供销合作社": 2254,
- "公共资源交易中心": 2255,
- "监狱": 2256,
- "城乡建设": 2257,
- "文旅": 2258,
- "人民防空": 2259,
- "园林": 2260,
- "物流口岸": 2261,
- "大数据": 2262,
- "政务服务": 2263,
- "地方史志": 2264,
- "住房公积金管理中心": 2265,
- "仲裁": 2266,
- "招商": 2267,
- "社保中心": 2268,
- "管委会": 2269,
- "人民政府": 2270,
- "工商联": 2271,
- "残联": 2272,
- "妇联": 2273,
- "艺术联": 2274,
- "侨联": 2275,
- "台联": 2276,
- "城管": 2277,
- "编办": 2278,
- "政协": 2279,
- "民主党派": 2280,
- "党委": 2281,
- "团委": 2282,
- "人大": 2283,
- "档案局": 2284,
- "武装": 2285,
- "医院": 2286,
- "渔业": 2287,
- "学校": 2288,
- }
- orgTags := strings.TrimSpace(org_tags)
- if strings.Contains(orgTags, "-") {
- classifications := readFile() //读取国标行业分类
- if len(classifications) > 0 {
- ss := findEntCodesByLabel(orgTags, classifications)
- newTags = append(newTags, ss...)
- }
- } else {
- //其它单独标签
- for prefix, tag := range categoryMap {
- if orgTags == prefix {
- newTags = append(newTags, tag)
- }
- }
- }
- return
- }
- // 根据label返回对应的EntCode数组
- func findEntCodesByLabel(label string, classifications []IndustryClassification) []uint64 {
- // 将label分割成多个层级
- labels := strings.Split(label, "-")
- var result []uint64
- if len(labels) > 0 {
- for k, v := range labels {
- rs := findIndustryClassification(k+1, v, classifications)
- if rs != nil {
- result = append(result, rs.EntCode)
- }
- }
- }
- return result
- }
- // 根据level和name查找对应的IndustryClassification
- func findIndustryClassification(level int, name string, classifications []IndustryClassification) *IndustryClassification {
- // 遍历每个分类
- for _, classification := range classifications {
- // 如果当前分类的level和name匹配,返回当前分类
- if classification.Level == level && classification.Name == name {
- return &classification
- }
- // 如果当前分类有子分类,则递归查找子分类
- if len(classification.Children) > 0 {
- if result := findIndustryClassification(level, name, classification.Children); result != nil {
- return result
- }
- }
- }
- // 如果没有找到匹配的分类,返回nil
- return nil
- }
- // getQyxyStd 获取qyxy_std 数据
- func getQyxyStd(companyName string) map[string]interface{} {
- qyxy_info := map[string]interface{}{}
- where := map[string]interface{}{
- "company_name": companyName,
- }
- dataArr, _ := Mgo.FindOne("qyxy_std", where)
- if len(*dataArr) > 0 {
- qyxy_info = *dataArr
- return qyxy_info
- } else {
- where2 := map[string]interface{}{
- "history_name": companyName,
- }
- hisData, _ := MgoQY.FindOne("company_history_name", where2)
- if len(*hisData) > 0 {
- where3 := map[string]interface{}{
- "company_name": companyName,
- }
- dataArr, _ = Mgo.FindOne("qyxy_std", where3)
- if len(*dataArr) > 0 {
- qyxy_info = *dataArr
- return qyxy_info
- }
- }
- }
- return qyxy_info
- }
|