tools.go 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. ckgo "github.com/ClickHouse/clickhouse-go/v2"
  7. "github.com/ClickHouse/clickhouse-go/v2/lib/driver"
  8. "io/ioutil"
  9. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  10. "log"
  11. "os"
  12. "strings"
  13. "time"
  14. )
  15. // connectClickhouse 连接Clickhouse,其它方式连接;bitmap字段无法处理,需要使用下面方法
  16. func connectClickhouse(host, username, password, dbname string) (driver.Conn, error) {
  17. //host := "192.168.3.207:19000"
  18. //username := "jytop"
  19. //password := "pwdTopJy123"
  20. //dbname := "pub_tags"
  21. var (
  22. ctx = context.Background()
  23. conn, err = ckgo.Open(&ckgo.Options{
  24. Addr: []string{host},
  25. DialTimeout: 10 * time.Second,
  26. MaxIdleConns: 3,
  27. MaxOpenConns: 30,
  28. Auth: ckgo.Auth{
  29. Database: dbname,
  30. Username: username,
  31. Password: password,
  32. },
  33. Debugf: func(format string, v ...interface{}) {
  34. log.Println(format, v)
  35. },
  36. })
  37. )
  38. if err != nil {
  39. return nil, err
  40. }
  41. if err := conn.Ping(ctx); err != nil {
  42. if exception, ok := err.(*ckgo.Exception); ok {
  43. log.Println("Exception [%d] %s \n%s\n", exception.Code, exception.Message, exception.StackTrace)
  44. }
  45. return nil, err
  46. }
  47. return conn, nil
  48. }
  49. // 动态构建 toUInt64 数组字符串
  50. func buildToUInt64Array(labels []uint64) string {
  51. if len(labels) == 0 {
  52. return "[]"
  53. }
  54. toUInt64Labels := make([]string, len(labels))
  55. for i, label := range labels {
  56. toUInt64Labels[i] = fmt.Sprintf("toUInt64(%d)", label)
  57. }
  58. return fmt.Sprintf("[%s]", strings.Join(toUInt64Labels, ", "))
  59. }
  60. // IndustryClassification 国标行业分类的结构体
  61. type IndustryClassification struct {
  62. Code string `json:"code"`
  63. EntCode uint64 `json:"ent_code"` // 法人库bitmap
  64. Name string `json:"name"`
  65. Children []IndustryClassification `json:"children"`
  66. Level int `json:"level"` // 新增字段 Level
  67. }
  68. // 全局变量 seen 用于记录已经分配的 Name 和 EntCode
  69. var seen = make(map[string]uint64)
  70. // assignLevel 为每个分类分配层级
  71. func assignLevel(classifications []IndustryClassification, level int) {
  72. for i := range classifications {
  73. classifications[i].Level = level
  74. // 递归处理子分类
  75. if len(classifications[i].Children) > 0 {
  76. assignLevel(classifications[i].Children, level+1)
  77. }
  78. }
  79. }
  80. // assignEntCode 为每个分类分配 EntCode
  81. func assignEntCode(classifications []IndustryClassification, entCode *uint64) {
  82. for i := range classifications {
  83. // 如果是第三级或更高层级,且只有一个子分类且名称相同,父子共享EntCode
  84. if len(classifications[i].Children) == 1 && classifications[i].Level == 3 && classifications[i].Name == classifications[i].Children[0].Name {
  85. classifications[i].EntCode = *entCode
  86. classifications[i].Children[0].EntCode = *entCode
  87. //seen[classifications[i].Name] = *entCode
  88. (*entCode)++
  89. continue
  90. } else {
  91. classifications[i].EntCode = *entCode
  92. //seen[classifications[i].Name] = *entCode
  93. (*entCode)++
  94. }
  95. // 如果有子分类,则递归处理
  96. if len(classifications[i].Children) > 0 {
  97. assignEntCode(classifications[i].Children, entCode)
  98. }
  99. }
  100. }
  101. // readIndustryClassifications 读取行业分类数据
  102. func readIndustryClassifications(filePath string) ([]IndustryClassification, error) {
  103. // 打开文件
  104. file, err := os.Open(filePath)
  105. if err != nil {
  106. return nil, err
  107. }
  108. defer file.Close()
  109. // 读取文件内容
  110. bytes, err := ioutil.ReadAll(file)
  111. if err != nil {
  112. return nil, err
  113. }
  114. // 解析 JSON
  115. var classifications []IndustryClassification
  116. err = json.Unmarshal(bytes, &classifications)
  117. if err != nil {
  118. return nil, err
  119. }
  120. // 先分配层级
  121. assignLevel(classifications, 1)
  122. // 然后分配EntCode
  123. startEntCode := uint64(192)
  124. assignEntCode(classifications, &startEntCode)
  125. return classifications, nil
  126. }
  127. // readFile d读取文件
  128. func readFile() []IndustryClassification {
  129. // 读取 JSON 文件
  130. filePath := "./国民经济行业分类_2017.json"
  131. //var err error
  132. classifications, err := readIndustryClassifications(filePath)
  133. if err != nil {
  134. fmt.Println("Error reading industry classifications:", err)
  135. return []IndustryClassification{}
  136. }
  137. return classifications
  138. //// 打印结果(调试用)
  139. //output, _ := json.MarshalIndent(classifications, "", " ")
  140. //fmt.Println(string(output))
  141. }
  142. // removeDuplicates 去除重复字符串
  143. func removeDuplicates(arr []string) []string {
  144. uniqueMap := make(map[string]bool)
  145. var result []string
  146. for _, str := range arr {
  147. if !uniqueMap[str] {
  148. uniqueMap[str] = true
  149. result = append(result, str)
  150. }
  151. }
  152. return result
  153. }
  154. // getMondayOfCurrentWeek 获取本周周一
  155. func getMondayOfCurrentWeek() time.Time {
  156. // 获取当前时间
  157. now := time.Now()
  158. // 获取今天是本周的第几天(0 表示周日,1 表示周一,...,6 表示周六)
  159. weekday := int(now.Weekday())
  160. // 如果是周日,转换为 7(以便计算为上一周的最后一天)
  161. if weekday == 0 {
  162. weekday = 7
  163. }
  164. // 计算周一日期
  165. monday := now.AddDate(0, 0, -weekday+1)
  166. // 清除时间部分,保留日期
  167. monday = time.Date(monday.Year(), monday.Month(), monday.Day(), 0, 0, 0, 0, monday.Location())
  168. return monday
  169. }
  170. // CalculateRegionCode 处理地域代码
  171. func CalculateRegionCode(area string, city string, district string) (area_code string, city_code string, district_code string) {
  172. area_code, city_code, district_code = "000000", "", ""
  173. if district != "" {
  174. key := area + "~" + city + "~" + district + "~"
  175. code := RegionCodeData[key]
  176. if code != "" {
  177. district_code = code
  178. city_code = code[:4] + "00"
  179. area_code = code[:2] + "0000"
  180. return
  181. }
  182. }
  183. if city != "" {
  184. key := area + "~" + city + "~" + "" + "~"
  185. code := RegionCodeData[key]
  186. if code != "" {
  187. city_code = code
  188. area_code = city_code[:2] + "0000"
  189. return
  190. }
  191. }
  192. if area != "" {
  193. key := area + "~" + "" + "~" + "" + "~"
  194. code := RegionCodeData[key]
  195. if code != "" {
  196. area_code = code
  197. return
  198. }
  199. }
  200. return
  201. }
  202. // getCompanyLabelBitmap 根据统一信用代码和ID获取bitmap 和对应标签字符串
  203. func getCompanyLabelBitmap(creditNo, companyID string) (bitLabels []uint64, nameLabels []string) {
  204. //var bitLabels = make([]uint64, 0) //bitmap数组
  205. //var nameLabels = make([]string, 0) //标签数组
  206. //1 dealCompanyNo
  207. tags1 := dealCompanyNo(creditNo)
  208. if len(tags1) > 0 {
  209. bitLabels = append(bitLabels, tags1...)
  210. }
  211. //获取对应Bitmap的标签字符串
  212. for _, v := range tags1 {
  213. if la, ok := entLabelMap[v]; ok {
  214. nameLabels = append(nameLabels, la)
  215. }
  216. }
  217. //2.获取凭安库里的国标经济行业分类
  218. label := getLabelsByID(companyID)
  219. if label != "" {
  220. labels := strings.Split(label, "-")
  221. for _, v := range labels {
  222. if bit, ok := nameBitMap[v]; ok {
  223. bitLabels = append(bitLabels, bit)
  224. nameLabels = append(nameLabels, v)
  225. }
  226. }
  227. }
  228. return
  229. }
  230. // getLabelsByID 根据名称,获取凭安库的国标经济行业分类
  231. func getLabelsByID(companyID string) string {
  232. whereIndustry := map[string]interface{}{
  233. "company_id": companyID,
  234. }
  235. industry, _ := MgoQY.FindOne("company_industry", whereIndustry)
  236. if industry != nil && len(*industry) > 0 {
  237. label := ""
  238. if util.ObjToString((*industry)["industry_l1_name"]) != "" {
  239. label = util.ObjToString((*industry)["industry_l1_name"])
  240. } else {
  241. return ""
  242. }
  243. //二级名称
  244. if util.ObjToString((*industry)["industry_l2_name"]) != "" {
  245. label = label + "-" + util.ObjToString((*industry)["industry_l2_name"])
  246. }
  247. if util.ObjToString((*industry)["industry_l3_name"]) != "" {
  248. label = label + "-" + util.ObjToString((*industry)["industry_l3_name"])
  249. }
  250. if util.ObjToString((*industry)["industry_l4_name"]) != "" {
  251. label = label + "-" + util.ObjToString((*industry)["industry_l4_name"])
  252. }
  253. return label
  254. }
  255. return ""
  256. }