tools.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. ckgo "github.com/ClickHouse/clickhouse-go/v2"
  7. "github.com/ClickHouse/clickhouse-go/v2/lib/driver"
  8. "io/ioutil"
  9. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  10. "log"
  11. "os"
  12. "strings"
  13. "time"
  14. )
  15. // connectClickhouse 连接Clickhouse,其它方式连接;bitmap字段无法处理,需要使用下面方法
  16. func connectClickhouse(host, username, password, dbname string) (driver.Conn, error) {
  17. //host := "192.168.3.207:19000"
  18. //username := "jytop"
  19. //password := "pwdTopJy123"
  20. //dbname := "pub_tags"
  21. var (
  22. ctx = context.Background()
  23. conn, err = ckgo.Open(&ckgo.Options{
  24. Addr: []string{host},
  25. DialTimeout: 10 * time.Second,
  26. MaxIdleConns: 3,
  27. MaxOpenConns: 30,
  28. Auth: ckgo.Auth{
  29. Database: dbname,
  30. Username: username,
  31. Password: password,
  32. },
  33. Debugf: func(format string, v ...interface{}) {
  34. log.Println(format, v)
  35. },
  36. })
  37. )
  38. if err != nil {
  39. return nil, err
  40. }
  41. if err := conn.Ping(ctx); err != nil {
  42. if exception, ok := err.(*ckgo.Exception); ok {
  43. log.Println("Exception [%d] %s \n%s\n", exception.Code, exception.Message, exception.StackTrace)
  44. }
  45. return nil, err
  46. }
  47. return conn, nil
  48. }
  49. // 动态构建 toUInt64 数组字符串
  50. func buildToUInt64Array(labels []uint64) string {
  51. if len(labels) == 0 {
  52. return "[]"
  53. }
  54. toUInt64Labels := make([]string, len(labels))
  55. for i, label := range labels {
  56. toUInt64Labels[i] = fmt.Sprintf("toUInt64(%d)", label)
  57. }
  58. return fmt.Sprintf("[%s]", strings.Join(toUInt64Labels, ", "))
  59. }
  60. // IndustryClassification 国标行业分类的结构体
  61. type IndustryClassification struct {
  62. Code string `json:"code"`
  63. EntCode uint64 `json:"ent_code"` // 法人库bitmap
  64. Name string `json:"name"`
  65. Children []IndustryClassification `json:"children"`
  66. Level int `json:"level"` // 新增字段 Level
  67. }
  68. // 全局变量 seen 用于记录已经分配的 Name 和 EntCode
  69. var seen = make(map[string]uint64)
  70. // assignLevel 为每个分类分配层级
  71. func assignLevel(classifications []IndustryClassification, level int) {
  72. for i := range classifications {
  73. classifications[i].Level = level
  74. // 递归处理子分类
  75. if len(classifications[i].Children) > 0 {
  76. assignLevel(classifications[i].Children, level+1)
  77. }
  78. }
  79. }
  80. // assignEntCode 为每个分类分配 EntCode
  81. func assignEntCode(classifications []IndustryClassification, entCode *uint64) {
  82. for i := range classifications {
  83. // 如果是第三级或更高层级,且只有一个子分类且名称相同,父子共享EntCode
  84. if len(classifications[i].Children) == 1 && classifications[i].Level == 3 && classifications[i].Name == classifications[i].Children[0].Name {
  85. classifications[i].EntCode = *entCode
  86. classifications[i].Children[0].EntCode = *entCode
  87. //seen[classifications[i].Name] = *entCode
  88. (*entCode)++
  89. continue
  90. } else {
  91. classifications[i].EntCode = *entCode
  92. //seen[classifications[i].Name] = *entCode
  93. (*entCode)++
  94. }
  95. // 如果有子分类,则递归处理
  96. if len(classifications[i].Children) > 0 {
  97. assignEntCode(classifications[i].Children, entCode)
  98. }
  99. }
  100. }
  101. // readIndustryClassifications 读取行业分类数据
  102. func readIndustryClassifications(filePath string) ([]IndustryClassification, error) {
  103. // 打开文件
  104. file, err := os.Open(filePath)
  105. if err != nil {
  106. return nil, err
  107. }
  108. defer file.Close()
  109. // 读取文件内容
  110. bytes, err := ioutil.ReadAll(file)
  111. if err != nil {
  112. return nil, err
  113. }
  114. // 解析 JSON
  115. var classifications []IndustryClassification
  116. err = json.Unmarshal(bytes, &classifications)
  117. if err != nil {
  118. return nil, err
  119. }
  120. // 先分配层级
  121. assignLevel(classifications, 1)
  122. // 然后分配EntCode
  123. startEntCode := uint64(192)
  124. assignEntCode(classifications, &startEntCode)
  125. return classifications, nil
  126. }
  127. // readFile d读取文件
  128. func readFile() []IndustryClassification {
  129. // 读取 JSON 文件
  130. filePath := "./国民经济行业分类_2017.json"
  131. //var err error
  132. classifications, err := readIndustryClassifications(filePath)
  133. if err != nil {
  134. fmt.Println("Error reading industry classifications:", err)
  135. return []IndustryClassification{}
  136. }
  137. return classifications
  138. //// 打印结果(调试用)
  139. //output, _ := json.MarshalIndent(classifications, "", " ")
  140. //fmt.Println(string(output))
  141. }
  142. // removeDuplicates 去除重复字符串
  143. func removeDuplicates(arr []string) []string {
  144. uniqueMap := make(map[string]bool)
  145. var result []string
  146. for _, str := range arr {
  147. if !uniqueMap[str] {
  148. uniqueMap[str] = true
  149. result = append(result, str)
  150. }
  151. }
  152. return result
  153. }
  154. // getMondayOfCurrentWeek 获取本周周一
  155. func getMondayOfCurrentWeek() time.Time {
  156. // 获取当前时间
  157. now := time.Now()
  158. // 获取今天是本周的第几天(0 表示周日,1 表示周一,...,6 表示周六)
  159. weekday := int(now.Weekday())
  160. // 如果是周日,转换为 7(以便计算为上一周的最后一天)
  161. if weekday == 0 {
  162. weekday = 7
  163. }
  164. // 计算周一日期
  165. monday := now.AddDate(0, 0, -weekday+1)
  166. // 清除时间部分,保留日期
  167. monday = time.Date(monday.Year(), monday.Month(), monday.Day(), 0, 0, 0, 0, monday.Location())
  168. return monday
  169. }
  170. // CalculateRegionCode 处理地域代码
  171. func CalculateRegionCode(area string, city string, district string) (area_code string, city_code string, district_code string) {
  172. area_code, city_code, district_code = "000000", "", ""
  173. if district != "" {
  174. key := area + "~" + city + "~" + district + "~"
  175. code := RegionCodeData[key]
  176. if code != "" {
  177. district_code = code
  178. city_code = code[:4] + "00"
  179. area_code = code[:2] + "0000"
  180. return
  181. }
  182. }
  183. if city != "" {
  184. key := area + "~" + city + "~" + "" + "~"
  185. code := RegionCodeData[key]
  186. if code != "" {
  187. city_code = code
  188. area_code = city_code[:2] + "0000"
  189. return
  190. }
  191. }
  192. if area != "" {
  193. key := area + "~" + "" + "~" + "" + "~"
  194. code := RegionCodeData[key]
  195. if code != "" {
  196. area_code = code
  197. return
  198. }
  199. }
  200. return
  201. }
  202. // getCompanyLabelBitmap 根据统一信用代码和ID获取bitmap 和对应标签字符串
  203. func getCompanyLabelBitmap(creditNo, companyID string) (bitLabels []uint64, nameLabels []string) {
  204. //var bitLabels = make([]uint64, 0) //bitmap数组
  205. //var nameLabels = make([]string, 0) //标签数组
  206. //1 dealCompanyNo
  207. tags1 := dealCompanyNo(creditNo)
  208. if len(tags1) > 0 {
  209. bitLabels = append(bitLabels, tags1...)
  210. }
  211. //获取对应Bitmap的标签字符串
  212. for _, v := range tags1 {
  213. if la, ok := entLabelMap[v]; ok {
  214. nameLabels = append(nameLabels, la)
  215. }
  216. }
  217. //2.获取凭安库里的国标经济行业分类
  218. label := getLabelsByID(companyID)
  219. if label != "" {
  220. labels := strings.Split(label, "-")
  221. for _, v := range labels {
  222. if bit, ok := nameBitMap[v]; ok {
  223. bitLabels = append(bitLabels, bit)
  224. nameLabels = append(nameLabels, v)
  225. }
  226. }
  227. }
  228. return
  229. }
  230. // getLabelsByID 根据名称,获取凭安库的国标经济行业分类
  231. func getLabelsByID(companyID string) string {
  232. whereIndustry := map[string]interface{}{
  233. "company_id": companyID,
  234. }
  235. industry, _ := MgoQY.FindOne("company_industry", whereIndustry)
  236. if industry != nil && len(*industry) > 0 {
  237. label := ""
  238. if util.ObjToString((*industry)["industry_l1_name"]) != "" {
  239. label = util.ObjToString((*industry)["industry_l1_name"])
  240. } else {
  241. return ""
  242. }
  243. //二级名称
  244. if util.ObjToString((*industry)["industry_l2_name"]) != "" {
  245. label = label + "-" + util.ObjToString((*industry)["industry_l2_name"])
  246. }
  247. if util.ObjToString((*industry)["industry_l3_name"]) != "" {
  248. label = label + "-" + util.ObjToString((*industry)["industry_l3_name"])
  249. }
  250. if util.ObjToString((*industry)["industry_l4_name"]) != "" {
  251. label = label + "-" + util.ObjToString((*industry)["industry_l4_name"])
  252. }
  253. return label
  254. }
  255. return ""
  256. }
  257. // dealCompanyNo dealCompanyNo 统一信用代码
  258. func dealCompanyNo(companyNo string) (newTags []uint64) {
  259. // 前缀与标签映射表
  260. prefixTagMap := map[string]uint64{
  261. "11": 151, "12": 152, "13": 153, "19": 154,
  262. "21": 155, "29": 156, "31": 157, "32": 158,
  263. "33": 159, "34": 160, "35": 161, "39": 162,
  264. "41": 163, "49": 164, "51": 165, "52": 166,
  265. "53": 167, "59": 168, "61": 169, "62": 170,
  266. "69": 171, "71": 172, "72": 173, "79": 174,
  267. "81": 175, "89": 176, "91": 177, "92": 178,
  268. "93": 179, "A1": 180, "A2": 181, "N1": 182,
  269. "N2": 183, "N3": 184, "N9": 185, "Y1": 186,
  270. }
  271. // 遍历映射表进行前缀匹配
  272. for prefix, tag := range prefixTagMap {
  273. if strings.HasPrefix(companyNo, prefix) {
  274. newTags = append(newTags, tag)
  275. }
  276. }
  277. return
  278. }
  279. // dealOrgTags 处理国民经济行业分类
  280. func dealOrgTags(org_tags string) (newTags []uint64) {
  281. var categoryMap = map[string]uint64{
  282. "外交": 2193,
  283. "发展和改革": 2194,
  284. "科学技术/科技": 2195,
  285. "民族事务": 2196,
  286. "保密局": 2197,
  287. "国安局": 2198,
  288. "司法": 2199,
  289. "法院": 2200,
  290. "检察院": 2201,
  291. "人力资源和社会保障": 2202,
  292. "生态环境": 2203,
  293. "交通运输": 2204,
  294. "农业农村": 2205,
  295. "退役军人事务": 2206,
  296. "人民银行": 2207,
  297. "国防": 2208,
  298. "教育": 2209,
  299. "党校": 2210,
  300. "工业和信息化": 2211,
  301. "公安": 2212,
  302. "民政": 2213,
  303. "财政": 2214,
  304. "自然资源(包含规划)": 2215,
  305. "住建": 2216,
  306. "水利": 2217,
  307. "商务": 2218,
  308. "卫生健康": 2219,
  309. "应急管理": 2220,
  310. "审计": 2221,
  311. "国有资产监督管理": 2222,
  312. "海关": 2223,
  313. "市场监督": 2224,
  314. "证券监督管理": 2225,
  315. "体育": 2226,
  316. "统计": 2227,
  317. "国际发展合作": 2228,
  318. "税务": 2229,
  319. "金融": 2230,
  320. "广播电视": 2231,
  321. "信访": 2232,
  322. "知识产权": 2233,
  323. "医疗保障": 2234,
  324. "新华通讯社": 2235,
  325. "气象": 2236,
  326. "科学院": 2237,
  327. "工程院": 2238,
  328. "粮食和物资储备": 2239,
  329. "数据": 2240,
  330. "烟草专卖": 2241,
  331. "林业和草原": 2242,
  332. "民用航空": 2243,
  333. "文物": 2244,
  334. "疾病预防控制": 2245,
  335. "消防救援": 2246,
  336. "药品监督": 2247,
  337. "能源": 2248,
  338. "移民": 2249,
  339. "铁路": 2250,
  340. "邮政": 2251,
  341. "中医药": 2252,
  342. "外汇": 2253,
  343. "供销合作社": 2254,
  344. "公共资源交易中心": 2255,
  345. "监狱": 2256,
  346. "城乡建设": 2257,
  347. "文旅": 2258,
  348. "人民防空": 2259,
  349. "园林": 2260,
  350. "物流口岸": 2261,
  351. "大数据": 2262,
  352. "政务服务": 2263,
  353. "地方史志": 2264,
  354. "住房公积金管理中心": 2265,
  355. "仲裁": 2266,
  356. "招商": 2267,
  357. "社保中心": 2268,
  358. "管委会": 2269,
  359. "人民政府": 2270,
  360. "工商联": 2271,
  361. "残联": 2272,
  362. "妇联": 2273,
  363. "艺术联": 2274,
  364. "侨联": 2275,
  365. "台联": 2276,
  366. "城管": 2277,
  367. "编办": 2278,
  368. "政协": 2279,
  369. "民主党派": 2280,
  370. "党委": 2281,
  371. "团委": 2282,
  372. "人大": 2283,
  373. "档案局": 2284,
  374. "武装": 2285,
  375. "医院": 2286,
  376. "渔业": 2287,
  377. "学校": 2288,
  378. }
  379. orgTags := strings.TrimSpace(org_tags)
  380. if strings.Contains(orgTags, "-") {
  381. classifications := readFile() //读取国标行业分类
  382. if len(classifications) > 0 {
  383. ss := findEntCodesByLabel(orgTags, classifications)
  384. newTags = append(newTags, ss...)
  385. }
  386. } else {
  387. //其它单独标签
  388. for prefix, tag := range categoryMap {
  389. if orgTags == prefix {
  390. newTags = append(newTags, tag)
  391. }
  392. }
  393. }
  394. return
  395. }
  396. // 根据label返回对应的EntCode数组
  397. func findEntCodesByLabel(label string, classifications []IndustryClassification) []uint64 {
  398. // 将label分割成多个层级
  399. labels := strings.Split(label, "-")
  400. var result []uint64
  401. if len(labels) > 0 {
  402. for k, v := range labels {
  403. rs := findIndustryClassification(k+1, v, classifications)
  404. if rs != nil {
  405. result = append(result, rs.EntCode)
  406. }
  407. }
  408. }
  409. return result
  410. }
  411. // 根据level和name查找对应的IndustryClassification
  412. func findIndustryClassification(level int, name string, classifications []IndustryClassification) *IndustryClassification {
  413. // 遍历每个分类
  414. for _, classification := range classifications {
  415. // 如果当前分类的level和name匹配,返回当前分类
  416. if classification.Level == level && classification.Name == name {
  417. return &classification
  418. }
  419. // 如果当前分类有子分类,则递归查找子分类
  420. if len(classification.Children) > 0 {
  421. if result := findIndustryClassification(level, name, classification.Children); result != nil {
  422. return result
  423. }
  424. }
  425. }
  426. // 如果没有找到匹配的分类,返回nil
  427. return nil
  428. }
  429. // getQyxyStd 获取qyxy_std 数据
  430. func getQyxyStd(companyName string) map[string]interface{} {
  431. qyxy_info := map[string]interface{}{}
  432. where := map[string]interface{}{
  433. "company_name": companyName,
  434. }
  435. dataArr, _ := Mgo.FindOne("qyxy_std", where)
  436. if len(*dataArr) > 0 {
  437. qyxy_info = *dataArr
  438. return qyxy_info
  439. } else {
  440. where2 := map[string]interface{}{
  441. "history_name": companyName,
  442. }
  443. hisData, _ := MgoQY.FindOne("company_history_name", where2)
  444. if len(*hisData) > 0 {
  445. where3 := map[string]interface{}{
  446. "company_name": companyName,
  447. }
  448. dataArr, _ = Mgo.FindOne("qyxy_std", where3)
  449. if len(*dataArr) > 0 {
  450. qyxy_info = *dataArr
  451. return qyxy_info
  452. }
  453. }
  454. }
  455. return qyxy_info
  456. }