task.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. package main
  2. import (
  3. "context"
  4. "fmt"
  5. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  6. //"github.com/cron"
  7. "github.com/wangbin/jiebago"
  8. "go.mongodb.org/mongo-driver/bson"
  9. "go.mongodb.org/mongo-driver/mongo/options"
  10. "log"
  11. "math/rand"
  12. "regexp"
  13. "strconv"
  14. "strings"
  15. "time"
  16. )
  17. var (
  18. AreaFiled = []string{"credit_no", "company_code"}
  19. WordsArr = []string{"研发", "研制", "开发", "生产", "制造", "制作", "加工", "种植"}
  20. seg jiebago.Segmenter
  21. regPre, _ = regexp.Compile(`^(.+[省|市|区|县|州])?(.+)`)
  22. seed = 188
  23. //startSeoId int64
  24. )
  25. // 企业基本信息
  26. var company_base = []string{"company_name", "company_code", "credit_no", "org_code", "legal_person", "company_status",
  27. "authority", "establish_date", "issue_date", "operation_startdate", "operation_enddate", "capital", "company_type",
  28. "company_status", "company_address", "business_scope", "cancel_date", "cancel_reason", "revoke_date", "revoke_reason",
  29. "legal_person_type", "real_capital", "en_name", "list_code", "tax_code", "use_flag",
  30. }
  31. var province_map = map[string]string{
  32. "BJ": "北京", "TJ": "天津", "SH": "上海", "CQ": "重庆", "HB": "河北", "SX": "山西", "NMG": "内蒙古", "LN": "辽宁", "JL": "吉林",
  33. "HLJ": "黑龙江", "JS": "江苏", "ZJ": "浙江", "AH": "安徽", "FJ": "福建", "JX": "江西", "SD": "山东", "HEN": "河南", "HUB": "湖北",
  34. "HUN": "湖南", "GD": "广东", "GX": "广西", "HAIN": "海南", "SC": "四川", "GZ": "贵州", "YN": "云南", "XZ": "西藏", "SAX": "陕西",
  35. "GS": "甘肃", "QH": "青海", "NX": "宁夏", "XJ": "新疆",
  36. }
  37. // company_shortname 企业简称
  38. func getStName(name string) string {
  39. regnames := regPre.FindStringSubmatch(name)
  40. lenth := len(regnames)
  41. if lenth < 1 {
  42. return ""
  43. }
  44. newstr := regnames[lenth-1]
  45. ch := seg.Cut(newstr, false)
  46. val := []string{}
  47. for word := range ch {
  48. val = append(val, word)
  49. }
  50. name2 := ""
  51. for _, v := range val {
  52. name2 = name2 + v
  53. if len([]rune(name2)) >= 4 {
  54. break
  55. }
  56. }
  57. return name2
  58. }
  59. func DealMemberNo(memberNo string) (no int) {
  60. memberNo = strings.Trim(memberNo, " ")
  61. if memberNo == "" {
  62. return no
  63. }
  64. if memberNo == "企业选择不公示" {
  65. return no
  66. }
  67. if strings.HasSuffix(memberNo, "人") {
  68. cou := strings.Split(memberNo, "人")[0]
  69. count, _ := strconv.Atoi(cou)
  70. return count
  71. }
  72. no, _ = strconv.Atoi(memberNo)
  73. return no
  74. }
  75. // updateStd 根据qyxy_std 表,更新 nseo_id
  76. func updateStd() {
  77. db := DB()
  78. defer db.Close()
  79. // 创建数据库操作对象
  80. badgerDB := &bdb{db}
  81. var id int64
  82. var startSeoId int64
  83. //获取 seo_id
  84. seoIdOld := badgerDB.Get("seoid")
  85. if seoIdOld == "" {
  86. startSeoId = int64(GF.Env.Seoid) //起始 nseo_id
  87. badgerDB.Set("seoid", strconv.Itoa(GF.Env.Seoid))
  88. } else {
  89. seoid, _ := strconv.Atoi(seoIdOld)
  90. startSeoId = int64(seoid)
  91. }
  92. log.Println("seoid--------", startSeoId)
  93. if startSeoId == 0 {
  94. log.Fatalln("seoid 为 0,请检查配置")
  95. }
  96. rand.Seed(time.Now().UnixNano())
  97. count := int64(0)
  98. //var wg sync.WaitGroup
  99. sess := MongoTool.GetMgoConn()
  100. defer MongoTool.DestoryMongoConn(sess)
  101. ctx, _ := context.WithTimeout(context.Background(), 99999*time.Hour)
  102. coll := sess.M.C.Database("mixdata").Collection("qyxy_std")
  103. find := options.Find().SetBatchSize(200).SetSort(bson.D{bson.E{"updatetime", 1}}).SetProjection(bson.M{"_id": 1, "autoid": 1, "establish_date": 1, "create_time_msql": 1, "nseo_id": 1})
  104. cur, err := coll.Find(ctx, bson.M{"updatetime": bson.M{"$gt": startTime}}, find)
  105. if err != nil {
  106. log.Println("mgo find err", err.Error())
  107. return
  108. }
  109. for tmp := make(map[string]interface{}); cur.Next(ctx); {
  110. count++
  111. if cur != nil {
  112. cur.Decode(&tmp)
  113. id = util.Int64All(tmp["autoid"])
  114. //存在就不处理
  115. if seoid, ok := tmp["nseo_id"]; ok && seoid != nil {
  116. continue
  117. }
  118. ed, _ := tmp["establish_date"].(string)
  119. pre := ""
  120. if len(ed) == 10 {
  121. pre = ed[2:4] + ed[5:7] + ed[8:10]
  122. } else {
  123. cd, _ := tmp["create_time_msql"].(string)
  124. if len(cd) > 9 {
  125. pre = cd[2:4] + cd[5:7] + cd[8:10]
  126. }
  127. }
  128. startSeoId += int64(rand.Intn(seed) + 2)
  129. nseo_id := fmt.Sprintf("%s%d", pre, startSeoId)
  130. update := make(map[string]interface{})
  131. update["$set"] = bson.M{
  132. "nseo_id": nseo_id,
  133. }
  134. where := map[string]interface{}{
  135. "_id": tmp["_id"],
  136. }
  137. res := MongoTool.Update("qyxy_std", where, update, true, false)
  138. if !res {
  139. log.Println(tmp["_id"], "数据更新失败,nseo_id", nseo_id)
  140. }
  141. if count%10000 == 0 {
  142. log.Println("current,autoid,startSeoId,nseo_id", count, id, startSeoId, nseo_id)
  143. }
  144. tmp = make(map[string]interface{})
  145. } else {
  146. cur.Close(ctx)
  147. break
  148. }
  149. }
  150. badgerDB.Set("autoid", strconv.Itoa(int(id)))
  151. badgerDB.Set("seoid", strconv.Itoa(int(startSeoId)))
  152. log.Println("over ----;", "autoid", id, "seoid:", startSeoId)
  153. }