main.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. package main
  2. import (
  3. "bufio"
  4. "compress/gzip"
  5. "encoding/json"
  6. "errors"
  7. "fmt"
  8. "go.uber.org/zap"
  9. "io"
  10. "io/ioutil"
  11. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  12. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  13. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  14. "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
  15. "net"
  16. "os"
  17. "runtime"
  18. "strings"
  19. "sync"
  20. "time"
  21. )
  22. var (
  23. MongoTool *mongodb.MongodbSim
  24. //updatePool chan []map[string]interface{}
  25. //updateSp chan bool
  26. //saveSize int
  27. CurrentColl string
  28. //collCount int
  29. saveLog = make(map[string]interface{})
  30. UdpClient udp.UdpClient
  31. changeAddr *net.UDPAddr
  32. readPath string //文件夹目录
  33. jyUpdatetime int64
  34. )
  35. func init() {
  36. InitLog()
  37. err := InitConfig()
  38. if err != nil {
  39. log.Info("init", zap.Any("InitConfig", err))
  40. }
  41. InitMgo()
  42. readPath = GF.Env.Path
  43. changeAddr = &net.UDPAddr{
  44. Port: GF.Env.ChangePort,
  45. IP: net.ParseIP(GF.Env.TargetIp),
  46. }
  47. log.Info("init", zap.Any("changeAddr", changeAddr))
  48. }
  49. func main() {
  50. UdpClient = udp.UdpClient{Local: GF.Env.LocalPort, BufSize: 1024}
  51. UdpClient.Listen(processUdpMsg)
  52. log.Info("main", zap.String("Udp服务监听======= port:", GF.Env.LocalPort))
  53. ch := make(chan bool, 1)
  54. <-ch
  55. }
  56. func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
  57. switch act {
  58. case udp.OP_TYPE_DATA:
  59. var mapInfo map[string]interface{}
  60. err := json.Unmarshal(data, &mapInfo)
  61. if err != nil {
  62. log.Info("processUdpMsg", zap.Any("Unmarshal err", err))
  63. } else if mapInfo != nil {
  64. log.Info("processUdpMsg", zap.Any("mapInfo", mapInfo))
  65. key, _ := mapInfo["key"].(string)
  66. if key == "" {
  67. key = "udpok"
  68. }
  69. go UdpClient.WriteUdp([]byte(key), udp.OP_NOOP, ra)
  70. //拿到同步信号,开始同步数据
  71. if _, ok := mapInfo["start"]; ok {
  72. if _, okk := mapInfo["path"]; okk {
  73. path := util.ObjToString(mapInfo["path"]) //udp 传递的路径
  74. //没有指定配置文件的指定目录,就使用udp 传递目录
  75. if path != "" {
  76. readPath = path
  77. }
  78. }
  79. //开始同步
  80. go task(readPath)
  81. }
  82. }
  83. default:
  84. log.Info("processUdpMsg", zap.String("qyxy_listen_data_new", "======"))
  85. }
  86. }
  87. func task(path string) {
  88. files, _ := ioutil.ReadDir(path)
  89. jyUpdatetime = time.Now().Unix() //数据更新时间,更新quxy_change 使用
  90. //annual_report_base/20221122/split.json.gz
  91. for _, f := range files {
  92. if f.IsDir() {
  93. start := time.Now()
  94. CurrentColl = f.Name() //annual_report_base
  95. collCount := 0 // 当前表的数据数量
  96. log.Info("task", zap.String("collection name", f.Name())) //annual_report_base
  97. //util.Debug("collection name:---", f.Name())
  98. if !strings.HasSuffix(path, "/") {
  99. path = path + "/"
  100. }
  101. subPath := path + f.Name() + "/"
  102. subFiles, _ := ioutil.ReadDir(subPath)
  103. for _, s := range subFiles {
  104. log.Info("task ", zap.String("当前文件:", s.Name()))
  105. if s.IsDir() {
  106. collCount = taskinfo(subPath+s.Name(), collCount) //annual_report_base/20221122
  107. //// 增加WaitGroup计数
  108. //go func(dirPath string, collCount int) {
  109. // collCount = taskinfo(dirPath, collCount)
  110. //}(subPath+s.Name(), collCount)
  111. //taskinfo(subPath + s.Name()) //annual_report_base/20221122
  112. }
  113. }
  114. // 判断最后的数据不足500条时 执行
  115. //if len(saveArr) > 0 {
  116. // tmps := saveArr
  117. // MongoTool.UpSertBulk(CurrentColl, tmps...)
  118. // saveArr = [][]map[string]interface{}{}
  119. //}
  120. duration := time.Since(start)
  121. result := map[string]interface{}{
  122. "count": collCount,
  123. "duration": duration.Minutes(),
  124. }
  125. saveLog[f.Name()] = result
  126. //sendMsg += f.Name() + ":" + strconv.Itoa(collCount) + ";"
  127. }
  128. }
  129. //执行完毕,通知qyxy_change,更新企业变更信息
  130. data := map[string]interface{}{
  131. "start": GF.Env.ChangeUdp,
  132. "jy_updatetime": jyUpdatetime,
  133. }
  134. if GF.Env.ChangeUdp {
  135. SendUdpMsg(data, changeAddr)
  136. }
  137. log.Info("task", zap.String("执行完毕", path))
  138. MongoTool.Save("save_log", map[string]interface{}{"createtime": time.Now().String(), "result": saveLog})
  139. //SendMail(sendMsg)
  140. }
  141. // taskinfo 读取压缩包文件
  142. func taskinfo(path string, collCount int) int {
  143. count := 0 //读取的数量
  144. file := path + "/split.json.gz"
  145. log.Info("taskinfo", zap.Any("current file", file))
  146. // 检查文件是否存在
  147. fileInfo, err := os.Stat(file)
  148. if err != nil {
  149. log.Error("Error opening file:", zap.Error(err))
  150. return collCount
  151. }
  152. // 检查文件大小是否为0
  153. if fileInfo.Size() == 0 {
  154. log.Warn(file, zap.Error(errors.New("文件大小为0")))
  155. return collCount
  156. }
  157. // 打开本地gz格式压缩包
  158. fr, err := os.Open(file)
  159. if err != nil {
  160. log.Info("taskinfo", zap.Any("err", err))
  161. } else {
  162. fmt.Println("open file success!", file)
  163. }
  164. // defer: 在函数退出时,执行关闭文件
  165. defer fr.Close()
  166. // 创建gzip文件读取对象
  167. gr, err := gzip.NewReader(fr)
  168. if err != nil {
  169. log.Info("taskinfo", zap.Any("err", err))
  170. }
  171. // defer: 在函数退出时,执行关闭gzip对象
  172. defer gr.Close()
  173. bfRd := bufio.NewReader(gr)
  174. wg := sync.WaitGroup{}
  175. ch := make(chan bool, 5)
  176. for {
  177. line, err := bfRd.ReadBytes('\n')
  178. if err != nil {
  179. if err == io.EOF {
  180. log.Info("taskinfo", zap.String("EOF", "read gzip data finish! "))
  181. break
  182. } else {
  183. log.Info("taskinfo", zap.Any("[read gzip data err]:", err))
  184. }
  185. } else {
  186. count++
  187. if count%5000 == 0 {
  188. printMemoryUsage()
  189. log.Info("taskinfo", zap.Int("current count:"+file, count))
  190. }
  191. ch <- true
  192. wg.Add(1)
  193. go func(line []byte) {
  194. defer func() {
  195. <-ch
  196. wg.Done()
  197. }()
  198. hookfn(line)
  199. }(line)
  200. }
  201. }
  202. wg.Wait()
  203. collCount += count
  204. return collCount
  205. }
  206. // hookfn 处理数据,500条处理一次
  207. func hookfn(line []byte) {
  208. tmp := make(map[string]interface{})
  209. err := json.Unmarshal(line, &tmp)
  210. if err != nil {
  211. log.Info("hookfn", zap.Any("Unmarshal err", err))
  212. }
  213. if len(tmp) == 0 {
  214. return
  215. }
  216. if util.IntAll(tmp["id"]) == 0 {
  217. MongoTool.Save("wcc"+CurrentColl, tmp)
  218. } else {
  219. tmp["_id"] = util.IntAll(tmp["id"])
  220. tmp["id"] = fmt.Sprintf("%d", util.IntAll(tmp["id"]))
  221. tmp["jy_updatetime"] = time.Now().Unix()
  222. //if CurrentColl == "company_change" {
  223. // tmp["jy_updatetime"] = jyUpdatetime
  224. //}
  225. saveInfo := []map[string]interface{}{map[string]interface{}{"_id": tmp["_id"]}, map[string]interface{}{"$set": tmp}}
  226. tmpArr := [][]map[string]interface{}{saveInfo}
  227. MongoTool.UpSertBulk(CurrentColl, tmpArr...)
  228. }
  229. }
  230. // SendUdpMsg 通知处理企业新增数据
  231. func SendUdpMsg(data map[string]interface{}, target *net.UDPAddr) {
  232. bytes, _ := json.Marshal(data)
  233. UdpClient.WriteUdp(bytes, udp.OP_TYPE_DATA, target)
  234. log.Info("SendUdpMsg", zap.Any("data", data), zap.Any("target", target))
  235. }
  236. func printMemoryUsage() {
  237. var memStats runtime.MemStats
  238. runtime.ReadMemStats(&memStats)
  239. // 将字节转换为兆字节(MB)
  240. allocatedMB := float64(memStats.Alloc) / 1024 / 1024
  241. totalAllocatedMB := float64(memStats.TotalAlloc) / 1024 / 1024
  242. heapAllocMB := float64(memStats.HeapAlloc) / 1024 / 1024
  243. log.Info("printMemoryUsage", zap.Any("当前程序已分配的内存大小", allocatedMB))
  244. log.Info("printMemoryUsage", zap.Any("程序自启动以来总共分配的内存大小", totalAllocatedMB))
  245. log.Info("printMemoryUsage", zap.Any("堆上当前已分配但尚未释放的内存", heapAllocMB))
  246. log.Info("printMemoryUsage", zap.Any("堆上分配的对象数", memStats.HeapObjects))
  247. }