main.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. package main
  2. import (
  3. "bufio"
  4. "compress/gzip"
  5. "encoding/json"
  6. "errors"
  7. "fmt"
  8. "go.uber.org/zap"
  9. "io"
  10. "io/ioutil"
  11. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  12. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  13. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  14. "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
  15. "net"
  16. "net/http"
  17. "os"
  18. "runtime"
  19. "strings"
  20. "sync"
  21. "time"
  22. )
  23. var (
  24. MongoTool *mongodb.MongodbSim
  25. //updatePool chan []map[string]interface{}
  26. //updateSp chan bool
  27. //saveSize int
  28. CurrentColl string
  29. //collCount int
  30. saveLog = make(map[string]interface{})
  31. UdpClient udp.UdpClient
  32. changeAddr *net.UDPAddr
  33. readPath string //文件夹目录
  34. jyUpdatetime int64
  35. )
  36. func init() {
  37. InitLog()
  38. err := InitConfig()
  39. if err != nil {
  40. log.Info("init", zap.Any("InitConfig", err))
  41. }
  42. InitMgo()
  43. readPath = GF.Env.Path
  44. changeAddr = &net.UDPAddr{
  45. Port: GF.Env.ChangePort,
  46. IP: net.ParseIP(GF.Env.TargetIp),
  47. }
  48. log.Info("init", zap.Any("changeAddr", changeAddr))
  49. }
  50. func main() {
  51. UdpClient = udp.UdpClient{Local: GF.Env.LocalPort, BufSize: 1024}
  52. UdpClient.Listen(processUdpMsg)
  53. log.Info("main", zap.String("Udp服务监听======= port:", GF.Env.LocalPort))
  54. ch := make(chan bool, 1)
  55. <-ch
  56. }
  57. func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
  58. switch act {
  59. case udp.OP_TYPE_DATA:
  60. var mapInfo map[string]interface{}
  61. err := json.Unmarshal(data, &mapInfo)
  62. if err != nil {
  63. log.Info("processUdpMsg", zap.Any("Unmarshal err", err))
  64. } else if mapInfo != nil {
  65. log.Info("processUdpMsg", zap.Any("mapInfo", mapInfo))
  66. key, _ := mapInfo["key"].(string)
  67. if key == "" {
  68. key = "udpok"
  69. }
  70. go UdpClient.WriteUdp([]byte(key), udp.OP_NOOP, ra)
  71. //拿到同步信号,开始同步数据
  72. if _, ok := mapInfo["start"]; ok {
  73. if _, okk := mapInfo["path"]; okk {
  74. path := util.ObjToString(mapInfo["path"]) //udp 传递的路径
  75. //没有指定配置文件的指定目录,就使用udp 传递目录
  76. if path != "" {
  77. readPath = path
  78. }
  79. }
  80. //开始同步
  81. go task(readPath)
  82. }
  83. }
  84. default:
  85. log.Info("processUdpMsg", zap.String("qyxy_listen_data_new", "======"))
  86. }
  87. }
  88. func task(path string) {
  89. files, _ := ioutil.ReadDir(path)
  90. jyUpdatetime = time.Now().Unix() //数据更新时间,更新quxy_change 使用
  91. //annual_report_base/20221122/split.json.gz
  92. for _, f := range files {
  93. if f.IsDir() {
  94. start := time.Now()
  95. CurrentColl = f.Name() //annual_report_base
  96. collCount := 0 // 当前表的数据数量
  97. log.Info("task", zap.String("collection name", f.Name())) //annual_report_base
  98. //util.Debug("collection name:---", f.Name())
  99. if !strings.HasSuffix(path, "/") {
  100. path = path + "/"
  101. }
  102. subPath := path + f.Name() + "/"
  103. subFiles, _ := ioutil.ReadDir(subPath)
  104. for _, s := range subFiles {
  105. log.Info("task ", zap.String("当前文件:", s.Name()))
  106. if s.IsDir() {
  107. collCount = taskinfo(subPath+s.Name(), collCount) //annual_report_base/20221122
  108. //// 增加WaitGroup计数
  109. //go func(dirPath string, collCount int) {
  110. // collCount = taskinfo(dirPath, collCount)
  111. //}(subPath+s.Name(), collCount)
  112. //taskinfo(subPath + s.Name()) //annual_report_base/20221122
  113. }
  114. }
  115. // 判断最后的数据不足500条时 执行
  116. //if len(saveArr) > 0 {
  117. // tmps := saveArr
  118. // MongoTool.UpSertBulk(CurrentColl, tmps...)
  119. // saveArr = [][]map[string]interface{}{}
  120. //}
  121. duration := time.Since(start)
  122. result := map[string]interface{}{
  123. "count": collCount,
  124. "duration": duration.Minutes(),
  125. }
  126. saveLog[f.Name()] = result
  127. //sendMsg += f.Name() + ":" + strconv.Itoa(collCount) + ";"
  128. }
  129. }
  130. //执行完毕,通知qyxy_change,更新企业变更信息
  131. data := map[string]interface{}{
  132. "start": GF.Env.ChangeUdp,
  133. "jy_updatetime": jyUpdatetime,
  134. }
  135. if GF.Env.ChangeUdp {
  136. SendUdpMsg(data, changeAddr)
  137. }
  138. log.Info("task", zap.String("执行完毕", path))
  139. MongoTool.Save("save_log", map[string]interface{}{"createtime": time.Now().String(), "result": saveLog})
  140. //邮件通知
  141. title := fmt.Sprintf("%s 数据处理完毕", path)
  142. // 将 map 转换为 JSON 字符串
  143. jsonBytes, err := json.Marshal(saveLog)
  144. if err != nil {
  145. log.Info("task", zap.Any("json 转换失败", saveLog))
  146. }
  147. // 转换为字符串并输出
  148. jsonStr := string(jsonBytes)
  149. SendMail(title, jsonStr)
  150. }
  151. // taskinfo 读取压缩包文件
  152. func taskinfo(path string, collCount int) int {
  153. count := 0 //读取的数量
  154. file := path + "/split.json.gz"
  155. log.Info("taskinfo", zap.Any("current file", file))
  156. // 检查文件是否存在
  157. fileInfo, err := os.Stat(file)
  158. if err != nil {
  159. log.Error("Error opening file:", zap.Error(err))
  160. return collCount
  161. }
  162. // 检查文件大小是否为0
  163. if fileInfo.Size() == 0 {
  164. log.Warn(file, zap.Error(errors.New("文件大小为0")))
  165. return collCount
  166. }
  167. // 打开本地gz格式压缩包
  168. fr, err := os.Open(file)
  169. if err != nil {
  170. log.Info("taskinfo", zap.Any("err", err))
  171. } else {
  172. fmt.Println("open file success!", file)
  173. }
  174. // defer: 在函数退出时,执行关闭文件
  175. defer fr.Close()
  176. // 创建gzip文件读取对象
  177. gr, err := gzip.NewReader(fr)
  178. if err != nil {
  179. log.Info("taskinfo", zap.Any("err", err))
  180. }
  181. // defer: 在函数退出时,执行关闭gzip对象
  182. defer gr.Close()
  183. bfRd := bufio.NewReader(gr)
  184. wg := sync.WaitGroup{}
  185. ch := make(chan bool, 5)
  186. for {
  187. line, err := bfRd.ReadBytes('\n')
  188. if err != nil {
  189. if err == io.EOF {
  190. log.Info("taskinfo", zap.String("EOF", "read gzip data finish! "))
  191. break
  192. } else {
  193. log.Info("taskinfo", zap.Any("[read gzip data err]:", err))
  194. }
  195. } else {
  196. count++
  197. if count%5000 == 0 {
  198. printMemoryUsage()
  199. log.Info("taskinfo", zap.Int("current count:"+file, count))
  200. }
  201. ch <- true
  202. wg.Add(1)
  203. go func(line []byte) {
  204. defer func() {
  205. <-ch
  206. wg.Done()
  207. }()
  208. hookfn(line)
  209. }(line)
  210. }
  211. }
  212. wg.Wait()
  213. collCount += count
  214. return collCount
  215. }
  216. // hookfn 处理数据,500条处理一次
  217. func hookfn(line []byte) {
  218. tmp := make(map[string]interface{})
  219. err := json.Unmarshal(line, &tmp)
  220. if err != nil {
  221. log.Info("hookfn", zap.Any("Unmarshal err", err))
  222. }
  223. if len(tmp) == 0 {
  224. return
  225. }
  226. if util.IntAll(tmp["id"]) == 0 {
  227. MongoTool.Save("wcc"+CurrentColl, tmp)
  228. } else {
  229. tmp["_id"] = util.IntAll(tmp["id"])
  230. tmp["id"] = fmt.Sprintf("%d", util.IntAll(tmp["id"]))
  231. tmp["jy_updatetime"] = time.Now().Unix()
  232. //if CurrentColl == "company_change" {
  233. // tmp["jy_updatetime"] = jyUpdatetime
  234. //}
  235. saveInfo := []map[string]interface{}{map[string]interface{}{"_id": tmp["_id"]}, map[string]interface{}{"$set": tmp}}
  236. tmpArr := [][]map[string]interface{}{saveInfo}
  237. MongoTool.UpSertBulk(CurrentColl, tmpArr...)
  238. }
  239. }
  240. // SendUdpMsg 通知处理企业新增数据
  241. func SendUdpMsg(data map[string]interface{}, target *net.UDPAddr) {
  242. bytes, _ := json.Marshal(data)
  243. UdpClient.WriteUdp(bytes, udp.OP_TYPE_DATA, target)
  244. log.Info("SendUdpMsg", zap.Any("data", data), zap.Any("target", target))
  245. }
  246. func printMemoryUsage() {
  247. var memStats runtime.MemStats
  248. runtime.ReadMemStats(&memStats)
  249. // 将字节转换为兆字节(MB)
  250. allocatedMB := float64(memStats.Alloc) / 1024 / 1024
  251. totalAllocatedMB := float64(memStats.TotalAlloc) / 1024 / 1024
  252. heapAllocMB := float64(memStats.HeapAlloc) / 1024 / 1024
  253. log.Info("printMemoryUsage", zap.Any("当前程序已分配的内存大小", allocatedMB))
  254. log.Info("printMemoryUsage", zap.Any("程序自启动以来总共分配的内存大小", totalAllocatedMB))
  255. log.Info("printMemoryUsage", zap.Any("堆上当前已分配但尚未释放的内存", heapAllocMB))
  256. log.Info("printMemoryUsage", zap.Any("堆上分配的对象数", memStats.HeapObjects))
  257. }
  258. // SendMail 发送邮件
  259. func SendMail(title, content string) {
  260. url := fmt.Sprintf("%s?to=%s&title=%s&body=%s", GF.Email.Api, GF.Email.To, title, content)
  261. fmt.Println("url=>", url)
  262. res, err := http.Get(url)
  263. if err != nil {
  264. log.Info("SendMail", zap.Any("err", err))
  265. } else {
  266. log.Info("SendMail", zap.Any("res", res))
  267. }
  268. }