main.go 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. package main
  2. /**
  3. 招标信息判重
  4. **/
  5. import (
  6. "encoding/json"
  7. "flag"
  8. "flow_repeat/nsqdata"
  9. "fmt"
  10. "jygit.jydev.jianyu360.cn/BP/jynats/jnats"
  11. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  12. mu "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
  13. "log"
  14. "net"
  15. "regexp"
  16. "sync"
  17. "time"
  18. )
  19. var (
  20. Sysconfig map[string]interface{} //配置文件
  21. data_mgo, task_mgo, spider_mgo *MongodbSim
  22. task_coll, task_bidding, spider_coll string
  23. extract, extract_back, extract_log string
  24. udpclient mu.UdpClient
  25. nextNode []map[string]interface{}
  26. dupdays = 7
  27. DM, FullDM *datamap
  28. Update *updateInfo
  29. AddGroupPool *addGroupInfo
  30. //正则筛选相关
  31. FilterRegTitle = regexp.MustCompile("^_$")
  32. FilterRegTitle_0 = regexp.MustCompile("^_$")
  33. FilterRegTitle_1 = regexp.MustCompile("^_$")
  34. FilterRegTitle_2 = regexp.MustCompile("^_$")
  35. threadNum int
  36. SiteMap map[string]map[string]interface{}
  37. LowHeavy, TimingTask, IsFull, isUpdateSite bool
  38. timingSpanDay, timingPubScope int64
  39. gtid, lastid, sec_gtid, sec_lteid, lteid string
  40. updatelock, datalock, numlock, cronlock sync.Mutex
  41. jyfb_data map[string]string
  42. taskList []map[string]interface{}
  43. nspdata_1, nspdata_2 *nsqdata.Producer
  44. responselock sync.Mutex
  45. lastNodeResponse int64
  46. jn *jnats.Jnats
  47. )
  48. // 初始化加载
  49. func init() {
  50. flag.StringVar(&lastid, "id", "", "增量加载的lastid") //增量
  51. flag.StringVar(&gtid, "gtid", "", "历史增量的起始id") //历史
  52. flag.StringVar(&sec_gtid, "sec_gtid", "", "全量分段起始id")
  53. flag.StringVar(&sec_lteid, "sec_lteid", "", "全量分段结束id")
  54. flag.Parse()
  55. qu.ReadConfig(&Sysconfig)
  56. InitAllInfos() //加载所有信息...
  57. }
  58. func mainT() {
  59. if TimingTask {
  60. log.Println("正常历史部署...组装...")
  61. go historyFlowRepeat()
  62. } else {
  63. log.Println("正常增量部署...流式...")
  64. go initRepeatNats()
  65. }
  66. time.Sleep(99999 * time.Hour)
  67. }
  68. func main() {
  69. IsFull = true
  70. increaseRepeat(map[string]interface{}{
  71. "gtid": "12ec61170ae152a3c2310f02",
  72. "lteid": "92ec61170ae152a3c2310f02",
  73. })
  74. time.Sleep(99999 * time.Hour)
  75. }
  76. // 主函数
  77. func mainTestTest() {
  78. go checkMailJob()
  79. lastNodeResponse = time.Now().Unix()
  80. updport := Sysconfig["udpport"].(string)
  81. udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
  82. udpclient.Listen(processUdpMsg)
  83. log.Println("Udp服务监听", updport)
  84. if TimingTask {
  85. log.Println("正常历史部署...")
  86. go historyRepeat()
  87. } else {
  88. if !IsFull {
  89. log.Println("正常增量部署与监控机制...")
  90. go lastUdpJob()
  91. go getRepeatTask()
  92. }
  93. }
  94. time.Sleep(99999 * time.Hour)
  95. }
  96. // udp接收
  97. func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
  98. switch act {
  99. case mu.OP_TYPE_DATA:
  100. var mapInfo map[string]interface{}
  101. err := json.Unmarshal(data, &mapInfo)
  102. if err != nil {
  103. udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
  104. } else if mapInfo != nil {
  105. sid, eid := qu.ObjToString(mapInfo["gtid"]), qu.ObjToString(mapInfo["lteid"])
  106. stype := qu.ObjToString(mapInfo["stype"])
  107. if stype == "monitor" {
  108. log.Println("收到监测......")
  109. key := qu.ObjToString(mapInfo["key"])
  110. udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
  111. return
  112. }
  113. if sid == "" || eid == "" {
  114. log.Println("接收id段异常-err ", "sid=", sid, ",eid=", eid)
  115. } else {
  116. lastNodeResponse = time.Now().Unix()
  117. key := sid + "-" + eid + "-" + qu.ObjToString(mapInfo["stype"])
  118. udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
  119. //计算是否需要加载站点~每天加载一次
  120. if isUpdateSite {
  121. initSite()
  122. }
  123. //插入任务-判断任务-是否存在
  124. updatelock.Lock()
  125. taskList = append(taskList, mapInfo)
  126. log.Println("udp收到任务...数量:", len(taskList), "具体任务:", taskList)
  127. updatelock.Unlock()
  128. }
  129. }
  130. case mu.OP_NOOP: //下个节点回应
  131. log.Println("下节点回应:", string(data))
  132. udptaskmap.Delete(string(data))
  133. }
  134. }
  135. // 监听-获取-分发判重任务
  136. func getRepeatTask() {
  137. for {
  138. if len(taskList) > 0 {
  139. updatelock.Lock()
  140. len_list := len(taskList)
  141. if len_list > 1 {
  142. first_id := taskList[0]["gtid"]
  143. end_id := taskList[len_list-1]["lteid"]
  144. if first_id != "" && end_id != "" {
  145. log.Println("合并段落~正常~", first_id, "~", end_id)
  146. increaseRepeat(map[string]interface{}{
  147. "gtid": first_id,
  148. "lteid": end_id,
  149. })
  150. taskList = taskList[len_list:]
  151. log.Println("此段落结束当前任务池...", len(taskList), taskList)
  152. } else {
  153. log.Println("合并段落~错误~正常取段落~~~")
  154. mapInfo := taskList[0]
  155. if mapInfo != nil {
  156. increaseRepeat(mapInfo) //判重方法
  157. }
  158. taskList = taskList[1:]
  159. log.Println("此段落结束当前任务池...", len(taskList), taskList)
  160. }
  161. } else {
  162. mapInfo := taskList[0]
  163. if mapInfo != nil {
  164. increaseRepeat(mapInfo) //判重方法
  165. }
  166. taskList = taskList[1:]
  167. log.Println("此段落结束当前任务池...", len(taskList), taskList)
  168. }
  169. updatelock.Unlock()
  170. } else {
  171. time.Sleep(15 * time.Second)
  172. }
  173. }
  174. }
  175. func lastUdpJob() {
  176. for {
  177. responselock.Lock()
  178. if time.Now().Unix()-lastNodeResponse >= 1800 {
  179. lastNodeResponse = time.Now().Unix() //重置时间
  180. sendErrMailApi("判重增量~发现处理流程超时~给予告警", fmt.Sprintf("半小时左右~无新段落数据进入判重增量流程...相关人员检查..."))
  181. }
  182. responselock.Unlock()
  183. time.Sleep(300 * time.Second)
  184. }
  185. }