main.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. package main
  2. /**
  3. 招标信息判重
  4. **/
  5. import (
  6. "encoding/json"
  7. "flag"
  8. "flow_repeat/nsqdata"
  9. "fmt"
  10. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  11. mu "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
  12. "log"
  13. "net"
  14. "regexp"
  15. "sync"
  16. "time"
  17. )
  18. var (
  19. Sysconfig map[string]interface{} //配置文件
  20. data_mgo, task_mgo, spider_mgo *MongodbSim
  21. task_coll, task_bidding, spider_coll string
  22. extract, extract_back, extract_log string
  23. udpclient mu.UdpClient
  24. nextNode []map[string]interface{}
  25. dupdays = 7
  26. DM, FullDM *datamap
  27. Update *updateInfo
  28. AddGroupPool *addGroupInfo
  29. //正则筛选相关
  30. FilterRegTitle = regexp.MustCompile("^_$")
  31. FilterRegTitle_0 = regexp.MustCompile("^_$")
  32. FilterRegTitle_1 = regexp.MustCompile("^_$")
  33. FilterRegTitle_2 = regexp.MustCompile("^_$")
  34. threadNum int
  35. SiteMap map[string]map[string]interface{}
  36. LowHeavy, TimingTask, IsFull, isUpdateSite bool
  37. timingSpanDay, timingPubScope int64
  38. gtid, lastid, sec_gtid, sec_lteid, lteid string
  39. updatelock, datalock, numlock, cronlock sync.Mutex
  40. jyfb_data map[string]string
  41. taskList []map[string]interface{}
  42. nspdata_1, nspdata_2 *nsqdata.Producer
  43. responselock sync.Mutex
  44. lastNodeResponse int64
  45. )
  46. // 初始化加载
  47. func init() {
  48. flag.StringVar(&lastid, "id", "", "增量加载的lastid") //增量
  49. flag.StringVar(&gtid, "gtid", "", "历史增量的起始id") //历史
  50. flag.StringVar(&sec_gtid, "sec_gtid", "", "全量分段起始id")
  51. flag.StringVar(&sec_lteid, "sec_lteid", "", "全量分段结束id")
  52. flag.Parse()
  53. qu.ReadConfig(&Sysconfig)
  54. InitAllInfos() //加载所有信息...
  55. }
  56. func main() {
  57. if TimingTask {
  58. log.Println("正常历史部署...组装...")
  59. go historyRepeat()
  60. } else {
  61. log.Println("正常增量部署...流式...")
  62. //jn := jnats.NewJnats("")
  63. //
  64. ////先消费,带zip压缩,用于跨网传输节省流量
  65. //jn.SubZip("test", func(msg *nats.Msg) {
  66. // log.Println(string(msg.Data))
  67. // //回执消息
  68. // msg.Respond([]byte("receive msg:" + string(msg.Data)))
  69. //})
  70. }
  71. time.Sleep(99999 * time.Hour)
  72. }
  73. func mainTest() {
  74. increaseRepeat(map[string]interface{}{
  75. "gtid": "12ec61170ae152a3c2310f02",
  76. "lteid": "92ec61170ae152a3c2310f02",
  77. })
  78. time.Sleep(99999 * time.Hour)
  79. }
  80. // 主函数
  81. func mainTestTest() {
  82. go checkMailJob()
  83. lastNodeResponse = time.Now().Unix()
  84. updport := Sysconfig["udpport"].(string)
  85. udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
  86. udpclient.Listen(processUdpMsg)
  87. log.Println("Udp服务监听", updport)
  88. if TimingTask {
  89. log.Println("正常历史部署...")
  90. go historyRepeat()
  91. } else {
  92. if !IsFull {
  93. log.Println("正常增量部署与监控机制...")
  94. go lastUdpJob()
  95. go getRepeatTask()
  96. }
  97. }
  98. time.Sleep(99999 * time.Hour)
  99. }
  100. // udp接收
  101. func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
  102. switch act {
  103. case mu.OP_TYPE_DATA:
  104. var mapInfo map[string]interface{}
  105. err := json.Unmarshal(data, &mapInfo)
  106. if err != nil {
  107. udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
  108. } else if mapInfo != nil {
  109. sid, eid := qu.ObjToString(mapInfo["gtid"]), qu.ObjToString(mapInfo["lteid"])
  110. stype := qu.ObjToString(mapInfo["stype"])
  111. if stype == "monitor" {
  112. log.Println("收到监测......")
  113. key := qu.ObjToString(mapInfo["key"])
  114. udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
  115. return
  116. }
  117. if sid == "" || eid == "" {
  118. log.Println("接收id段异常-err ", "sid=", sid, ",eid=", eid)
  119. } else {
  120. lastNodeResponse = time.Now().Unix()
  121. key := sid + "-" + eid + "-" + qu.ObjToString(mapInfo["stype"])
  122. udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
  123. //计算是否需要加载站点~每天加载一次
  124. if isUpdateSite {
  125. initSite()
  126. }
  127. //插入任务-判断任务-是否存在
  128. updatelock.Lock()
  129. taskList = append(taskList, mapInfo)
  130. log.Println("udp收到任务...数量:", len(taskList), "具体任务:", taskList)
  131. updatelock.Unlock()
  132. }
  133. }
  134. case mu.OP_NOOP: //下个节点回应
  135. log.Println("下节点回应:", string(data))
  136. udptaskmap.Delete(string(data))
  137. }
  138. }
  139. // 监听-获取-分发判重任务
  140. func getRepeatTask() {
  141. for {
  142. if len(taskList) > 0 {
  143. updatelock.Lock()
  144. len_list := len(taskList)
  145. if len_list > 1 {
  146. first_id := taskList[0]["gtid"]
  147. end_id := taskList[len_list-1]["lteid"]
  148. if first_id != "" && end_id != "" {
  149. log.Println("合并段落~正常~", first_id, "~", end_id)
  150. increaseRepeat(map[string]interface{}{
  151. "gtid": first_id,
  152. "lteid": end_id,
  153. })
  154. taskList = taskList[len_list:]
  155. log.Println("此段落结束当前任务池...", len(taskList), taskList)
  156. } else {
  157. log.Println("合并段落~错误~正常取段落~~~")
  158. mapInfo := taskList[0]
  159. if mapInfo != nil {
  160. increaseRepeat(mapInfo) //判重方法
  161. }
  162. taskList = taskList[1:]
  163. log.Println("此段落结束当前任务池...", len(taskList), taskList)
  164. }
  165. } else {
  166. mapInfo := taskList[0]
  167. if mapInfo != nil {
  168. increaseRepeat(mapInfo) //判重方法
  169. }
  170. taskList = taskList[1:]
  171. log.Println("此段落结束当前任务池...", len(taskList), taskList)
  172. }
  173. updatelock.Unlock()
  174. } else {
  175. time.Sleep(15 * time.Second)
  176. }
  177. }
  178. }
  179. func lastUdpJob() {
  180. for {
  181. responselock.Lock()
  182. if time.Now().Unix()-lastNodeResponse >= 1800 {
  183. lastNodeResponse = time.Now().Unix() //重置时间
  184. sendErrMailApi("判重增量~发现处理流程超时~给予告警", fmt.Sprintf("半小时左右~无新段落数据进入判重增量流程...相关人员检查..."))
  185. }
  186. responselock.Unlock()
  187. time.Sleep(300 * time.Second)
  188. }
  189. }