123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528 |
- package main
- import (
- "encoding/json"
- "esindex/config"
- "esindex/oss"
- "fmt"
- "github.com/robfig/cron"
- "go.uber.org/zap"
- "io/ioutil"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/elastic"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/mysqldb"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
- "net"
- "net/http"
- "sync"
- "time"
- )
- var (
- MgoB *mongodb.MongodbSim
- MgoP *mongodb.MongodbSim
- MgoQ *mongodb.MongodbSim
- MgoS *mongodb.MongodbSim
- Mysql *mysqldb.Mysql
- Es, Es1 *elastic.Elastic
- UdpClient udp.UdpClient
- UdpTaskMap = &sync.Map{}
- JyUdpAddr *net.UDPAddr
- EsBulkSize = 100 // es批量保存大小
- updateBiddingPool = make(chan []map[string]interface{}, 5000) //更新bingding数据
- updateBiddingSp = make(chan bool, 5)
- saveEsPool = make(chan map[string]interface{}, 5000) //保存binding数据到es
- saveEsSp = make(chan bool, 5)
- saveProjectEsPool = make(chan map[string]interface{}, 5000) //保存project数据到es
- saveProjectSp = make(chan bool, 5)
- saveEsAllPool = make(chan map[string]interface{}, 5000) //存储单机版es,爬虫采集判重使用
- saveEsAllSp = make(chan bool, 5)
- saveErrBidPool = make(chan map[string]interface{}, 5000)
- saveBidSp = make(chan bool, 5)
- //detailLength = 50000 // es保存detail长度
- fileLength = 200000 // es保存附件文本长度,大于20万 时做一个日志记录
- //pscopeLength = 32766 // projectscope长度
- specialNames = make(map[string]bool, 0) //存储181 凭安提供的特企,爬虫采购单位
- responselock sync.Mutex
- BiddingLastNodeResponse int64 //上次节点接受数据时间
- ProjectLastNodeResponse int64 //上次节点接受数据时间
- )
- func init() {
- config.Init("./common.toml")
- InitLog()
- InitMgo()
- InitEs()
- InitField()
- InitEsBiddingField()
- oss.InitOss()
- verifyESFields() //检测es 字段类型
- JyUdpAddr = &net.UDPAddr{
- IP: net.ParseIP(config.Conf.Udp.JyAddr),
- Port: util.IntAll(config.Conf.Udp.JyPort),
- }
- BiddingLastNodeResponse = time.Now().Unix()
- ProjectLastNodeResponse = time.Now().Unix()
- log.Info("init success")
- }
- func main() {
- go checkMapJob() //udp 发送邮件
- go task_index() //定时同步更新winner_enterprise、buyer_enterprise ES索引;这个功能很少变动,几乎不需要维护
- go UpdateBidding() //更新bidding表数据
- go SaveEsMethod()
- go SaveAllEsMethod()
- go SaveProjectEs()
- go SaveBidErr()
- go LastUdpJob() //监听半小时内有无数据
- UdpClient = udp.UdpClient{Local: config.Conf.Udp.LocPort, BufSize: 1024}
- UdpClient.Listen(processUdpMsg)
- log.Info("Udp服务监听", zap.String("port:", config.Conf.Udp.LocPort))
- ch := make(chan bool, 1)
- <-ch
- }
- var pool = make(chan bool, 20)
- func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
- switch act {
- case udp.OP_TYPE_DATA:
- var mapInfo map[string]interface{}
- err := json.Unmarshal(data, &mapInfo)
- log.Info("processUdpMsg", zap.Any("mapInfo:", mapInfo))
- if err != nil {
- UdpClient.WriteUdp([]byte("err:"+err.Error()), udp.OP_NOOP, ra)
- } else if mapInfo != nil {
- key, _ := mapInfo["key"].(string)
- if key == "" {
- key = "udpok"
- }
- go UdpClient.WriteUdp([]byte(key), udp.OP_NOOP, ra)
- tasktype, _ := mapInfo["stype"].(string)
- switch tasktype {
- case "index-by-id": //单个索引
- pool <- true
- go func() {
- defer func() {
- <-pool
- }()
- biddingTaskById(mapInfo)
- }()
- case "bidding":
- BiddingLastNodeResponse = time.Now().Unix()
- pool <- true
- go func() {
- defer func() {
- <-pool
- }()
- biddingTask(mapInfo)
- }()
- case "biddingall": //补充存量数据
- pool <- true
- go func() {
- defer func() {
- <-pool
- }()
- biddingAllTask(mapInfo)
- }()
- case "bidding_all_data": //根据biddingall配置文件,存量迁移数据
- pool <- true
- go func() {
- defer func() {
- <-pool
- }()
- biddingAllDataTask()
- }()
- case "bidding_history":
- pool <- true
- go func() {
- defer func() {
- <-pool
- }()
- biddingTask(mapInfo)
- }()
- case "project":
- ProjectLastNodeResponse = time.Now().Unix()
- pool <- true
- go func() {
- defer func() {
- <-pool
- }()
- projectTask(data, mapInfo)
- }()
- case "project_all_data": //存量 projectset 数据
- pool <- true
- go func() {
- defer func() {
- <-pool
- }()
- projectAllData()
- }()
- case "biddingdata": //es 单机版,采集判重
- pool <- true
- go func() {
- defer func() {
- <-pool
- }()
- biddingDataTask(data, mapInfo)
- }()
- case "biddingdelbyextracttype": //根据bidding表extracttype=-1,删除es中重复数据
- pool <- true
- go func() {
- defer func() {
- <-pool
- }()
- biddingDelByExtracttype(data, mapInfo)
- }()
- case "buyer_once": // 采购单位昨天增量数据
- pool <- true
- go func() {
- defer func() {
- <-pool
- }()
- buyerOnce()
- }()
- default:
- pool <- true
- go func() {
- defer func() {
- <-pool
- }()
- log.Info("err", zap.Any("mapInfo", mapInfo))
- }()
- }
- }
- case udp.OP_NOOP: //下个节点回应
- ok := string(data)
- if ok != "" {
- log.Info("udp re", zap.String("data:", ok))
- UdpTaskMap.Delete(ok)
- }
- }
- }
- func task_index() {
- c := cron.New()
- _ = c.AddFunc("0 0 0 * * ?", func() { task_winneres() }) //每天凌晨执行一次winner生索引
- _ = c.AddFunc("0 0 1 * * ?", func() { task_buyeres() }) //每天1点执行一次buyer生索引
- c.Start()
- }
- func task_winneres() {
- log.Info("定时任务,winneres")
- winnerEsTaskOnce()
- }
- func task_buyeres() {
- log.Info("定时任务,buyeres")
- buyerOnce()
- }
- type UdpNode struct {
- data []byte
- addr *net.UDPAddr
- timestamp int64
- retry int
- }
- //UpdateBidding 更新bidding表数据
- func UpdateBidding() {
- arru := make([][]map[string]interface{}, 200)
- indexu := 0
- for {
- select {
- case v := <-updateBiddingPool:
- arru[indexu] = v
- indexu++
- if indexu == 200 {
- updateBiddingSp <- true
- go func(arru [][]map[string]interface{}) {
- defer func() {
- <-updateBiddingSp
- }()
- MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, arru...)
- }(arru)
- arru = make([][]map[string]interface{}, 200)
- indexu = 0
- }
- case <-time.After(1000 * time.Millisecond):
- if indexu > 0 {
- updateBiddingSp <- true
- go func(arru [][]map[string]interface{}) {
- defer func() {
- <-updateBiddingSp
- }()
- MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, arru...)
- }(arru[:indexu])
- arru = make([][]map[string]interface{}, 200)
- indexu = 0
- }
- }
- }
- }
- //SaveBidErr 记录错误信息,暂时记录 附件过长的
- func SaveBidErr() {
- arru := make([]map[string]interface{}, 200)
- indexu := 0
- for {
- select {
- case v := <-saveErrBidPool:
- arru[indexu] = v
- indexu++
- if indexu == 200 {
- saveBidSp <- true
- go func(arru []map[string]interface{}) {
- defer func() {
- <-saveBidSp
- }()
- MgoB.SaveBulk("bidding_es_err_record", arru...)
- }(arru)
- arru = make([]map[string]interface{}, 200)
- indexu = 0
- }
- case <-time.After(1000 * time.Millisecond):
- if indexu > 0 {
- saveBidSp <- true
- go func(arru []map[string]interface{}) {
- defer func() {
- <-saveBidSp
- }()
- MgoB.SaveBulk("bidding_es_err_record", arru...)
- }(arru[:indexu])
- arru = make([]map[string]interface{}, 200)
- indexu = 0
- }
- }
- }
- }
- //SaveEsMethod 保存到es
- func SaveEsMethod() {
- arru := make([]map[string]interface{}, EsBulkSize)
- indexu := 0
- for {
- select {
- case v := <-saveEsPool:
- arru[indexu] = v
- indexu++
- if indexu == EsBulkSize {
- saveEsSp <- true
- go func(arru []map[string]interface{}) {
- defer func() {
- <-saveEsSp
- }()
- Es.BulkSave(config.Conf.DB.Es.IndexB, arru)
- if config.Conf.DB.Es.IndexTmp != "" {
- Es.BulkSave(config.Conf.DB.Es.IndexTmp, arru)
- }
- }(arru)
- arru = make([]map[string]interface{}, EsBulkSize)
- indexu = 0
- }
- case <-time.After(1000 * time.Millisecond):
- if indexu > 0 {
- saveEsSp <- true
- go func(arru []map[string]interface{}) {
- defer func() {
- <-saveEsSp
- }()
- Es.BulkSave(config.Conf.DB.Es.IndexB, arru)
- if config.Conf.DB.Es.IndexTmp != "" {
- Es.BulkSave(config.Conf.DB.Es.IndexTmp, arru)
- }
- }(arru[:indexu])
- arru = make([]map[string]interface{}, EsBulkSize)
- indexu = 0
- }
- }
- }
- }
- func SaveAllEsMethod() {
- arru := make([]map[string]interface{}, EsBulkSize)
- indexu := 0
- for {
- select {
- case v := <-saveEsAllPool:
- arru[indexu] = v
- indexu++
- if indexu == EsBulkSize {
- saveEsAllSp <- true
- go func(arru []map[string]interface{}) {
- defer func() {
- <-saveEsAllSp
- }()
- Es1.BulkSave("biddingall", arru)
- }(arru)
- arru = make([]map[string]interface{}, EsBulkSize)
- indexu = 0
- }
- case <-time.After(1000 * time.Millisecond):
- if indexu > 0 {
- saveEsAllSp <- true
- go func(arru []map[string]interface{}) {
- defer func() {
- <-saveEsAllSp
- }()
- Es1.BulkSave("biddingall", arru)
- }(arru[:indexu])
- arru = make([]map[string]interface{}, EsBulkSize)
- indexu = 0
- }
- }
- }
- }
- func SaveProjectEs() {
- arru := make([]map[string]interface{}, EsBulkSize)
- indexu := 0
- for {
- select {
- case v := <-saveProjectEsPool:
- arru[indexu] = v
- indexu++
- if indexu == EsBulkSize {
- saveProjectSp <- true
- go func(arru []map[string]interface{}) {
- defer func() {
- <-saveProjectSp
- }()
- Es.BulkSave(config.Conf.DB.Es.IndexP, arru)
- }(arru)
- arru = make([]map[string]interface{}, EsBulkSize)
- indexu = 0
- }
- case <-time.After(1000 * time.Millisecond):
- if indexu > 0 {
- saveProjectSp <- true
- go func(arru []map[string]interface{}) {
- defer func() {
- <-saveProjectSp
- }()
- Es.BulkSave(config.Conf.DB.Es.IndexP, arru)
- }(arru[:indexu])
- arru = make([]map[string]interface{}, EsBulkSize)
- indexu = 0
- }
- }
- }
- }
- func checkMapJob() {
- if config.Conf.Mail.Send {
- log.Info("checkMapJob", zap.String("to:", config.Conf.Mail.To))
- for {
- UdpTaskMap.Range(func(k, v interface{}) bool {
- now := time.Now().Unix()
- node, _ := v.(*UdpNode)
- if now-node.timestamp > 120 {
- node.retry++
- if node.retry > 5 {
- UdpTaskMap.Delete(k)
- res, err := http.Get(fmt.Sprintf("%s?to=%s&title=%s&body=%s", config.Conf.Mail.Api, config.Conf.Mail.To, "field-sync-send-fail", k.(string)))
- if err == nil {
- defer res.Body.Close()
- read, err := ioutil.ReadAll(res.Body)
- log.Info("send mail ...", zap.String("r:", string(read)), zap.Any("err:", err))
- }
- } else {
- log.Info("udp重发", zap.Any("k:", k))
- UdpClient.WriteUdp(node.data, udp.OP_TYPE_DATA, node.addr)
- }
- } else if now-node.timestamp > 10 {
- log.Info("udp任务超时中..", zap.Any("k:", k))
- }
- return true
- })
- time.Sleep(60 * time.Second)
- }
- }
- }
- func task() {
- sess := MgoB.GetMgoConn()
- defer MgoB.DestoryMongoConn(sess)
- ch := make(chan bool, 10)
- wg := &sync.WaitGroup{}
- query := sess.DB("qfw").C("result_replace_repair_log").Find(nil).Iter()
- count := 0
- for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- if count%1000 == 0 {
- util.Debug("current ---", count)
- }
- ch <- true
- wg.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-ch
- wg.Done()
- }()
- if id := util.ObjToString(tmp["replace_id"]); mongodb.IsObjectIdHex(id) {
- biddingTaskById(map[string]interface{}{"infoid": id, "stype": "bidding"})
- }
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg.Wait()
- util.Debug("over ---", count)
- }
- //LastUdpJob 处理UDP 没有接受数据
- func LastUdpJob() {
- for {
- responselock.Lock()
- if time.Now().Unix()-BiddingLastNodeResponse >= 1800 {
- BiddingLastNodeResponse = time.Now().Unix() //重置时间
- sendErrMailApi("索引程序异常", fmt.Sprintf("半小时左右 无bidding据进入 ...相关人员检查..."))
- }
- if time.Now().Unix()-ProjectLastNodeResponse >= 1800 {
- ProjectLastNodeResponse = time.Now().Unix() //重置时间
- sendErrMailApi("索引程序异常", fmt.Sprintf("半小时左右 无project数据进入 ...相关人员检查..."))
- }
- responselock.Unlock()
- time.Sleep(300 * time.Second)
- }
- }
- //sendErrMailApi 发送邮件
- func sendErrMailApi(title, body string) {
- var tomail, api string
- if config.Conf.Mail.Send {
- tomail = config.Conf.Mail.To
- api = config.Conf.Mail.Api
- }
- log.Info("sendErrMailApi", zap.Any(tomail, api))
- res, err := http.Get(fmt.Sprintf("%s?to=%s&title=%s&body=%s", api, tomail, title, body))
- if err == nil {
- defer res.Body.Close()
- read, err := ioutil.ReadAll(res.Body)
- if err != nil {
- log.Info("邮件发送成功", zap.String("read", string(read)))
- }
- } else {
- log.Info("sendErrMailApi", zap.String("邮件发送失败", err.Error()))
- }
- }
|