package main import ( "encoding/json" "esindex/config" "esindex/oss" "fmt" "github.com/robfig/cron" "go.uber.org/zap" "io/ioutil" util "jygit.jydev.jianyu360.cn/data_processing/common_utils" "jygit.jydev.jianyu360.cn/data_processing/common_utils/elastic" "jygit.jydev.jianyu360.cn/data_processing/common_utils/log" "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb" "jygit.jydev.jianyu360.cn/data_processing/common_utils/mysqldb" "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp" "net" "net/http" "os" "os/signal" "sync" "syscall" "time" ) var ( MgoB *mongodb.MongodbSim MgoBOld *mongodb.MongodbSim MgoP *mongodb.MongodbSim MgoQ *mongodb.MongodbSim MgoS *mongodb.MongodbSim Mysql *mysqldb.Mysql Es, Es2, Es3 *elastic.Elastic //Es3 迁移华为云新集群地址 //PreEs = make(map[string]*elastic.Elastic, 0) //预处理 索引客户端 UdpClient udp.UdpClient UdpTaskMap = &sync.Map{} JyUdpAddr *net.UDPAddr NeUdpAddr *net.UDPAddr EsBulkSize = 50 // es批量保存大小 updateBiddingPool = make(chan map[string]interface{}, 5000) //更新bingding数据 updateBiddingSp = make(chan bool, 5) saveEsPool = make(chan map[string]interface{}, 5000) //保存binding数据到es saveEsSp = make(chan bool, 5) saveDetailEsSp = make(chan bool, 5) saveDetailEsPool = make(chan map[string]interface{}, 5000) //保存binding detail,contenthtml 二个字段数据到es saveProjectEsPool = make(chan map[string]interface{}, 5000) //保存project数据到es saveProjectSp = make(chan bool, 5) saveProjectDetailEsPool = make(chan map[string]interface{}, 5000) //保存project detail 数据到es saveProjectDetailSp = make(chan bool, 5) saveEsAllPool = make(chan map[string]interface{}, 5000) //存储单机版es,爬虫采集判重使用 saveEsAllSp = make(chan bool, 5) saveBiddingAllPool = make(chan map[string]interface{}, 5000) //保存binding数据到es,stype=bidding_all_data saveBiddingAllBEsSp = make(chan bool, 5) saveErrBidPool = make(chan map[string]interface{}, 5000) saveBidSp = make(chan bool, 5) //detailLength = 50000 // es保存detail长度 fileLength = 200000 // es保存附件文本长度,大于20万 时做一个日志记录 //pscopeLength = 32766 // projectscope长度 specialNames = make(map[string]bool, 0) //存储181 凭安提供的特企,爬虫采购单位 responselock sync.Mutex BiddingLastNodeResponse int64 //上次节点接受数据时间 ProjectLastNodeResponse int64 //上次节点接受数据时间 ) func init() { config.InitConf("./common.toml") InitLog() InitMysql() InitMgo() InitEs() InitField() //初始化项目数据升索引字段 InitBitmap() //初始化项目名称副标题 bitmap InitRule() //初始化中国移动定制标签规则 InitEsBiddingField() //初始bidding数据升索引字段 oss.InitOss() // 初始化oss 存储 verifyESFields() //检测es 字段类型 JyUdpAddr = &net.UDPAddr{ IP: net.ParseIP(config.Conf.Udp.JyAddr), Port: util.IntAll(config.Conf.Udp.JyPort), } if config.Conf.Udp.NeAddr != "" { NeUdpAddr = &net.UDPAddr{ IP: net.ParseIP(config.Conf.Udp.NeAddr), Port: util.IntAll(config.Conf.Udp.NePort), } } BiddingLastNodeResponse = time.Now().Unix() ProjectLastNodeResponse = time.Now().Unix() log.Info("init success") } func main() { //正式环境才执行定时任务 if config.Conf.Env.Stype == 0 { go LastUdpJob() //监听半小时内有无数据 go checkMapJob() //udp 发送邮件 go task_index() //定时同步更新winner_enterprise、buyer_enterprise ES索引;这个功能很少变动,几乎不需要维护 } //go SaveDetailEsMethod() //保存 bidding_detail 索引 go BatchSaveBiddingDetailEsMethod() //保存 bidding_detail 索引 go UpdateBidding() //更新bidding表数据 go SaveEsMethod() //保存es bidding数据 //go SaveBiddingEsMethod() //保存es bidding数据 go SaveAllEsMethod() // 保存爬虫采集临时数据 go SaveProjectEs() //保存项目索引数据 go SaveProjectDetailEs() //保存项目索引,添加了详情字段的新索引 go SaveBiddingAllDataEs() //保存stype=bidding_all_data 数据 go SaveBidErr() //添加预处理函数 //if config.Conf.Env.OpenPre { // go SavePreEsMethod() // go dealPreProcess() //} UdpClient = udp.UdpClient{Local: config.Conf.Udp.LocPort, BufSize: 1024} UdpClient.Listen(processUdpMsg) log.Info("Udp服务监听", zap.String("port:", config.Conf.Udp.LocPort)) //监听异常退出信号;及时保存项目名称副标题数据 signalChan := make(chan os.Signal, 1) signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM) <-signalChan saveDb() //ch := make(chan bool, 1) //<-ch } var pool = make(chan bool, 20) func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) { switch act { case udp.OP_TYPE_DATA: var mapInfo map[string]interface{} err := json.Unmarshal(data, &mapInfo) log.Info("processUdpMsg", zap.Any("mapInfo:", mapInfo)) if err != nil { UdpClient.WriteUdp([]byte("err:"+err.Error()), udp.OP_NOOP, ra) } else if mapInfo != nil { key, _ := mapInfo["key"].(string) if key == "" { key = "udpok" } go UdpClient.WriteUdp([]byte(key), udp.OP_NOOP, ra) //有udp 转发时 if config.Conf.Udp.NeAddr != "" { go SendUdpMsg(mapInfo, NeUdpAddr) } tasktype, _ := mapInfo["stype"].(string) switch tasktype { case "index-by-id": //单个索引,更新pici pool <- true go func() { defer func() { <-pool }() biddingTaskById(mapInfo) }() case "index_by_id": //单个索引,不更新pici pool <- true go func() { defer func() { <-pool }() biddingTaskById(mapInfo) }() case "bidding": BiddingLastNodeResponse = time.Now().Unix() pool <- true go func() { defer func() { <-pool }() biddingTask(mapInfo) }() case "bidding_detail": //bidding_detail 标讯详情索引;历史数据需要读取oss pool <- true go func() { defer func() { <-pool }() biddingDetailTask(mapInfo) }() case "biddingall": //补充存量数据 pool <- true go func() { defer func() { <-pool }() biddingAllTask(mapInfo) }() case "bidding_all_data": //根据biddingall配置文件,存量迁移数据 pool <- true go func() { defer func() { <-pool }() biddingAllDataTask() }() case "bidding_history": pool <- true go func() { defer func() { <-pool }() biddingTask(mapInfo) }() case "project": ProjectLastNodeResponse = time.Now().Unix() pool <- true go func() { defer func() { <-pool }() projectTask(data, mapInfo) // 配置项目详情索引 if config.Conf.DB.Es.IndexPD != "" { projectDetailTask(data, mapInfo) } }() case "project_detail": //添加了详情字段的项目索引 if config.Conf.DB.Es.IndexPD != "" { ProjectLastNodeResponse = time.Now().Unix() pool <- true go func() { defer func() { <-pool }() projectDetailTask(data, mapInfo) }() } else { log.Info("升级项目索引", zap.String("项目详情索引 ", "缺少项目详情索引配置,请检查配置文件")) } case "project_all_data": //存量 projectset 数据 pool <- true go func() { defer func() { <-pool }() projectAllData() }() case "biddingdata": //es 单机版,采集判重,对应索引 biddingalll; 172.17.4.184:19905 pool <- true go func() { defer func() { <-pool }() biddingDataTask(data, mapInfo) }() case "biddingdelbyextracttype": //根据bidding表extracttype=-1,删除es中重复数据 pool <- true go func() { defer func() { <-pool }() biddingDelByExtracttype(data, mapInfo) }() case "buyer_once": // 采购单位昨天增量数据 pool <- true go func() { defer func() { <-pool }() buyerOnce() }() case "buyer_all": // 采购单位全量数据 pool <- true go func() { defer func() { <-pool }() buyerAll() }() case "winner_once": // 中标单位昨天增量数据 pool <- true go func() { defer func() { <-pool }() winnerEsTaskOnce() }() case "winner_all": // 中标单位存量数据 pool <- true go func() { defer func() { <-pool }() winnerEsAll() }() case "attachment": // 补充附件采集,对应bidding为bidding_downloadfile_log pool <- true go func() { defer func() { <-pool }() //有单独配置其他操作 if len(config.Conf.Others) > 0 { if v, ok := config.Conf.Others[tasktype]; ok { attachmentBiddingTask(mapInfo, v) } } }() default: pool <- true go func() { defer func() { <-pool }() log.Info("err", zap.Any("mapInfo", mapInfo)) }() } } case udp.OP_NOOP: //下个节点回应 ok := string(data) if ok != "" { log.Info("udp re", zap.String("data:", ok)) UdpTaskMap.Delete(ok) } } } func task_index() { c := cron.New() _ = c.AddFunc("0 00 00 * * *", func() { task_winneres() }) //每天凌晨执行一次winner生索引 _ = c.AddFunc("0 00 01 * * *", func() { task_buyeres() }) //每天1点执行一次buyer生索引 c.Start() } func task_winneres() { log.Info("定时任务,winneres") winnerEsTaskOnce() } func task_buyeres() { log.Info("定时任务,buyeres") buyerOnce() } type UdpNode struct { data []byte addr *net.UDPAddr timestamp int64 retry int } // UpdateBidding 更新bidding表数据 func UpdateBidding() { //arru := make([][]map[string]interface{}, 10) //indexu := 0 for { select { case v := <-updateBiddingPool: MgoB.UpdateById(config.Conf.DB.MongoB.Coll, v["_id"], map[string]interface{}{"$set": v["set"]}) //arru[indexu] = v //indexu++ //if indexu == 10 { // updateBiddingSp <- true //go func(arru [][]map[string]interface{}) { // defer func() { // <-updateBiddingSp //}() //MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, arru...) //}(arru) //arru = make([][]map[string]interface{}, 10) //indexu = 0 //} //case <-time.After(1000 * time.Millisecond): // if indexu > 0 { // updateBiddingSp <- true // go func(arru [][]map[string]interface{}) { // defer func() { // <-updateBiddingSp // }() // MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, arru...) // }(arru[:indexu]) // arru = make([][]map[string]interface{}, 200) // indexu = 0 // } } } } // SaveBidErr 记录错误信息,暂时记录 附件过长的 func SaveBidErr() { arru := make([]map[string]interface{}, 200) indexu := 0 for { select { case v := <-saveErrBidPool: arru[indexu] = v indexu++ if indexu == 200 { saveBidSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveBidSp }() MgoB.SaveBulk("bidding_es_err_record", arru...) }(arru) arru = make([]map[string]interface{}, 200) indexu = 0 } case <-time.After(1000 * time.Millisecond): if indexu > 0 { saveBidSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveBidSp }() MgoB.SaveBulk("bidding_es_err_record", arru...) }(arru[:indexu]) arru = make([]map[string]interface{}, 200) indexu = 0 } } } } // SaveEsMethod 保存bidding数据到es;单携程保存 func SaveEsMethod() { for { select { case v := <-saveEsPool: id := v["id"] ids := v["_id"] //如果优选,存在上次的优选ID,需要删除上次的ID索引 oid := util.ObjToString(v["old_preferid"]) delete(v, "old_preferid") if oid != "" { err := Es.DeleteByID(config.Conf.DB.Es.IndexB, oid) if err != nil { log.Info("SaveEsMethod", zap.String("上次优选ID删除失败", oid)) } } stype := util.ObjToString(v["stype"]) delete(v, "stype") mgoUpdate := make(map[string]interface{}) if stype == "bidding" || stype == "bidding_history" || stype == "index-by-id" { //之前存在pici,就不在添加 if pici, ok := v["pici"]; ok { v["pici"] = pici } else { picc := time.Now().Unix() v["pici"] = picc mgoUpdate["pici"] = picc } } if len(mgoUpdate) > 0 { updateBiddingPool <- map[string]interface{}{ "_id": mongodb.StringTOBsonId(util.ObjToString(v["_id"])), "set": mgoUpdate, } if util.ObjToString(v["spidercode"]) == "a_jyxxfbpt_gg" { // 剑鱼信息发布数据 通过udp通知信息发布程序 go UdpMethod(mongodb.BsonIdToSId(v["_id"])) } } Es.Save(config.Conf.DB.Es.IndexB, v) // 华为云集群1 if config.Conf.DB.Es.Addr2 != "" { v["id"] = id v["_id"] = ids delete(v, "old_preferid") if oid != "" { err := Es2.DeleteByID(config.Conf.DB.Es.IndexB, oid) if err != nil { log.Info("SaveEsMethod", zap.String("上次优选ID删除失败", oid)) } } Es2.Save(config.Conf.DB.Es.Indexb2, v) } // 华为云新集群2,迁移原来阿里云数据 if config.Conf.DB.Es.Addr3 != "" { v["id"] = id v["_id"] = ids delete(v, "old_preferid") if oid != "" { err := Es3.DeleteByID(config.Conf.DB.Es.IndexB, oid) if err != nil { log.Info("SaveEsMethod", zap.String("上次优选ID删除失败", oid)) } } Es3.Save(config.Conf.DB.Es.Indexb3, v) } } } } // SaveDetailEsMethod 保存bidding detail、contenghtml 字段的详情索引 func SaveDetailEsMethod() { if config.Conf.DB.Es.BiddingDetail == "" { config.Conf.DB.Es.BiddingDetail = "bidding_detail" log.Info("SaveDetailEsMethod", zap.String("config.Conf.DB.Es.BiddingDetail", "配置文件中,biddingdetail 没有配置,默认初始化为 bidding_detail")) } for { select { case v := <-saveDetailEsPool: Es.Save(config.Conf.DB.Es.BiddingDetail, v) } } } // BatchSaveBiddingDetailEsMethod 批量保存bidding_detail func BatchSaveBiddingDetailEsMethod() { arru := make([]map[string]interface{}, EsBulkSize) indexu := 0 for { select { case v := <-saveDetailEsPool: arru[indexu] = v indexu++ if indexu == EsBulkSize { saveDetailEsSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveDetailEsSp }() if config.Conf.DB.Es.Addr != "" { Es.BulkSave(config.Conf.DB.Es.BiddingDetail, arru) } }(arru) arru = make([]map[string]interface{}, EsBulkSize) indexu = 0 } case <-time.After(1000 * time.Millisecond): if indexu > 0 { saveDetailEsSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveDetailEsSp }() if config.Conf.DB.Es.Addr != "" { Es.BulkSave(config.Conf.DB.Es.BiddingDetail, arru) } }(arru[:indexu]) arru = make([]map[string]interface{}, EsBulkSize) indexu = 0 } } } } // SaveBiddingEsMethod 批量保存bidding数据 func SaveBiddingEsMethod() { arru := make([]map[string]interface{}, EsBulkSize) indexu := 0 for { select { case v := <-saveEsPool: arru[indexu] = v indexu++ if indexu == EsBulkSize { saveEsSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveEsSp }() if config.Conf.DB.Es.Addr != "" { Es.BulkSave(config.Conf.DB.Es.IndexB, arru) } // 集群地址2 if config.Conf.DB.Es.Addr2 != "" { Es2.BulkSave(config.Conf.DB.Es.Indexb2, arru) } // 集群地址3 if config.Conf.DB.Es.Addr3 != "" { Es3.BulkSave(config.Conf.DB.Es.Indexb3, arru) } }(arru) arru = make([]map[string]interface{}, EsBulkSize) indexu = 0 } case <-time.After(1000 * time.Millisecond): if indexu > 0 { saveEsSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveEsSp }() if config.Conf.DB.Es.Addr != "" { Es.BulkSave(config.Conf.DB.Es.IndexB, arru) } // 集群地址2 if config.Conf.DB.Es.Addr2 != "" { Es2.BulkSave(config.Conf.DB.Es.Indexb2, arru) } // 集群地址3 if config.Conf.DB.Es.Addr3 != "" { Es3.BulkSave(config.Conf.DB.Es.Indexb3, arru) } }(arru[:indexu]) arru = make([]map[string]interface{}, EsBulkSize) indexu = 0 } } } } // SaveAllEsMethod 保存爬虫采集临时数据,保存在华为云上 func SaveAllEsMethod() { arru := make([]map[string]interface{}, EsBulkSize) indexu := 0 for { select { case v := <-saveEsAllPool: arru[indexu] = v indexu++ if indexu == EsBulkSize { saveEsAllSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveEsAllSp }() if config.Conf.DB.Es.Addr2 != "" { Es2.BulkSave("biddingall", arru) } }(arru) arru = make([]map[string]interface{}, EsBulkSize) indexu = 0 } case <-time.After(1000 * time.Millisecond): if indexu > 0 { saveEsAllSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveEsAllSp }() if config.Conf.DB.Es.Addr2 != "" { Es2.BulkSave("biddingall", arru) } }(arru[:indexu]) arru = make([]map[string]interface{}, EsBulkSize) indexu = 0 } } } } // SaveBiddingAllDataEs 保存bidding数据到Es,stype=bidding_all_data 数据 func SaveBiddingAllDataEs() { arru := make([]map[string]interface{}, EsBulkSize) indexu := 0 for { select { case v := <-saveBiddingAllPool: arru[indexu] = v indexu++ if indexu == EsBulkSize { saveBiddingAllBEsSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveBiddingAllBEsSp }() //1.阿里云集群 Es.BulkSave(config.Conf.DB.Es.IndexB, arru) }(arru) arru = make([]map[string]interface{}, EsBulkSize) indexu = 0 } case <-time.After(1000 * time.Millisecond): if indexu > 0 { saveBiddingAllBEsSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveBiddingAllBEsSp }() //1.阿里云集群 Es.BulkSave(config.Conf.DB.Es.IndexB, arru) }(arru[:indexu]) arru = make([]map[string]interface{}, EsBulkSize) indexu = 0 } } } } // SaveProjectEsMethod 保存项目索引数据 func SaveProjectEsMethod() { arru := make([]map[string]interface{}, EsBulkSize) indexu := 0 for { select { case v := <-saveProjectEsPool: arru[indexu] = v indexu++ if indexu == EsBulkSize { saveProjectSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveProjectSp }() Es.BulkSave(config.Conf.DB.Es.IndexP, arru) // 华为云新集群,存储标讯、项目、凭安数据 if config.Conf.DB.Es.Addr3 != "" { Es3.BulkSave(config.Conf.DB.Es.IndexP, arru) } }(arru) arru = make([]map[string]interface{}, EsBulkSize) indexu = 0 } case <-time.After(1000 * time.Millisecond): if indexu > 0 { saveProjectSp <- true go func(arru []map[string]interface{}) { defer func() { <-saveProjectSp }() Es.BulkSave(config.Conf.DB.Es.IndexP, arru) // 华为云新集群,存储标讯、项目、凭安数据 if config.Conf.DB.Es.Addr3 != "" { Es3.BulkSave(config.Conf.DB.Es.IndexP, arru) } }(arru[:indexu]) arru = make([]map[string]interface{}, EsBulkSize) indexu = 0 } } } } // SaveProjectEs 保存项目索引数据,但携程保存 func SaveProjectEs() { for { select { case v := <-saveProjectEsPool: id := v["id"] ids := v["_id"] Es.Save(config.Conf.DB.Es.IndexP, v) // 华为云新集群,存储标讯、项目、凭安数据 if config.Conf.DB.Es.Addr3 != "" { v["id"] = id v["_id"] = ids Es3.Save(config.Conf.DB.Es.IndexP, v) } } } } // SaveProjectDetailEs 保存项目索引,支持详情字段 func SaveProjectDetailEs() { for { select { case v := <-saveProjectDetailEsPool: id := v["id"] ids := v["_id"] Es.Save(config.Conf.DB.Es.IndexPD, v) // 华为云新集群,存储标讯、项目、凭安数据 if config.Conf.DB.Es.Addr3 != "" { v["id"] = id v["_id"] = ids Es3.Save(config.Conf.DB.Es.IndexPD, v) } } } } func checkMapJob() { if config.Conf.Mail.Send { log.Info("checkMapJob", zap.String("to:", config.Conf.Mail.To)) for { UdpTaskMap.Range(func(k, v interface{}) bool { now := time.Now().Unix() node, _ := v.(*UdpNode) if now-node.timestamp > 120 { node.retry++ if node.retry > 5 { UdpTaskMap.Delete(k) res, err := http.Get(fmt.Sprintf("%s?to=%s&title=%s&body=%s", config.Conf.Mail.Api, config.Conf.Mail.To, "field-sync-send-fail", k.(string))) if err == nil { defer res.Body.Close() read, err := ioutil.ReadAll(res.Body) log.Info("send mail ...", zap.String("r:", string(read)), zap.Any("err:", err)) } } else { log.Info("udp重发", zap.Any("k:", k)) UdpClient.WriteUdp(node.data, udp.OP_TYPE_DATA, node.addr) } } else if now-node.timestamp > 10 { log.Info("udp任务超时中..", zap.Any("k:", k)) } return true }) time.Sleep(60 * time.Second) } } } func task() { sess := MgoB.GetMgoConn() defer MgoB.DestoryMongoConn(sess) ch := make(chan bool, 10) wg := &sync.WaitGroup{} query := sess.DB("qfw").C("result_replace_repair_log").Find(nil).Iter() count := 0 for tmp := make(map[string]interface{}); query.Next(tmp); count++ { if count%1000 == 0 { util.Debug("current ---", count) } ch <- true wg.Add(1) go func(tmp map[string]interface{}) { defer func() { <-ch wg.Done() }() if id := util.ObjToString(tmp["replace_id"]); mongodb.IsObjectIdHex(id) { biddingTaskById(map[string]interface{}{"infoid": id, "stype": "bidding"}) } }(tmp) tmp = make(map[string]interface{}) } wg.Wait() util.Debug("over ---", count) } // LastUdpJob 处理UDP 没有接受数据 func LastUdpJob() { for { responselock.Lock() if time.Now().Unix()-BiddingLastNodeResponse >= 1800 { BiddingLastNodeResponse = time.Now().Unix() //重置时间 sendErrMailApi("索引程序异常", fmt.Sprintf("半小时左右 无bidding据进入 ...相关人员检查...")) } if time.Now().Unix()-ProjectLastNodeResponse >= 1800 { ProjectLastNodeResponse = time.Now().Unix() //重置时间 sendErrMailApi("索引程序异常", fmt.Sprintf("半小时左右 无project数据进入 ...相关人员检查...")) } responselock.Unlock() time.Sleep(300 * time.Second) } } // sendErrMailApi 发送邮件 func sendErrMailApi(title, body string) { var tomail, api string if config.Conf.Mail.Send { tomail = config.Conf.Mail.To api = config.Conf.Mail.Api } log.Info("sendErrMailApi", zap.Any(tomail, api)) res, err := http.Get(fmt.Sprintf("%s?to=%s&title=%s&body=%s", api, tomail, title, body)) if err == nil { defer res.Body.Close() read, err := ioutil.ReadAll(res.Body) if err != nil { log.Info("邮件发送成功", zap.String("read", string(read))) } } else { log.Info("sendErrMailApi", zap.String("邮件发送失败", err.Error())) } }