|
@@ -5,10 +5,7 @@ import (
|
|
|
"app.yhyue.com/data_processing/common_utils/log"
|
|
|
"app.yhyue.com/data_processing/common_utils/mongodb"
|
|
|
"app.yhyue.com/data_processing/common_utils/mysqldb"
|
|
|
- "context"
|
|
|
- "encoding/json"
|
|
|
"fmt"
|
|
|
- es "github.com/olivere/elastic/v7"
|
|
|
"github.com/robfig/cron/v3"
|
|
|
"go.uber.org/zap"
|
|
|
"sync"
|
|
@@ -16,9 +13,12 @@ import (
|
|
|
)
|
|
|
|
|
|
var (
|
|
|
- Mgo *mongodb.MongodbSim
|
|
|
- Mysql *mysqldb.Mysql
|
|
|
- EsClient *es.Client
|
|
|
+ Mgo *mongodb.MongodbSim
|
|
|
+ MgoP *mongodb.MongodbSim
|
|
|
+ MgoSpider *mongodb.MongodbSim
|
|
|
+ Mysql *mysqldb.Mysql
|
|
|
+ //EsClient *es.Client
|
|
|
+ SpiderSites = make([]string, 0) //排除,竞品站点
|
|
|
)
|
|
|
|
|
|
func main() {
|
|
@@ -47,10 +47,14 @@ func specData() {
|
|
|
func dealBidding() {
|
|
|
sess := Mgo.GetMgoConn()
|
|
|
defer Mgo.DestoryMongoConn(sess)
|
|
|
+ // 指定对应的时间格式
|
|
|
+ //layout := "2006-01-02 15:04:05"
|
|
|
// 获取当前时间
|
|
|
now := time.Now()
|
|
|
- targetTime := time.Date(now.Year(), now.Month(), now.Day()+GF.Cron.Day, 0, 0, 0, 0, now.Location())
|
|
|
- todayTime := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location())
|
|
|
+
|
|
|
+ //targetTime := time.Date(now.Year(), now.Month(), now.Day()+GF.Cron.Start, 04, 20, 0, 0, now.Location())
|
|
|
+ targetTime := time.Date(now.Year(), now.Month(), now.Day()+GF.Cron.Start, 0, 0, 0, 0, now.Location())
|
|
|
+ todayTime := time.Date(now.Year(), now.Month(), now.Day()+GF.Cron.End, 0, 0, 0, 0, now.Location())
|
|
|
|
|
|
q := map[string]interface{}{
|
|
|
"comeintime": map[string]interface{}{
|
|
@@ -58,6 +62,7 @@ func dealBidding() {
|
|
|
"$lte": todayTime.Unix(),
|
|
|
},
|
|
|
}
|
|
|
+ log.Info("dealBidding", zap.Any("q", q))
|
|
|
query := sess.DB(GF.Mongob.DB).C(GF.Mongob.Coll).Find(q).Select(map[string]interface{}{
|
|
|
"contenthtml": 0}).Iter()
|
|
|
count := 0
|
|
@@ -89,6 +94,9 @@ func dealBidding() {
|
|
|
|
|
|
//saveBidding 保存bidding数据
|
|
|
func saveBidding(tmp map[string]interface{}) {
|
|
|
+ sess := MgoP.GetMgoConn()
|
|
|
+ defer MgoP.DestoryMongoConn(sess)
|
|
|
+
|
|
|
//针对产权数据,暂时不入es 索引库
|
|
|
if utils.IntAll(tmp["infoformat"]) == 3 {
|
|
|
return
|
|
@@ -103,25 +111,25 @@ func saveBidding(tmp map[string]interface{}) {
|
|
|
insert["bidding_id"] = id
|
|
|
insert["spidercode"] = tmp["spidercode"]
|
|
|
insert["site"] = tmp["site"]
|
|
|
- if comeintime, ok := tmp["comeintime"].(int64); ok {
|
|
|
- timeString := time.Unix(comeintime, 0).Format("2006-01-02 15:04:05")
|
|
|
- insert["comeintime"] = timeString
|
|
|
+ site := tmp["site"].(string)
|
|
|
+ //竞品网站数据,直接过滤
|
|
|
+ if IsInStringArray(site, SpiderSites) {
|
|
|
+ return
|
|
|
}
|
|
|
|
|
|
- if publishtime, ok := tmp["publishtime"].(int64); ok {
|
|
|
- timeString := time.Unix(publishtime, 0).Format("2006-01-02 15:04:05")
|
|
|
- insert["publishtime"] = timeString
|
|
|
- } else if publishtime, ok := tmp["publishtime"].(float64); ok {
|
|
|
- timeString := time.Unix(int64(publishtime), 0).Format("2006-01-02 15:04:05")
|
|
|
- insert["publishtime"] = timeString
|
|
|
+ if tmp["comeintime"] != nil {
|
|
|
+ time, _ := convertToTime(tmp["comeintime"])
|
|
|
+ insert["comeintime"] = time.Format("2006-01-02 15:04:05")
|
|
|
}
|
|
|
|
|
|
- if pici, ok := tmp["pici"].(int64); ok {
|
|
|
- timeString := time.Unix(pici, 0).Format("2006-01-02 15:04:05")
|
|
|
- insert["es_bidding_pici"] = timeString
|
|
|
- } else if pici, ok := tmp["pici"].(float64); ok {
|
|
|
- timeString := time.Unix(int64(pici), 0).Format("2006-01-02 15:04:05")
|
|
|
- insert["es_bidding_pici"] = timeString
|
|
|
+ if tmp["publishtime"] != nil {
|
|
|
+ time, _ := convertToTime(tmp["publishtime"])
|
|
|
+ insert["publishtime"] = time.Format("2006-01-02 15:04:05")
|
|
|
+ }
|
|
|
+
|
|
|
+ if tmp["pici"] != nil {
|
|
|
+ time, _ := convertToTime(tmp["pici"])
|
|
|
+ insert["es_bidding_pici"] = time.Format("2006-01-02 15:04:05")
|
|
|
}
|
|
|
|
|
|
if utils.IntAll(tmp["extracttype"]) == -1 || utils.IntAll(tmp["dataprocess"]) == 7 {
|
|
@@ -187,50 +195,72 @@ func saveBidding(tmp map[string]interface{}) {
|
|
|
insert["district_code"] = ""
|
|
|
}
|
|
|
|
|
|
- esquery := es.NewBoolQuery().Filter(es.NewTermQuery("ids", id))
|
|
|
- // 执行查询
|
|
|
- searchResult, err := EsClient.Search().
|
|
|
- Index(GF.EsP.Index).
|
|
|
- Query(esquery).
|
|
|
- Do(context.Background())
|
|
|
+ insert["project_id"] = ""
|
|
|
|
|
|
- if err != nil {
|
|
|
- log.Info("saveBidding", zap.Any("EsClient.Search.Error", err))
|
|
|
+ //查询MongoDB project 信息
|
|
|
+ wherep := map[string]interface{}{
|
|
|
+ "ids": id,
|
|
|
}
|
|
|
-
|
|
|
- if searchResult.Hits.TotalHits.Value > 0 {
|
|
|
- // 处理查询结果
|
|
|
- // 处理查询结果并转换为map
|
|
|
- for _, hit := range searchResult.Hits.Hits {
|
|
|
- //fmt.Printf("Found document with id %s\n", hit.Id)
|
|
|
- // 处理你的文档数据...
|
|
|
- result := make(map[string]interface{})
|
|
|
- err = json.Unmarshal(hit.Source, &result)
|
|
|
- if err != nil {
|
|
|
- log.Info("dealBidding", zap.Any("Unmarshal err", err))
|
|
|
- }
|
|
|
- if len(result) > 0 {
|
|
|
- insert["project_id"] = result["id"]
|
|
|
- if project_pici, ok := result["createtime"].(float64); ok {
|
|
|
- timeString := time.Unix(int64(project_pici), 0).Format("2006-01-02 15:04:05")
|
|
|
- insert["project_pici"] = timeString
|
|
|
- }
|
|
|
- if project_pici, ok := result["pici"].(float64); ok {
|
|
|
- timeString := time.Unix(int64(project_pici), 0).Format("2006-01-02 15:04:05")
|
|
|
- insert["es_project_pici"] = timeString
|
|
|
- }
|
|
|
- }
|
|
|
+ count := 0
|
|
|
+ query := sess.DB(GF.Mongop.DB).C(GF.Mongop.Coll).Find(wherep).Limit(1).Iter()
|
|
|
+ for p := make(map[string]interface{}); query.Next(p); count++ {
|
|
|
+ projectId := mongodb.BsonIdToSId(p["_id"])
|
|
|
+ insert["project_id"] = projectId
|
|
|
+ if p["pici"] != nil {
|
|
|
+ time, _ := convertToTime(p["pici"])
|
|
|
+ insert["project_pici"] = time.Format("2006-01-02 15:04:05")
|
|
|
+ insert["es_project_pici"] = time.Format("2006-01-02 15:04:05")
|
|
|
}
|
|
|
- } else {
|
|
|
- insert["project_id"] = ""
|
|
|
}
|
|
|
|
|
|
+ //
|
|
|
+ //esquery := es.NewBoolQuery().Filter(es.NewTermQuery("ids", id))
|
|
|
+ //// 执行查询
|
|
|
+ //searchResult, err := EsClient.Search().
|
|
|
+ // Index(GF.EsP.Index).
|
|
|
+ // Query(esquery).
|
|
|
+ // Source(es.NewFetchSourceContext(true).Include("id", "pici")).
|
|
|
+ // Do(context.Background())
|
|
|
+ //
|
|
|
+ //if err != nil {
|
|
|
+ // log.Info("saveBidding", zap.Any("EsClient.Search.Error", err))
|
|
|
+ //}
|
|
|
+ //
|
|
|
+ //if searchResult.Hits.TotalHits.Value > 0 {
|
|
|
+ // // 处理查询结果
|
|
|
+ // // 处理查询结果并转换为map
|
|
|
+ // for _, hit := range searchResult.Hits.Hits {
|
|
|
+ // //fmt.Printf("Found document with id %s\n", hit.Id)
|
|
|
+ // // 处理你的文档数据...
|
|
|
+ // result := make(map[string]interface{})
|
|
|
+ // err = json.Unmarshal(hit.Source, &result)
|
|
|
+ // if err != nil {
|
|
|
+ // log.Info("dealBidding", zap.Any("Unmarshal err", err))
|
|
|
+ // }
|
|
|
+ // if len(result) > 0 {
|
|
|
+ // insert["project_id"] = result["id"]
|
|
|
+ //
|
|
|
+ // if project_pici, ok := result["pici"].(float64); ok {
|
|
|
+ // timeString := time.Unix(int64(project_pici), 0).Format("2006-01-02 15:04:05")
|
|
|
+ // insert["project_pici"] = timeString
|
|
|
+ // }
|
|
|
+ // if project_pici, ok := result["pici"].(float64); ok {
|
|
|
+ // timeString := time.Unix(int64(project_pici), 0).Format("2006-01-02 15:04:05")
|
|
|
+ // insert["es_project_pici"] = timeString
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ //} else {
|
|
|
+ // insert["project_id"] = ""
|
|
|
+ //}
|
|
|
+
|
|
|
if len(insert) > 0 {
|
|
|
insertId := Mysql.Insert(GF.Mysql.Table, insert)
|
|
|
if insertId <= 0 {
|
|
|
log.Info("saveBidding", zap.Any("insertId", insertId), zap.Any("insert", insert))
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
}
|
|
|
|
|
|
func dealProject() {
|
|
@@ -298,47 +328,80 @@ func dealProject() {
|
|
|
}
|
|
|
|
|
|
func updateProject(tmp map[string]interface{}) {
|
|
|
- esquery := es.NewBoolQuery().Filter(es.NewTermQuery("ids", tmp["bidding_id"]))
|
|
|
- // 执行查询
|
|
|
- searchResult, err := EsClient.Search().
|
|
|
- Index(GF.EsP.Index).
|
|
|
- Query(esquery).
|
|
|
- Do(context.Background())
|
|
|
+ sess := MgoP.GetMgoConn()
|
|
|
+ defer MgoP.DestoryMongoConn(sess)
|
|
|
+
|
|
|
+ update := map[string]interface{}{}
|
|
|
+ id := mongodb.BsonIdToSId(tmp["bidding_id"])
|
|
|
+ //更新MySQL where
|
|
|
+ where := map[string]interface{}{
|
|
|
+ "id": tmp["id"],
|
|
|
+ }
|
|
|
|
|
|
- if err != nil {
|
|
|
- log.Info("updateProject", zap.Any("EsClient.Search.Error", err))
|
|
|
+ //查询MongoDB project 信息
|
|
|
+ wherep := map[string]interface{}{
|
|
|
+ "ids": id,
|
|
|
+ }
|
|
|
+ count := 0
|
|
|
+ query := sess.DB(GF.Mongop.DB).C(GF.Mongop.Coll).Find(wherep).Limit(1).Iter()
|
|
|
+ for p := make(map[string]interface{}); query.Next(p); count++ {
|
|
|
+ projectId := mongodb.BsonIdToSId(p["_id"])
|
|
|
+ update["project_id"] = projectId
|
|
|
+ if p["pici"] != nil {
|
|
|
+ time, _ := convertToTime(p["pici"])
|
|
|
+ update["project_pici"] = time.Format("2006-01-02 15:04:05")
|
|
|
+ update["es_project_pici"] = time.Format("2006-01-02 15:04:05")
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
- if searchResult.Hits.TotalHits.Value > 0 {
|
|
|
- // 处理查询结果
|
|
|
- // 处理查询结果并转换为map
|
|
|
- for _, hit := range searchResult.Hits.Hits {
|
|
|
- //fmt.Printf("Found document with id %s\n", hit.Id)
|
|
|
- // 处理你的文档数据...
|
|
|
- result := make(map[string]interface{})
|
|
|
- err = json.Unmarshal(hit.Source, &result)
|
|
|
- if err != nil {
|
|
|
- log.Info("dealBidding", zap.Any("Unmarshal err", err))
|
|
|
- }
|
|
|
- if len(result) > 0 {
|
|
|
- update := map[string]interface{}{
|
|
|
- "project_id": result["id"],
|
|
|
- }
|
|
|
- log.Info("updateProject", zap.Any("bidding_id", tmp["bidding_id"]))
|
|
|
- if project_pici, ok := result["pici"].(float64); ok {
|
|
|
- timeString := time.Unix(int64(project_pici), 0).Format("2006-01-02 15:04:05")
|
|
|
- update["project_pici"] = timeString
|
|
|
- update["es_project_pici"] = timeString
|
|
|
- }
|
|
|
-
|
|
|
- where := map[string]interface{}{
|
|
|
- "id": tmp["id"],
|
|
|
- }
|
|
|
- res := Mysql.Update(GF.Mysql.Table, where, update)
|
|
|
- if !res {
|
|
|
- log.Info("updateProject", zap.Any("update", update), zap.Any("where", where))
|
|
|
- }
|
|
|
- }
|
|
|
+ if len(update) > 0 {
|
|
|
+ res := Mysql.Update(GF.Mysql.Table, where, update)
|
|
|
+ if !res {
|
|
|
+ log.Info("updateProject", zap.Any("update", update), zap.Any("where", where))
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ //esquery := es.NewBoolQuery().Filter(es.NewTermQuery("ids", tmp["bidding_id"]))
|
|
|
+ //// 执行查询
|
|
|
+ //searchResult, err := EsClient.Search().
|
|
|
+ // Index(GF.EsP.Index).
|
|
|
+ // Query(esquery).
|
|
|
+ // Do(context.Background())
|
|
|
+ //
|
|
|
+ //if err != nil {
|
|
|
+ // log.Info("updateProject", zap.Any("EsClient.Search.Error", err))
|
|
|
+ //}
|
|
|
+ //
|
|
|
+ //if searchResult.Hits.TotalHits.Value > 0 {
|
|
|
+ // // 处理查询结果
|
|
|
+ // // 处理查询结果并转换为map
|
|
|
+ // for _, hit := range searchResult.Hits.Hits {
|
|
|
+ // //fmt.Printf("Found document with id %s\n", hit.Id)
|
|
|
+ // // 处理你的文档数据...
|
|
|
+ // result := make(map[string]interface{})
|
|
|
+ // err = json.Unmarshal(hit.Source, &result)
|
|
|
+ // if err != nil {
|
|
|
+ // log.Info("dealBidding", zap.Any("Unmarshal err", err))
|
|
|
+ // }
|
|
|
+ // if len(result) > 0 {
|
|
|
+ // update := map[string]interface{}{
|
|
|
+ // "project_id": result["id"],
|
|
|
+ // }
|
|
|
+ // log.Info("updateProject", zap.Any("bidding_id", tmp["bidding_id"]))
|
|
|
+ // if project_pici, ok := result["pici"].(float64); ok {
|
|
|
+ // timeString := time.Unix(int64(project_pici), 0).Format("2006-01-02 15:04:05")
|
|
|
+ // update["project_pici"] = timeString
|
|
|
+ // update["es_project_pici"] = timeString
|
|
|
+ // }
|
|
|
+ //
|
|
|
+ // where := map[string]interface{}{
|
|
|
+ // "id": tmp["id"],
|
|
|
+ // }
|
|
|
+ // res := Mysql.Update(GF.Mysql.Table, where, update)
|
|
|
+ // if !res {
|
|
|
+ // log.Info("updateProject", zap.Any("update", update), zap.Any("where", where))
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ //}
|
|
|
}
|