|
@@ -15,6 +15,8 @@ import (
|
|
|
"time"
|
|
|
)
|
|
|
|
|
|
+var piciLock sync.Mutex
|
|
|
+
|
|
|
func task() {
|
|
|
|
|
|
client := Es.GetEsConn()
|
|
@@ -23,10 +25,11 @@ func task() {
|
|
|
wg := &sync.WaitGroup{}
|
|
|
|
|
|
tf := []int64{1577808000, 1609430400, 1640966400, 1672502400, 1712851200}
|
|
|
- fsc := es.NewFetchSourceContext(true).Include("tag_topinformation", "tag_subinformation", "tag_set") // 查询字段
|
|
|
+ fsc := es.NewFetchSourceContext(true).Include("tag_topinformation", "tag_subinformation", "tag_set", "tag_topinformation_ai", "tag_subinformation_ai", "property_form") // 查询字段
|
|
|
for i, tm := range tf {
|
|
|
query := es.NewBoolQuery().
|
|
|
- Must(es.NewMatchQuery("tag_topinformation", "情报_物业"))
|
|
|
+ //Must(es.NewMatchQuery("tag_topinformation", "情报_物业"))
|
|
|
+ Must(es.NewExistsQuery("property_form"))
|
|
|
if i == 0 {
|
|
|
query.Must(es.NewRangeQuery("comeintime").Lte(tm))
|
|
|
} else if i == 1 {
|
|
@@ -63,6 +66,47 @@ func task() {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+func taskAdd() {
|
|
|
+
|
|
|
+ client := Es.GetEsConn()
|
|
|
+ defer Es.DestoryEsConn(client)
|
|
|
+
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
+
|
|
|
+ fsc := es.NewFetchSourceContext(true).Include("tag_topinformation", "tag_subinformation", "tag_set", "tag_topinformation_ai", "tag_subinformation_ai", "property_form", "pici") // 查询字段
|
|
|
+ query := es.NewBoolQuery().
|
|
|
+ Should(es.NewTermQuery("tag_topinformation", "情报_物业")).
|
|
|
+ Should(es.NewTermQuery("tag_topinformation_ai", "情报_物业")).MinimumShouldMatch("1")
|
|
|
+ //Must(es.NewMatchQuery("tag_topinformation", "情报_物业"))
|
|
|
+ //Must(es.NewExistsQuery("property_form"))
|
|
|
+ if config.Conf.Serve.Pici > 0 {
|
|
|
+ query.Must(es.NewRangeQuery("pici").Gte(config.Conf.Serve.Pici))
|
|
|
+ }
|
|
|
+
|
|
|
+ util.Debug(fmt.Sprintf("数据量为:%d", Es.Count("bidding", query)))
|
|
|
+ countDocs := 0
|
|
|
+ res, err := client.Scroll().Index("bidding").Query(query).FetchSourceContext(fsc).Scroll("5m").Size(2000).Do(context.Background()) //查询一条获取游标
|
|
|
+ if err == nil {
|
|
|
+ taskInfoA(res, wg, &countDocs)
|
|
|
+ scrollId := res.ScrollId
|
|
|
+ for {
|
|
|
+ searchResult, err := client.Scroll("1m").Index("bidding").ScrollId(scrollId).Size(2000).Do(context.TODO()) //查询
|
|
|
+ if err != nil {
|
|
|
+ util.Debug("Es Search Data Error:", err.Error())
|
|
|
+ break
|
|
|
+ }
|
|
|
+ taskInfoA(searchResult, wg, &countDocs)
|
|
|
+ scrollId = searchResult.ScrollId
|
|
|
+ }
|
|
|
+ wg.Wait()
|
|
|
+ util.Debug(fmt.Sprintf("处理结束,处理文档%d条", countDocs))
|
|
|
+ util.Debug(config.Conf.Serve.Pici)
|
|
|
+ _, _ = client.ClearScroll().ScrollId(scrollId).Do(context.Background()) //清理游标
|
|
|
+ } else {
|
|
|
+ util.Debug(err)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
func taskInfoA(searchResult *es.SearchResult, wg *sync.WaitGroup, countDocs *int) {
|
|
|
chd := make(chan bool, 5)
|
|
|
for _, hit := range searchResult.Hits.Hits {
|
|
@@ -76,20 +120,46 @@ func taskInfoA(searchResult *es.SearchResult, wg *sync.WaitGroup, countDocs *int
|
|
|
}()
|
|
|
tmp := make(map[string]interface{})
|
|
|
if json.Unmarshal(tmpHit.Source, &tmp) == nil {
|
|
|
+ piciLock.Lock()
|
|
|
+ if util.Int64All(tmp["pici"]) > config.Conf.Serve.Pici {
|
|
|
+ config.Conf.Serve.Pici = util.Int64All(tmp["pici"])
|
|
|
+ }
|
|
|
+ piciLock.Unlock()
|
|
|
update := make(map[string]interface{})
|
|
|
- update["tag_topinformation"] = tmp["tag_topinformation"]
|
|
|
- update["tag_subinformation"] = tmp["tag_subinformation"]
|
|
|
+ if tmp["tag_topinformation"] != nil {
|
|
|
+ update["tag_topinformation"] = tmp["tag_topinformation"]
|
|
|
+ }
|
|
|
+ if tmp["tag_subinformation"] != nil {
|
|
|
+ update["tag_subinformation"] = tmp["tag_subinformation"]
|
|
|
+ }
|
|
|
+ if tmp["tag_topinformation_ai"] != nil {
|
|
|
+ update["tag_topinformation"] = tmp["tag_topinformation_ai"]
|
|
|
+ }
|
|
|
+ if tmp["tag_subinformation_ai"] != nil {
|
|
|
+ update["tag_subinformation_ai"] = tmp["tag_subinformation_ai"]
|
|
|
+ }
|
|
|
if tmp["tag_set"] != nil {
|
|
|
update["tag_set"] = tmp["tag_set"]
|
|
|
}
|
|
|
+ if tmp["property_form"] != nil {
|
|
|
+ update["property_form"] = tmp["property_form"]
|
|
|
+ }
|
|
|
//updatePool <- []map[string]interface{}{
|
|
|
// {"ids": tmpHit.Id},
|
|
|
- // {"$set": update, "$push": bson.M{"tag_information_ids": tmpHit.Id}},
|
|
|
+ // {"$set": update},
|
|
|
+ //}
|
|
|
+ //if tmp["tag_topinformation"] == nil {
|
|
|
+ // updatePool <- []map[string]interface{}{
|
|
|
+ // {"ids": tmpHit.Id},
|
|
|
+ // {"$set": update, "$addToSet": bson.M{"tag_information_ids": tmpHit.Id},
|
|
|
+ // "$unset": bson.M{"tag_topinformation": 1, "tag_subinformation": 1}},
|
|
|
+ // }
|
|
|
+ //} else {
|
|
|
+ // updatePool <- []map[string]interface{}{
|
|
|
+ // {"ids": tmpHit.Id},
|
|
|
+ // {"$set": update, "$addToSet": bson.M{"tag_information_ids": tmpHit.Id}},
|
|
|
+ // }
|
|
|
//}
|
|
|
- updatePool <- []map[string]interface{}{
|
|
|
- {"ids": tmpHit.Id},
|
|
|
- {"$set": update},
|
|
|
- }
|
|
|
|
|
|
}
|
|
|
}(hit)
|
|
@@ -108,7 +178,7 @@ func taskT() {
|
|
|
ch := make(chan bool, 5)
|
|
|
wg := &sync.WaitGroup{}
|
|
|
|
|
|
- q := bson.M{"firsttime": bson.M{"$gte": 1577808000}}
|
|
|
+ q := bson.M{"firsttime": bson.M{"$gte": 1711360551}}
|
|
|
query := sess.DB(config.Conf.DB.Mongo.Dbname).C(config.Conf.DB.Mongo.Coll).Find(q).Iter()
|
|
|
count := 0
|
|
|
for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
|
|
@@ -307,10 +377,10 @@ func getStr(b string) string {
|
|
|
if b == "" {
|
|
|
return ""
|
|
|
}
|
|
|
- a1 := "(交通|运输物流|工信|农业|住建|城管|市政|出版广电|检察院|科技|民政|生态环境|市场监管|水利|应急管理|自然\n资源|财政|档案|党委办|组织|发改|宣传|政府办|政务中心|人大|政协|法院|公安|国资委|海关|机关事务|纪委|军队|人社|商务|审计税务|司法|体育|统计|统战|文旅|民宗|银保监|证监|气象|社会团体|公共资源交易)"
|
|
|
+ a1 := "(交通|运输物流|工信|农业|住建|城管|市政|出版广电|检察院|科技|民政|生态环境|市场监管|水利|应急管理|自然资源|财政|档案|党委办|组织|发改|宣传|政府办|政务中心|人大|政协|法院|公安|国资委|海关|机关事务|纪委|军队|人社|商务|审计税务|司法|体育|统计|统战|文旅|民宗|银保监|证监|气象|社会团体|公共资源交易)"
|
|
|
a2 := "(卫健委|医疗)"
|
|
|
a3 := "(教育|学校)"
|
|
|
- a4 := "(人行l金融业)"
|
|
|
+ a4 := "(人行|金融业)"
|
|
|
a5 := "(信息技术|电信行业|农林牧渔|建筑业|传媒|制造业|住宿餐饮|采矿业|能源化工|批发零售)"
|
|
|
if strings.Contains(a1, b) {
|
|
|
return "政府机构"
|