|
@@ -111,14 +111,12 @@ func getDataExportSql(scd *SieveCondition) string {
|
|
|
return fmt.Sprintf(query, doSearchSql)
|
|
|
}
|
|
|
multi_match := `{"multi_match": {"query": %s,"type": "phrase", "fields": [%s]}}`
|
|
|
- query := `{"query":{"bool":{"must":[%s],"must_not":[%s],"should":[%s],"minimum_should_match": %d}}}`
|
|
|
+ query := `{"query":{"bool":{"filter":[%s],"must_not":[%s],"should":[%s],"minimum_should_match": %d}}}`
|
|
|
//query := `{"query": {"function_score": {"query": {"bool": {"must": [%s],"must_not": [%s],"should": [%s],"minimum_should_match": %d}},"field_value_factor": {"field": "dataweight","modifier": "ln1p","missing": 0}}}}`
|
|
|
query_bool_should := `{"bool":{"should":[%s],"minimum_should_match": 1}}`
|
|
|
query_bool_should_and := `{"bool":{"should":[%s],"minimum_should_match": 1 %s}}`
|
|
|
query_price := `{"bool":{"must":[{"range":{"bidamount":{%s}}}]}},{"bool":{"must":[{"range":{"budget":{%s}}}],"must_not":[{"range":{"bidamount":{"gte":-1}}}]}}`
|
|
|
query_bool_must := `{"terms":{"%s":[%s]}}`
|
|
|
- query_bool_should_wildcard := `{"bool":{"should":[%s]}}`
|
|
|
- query_wildcard := `{"wildcard":{"%s":"*%s*"}}`
|
|
|
query_bool_must_and := `{"bool":{"must":[%s]%s}}`
|
|
|
query_exists := `{"constant_score":{"filter":{"exists":{"field":"%s"}}}}`
|
|
|
query_bool_must_term := `{"bool": {"must": [{ "term": {"isValidFile": %t }}]}}`
|
|
@@ -227,29 +225,13 @@ func getDataExportSql(scd *SieveCondition) string {
|
|
|
musts = append(musts, fmt.Sprintf(query_bool_must, "s_subscopeclass", `"`+strings.Join(scd.Industry, `","`)+`"`))
|
|
|
}
|
|
|
if len(scd.Buyer) > 0 {
|
|
|
- str := ""
|
|
|
- for i, v := range scd.Buyer {
|
|
|
- if i < len(scd.Buyer)-1 {
|
|
|
- str += fmt.Sprintf(query_wildcard, "buyer", v) + ","
|
|
|
- } else {
|
|
|
- str += fmt.Sprintf(query_wildcard, "buyer", v)
|
|
|
- }
|
|
|
- }
|
|
|
- musts = append(musts, fmt.Sprintf(query_bool_should_wildcard, str))
|
|
|
+ musts = append(musts, fmt.Sprintf(query_bool_must, "buyer", `"`+strings.Join(scd.Buyer, `","`)+`"`))
|
|
|
}
|
|
|
if len(scd.Buyerclass) > 0 {
|
|
|
musts = append(musts, fmt.Sprintf(query_bool_must, "buyerclass", `"`+strings.Join(scd.Buyerclass, `","`)+`"`))
|
|
|
}
|
|
|
if len(scd.Winner) > 0 {
|
|
|
- str := ""
|
|
|
- for i, v := range scd.Winner {
|
|
|
- if i < len(scd.Winner)-1 {
|
|
|
- str += fmt.Sprintf(query_wildcard, "s_winner", v) + ","
|
|
|
- } else {
|
|
|
- str += fmt.Sprintf(query_wildcard, "s_winner", v)
|
|
|
- }
|
|
|
- }
|
|
|
- musts = append(musts, fmt.Sprintf(query_bool_should_wildcard, str))
|
|
|
+ musts = append(musts, fmt.Sprintf(query_bool_must, "s_winner", `"`+strings.Join(scd.Winner, `","`)+`"`))
|
|
|
}
|
|
|
_minPrice := ""
|
|
|
_maxPrice := ""
|
|
@@ -519,7 +501,7 @@ func GetDataExportSearchCountByScdId(sim, bid mg.MongodbSim, biddingName, elasti
|
|
|
if scd.SelectIds != nil {
|
|
|
//部分数据可能已删除、不存在;此处需要统计返回实际数量
|
|
|
//return len(scd.SelectIds)
|
|
|
- return int(GetDataExportSelectReallyCount(bid, biddingName, scd.SelectIds))
|
|
|
+ return int(GetDataExportSelectReallyCountFromMongo(bid, biddingName, scd.SelectIds))
|
|
|
}
|
|
|
return GetDataExportSearchCountBySieveCondition(scd, elasticAddress)
|
|
|
}
|
|
@@ -622,8 +604,37 @@ func GetDataExportIdArrByScdId(sim mg.MongodbSim, elasticAddress, id string, che
|
|
|
// 收藏导出
|
|
|
var contentfilterReg = regexp.MustCompile("<[^>]+>")
|
|
|
|
|
|
-// GetDataExportSelectReallyCount 查询实际可调导出数量
|
|
|
-func GetDataExportSelectReallyCount(bid mg.MongodbSim, biddingName string, ids []string) int64 {
|
|
|
+// GetDataExportSelectReallyCountFromEs 从elasticsearch查询实际可调导出数量
|
|
|
+func GetDataExportSelectReallyCountFromEs(ids []string) int64 {
|
|
|
+ pool := make(chan bool, 10)
|
|
|
+ wait := &sync.WaitGroup{}
|
|
|
+ var total int64
|
|
|
+ var lock sync.Mutex
|
|
|
+ for _, v := range SplitArray(ids, 200) {
|
|
|
+ pool <- true
|
|
|
+ wait.Add(1)
|
|
|
+ go func(arr []string) {
|
|
|
+ defer func() {
|
|
|
+ wait.Done()
|
|
|
+ <-pool
|
|
|
+ }()
|
|
|
+ query := fmt.Sprintf(`{"query":{"bool":{"must":[{"terms":{"id":["%s"]}}]}}}`, strings.Join(arr, "\",\""))
|
|
|
+ tCount := elastic.Count(INDEX, TYPE, query)
|
|
|
+ if tCount > 0 {
|
|
|
+ lock.Lock()
|
|
|
+ total += tCount
|
|
|
+ lock.Unlock()
|
|
|
+ }
|
|
|
+ return
|
|
|
+ }(v)
|
|
|
+ }
|
|
|
+ wait.Wait()
|
|
|
+ log.Printf("GetDataExportSelectReallyCount 选择数据共%d条记录,实际查询%d条\n", len(ids), total)
|
|
|
+ return total
|
|
|
+}
|
|
|
+
|
|
|
+// GetDataExportSelectReallyCountFromMongo 从mongodb查询实际可调导出数量
|
|
|
+func GetDataExportSelectReallyCountFromMongo(bid mg.MongodbSim, biddingName string, ids []string) int64 {
|
|
|
sess := bid.GetMgoConn()
|
|
|
defer bid.DestoryMongoConn(sess)
|
|
|
if ids == nil || len(ids) == 0 {
|
|
@@ -657,9 +668,7 @@ func GetDataExportSelectReallyCount(bid mg.MongodbSim, biddingName string, ids [
|
|
|
return -2
|
|
|
}
|
|
|
|
|
|
-func GetDataExportSelectResult(bidding mg.MongodbSim, biddingName string, scd *SieveCondition, dataType string, checkCount int) (*[]map[string]interface{}, error) {
|
|
|
- //sess := bidding.GetMgoConn()
|
|
|
- //defer bidding.DestoryMongoConn(sess)
|
|
|
+func GetDataExportSelectResultFromEs(bidding mg.MongodbSim, biddingName string, scd *SieveCondition, dataType string, checkCount int) (*[]map[string]interface{}, error) {
|
|
|
bidField := `"_id", "title", "detail", "area", "city", "publishtime", "projectname", "buyer", "s_winner", "bidamount", "subtype", "toptype", "filetext", "purchasing"`
|
|
|
//selectMap := map[string]interface{}{
|
|
|
// "_id": 1, "title": 1, "detail": 1, "area": 1, "city": 1, "publishtime": 1, "projectname": 1, "buyer": 1, "s_winner": 1, "bidamount": 1, "subtype": 1, "toptype": 1, "filetext": 1, "purchasing": 1,
|
|
@@ -685,12 +694,8 @@ func GetDataExportSelectResult(bidding mg.MongodbSim, biddingName string, scd *S
|
|
|
wait.Done()
|
|
|
<-pool
|
|
|
}()
|
|
|
- //var queryIds []interface{}
|
|
|
- //for _, idStr := range arr {
|
|
|
- // queryIds = append(queryIds, mg.StringTOBsonId(idStr))
|
|
|
- //}
|
|
|
query := fmt.Sprintf(`{"query":{"bool":{"must":[{"terms":{"id":["%s"]}}]}},"_source": [%s],"size":%d}`, strings.Join(arr, "\",\""), bidField, len(arr))
|
|
|
- log.Println("数据流量包 es count 信息查询:", query)
|
|
|
+ log.Println("GetDataExportSelectResultFromEs 数据流量包 es count 信息查询:", query)
|
|
|
data := *elastic.Get(INDEX, TYPE, query)
|
|
|
if data != nil && len(data) > 0 {
|
|
|
for _, bv := range data {
|
|
@@ -704,34 +709,6 @@ func GetDataExportSelectResult(bidding mg.MongodbSim, biddingName string, scd *S
|
|
|
lock.Unlock()
|
|
|
}
|
|
|
}
|
|
|
- //iter := sess.DB(biddingName).C("bidding").Select(selectMap).Find(map[string]interface{}{"_id": map[string]interface{}{
|
|
|
- // "$in": queryIds,
|
|
|
- //}}).Iter()
|
|
|
- //for m := make(map[string]interface{}); iter.Next(&m); {
|
|
|
- // m["_id"] = mg.BsonIdToSId(m["_id"])
|
|
|
- // detail, _ := m["detail"].(string)
|
|
|
- // if detail != "" {
|
|
|
- // m["detail"] = contentfilterReg.ReplaceAllString(detail, "")
|
|
|
- // }
|
|
|
- // lock.Lock()
|
|
|
- // returnLsit = append(returnLsit, m)
|
|
|
- // lock.Unlock()
|
|
|
- // m = make(map[string]interface{})
|
|
|
- //}
|
|
|
- //iter_back := sess.DB(biddingName).C("bidding_back").Select(selectMap).Find(map[string]interface{}{"_id": map[string]interface{}{
|
|
|
- // "$in": queryIds,
|
|
|
- //}}).Iter()
|
|
|
- //for m := make(map[string]interface{}); iter_back.Next(&m); {
|
|
|
- // m["_id"] = mg.BsonIdToSId(m["_id"])
|
|
|
- // detail, _ := m["detail"].(string)
|
|
|
- // if detail != "" {
|
|
|
- // m["detail"] = contentfilterReg.ReplaceAllString(detail, "")
|
|
|
- // }
|
|
|
- // lock.Lock()
|
|
|
- // returnLsit = append(returnLsit, m)
|
|
|
- // lock.Unlock()
|
|
|
- // m = make(map[string]interface{})
|
|
|
- //}
|
|
|
return nil
|
|
|
}(v)
|
|
|
}
|
|
@@ -739,7 +716,76 @@ func GetDataExportSelectResult(bidding mg.MongodbSim, biddingName string, scd *S
|
|
|
if len(returnLsit) == checkCount || checkCount == -1 {
|
|
|
return &returnLsit, nil
|
|
|
} else {
|
|
|
- return nil, fmt.Errorf("选择数据导出异常 数据量期望%d条,实际查询%d条", checkCount, len(returnLsit))
|
|
|
+ return nil, fmt.Errorf("GetDataExportSelectResultFromEs 选择数据导出异常 数据量期望%d条,实际查询%d条", checkCount, len(returnLsit))
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func GetDataExportSelectResultFromMongoDb(bidding mg.MongodbSim, biddingName string, scd *SieveCondition, dataType string, checkCount int) (*[]map[string]interface{}, error) {
|
|
|
+ sess := bidding.GetMgoConn()
|
|
|
+ defer bidding.DestoryMongoConn(sess)
|
|
|
+ selectMap := map[string]interface{}{
|
|
|
+ "_id": 1, "title": 1, "detail": 1, "area": 1, "city": 1, "publishtime": 1, "projectname": 1, "buyer": 1, "s_winner": 1, "bidamount": 1, "subtype": 1, "toptype": 1, "filetext": 1, "purchasing": 1,
|
|
|
+ }
|
|
|
+ if dataType == "2" {
|
|
|
+ for _, key := range []string{"href", "projectcode", "buyerperson", "buyertel", "budget", "bidopentime", "agency", "projectscope", "winnerperson", "winnertel", "bidendtime", "district", "signendtime", "buyeraddr", "filetext", "buyerclass", "s_topscopeclass", "entidlist"} {
|
|
|
+ selectMap[key] = 1
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if checkCount == -1 && len(scd.SelectIds) > 500 {
|
|
|
+ scd.SelectIds = scd.SelectIds[:500]
|
|
|
+ }
|
|
|
+ pool := make(chan bool, 10)
|
|
|
+ wait := &sync.WaitGroup{}
|
|
|
+ var lock sync.Mutex
|
|
|
+ returnLsit := make([]map[string]interface{}, 0, len(scd.SelectIds))
|
|
|
+ for _, v := range SplitArray(scd.SelectIds, 200) {
|
|
|
+ pool <- true
|
|
|
+ wait.Add(1)
|
|
|
+ go func(arr []string) error {
|
|
|
+ defer func() {
|
|
|
+ wait.Done()
|
|
|
+ <-pool
|
|
|
+ }()
|
|
|
+ var queryIds []interface{}
|
|
|
+ for _, idStr := range arr {
|
|
|
+ queryIds = append(queryIds, mg.StringTOBsonId(idStr))
|
|
|
+ }
|
|
|
+ iter := sess.DB(biddingName).C("bidding").Select(selectMap).Find(map[string]interface{}{"_id": map[string]interface{}{
|
|
|
+ "$in": queryIds,
|
|
|
+ }}).Iter()
|
|
|
+ for m := make(map[string]interface{}); iter.Next(&m); {
|
|
|
+ m["_id"] = mg.BsonIdToSId(m["_id"])
|
|
|
+ detail, _ := m["detail"].(string)
|
|
|
+ if detail != "" {
|
|
|
+ m["detail"] = contentfilterReg.ReplaceAllString(detail, "")
|
|
|
+ }
|
|
|
+ lock.Lock()
|
|
|
+ returnLsit = append(returnLsit, m)
|
|
|
+ lock.Unlock()
|
|
|
+ m = make(map[string]interface{})
|
|
|
+ }
|
|
|
+ iter_back := sess.DB(biddingName).C("bidding_back").Select(selectMap).Find(map[string]interface{}{"_id": map[string]interface{}{
|
|
|
+ "$in": queryIds,
|
|
|
+ }}).Iter()
|
|
|
+ for m := make(map[string]interface{}); iter_back.Next(&m); {
|
|
|
+ m["_id"] = mg.BsonIdToSId(m["_id"])
|
|
|
+ detail, _ := m["detail"].(string)
|
|
|
+ if detail != "" {
|
|
|
+ m["detail"] = contentfilterReg.ReplaceAllString(detail, "")
|
|
|
+ }
|
|
|
+ lock.Lock()
|
|
|
+ returnLsit = append(returnLsit, m)
|
|
|
+ lock.Unlock()
|
|
|
+ m = make(map[string]interface{})
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+ }(v)
|
|
|
+ }
|
|
|
+ wait.Wait()
|
|
|
+ if len(returnLsit) == checkCount || checkCount == -1 {
|
|
|
+ return &returnLsit, nil
|
|
|
+ } else {
|
|
|
+ return nil, fmt.Errorf("GetDataExportSelectResultFromMongoDb 选择数据导出异常 数据量期望%d条,实际查询%d条", checkCount, len(returnLsit))
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -841,7 +887,7 @@ func GetDataExportSearchResult(bid mg.MongodbSim, bidMgoDBName, elasticAddress s
|
|
|
return nil, errors.New("GetDataExportSearchResult-未获取到查询信息")
|
|
|
}
|
|
|
if scd.SelectIds != nil {
|
|
|
- idSelectDates, idSelectErr := GetDataExportSelectResult(bid, bidMgoDBName, scd, dataType, checkCount)
|
|
|
+ idSelectDates, idSelectErr := GetDataExportSelectResultFromMongoDb(bid, bidMgoDBName, scd, dataType, checkCount)
|
|
|
if idSelectErr != nil {
|
|
|
return nil, idSelectErr
|
|
|
}
|
|
@@ -1087,7 +1133,7 @@ func FormatExportData(entmg mg.MongodbSim, data *[]map[string]interface{}, webdo
|
|
|
date := v["bidendtime"]
|
|
|
v["bidendtime"] = FormatDateWithObj(&date, Date_Short_Layout)
|
|
|
}
|
|
|
- if v["_id"] != nil && !isEncry {
|
|
|
+ if v["_id"] != nil {
|
|
|
encodeId := CommonEncodeArticle("content", v["_id"].(string))
|
|
|
v["url"] = webdomain + "/article/content/" + encodeId + ".html"
|
|
|
v["url_jump"] = webdomain + "/front/reloadTo/article/content/" + encodeId + ".html"
|