|
@@ -38,22 +38,21 @@ func (service *DeduplicationService) DataDeduplicateInsert(data *deduplication.R
|
|
|
} else {
|
|
|
valueList = append(valueList, data.PersonId, data.EntId)
|
|
|
}
|
|
|
-
|
|
|
+ mapData := map[string]bool{} // 本批次去重后的id
|
|
|
for _, v := range strings.Split(data.InfoId, ",") {
|
|
|
tmpList = append(tmpList, "?")
|
|
|
valueList = append(valueList, v)
|
|
|
+ mapData[v] = true // 本批次去重
|
|
|
}
|
|
|
+ totalInfoCount := len(mapData) // 本批次总数
|
|
|
if data.IsEnt {
|
|
|
selectSql = fmt.Sprintf("ent_id=? and info_id in (%s)", strings.Join(tmpList, ","))
|
|
|
} else {
|
|
|
selectSql = fmt.Sprintf("person_id = ? and ent_id=? and info_id in (%s)", strings.Join(tmpList, ","))
|
|
|
}
|
|
|
log.Println(selectSql)
|
|
|
- infoIdList := strings.Split(data.InfoId, ",")
|
|
|
- totalInfoCount := len(infoIdList)
|
|
|
err := orm.Table(tableName).Distinct("info_id").Where(selectSql, valueList...).Find(&rs)
|
|
|
- totalExist := len(rs)
|
|
|
- log.Println(totalExist, "已存在")
|
|
|
+
|
|
|
if err != nil {
|
|
|
log.Println(err, "判重查询失败")
|
|
|
return &deduplication.Info{
|
|
@@ -63,9 +62,11 @@ func (service *DeduplicationService) DataDeduplicateInsert(data *deduplication.R
|
|
|
IsInsert: false,
|
|
|
}, "判重查询失败"
|
|
|
}
|
|
|
-
|
|
|
+ totalExist := len(rs)
|
|
|
+ log.Println(totalExist, "已存在")
|
|
|
+ log.Println(totalInfoCount, "本批次去重后总量")
|
|
|
+ log.Println(totalInfoCount-totalExist, "newCount")
|
|
|
if data.IsInsert {
|
|
|
-
|
|
|
existIdMap := map[string]bool{}
|
|
|
for _, v := range rs {
|
|
|
existIdMap[v.InfoId] = true
|
|
@@ -74,7 +75,7 @@ func (service *DeduplicationService) DataDeduplicateInsert(data *deduplication.R
|
|
|
orm.Begin()
|
|
|
// 新增
|
|
|
var insertList []entity.Deduplication
|
|
|
- for _, id := range infoIdList {
|
|
|
+ for id, _ := range mapData {
|
|
|
if existIdMap[id] {
|
|
|
log.Println("id已存在", id)
|
|
|
continue
|
|
@@ -151,7 +152,7 @@ func (service *DeduplicationService) DataDeduplicateByAccountId(data *deduplicat
|
|
|
// 模运算取企业id todo 需要区分是int类型还是mongodb objectid类型 看一下咋转
|
|
|
//var num01 int
|
|
|
number, errConv := strconv.Atoi(data.AccountId)
|
|
|
- log.Println(55555, number, errConv)
|
|
|
+ log.Println("账户id转换:", number, errConv)
|
|
|
if errConv != nil {
|
|
|
log.Println("不是int类型的,hash后再取模寻表")
|
|
|
b := a(data.AccountId)
|
|
@@ -169,17 +170,17 @@ func (service *DeduplicationService) DataDeduplicateByAccountId(data *deduplicat
|
|
|
var selectSql string
|
|
|
valueList = append(valueList, data.AccountId)
|
|
|
valueList = append(valueList, data.DataDesc)
|
|
|
+ mapData := map[string]bool{} // 本批次去重后的id
|
|
|
for _, v := range strings.Split(data.InfoId, ",") {
|
|
|
if strings.TrimSpace(v) != "" {
|
|
|
tmpList = append(tmpList, "?")
|
|
|
valueList = append(valueList, v)
|
|
|
+ mapData[v] = true // 本批次去重
|
|
|
}
|
|
|
-
|
|
|
}
|
|
|
+ totalInfoCount := len(mapData) // 本批次总数
|
|
|
var rs []*entity.Deduplication
|
|
|
selectSql = fmt.Sprintf("account_id=? and data_desc=? and info_id in (%s)", strings.Join(tmpList, ","))
|
|
|
- infoIdList := strings.Split(data.InfoId, ",")
|
|
|
- totalInfoCount := len(infoIdList)
|
|
|
err := orm.Table(tableName).Distinct("info_id").Where(selectSql, valueList...).Find(&rs)
|
|
|
|
|
|
if err != nil {
|
|
@@ -191,16 +192,10 @@ func (service *DeduplicationService) DataDeduplicateByAccountId(data *deduplicat
|
|
|
IsInsert: false,
|
|
|
}, "判重查询失败"
|
|
|
}
|
|
|
- existInfoIdMap := map[string]bool{}
|
|
|
- existIdList := []string{}
|
|
|
- for _, v := range rs {
|
|
|
- if existInfoIdMap[v.InfoId] {
|
|
|
- continue
|
|
|
- } else {
|
|
|
- existIdList = append(existIdList, v.InfoId)
|
|
|
- }
|
|
|
- }
|
|
|
- count := int64(len(existIdList))
|
|
|
+ count := int64(len(rs))
|
|
|
+ log.Println(count, "已存在数据量")
|
|
|
+ log.Println(totalInfoCount, "本批次内部去重后总数量")
|
|
|
+ log.Println(int64(totalInfoCount)-count, "newCount")
|
|
|
return &deduplication.Info{
|
|
|
TotalCount: int64(totalInfoCount),
|
|
|
ExistCount: count,
|
|
@@ -234,19 +229,17 @@ func (service *DeduplicationService) DataDeduplicateAndSave(data *deduplication.
|
|
|
var selectSql string
|
|
|
valueList = append(valueList, data.AccountId)
|
|
|
valueList = append(valueList, data.DataDesc)
|
|
|
+ mapData := map[string]bool{} // 本批次去重后的id
|
|
|
for _, v := range strings.Split(data.InfoId, ",") {
|
|
|
if strings.TrimSpace(v) != "" {
|
|
|
tmpList = append(tmpList, "?")
|
|
|
valueList = append(valueList, v)
|
|
|
+ mapData[v] = true // 本批次去重
|
|
|
}
|
|
|
-
|
|
|
}
|
|
|
-
|
|
|
+ totalInfoCount := len(mapData) // 本批次总数
|
|
|
selectSql = fmt.Sprintf("account_id=? and data_desc=? and info_id in (%s)", strings.Join(tmpList, ","))
|
|
|
- infoIdList := strings.Split(data.InfoId, ",")
|
|
|
- totalInfoCount := len(infoIdList)
|
|
|
err := orm.Table(tableName).Distinct("info_id").Where(selectSql, valueList...).Find(&rs)
|
|
|
- //existIdList := []string{}
|
|
|
|
|
|
if err != nil {
|
|
|
log.Println(err, "判重查询失败")
|
|
@@ -256,18 +249,18 @@ func (service *DeduplicationService) DataDeduplicateAndSave(data *deduplication.
|
|
|
NewCount: 0,
|
|
|
}, "判重查询失败"
|
|
|
}
|
|
|
-
|
|
|
+ count := len(rs)
|
|
|
+ log.Println(count, "已存在数据量")
|
|
|
+ log.Println(totalInfoCount, "本批次内部去重后总数量")
|
|
|
+ log.Println(totalInfoCount-count, "newCount")
|
|
|
existIdMap := map[string]bool{}
|
|
|
for _, v := range rs {
|
|
|
existIdMap[v.InfoId] = true
|
|
|
}
|
|
|
|
|
|
- count := len(existIdMap)
|
|
|
- log.Println(count, "已存在数据量")
|
|
|
-
|
|
|
// 新增
|
|
|
var insertList []entity.Deduplication
|
|
|
- for _, id := range infoIdList {
|
|
|
+ for id, _ := range mapData {
|
|
|
if existIdMap[id] {
|
|
|
log.Println("id已存在", id)
|
|
|
continue
|
|
@@ -280,7 +273,6 @@ func (service *DeduplicationService) DataDeduplicateAndSave(data *deduplication.
|
|
|
AccountId: data.AccountId,
|
|
|
DataDesc: data.DataDesc,
|
|
|
}
|
|
|
- existIdMap[id] = true
|
|
|
insertList = append(insertList, temData)
|
|
|
|
|
|
}
|