浏览代码

fix:对该批次数据进行内部去重

fuwencai 3 年之前
父节点
当前提交
8ee469a1e1
共有 1 个文件被更改,包括 25 次插入33 次删除
  1. 25 33
      service/deduplication.go

+ 25 - 33
service/deduplication.go

@@ -38,22 +38,21 @@ func (service *DeduplicationService) DataDeduplicateInsert(data *deduplication.R
 	} else {
 		valueList = append(valueList, data.PersonId, data.EntId)
 	}
-
+	mapData := map[string]bool{} // 本批次去重后的id
 	for _, v := range strings.Split(data.InfoId, ",") {
 		tmpList = append(tmpList, "?")
 		valueList = append(valueList, v)
+		mapData[v] = true // 本批次去重
 	}
+	totalInfoCount := len(mapData) // 本批次总数
 	if data.IsEnt {
 		selectSql = fmt.Sprintf("ent_id=? and info_id in (%s)", strings.Join(tmpList, ","))
 	} else {
 		selectSql = fmt.Sprintf("person_id = ? and ent_id=? and info_id in (%s)", strings.Join(tmpList, ","))
 	}
 	log.Println(selectSql)
-	infoIdList := strings.Split(data.InfoId, ",")
-	totalInfoCount := len(infoIdList)
 	err := orm.Table(tableName).Distinct("info_id").Where(selectSql, valueList...).Find(&rs)
-	totalExist := len(rs)
-	log.Println(totalExist, "已存在")
+
 	if err != nil {
 		log.Println(err, "判重查询失败")
 		return &deduplication.Info{
@@ -63,9 +62,11 @@ func (service *DeduplicationService) DataDeduplicateInsert(data *deduplication.R
 			IsInsert:   false,
 		}, "判重查询失败"
 	}
-
+	totalExist := len(rs)
+	log.Println(totalExist, "已存在")
+	log.Println(totalInfoCount, "本批次去重后总量")
+	log.Println(totalInfoCount-totalExist, "newCount")
 	if data.IsInsert {
-
 		existIdMap := map[string]bool{}
 		for _, v := range rs {
 			existIdMap[v.InfoId] = true
@@ -74,7 +75,7 @@ func (service *DeduplicationService) DataDeduplicateInsert(data *deduplication.R
 		orm.Begin()
 		// 新增
 		var insertList []entity.Deduplication
-		for _, id := range infoIdList {
+		for id, _ := range mapData {
 			if existIdMap[id] {
 				log.Println("id已存在", id)
 				continue
@@ -151,7 +152,7 @@ func (service *DeduplicationService) DataDeduplicateByAccountId(data *deduplicat
 	// 模运算取企业id  todo  需要区分是int类型还是mongodb objectid类型   看一下咋转
 	//var num01 int
 	number, errConv := strconv.Atoi(data.AccountId)
-	log.Println(55555, number, errConv)
+	log.Println("账户id转换:", number, errConv)
 	if errConv != nil {
 		log.Println("不是int类型的,hash后再取模寻表")
 		b := a(data.AccountId)
@@ -169,17 +170,17 @@ func (service *DeduplicationService) DataDeduplicateByAccountId(data *deduplicat
 	var selectSql string
 	valueList = append(valueList, data.AccountId)
 	valueList = append(valueList, data.DataDesc)
+	mapData := map[string]bool{} // 本批次去重后的id
 	for _, v := range strings.Split(data.InfoId, ",") {
 		if strings.TrimSpace(v) != "" {
 			tmpList = append(tmpList, "?")
 			valueList = append(valueList, v)
+			mapData[v] = true // 本批次去重
 		}
-
 	}
+	totalInfoCount := len(mapData) // 本批次总数
 	var rs []*entity.Deduplication
 	selectSql = fmt.Sprintf("account_id=? and data_desc=? and info_id in (%s)", strings.Join(tmpList, ","))
-	infoIdList := strings.Split(data.InfoId, ",")
-	totalInfoCount := len(infoIdList)
 	err := orm.Table(tableName).Distinct("info_id").Where(selectSql, valueList...).Find(&rs)
 
 	if err != nil {
@@ -191,16 +192,10 @@ func (service *DeduplicationService) DataDeduplicateByAccountId(data *deduplicat
 			IsInsert:   false,
 		}, "判重查询失败"
 	}
-	existInfoIdMap := map[string]bool{}
-	existIdList := []string{}
-	for _, v := range rs {
-		if existInfoIdMap[v.InfoId] {
-			continue
-		} else {
-			existIdList = append(existIdList, v.InfoId)
-		}
-	}
-	count := int64(len(existIdList))
+	count := int64(len(rs))
+	log.Println(count, "已存在数据量")
+	log.Println(totalInfoCount, "本批次内部去重后总数量")
+	log.Println(int64(totalInfoCount)-count, "newCount")
 	return &deduplication.Info{
 		TotalCount: int64(totalInfoCount),
 		ExistCount: count,
@@ -234,19 +229,17 @@ func (service *DeduplicationService) DataDeduplicateAndSave(data *deduplication.
 	var selectSql string
 	valueList = append(valueList, data.AccountId)
 	valueList = append(valueList, data.DataDesc)
+	mapData := map[string]bool{} // 本批次去重后的id
 	for _, v := range strings.Split(data.InfoId, ",") {
 		if strings.TrimSpace(v) != "" {
 			tmpList = append(tmpList, "?")
 			valueList = append(valueList, v)
+			mapData[v] = true // 本批次去重
 		}
-
 	}
-
+	totalInfoCount := len(mapData) // 本批次总数
 	selectSql = fmt.Sprintf("account_id=? and data_desc=? and info_id in (%s)", strings.Join(tmpList, ","))
-	infoIdList := strings.Split(data.InfoId, ",")
-	totalInfoCount := len(infoIdList)
 	err := orm.Table(tableName).Distinct("info_id").Where(selectSql, valueList...).Find(&rs)
-	//existIdList := []string{}
 
 	if err != nil {
 		log.Println(err, "判重查询失败")
@@ -256,18 +249,18 @@ func (service *DeduplicationService) DataDeduplicateAndSave(data *deduplication.
 			NewCount:   0,
 		}, "判重查询失败"
 	}
-
+	count := len(rs)
+	log.Println(count, "已存在数据量")
+	log.Println(totalInfoCount, "本批次内部去重后总数量")
+	log.Println(totalInfoCount-count, "newCount")
 	existIdMap := map[string]bool{}
 	for _, v := range rs {
 		existIdMap[v.InfoId] = true
 	}
 
-	count := len(existIdMap)
-	log.Println(count, "已存在数据量")
-
 	// 新增
 	var insertList []entity.Deduplication
-	for _, id := range infoIdList {
+	for id, _ := range mapData {
 		if existIdMap[id] {
 			log.Println("id已存在", id)
 			continue
@@ -280,7 +273,6 @@ func (service *DeduplicationService) DataDeduplicateAndSave(data *deduplication.
 			AccountId: data.AccountId,
 			DataDesc:  data.DataDesc,
 		}
-		existIdMap[id] = true
 		insertList = append(insertList, temData)
 
 	}