浏览代码

合并最新修改

apple 5 年之前
父节点
当前提交
76a467d57b
共有 2 个文件被更改,包括 194 次插入204 次删除
  1. 193 203
      udpfilterdup/src/main.go
  2. 1 1
      udps/main.go

+ 193 - 203
udpfilterdup/src/main.go

@@ -89,7 +89,7 @@ func init() {
 		}
 		SiteMap[util.ObjToString(site_dict["site"])] = data_map
 	}
-	fmt.Printf("站点加载用时:%d秒,%d个\n", int(time.Now().Unix())-start, len(SiteMap))
+	log.Printf("站点加载用时:%d秒,%d个\n", int(time.Now().Unix())-start, len(SiteMap))
 }
 
 func main() {
@@ -223,84 +223,110 @@ func task(data []byte, mapInfo map[string]interface{}) {
 				b, source, reason := DM.check(info)
 				if b { //有重复,生成更新语句,更新抽取和更新招标
 					repeateN++
+					var is_replace  = false
 					var mergeArr = []int64{} //更改合并数组记录
 					var newData = &Info{}    //更换新的数据池数据
-					var id_map = map[string]interface{}{}
-					repeat_id := source.id
-					if idtype == "1" {
-						id_map["_id"] = info.id
+					var repeat_idMap = map[string]interface{}{} //记录判重的
+					var merge_idMap = map[string]interface{}{} //记录合并的
+					if idtype == "1" { //先临时决定一个id
+						repeat_idMap["_id"] = info.id
+						merge_idMap["_id"] = source.id
 					} else {
-						id_map["_id"] = util.StringTOBsonId(info.id)
+						repeat_idMap["_id"] = util.StringTOBsonId(info.id)
+						merge_idMap["_id"] = util.StringTOBsonId(source.id)
 					}
+					repeat_id:=source.id
+					//以下合并相关
 					if isMerger {
-						//需要合并相关操作-合并操作--评功权重打分-合并完替换原始数据池
 						basic_bool := basicDataScore(source, info)
 						if basic_bool {
-							//已原始数据为标准-对比数据打判重标签
-							newData, mergeArr = mergeDataFields(source, info)
+							//已原始数据为标准 - 对比数据打判重标签-
+							newData, mergeArr,is_replace = mergeDataFields(source, info)
 							DM.replaceSourceData(newData, source.id) //替换
+							//对比数据打重复标签的id,原始数据id的记录
 							if idtype == "1" {
-								id_map["_id"] = info.id
+								repeat_idMap["_id"] = info.id
+								merge_idMap["_id"] = source.id
 							} else {
-								id_map["_id"] = util.StringTOBsonId(info.id)
+								repeat_idMap["_id"] = util.StringTOBsonId(info.id)
+								merge_idMap["_id"] = util.StringTOBsonId(source.id)
 							}
-
 							repeat_id = source.id
 						} else {
 							//已对比数据为标准 ,数据池的数据打判重标签
-							newData, mergeArr = mergeDataFields(info, source)
+							newData, mergeArr,is_replace = mergeDataFields(info, source)
 							DM.replaceSourceData(newData, source.id) //替换
+
+							//原始数据打重复标签的id,   对比数据id的记录
 							if idtype == "1" {
-								id_map["_id"] = source.id
+								repeat_idMap["_id"] = source.id
+								merge_idMap["_id"] = info.id
 							} else {
-								id_map["_id"] = util.StringTOBsonId(source.id)
+								repeat_idMap["_id"] = util.StringTOBsonId(source.id)
+								merge_idMap["_id"] = util.StringTOBsonId(info.id)
 							}
-
 							repeat_id = info.id
 						}
-					}
 
-					var update_map = map[string]interface{}{
-						"$set": map[string]interface{}{
-							"repeat_reason": reason,
-							"repeat":        1,
-							"repeatid":      repeat_id,
-						},
-					}
-					if isMerger {
-						if len(newData.mergemap) > 0 {
-							update_map["$set"].(map[string]interface{})["merge"] = newData.mergemap
-						}
-						//更新合并后的数据
-						for _, value := range mergeArr {
-							if value == 1 {
-								update_map["$set"].(map[string]interface{})["area"] = newData.area
-								update_map["$set"].(map[string]interface{})["city"] = newData.city
-							} else if value == 2 {
-								update_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
-							} else if value == 3 {
-								update_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
-							} else if value == 4 {
-								update_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
-							} else if value == 5 {
-								update_map["$set"].(map[string]interface{})["budget"] = newData.budget
-							} else if value == 6 {
-								update_map["$set"].(map[string]interface{})["winner"] = newData.winner
-							} else if value == 7 {
-								update_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
-							} else if value == 8 {
-								update_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
-							} else if value == 9 {
-								update_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber
-							}else {
+
+						merge_map := make(map[string]interface{},0)
+						if is_replace {//有过合并-更新数据
+
+							merge_map = map[string]interface{}{
+								"$set": map[string]interface{}{
+									"merge":newData.mergemap,
+								},
+							}
+
+							//更新合并后的数据
+							for _, value := range mergeArr {
+								if value == 1 {
+									merge_map["$set"].(map[string]interface{})["area"] = newData.area
+									merge_map["$set"].(map[string]interface{})["city"] = newData.city
+								} else if value == 2 {
+									merge_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
+								} else if value == 3 {
+									merge_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
+								} else if value == 4 {
+									merge_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
+								} else if value == 5 {
+									merge_map["$set"].(map[string]interface{})["budget"] = newData.budget
+								} else if value == 6 {
+									merge_map["$set"].(map[string]interface{})["winner"] = newData.winner
+								} else if value == 7 {
+									merge_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
+								} else if value == 8 {
+									merge_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
+								} else if value == 9 {
+									merge_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber
+								}else {
+								}
+
+								if value==0 {
+
+								}
 							}
+							//模板数据更新
+							updateExtract = append(updateExtract, []map[string]interface{}{
+								merge_idMap,
+								merge_map,
+							})
 						}
 					}
-					//构建数据库更新用到的
+
+
+					//重复数据打标签
 					updateExtract = append(updateExtract, []map[string]interface{}{
-						id_map,
-						update_map,
+						repeat_idMap,
+						map[string]interface{}{
+							"$set": map[string]interface{}{
+								"repeat": 1,
+								"repeat_reason": reason,
+								"repeat_id":repeat_id,
+							},
+						},
 					})
+
 				}
 			}
 		}(tmp)
@@ -434,7 +460,6 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 			}()
 			info := NewInfo(tmp)
 			if invalidData(info.buyer, info.projectname, info.projectcode,info.contractnumber) {
-				//mapLock.Lock()
 				updateExtract = append(updateExtract, []map[string]interface{}{
 					map[string]interface{}{
 						"_id": tmp["_id"],
@@ -449,7 +474,6 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 					mgo.UpdateBulk(extract, updateExtract...)
 					updateExtract = [][]map[string]interface{}{}
 				}
-				//mapLock.Unlock()
 			} else {
 				b, source, reason := HM.checkHistory(info)
 				if b { //有重复,生成更新语句,更新抽取和更新招标
@@ -470,86 +494,110 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 						})
 					} else {
 						repeateN++
+						var is_replace  = false
 						var mergeArr = []int64{} //更改合并数组记录
 						var newData = &Info{}    //更换新的数据池数据
-						var id_map = map[string]interface{}{}
-						repeat_id := source.id
-						if idtype == "1" {
-							id_map["_id"] = info.id
+						var repeat_idMap = map[string]interface{}{} //记录判重的
+						var merge_idMap = map[string]interface{}{} //记录合并的
+						if idtype == "1" { //先临时决定一个id
+							repeat_idMap["_id"] = info.id
+							merge_idMap["_id"] = source.id
 						} else {
-							id_map["_id"] = util.StringTOBsonId(info.id)
+							repeat_idMap["_id"] = util.StringTOBsonId(info.id)
+							merge_idMap["_id"] = util.StringTOBsonId(source.id)
 						}
+						repeat_id:=source.id
+						//以下合并相关
 						if isMerger {
-							//需要合并相关操作-合并操作--评功权重打分-合并完替换原始数据池
 							basic_bool := basicDataScore(source, info)
 							if basic_bool {
-								//已原始数据为标准-对比数据打判重标签
-								newData, mergeArr = mergeDataFields(source, info)
+								//已原始数据为标准 - 对比数据打判重标签-
+								newData, mergeArr,is_replace = mergeDataFields(source, info)
 								DM.replaceSourceData(newData, source.id) //替换
+								//对比数据打重复标签的id,原始数据id的记录
 								if idtype == "1" {
-									id_map["_id"] = info.id
+									repeat_idMap["_id"] = info.id
+									merge_idMap["_id"] = source.id
 								} else {
-									id_map["_id"] = util.StringTOBsonId(info.id)
+									repeat_idMap["_id"] = util.StringTOBsonId(info.id)
+									merge_idMap["_id"] = util.StringTOBsonId(source.id)
 								}
-
 								repeat_id = source.id
 							} else {
 								//已对比数据为标准 ,数据池的数据打判重标签
-								newData, mergeArr = mergeDataFields(info, source)
+								newData, mergeArr,is_replace = mergeDataFields(info, source)
 								DM.replaceSourceData(newData, source.id) //替换
+
+								//原始数据打重复标签的id,   对比数据id的记录
 								if idtype == "1" {
-									id_map["_id"] = source.id
+									repeat_idMap["_id"] = source.id
+									merge_idMap["_id"] = info.id
 								} else {
-									id_map["_id"] = util.StringTOBsonId(source.id)
+									repeat_idMap["_id"] = util.StringTOBsonId(source.id)
+									merge_idMap["_id"] = util.StringTOBsonId(info.id)
 								}
-
 								repeat_id = info.id
 							}
-						}
 
-						var update_map = map[string]interface{}{
-							"$set": map[string]interface{}{
-								"repeat_reason": reason,
-								"repeat":        1,
-								"repeatid":      repeat_id,
-							},
-						}
-						if isMerger {
-							//合并记录
-							if len(newData.mergemap) > 0 {
-								update_map["$set"].(map[string]interface{})["merge"] = newData.mergemap
-							}
-							//更新合并后的数据
-							for _, value := range mergeArr {
-								if value == 1 {
-									update_map["$set"].(map[string]interface{})["area"] = newData.area
-									update_map["$set"].(map[string]interface{})["city"] = newData.city
-								} else if value == 2 {
-									update_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
-								} else if value == 3 {
-									update_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
-								} else if value == 4 {
-									update_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
-								} else if value == 5 {
-									update_map["$set"].(map[string]interface{})["budget"] = newData.budget
-								} else if value == 6 {
-									update_map["$set"].(map[string]interface{})["winner"] = newData.winner
-								} else if value == 7 {
-									update_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
-								} else if value == 8 {
-									update_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
-								} else if value == 9 {
-									update_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber
-								}else {
 
+							merge_map := make(map[string]interface{},0)
+							if is_replace {//有过合并-更新数据
+
+								merge_map = map[string]interface{}{
+									"$set": map[string]interface{}{
+										"merge":newData.mergemap,
+									},
+								}
+
+								//更新合并后的数据
+								for _, value := range mergeArr {
+									if value == 1 {
+										merge_map["$set"].(map[string]interface{})["area"] = newData.area
+										merge_map["$set"].(map[string]interface{})["city"] = newData.city
+									} else if value == 2 {
+										merge_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
+									} else if value == 3 {
+										merge_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
+									} else if value == 4 {
+										merge_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
+									} else if value == 5 {
+										merge_map["$set"].(map[string]interface{})["budget"] = newData.budget
+									} else if value == 6 {
+										merge_map["$set"].(map[string]interface{})["winner"] = newData.winner
+									} else if value == 7 {
+										merge_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
+									} else if value == 8 {
+										merge_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
+									} else if value == 9 {
+										merge_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber
+									}else {
+									}
+
+									if value==0 {
+
+									}
 								}
+								//模板数据更新
+								updateExtract = append(updateExtract, []map[string]interface{}{
+									merge_idMap,
+									merge_map,
+								})
 							}
 						}
-						//构建数据库更新用到的
+
+
+						//重复数据打标签
 						updateExtract = append(updateExtract, []map[string]interface{}{
-							id_map,
-							update_map,
+							repeat_idMap,
+							map[string]interface{}{
+								"$set": map[string]interface{}{
+									"repeat": 1,
+									"repeat_reason": reason,
+									"repeat_id":repeat_id,
+								},
+							},
 						})
+
 					}
 				}
 			}
@@ -590,151 +638,93 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 	}
 }
 
-//合并字段
-func mergeDataFields(source *Info, info *Info) (*Info, []int64) {
+//合并字段-并更新merge字段的值
+func mergeDataFields(source *Info, info *Info) (*Info, []int64,bool) {
 
-	var mergeArr []int64
-	mergeArr = make([]int64, 0)
+	//定义一个新的map[string]interface{}{}
+	merge_recordMap := make(map[string]interface{},0)
+	mergeArr := make([]int64, 0)
+	//是否替换数据了-记录原始的数据
+	is_replace :=false
 	//1、城市
 	if (source.area == "" || source.area == "全国") && info.area != "全国" && info.area != "" {
-		var arrA []string
-		if source.mergemap["area"] == nil {
-			arrA = make([]string, 0)
-		} else {
-			arrA = source.mergemap["area"].([]string)
-		}
-		arrA = append(arrA, source.area)
-		source.mergemap["area"] = arrA
-
-		var arrC []string
-		if source.mergemap["city"] == nil {
-			arrC = make([]string, 0)
-		} else {
-			arrC = source.mergemap["city"].([]string)
-		}
-		arrC = append(arrC, source.city)
-		source.mergemap["city"] = arrC
-
+		merge_recordMap["area"] = info.area
+		merge_recordMap["city"] = info.city
 		source.area = info.area
 		source.city = info.city
 		mergeArr = append(mergeArr, 1)
+		is_replace = true
 	}
 	//2、项目名称
 	if source.projectname == "" && info.projectname != "" {
-		var arr []string
-		if source.mergemap["projectname"] == nil {
-			arr = make([]string, 0)
-		} else {
-			arr = source.mergemap["projectname"].([]string)
-		}
-		arr = append(arr, source.projectname)
-		source.mergemap["projectname"] = arr
-
+		merge_recordMap["projectname"] = info.projectname
 		source.projectname = info.projectname
 		mergeArr = append(mergeArr, 2)
+		is_replace = true
 	}
 	//3、项目编号
 	if source.projectcode == "" && info.projectcode != "" {
-		var arr []string
-		if source.mergemap["projectcode"] == nil {
-			arr = make([]string, 0)
-		} else {
-			arr = source.mergemap["projectcode"].([]string)
-		}
-		arr = append(arr, source.projectcode)
-		source.mergemap["projectcode"] = arr
-
+		merge_recordMap["projectcode"] = info.projectcode
 		source.projectcode = info.projectcode
 		mergeArr = append(mergeArr, 3)
+		is_replace = true
 	}
 	//4、采购单位
 	if source.buyer == "" && info.buyer != "" {
-		var arr []string
-		if source.mergemap["buyer"] == nil {
-			arr = make([]string, 0)
-		} else {
-			arr = source.mergemap["buyer"].([]string)
-		}
-		arr = append(arr, source.buyer)
-		source.mergemap["buyer"] = arr
-
+		merge_recordMap["buyer"] = info.buyer
 		source.buyer = info.buyer
 		mergeArr = append(mergeArr, 4)
+		is_replace = true
 	}
 	//5、预算
 	if source.budget == 0 && info.budget != 0 {
-		var arr []float64
-		if source.mergemap["budget"] == nil {
-			arr = make([]float64, 0)
-		} else {
-			arr = source.mergemap["budget"].([]float64)
-		}
-		arr = append(arr, source.budget)
-		source.mergemap["budget"] = arr
-
+		merge_recordMap["budget"] = info.budget
 		source.budget = info.budget
 		mergeArr = append(mergeArr, 5)
+		is_replace = true
 	}
 	//6、中标单位
 	if source.winner == "" && info.winner != "" {
-		var arr []string
-		if source.mergemap["winner"] == nil {
-			arr = make([]string, 0)
-		} else {
-			arr = source.mergemap["winner"].([]string)
-		}
-		arr = append(arr, source.winner)
-		source.mergemap["winner"] = arr
-
+		merge_recordMap["winner"] = info.winner
 		source.winner = info.winner
 		mergeArr = append(mergeArr, 6)
+		is_replace = true
 	}
 	//7、中标金额
 	if source.bidamount == 0 && info.bidamount != 0 {
-		var arr []float64
-		if source.mergemap["bidamount"] == nil {
-			arr = make([]float64, 0)
-		} else {
-			arr = source.mergemap["bidamount"].([]float64)
-		}
-		arr = append(arr, source.bidamount)
-		source.mergemap["bidamount"] = arr
-
+		merge_recordMap["bidamount"] = info.bidamount
 		source.bidamount = info.bidamount
 		mergeArr = append(mergeArr, 7)
+		is_replace = true
 	}
 	//8、开标时间-地点
 	if source.bidopentime == 0 && info.bidopentime != 0 {
-		var arr []int64
-		if source.mergemap["bidopentime"] == nil {
-			arr = make([]int64, 0)
-		} else {
-			arr = source.mergemap["bidopentime"].([]int64)
-		}
-		arr = append(arr, source.bidopentime)
-		source.mergemap["bidopentime"] = arr
-
+		merge_recordMap["bidopentime"] = info.bidopentime
 		source.bidopentime = info.bidopentime
 		mergeArr = append(mergeArr, 8)
+		is_replace = true
 	}
 
 	//9、合同编号
 	if source.contractnumber == "" && info.contractnumber != "" {
-		var arr []string
-		if source.mergemap["contractnumber"] == nil {
-			arr = make([]string, 0)
-		} else {
-			arr = source.mergemap["contractnumber"].([]string)
-		}
-		arr = append(arr, source.contractnumber)
-		source.mergemap["contractnumber"] = arr
-
+		merge_recordMap["contractnumber"] = info.contractnumber
 		source.contractnumber = info.contractnumber
 		mergeArr = append(mergeArr, 9)
+		is_replace = true
 	}
 
+	if is_replace {//有过替换更新
+		//总次数+1
+		source.mergemap["total_num"] = util.Int64All(source.mergemap["total_num"])+1
+		merge_recordMap["num"] = util.Int64All(source.mergemap["total_num"])
+		//和哪一个数据id进行非空替换的-记录
+		key:=info.id
+		source.mergemap[key] = merge_recordMap
+	}
+
+
 	//以上合并过于简单,待进一步优化
-	return source, mergeArr
+	return source, mergeArr,is_replace
 }
 
 //权重评估

+ 1 - 1
udps/main.go

@@ -24,7 +24,7 @@ func main() {
 	//2018-06-01,2019-02-20
 	/*
 ObjectId("5da3f31aa5cb26b9b798d3aa")
-ObjectId("5da422fba5cb26b9b706984b")
+ObjectId("5da418c4a5cb26b9b7e3e9a6")
 */
 
 	flag.StringVar(&sid, "sid", "", "开始id")