Explorar el Código

优化判重文件

apple hace 5 años
padre
commit
17d1b5e522

+ 117 - 0
udpfilterdup/go_build_main_go

@@ -101,5 +101,122 @@
 
 
 
+//basic_bool := basicDataScore(source, info)
+					//if basic_bool {
+					//	//已原始数据为标准 - 对比数据打判重标签-
+					//	newData, mergeArr, is_replace = mergeDataFields(source, info)
+					//	//对比数据打重复标签的id,原始数据id的记录
+					//	repeat_idMap["_id"] = StringTOBsonId(info.id)
+					//	merge_idMap["_id"] = StringTOBsonId(source.id)
+					//
+					//	if IdType {
+					//		repeat_idMap["_id"] = info.id
+					//		merge_idMap["_id"] = source.id
+					//	}
+					//	repeat_id = source.id
+					//} else {
+					//	//已对比数据为标准 ,数据池的数据打判重标签
+					//	newData, mergeArr, is_replace = mergeDataFields(info, source)
+					//	DM.replaceSourceData(newData, source) //替换
+					//	//原始数据打重复标签的id,   对比数据id的记录
+					//	repeat_idMap["_id"] = StringTOBsonId(source.id)
+					//	merge_idMap["_id"] = StringTOBsonId(info.id)
+					//	if IdType {
+					//		repeat_idMap["_id"] = source.id
+					//		merge_idMap["_id"] = info.id
+					//	}
+					//	repeat_id = info.id
+					//}
 
 
+
+
+//basic_bool := basicDataScore(source, info)
+					//if !basic_bool {
+					//	DM.replaceSourceData(info, source) //替换
+					//	repeat_idMap["_id"] = StringTOBsonId(source.id)
+					//	if IdType {
+					//		repeat_idMap["_id"] = source.id
+					//	}
+					//	repeat_id = info.id
+					//	if len(ids)>=9 {
+					//		ids=append(ids,source.id)
+					//
+					//
+					//		for _, to := range nextNode {
+					//
+					//			key := source.id + "-" + source.id + "-" + util.ObjToString(to["stype"])
+					//			by, _ := json.Marshal(map[string]interface{}{
+					//				"gtid":  source.id,
+					//				"lteid": source.id,
+					//				"stype": util.ObjToString(to["stype"]),
+					//				"key":   key,
+					//				"ids":   strings.Join(ids, ","),
+					//			})
+					//			addr := &net.UDPAddr{
+					//				IP:   net.ParseIP(to["addr"].(string)),
+					//				Port: util.IntAll(to["port"]),
+					//			}
+					//			node := &udpNode{by, addr, time.Now().Unix(), 0}
+					//			udptaskmap.Store(key, node)
+					//			udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
+					//		}
+					//
+					//		//
+					//		ids = []string{}
+					//	}else {
+					//		ids=append(ids,source.id)
+					//	}
+					//
+					//}
+		
+		
+		
+					
+if isMerger { //合并相关
+					newData, mergeArr, is_replace := mergeDataFields(source, info)
+					merge_map := make(map[string]interface{}, 0)
+					if is_replace { //支持合并-更新数据
+						merge_map = map[string]interface{}{
+							"$set": map[string]interface{}{
+								"merge": newData.mergemap,
+							},
+						}
+						//更新合并后的数据
+						for _, value := range mergeArr {
+							if value == 0 {
+								merge_map["$set"].(map[string]interface{})["area"] = newData.area
+								merge_map["$set"].(map[string]interface{})["city"] = newData.city
+							} else if value == 1 {
+								merge_map["$set"].(map[string]interface{})["area"] = newData.area
+								merge_map["$set"].(map[string]interface{})["city"] = newData.city
+							} else if value == 2 {
+								merge_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
+							} else if value == 3 {
+								merge_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
+							} else if value == 4 {
+								merge_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
+							} else if value == 5 {
+								merge_map["$set"].(map[string]interface{})["budget"] = newData.budget
+							} else if value == 6 {
+								merge_map["$set"].(map[string]interface{})["winner"] = newData.winner
+							} else if value == 7 {
+								merge_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
+							} else if value == 8 {
+								merge_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
+							} else if value == 9 {
+								merge_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber
+							} else if value == 10 {
+								merge_map["$set"].(map[string]interface{})["publishtime"] = newData.publishtime
+							} else if value == 11 {
+								merge_map["$set"].(map[string]interface{})["agency"] = newData.agency
+							} else {
+							}
+						}
+						//模板数据更新
+						updateExtract = append(updateExtract, []map[string]interface{}{
+							merge_idMap,
+							merge_map,
+						})
+					}
+				}					

+ 0 - 1
udpfilterdup/src/config.json

@@ -20,7 +20,6 @@
     ],
     "threads": 1,
     "isMerger": false,
-    "isSort":true,
     "lowHeavy":true,
     "timingTask":false,
     "timingSpanDay": 3,

+ 227 - 0
udpfilterdup/src/dataMethod.go

@@ -0,0 +1,227 @@
+package main
+
+import (
+	"math"
+	"regexp"
+	"strings"
+	qutil "qfw/util"
+)
+
+
+
+//完善判重数据检测-前置条件
+func convertArabicNumeralsAndLetters(data string) string {
+	newData :=data
+	res1, _ := regexp.Compile("[a-zA-Z]+");
+	if res1.MatchString(data) {
+		newData = res1.ReplaceAllStringFunc(data, strings.ToUpper);
+	}
+	res2, _ := regexp.Compile("[0-9]+");
+	if res2.MatchString(newData) {
+		arr1:=[]string {"0","1","2","3","4","5","6","7","8","9"}
+		arr2:=[]string {"零","一","二","三","四","五","六","七","八","九"}
+		for i:=0 ;i<len(arr1) ;i++  {
+			resTemp ,_:=regexp.Compile(arr1[i])
+			newData= resTemp.ReplaceAllString(newData, arr2[i]);
+		}
+	}
+	return newData
+}
+
+func dealWithSpecialPhrases(str1 string,str2 string) (string,string) {
+	newStr1:=str1
+	newStr2:=str2
+	res, _ := regexp.Compile("重新招标");
+	if res.MatchString(newStr1) {
+		newStr1 = res.ReplaceAllString(newStr1,"重招");
+	}
+	if res.MatchString(newStr2) {
+		newStr2 = res.ReplaceAllString(newStr2,"重招");
+	}
+	return newStr1,newStr2
+}
+//关键词数量v
+func dealWithSpecialWordNumber(info*Info,v*Info) int {
+	okNum:=0
+	if  info.titleSpecialWord || info.specialWord {
+		okNum++
+	}
+	if  v.titleSpecialWord || v.specialWord {
+		okNum++
+	}
+	return okNum
+}
+
+//关键词再次判断
+func againRepeat(v *Info, info *Info) bool {
+	if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
+		return true
+	}
+	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
+		return true
+	}
+	if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
+		return true
+	}
+	if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
+		return true
+	}
+	if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
+		return true
+	}
+	if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
+		return true
+	}
+
+	return false
+}
+
+//删除中标单位字符串中多余的空格(含tab)
+func deleteExtraSpace(s string) string {
+	//删除字符串中的多余空格,有多个空格时,仅保留一个空格
+	s1 := strings.Replace(s, "  ", " ", -1)      //替换tab为空格
+	regstr := "\\s{2,}"                          //两个及两个以上空格的正则表达式
+	reg, _ := regexp.Compile(regstr)             //编译正则表达式
+	s2 := make([]byte, len(s1))                  //定义字符数组切片
+	copy(s2, s1)                                 //将字符串复制到切片
+	spc_index := reg.FindStringIndex(string(s2)) //在字符串中搜索
+	for len(spc_index) > 0 {                     //找到适配项
+		s2 = append(s2[:spc_index[0]+1], s2[spc_index[1]:]...) //删除多余空格
+		spc_index = reg.FindStringIndex(string(s2))            //继续在字符串中搜索
+	}
+	return string(s2)
+}
+
+//中标金额倍率:10000
+func isBidWinningAmount(f1 float64 ,f2 float64) bool {
+
+	if f1==f2||f1*10000==f2||f2*10000==f1 {
+		return false
+	}
+	return true
+}
+
+
+//开标时间区间为一天
+func isBidopentimeInterval(i1 int64 ,i2 int64) bool {
+	if i1==0||i2==0 {
+		return false
+	}
+	//不在同一天-或者同一天间隔超过六小时,属于不相等返回true
+	timeOne,timeTwo:=i1,i2
+	day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
+	day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
+	if day1==day2 {
+		//是否间隔超过六小时
+		if math.Abs(float64(i1-i2)) >21600.0 {
+			return true
+		}else {
+			return false
+		}
+	}else {
+		return true
+	}
+}
+
+//开标时间区间为一天
+func isTheSameDay(i1 int64 ,i2 int64) bool {
+	if i1==0||i2==0 {
+		return false
+	}
+	timeOne,timeTwo:=i1,i2
+	day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
+	day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
+	if day1==day2 {
+		return true
+	}
+	return false
+}
+
+
+
+//前置0 五要素均相等认为重复
+func leadingElementSame(v *Info, info *Info) bool {
+
+	isok:= 0
+	if info.projectname != "" && v.projectname == info.projectname {
+		isok++
+	}
+	if info.buyer != "" && v.buyer == info.buyer {
+		isok++
+	}
+	if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
+		if info.contractnumber != "" && v.contractnumber == info.contractnumber {
+			isok++
+		}
+	}else {
+		if info.projectcode != "" && v.projectcode == info.projectcode {
+			isok++
+		}
+	}
+	if info.title != "" && v.title == info.title {
+		isok++
+	}
+	if v.agency == info.agency {
+		isok++
+	}
+
+	if isok==5 {
+		return true
+	}
+
+
+	return false
+}
+
+//buyer的优先级
+func buyerIsContinue(v *Info, info *Info) bool {
+	if !isTheSameDay(info.publishtime,v.publishtime) {
+		return true
+	}
+	if v.title != info.title && v.title != "" && info.title != ""{
+		return true
+	}
+	if v.projectname != info.projectname && v.projectname != "" && info.projectname != ""{
+		return true
+	}
+	//if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
+	//	return true
+	//}
+	//if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
+	//	return true
+	//}
+	//if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
+	//	return true
+	//}
+	if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
+		return true
+	}
+	if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
+		return true
+	}
+
+	return false
+}
+
+
+
+//无效数据
+func invalidData(d1 string, d2 string, d3 string, d4 string) bool {
+	var n int
+	if d1 != "" {
+		n++
+	}
+	if d2 != "" {
+		n++
+	}
+	if d3 != "" {
+		n++
+	}
+	if d4 != "" {
+		n++
+	}
+	if n == 0 {
+		return true
+	}
+	return false
+}

+ 519 - 0
udpfilterdup/src/dataMethodHeavy.go

@@ -0,0 +1,519 @@
+package main
+
+import "strings"
+
+//判重方法1
+func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
+
+	isMeet := false
+	if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
+		info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
+		info.subtype == "变更" || info.subtype == "其他" {
+		//招标结果
+		if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
+			if tenderRepeat_C(v, info) {
+				return false, reason
+			} else {
+				reason = reason + "---招标类"
+				return true, reason
+			}
+		} else {
+			return false, reason
+		}
+
+	} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
+		//中标结果
+		if isMeet, reason = winningRepeat_A(v, info, reason); isMeet {
+			if winningRepeat_C(v, info) {
+				return false, reason
+			} else {
+				reason = reason + "---中标类"
+				return true, reason
+			}
+		} else {
+			return false, reason
+		}
+
+	} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
+		//合同
+		if isMeet, reason = contractRepeat_A(v, info, reason); isMeet {
+			if contractRepeat_C(v, info) {
+				return false, reason
+			} else {
+				reason = reason + "---合同类"
+				return true, reason
+			}
+		} else {
+			return false, reason
+		}
+	} else {
+		//招标结果
+		if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
+			if tenderRepeat_C(v, info) {
+				return false, reason
+			} else {
+				reason = reason + "---类别空-招标类"
+				return true, reason
+			}
+		} else {
+			return false, reason
+		}
+	}
+
+	return false, reason
+}
+
+//判重方法2
+func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
+	isMeet := false
+	if v.agency == info.agency && v.agency != "" && info.agency != "" {
+		if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
+			info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
+			info.subtype == "变更" || info.subtype == "其他" {
+			//招标结果
+			if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
+				if tenderRepeat_C(v, info) { //有不同
+					return false, reason
+				} else {
+					reason = reason + "---招标类"
+					return true, reason
+				}
+			} else {
+				return false, reason
+			}
+
+		} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
+			//中标结果
+			if isMeet, reason = winningRepeat_B(v, info, reason); isMeet {
+				if winningRepeat_C(v, info) { //有不同
+					return false, reason
+				} else {
+					reason = reason + "---中标类"
+					return true, reason
+				}
+			} else {
+				return false, reason
+			}
+
+		} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
+			//合同
+			if isMeet, reason = contractRepeat_B(v, info, reason); isMeet {
+				if contractRepeat_C(v, info) { //有不同
+					return false, reason
+				} else {
+					reason = reason + "---合同类"
+					return true, reason
+				}
+			} else {
+				return false, reason
+			}
+		} else {
+			//招标结果
+			if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
+				if tenderRepeat_C(v, info) { //有不同
+					return false, reason
+				} else {
+					reason = reason + "---类别空-招标类"
+					return true, reason
+				}
+			} else {
+				return false, reason
+			}
+		}
+	}
+
+	//不同
+	if v.agency != info.agency && v.agency != "" && info.agency != "" {
+		return false, reason
+	}
+	//机构最少一个为空
+	if v.agency == "" || info.agency == "" {
+		var repeat = false
+		if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
+			reason = reason + "---机构最少一个空"
+			return true, reason
+		} else {
+			return false, reason
+		}
+	}
+
+	return false, reason
+}
+
+//招标_A
+func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
+
+	var ss string
+	p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
+	if v.projectname != "" && v.projectname == info.projectname {
+		ss = ss + "p1-名称-"
+		p1 = true
+	}
+	if v.buyer != "" && v.buyer == info.buyer {
+		ss = ss + "p2-单位-"
+		p2 = true
+	}
+	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
+		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
+		ss = ss + "p3-编号组-"
+		p3 = true
+	}
+	if v.budget != 0 && v.budget == info.budget {
+		ss = ss + "p4-预算-"
+		p4 = true
+	}
+	if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
+		ss = ss + "p9-开标时间相同-"
+		p9 = true
+	}
+	if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
+		ss = ss + "p10-开标地点-"
+		p10 = true
+	}
+	if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
+		(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
+		ss = ss + "p11-标题-"
+		p11 = true
+	}
+
+	if  (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) || (p1 && p2 && p10) ||
+		(p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
+		(p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
+		(p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
+		(p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
+		(p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
+		(p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
+		reason = reason + "满足招标A,3要素组合-" + ss + ","
+		return true, reason
+	}
+	return false, reason
+}
+
+//招标_B
+func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
+
+	m, n := 0, 0
+	if v.projectname != "" && v.projectname == info.projectname {
+		m++
+		n++
+	}
+	if v.buyer != "" && v.buyer == info.buyer {
+		m++
+	}
+	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
+		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
+		m++
+	}
+	if v.budget != 0 && v.budget == info.budget {
+		m++
+	}
+	if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
+		m++
+	}
+	//if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
+	//	m++
+	//}
+	if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
+		(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
+		m++
+		n++
+	}
+	if m >= 2 {
+		if n == 2 && m == 2 {
+			return false, reason
+		} else {
+			reason = reason + "满足招标B,六选二,"
+			return true, reason
+		}
+	}
+	return false, reason
+}
+
+//招标_C
+func tenderRepeat_C(v *Info, info *Info) bool {
+
+	if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
+		return true
+	}
+	//原始地址...
+	//if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
+	//	return true
+	//}
+
+	if v.bidopentime != 0 && info.bidopentime != 0 && isBidopentimeInterval(info.bidopentime,v.bidopentime) {
+		return true
+	}
+	//if v.bidopenaddress != "" && info.bidopenaddress != "" && v.bidopenaddress != info.bidopenaddress {
+	//	return true
+	//}
+
+	return false
+}
+
+//中标_A
+func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
+
+	var ss string
+	p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
+	if v.projectname != "" && v.projectname == info.projectname {
+		ss = ss + "p1-项目名称-"
+		p1 = true
+	}
+	if v.buyer != "" && v.buyer == info.buyer {
+		ss = ss + "p2-单位-"
+		p2 = true
+	}
+	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
+		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
+		ss = ss + "p3-编号组--"
+		p3 = true
+	}
+	if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
+		ss = ss + "p5-中标金-"
+		p5 = true
+	}
+	if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
+		ss = ss + "p6-中标人-"
+		p6 = true
+	}
+
+
+	if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
+		(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
+		ss = ss + "p11-标题-"
+		p11 = true
+	}
+
+	if 	(p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
+		(p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
+		(p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
+		(p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
+		(p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
+		(p5 && p6 && p11) {
+		reason = reason + "满足中标A,3要素组合-" + ss + ","
+		return true, reason
+	}
+
+	return false, reason
+}
+
+//中标_B
+func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
+
+	m, n := 0, 0
+	if v.projectname != "" && v.projectname == info.projectname {
+		m++
+		n++
+	}
+	if v.buyer != "" && v.buyer == info.buyer {
+		m++
+	}
+	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
+		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
+		m++
+	}
+	if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
+		m++
+	}
+	if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
+		m++
+	}
+	if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
+		(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
+		m++
+		n++
+	}
+	if m >= 2 {
+		if n == 2 && m == 2 {
+			return false, reason
+		} else {
+			reason = reason + "满足中标B.六选二,"
+			return true, reason
+		}
+	}
+	return false, reason
+}
+
+//中标_C
+func winningRepeat_C(v *Info, info *Info) bool {
+
+	if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
+		return true
+	}
+	//
+	if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
+		return true
+	}
+	//原始地址...
+	//if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
+	//	return true
+	//}
+	return false
+}
+
+//合同_A
+func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) {
+
+	isMeet_1 := false
+	if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 {
+		return true, reason
+	}
+
+	isMeet_2 := false
+	if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 {
+		return true, reason
+	}
+	return false, reason
+}
+
+//合同_B
+func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) {
+
+	isMeet_1 := false
+	if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 {
+		return true, reason
+	}
+	isMeet_2 := false
+	if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 {
+		return true, reason
+	}
+	return false, reason
+}
+
+//合同_C
+func contractRepeat_C(v *Info, info *Info) bool {
+
+	if tenderRepeat_C(v, info) {
+		return true
+	}
+	if winningRepeat_C(v, info) {
+		return true
+	}
+
+	//合同类 - 新增编号
+	if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
+		return true
+	}
+	if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
+		return true
+	}
+
+	return false
+}
+
+
+
+
+
+
+
+
+
+//快速低质量数据判重
+func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
+	if !isTheSameDay(v.publishtime,info.publishtime) {
+		return false,reason
+	}
+	//首先判定是否为低质量数据    info目标数据
+	if info.agency==v.agency&&info.title!=""&&
+		info.title==v.title &&
+		info.projectname==""&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" {
+		isValue:=0//五要素判断
+		if info.budget != 0 {//预算
+			isValue++
+		}
+		if info.bidopentime != 0{//开标时间
+			isValue++
+		}
+		if info.bidopenaddress!=""{//开标地点
+			isValue++
+		}
+		if info.winner != ""{//中标单位
+			isValue++
+		}
+		if info.bidamount != 0 {//中标金额
+			isValue++
+		}
+		if isValue==0 {
+			reason = reason + "---低质量-要素均为空-标题包含关系"
+			return true, reason
+		}else if isValue==1 {
+			isMeet := false
+			if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet {
+				reason = reason + "---低质量-有且一个要素组合"
+				return true, reason
+			}
+		}else {
+
+		}
+	}
+	return false,reason
+}
+
+//类别细节原因记录
+func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
+	if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
+		info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
+		info.subtype == "变更" || info.subtype == "其他" {
+		//招标结果
+		if info.budget != 0 && info.budget == v.budget{//预算
+			reason = reason + "---招标类:预算"
+			return true,reason
+		}
+		if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
+			reason = reason + "---招标类:开标时间"
+			return true,reason
+		}
+		if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
+			reason = reason + "---招标类:开标地点"
+			return true,reason
+		}
+	} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
+		//中标结果
+		if v.winner != "" && info.winner == v.winner{//中标单位
+			reason = reason + "---中标类:中标单位"
+			return true,reason
+		}
+		if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
+			reason = reason + "---中标类:中标金额"
+			return true,reason
+		}
+	} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
+		//合同
+		if info.budget != 0 && info.budget == v.budget{//预算
+			reason = reason + "---合同类:预算"
+			return true,reason
+		}
+		if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
+			reason = reason + "---合同类:开标时间"
+			return true,reason
+		}
+		if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
+			reason = reason + "---合同类:开标地点"
+			return true,reason
+		}
+		if v.winner != "" && info.winner == v.winner{//中标单位
+			reason = reason + "---合同类:中标单位"
+			return true,reason
+		}
+		if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
+			reason = reason + "---合同类:中标金额"
+			return true,reason
+		}
+	} else {
+		//招标结果
+		if info.budget != 0 && info.budget == v.budget{//预算
+			reason = reason + "---类别空-招标类:预算"
+			return true,reason
+		}
+		if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
+			reason = reason + "---类别空-招标类:开标时间"
+			return true,reason
+		}
+		if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
+			reason = reason + "---类别空-招标类:开标地点"
+			return true,reason
+		}
+	}
+	return false,reason
+}

+ 302 - 0
udpfilterdup/src/dataMethodMerge.go

@@ -0,0 +1,302 @@
+package main
+
+import "qfw/util"
+
+//合并字段-并更新merge字段的值
+func mergeDataFields(source *Info, info *Info) (*Info, []int64, bool) {
+
+	merge_recordMap := make(map[string]interface{}, 0)
+	mergeArr := make([]int64, 0)
+	//是否替换数据了-记录原始的数据
+	is_replace := false
+	//1、城市
+	if source.area == "" || source.area == "全国" {
+		//为空
+		if info.area != "全国" && info.area != "" {
+			merge_recordMap["area"] = source.area
+			merge_recordMap["city"] = source.city
+			source.area = info.area
+			source.city = info.city
+			mergeArr = append(mergeArr, 1)
+			is_replace = true
+		}
+	} else {
+		//不为空-查看站点相关-有值必替换
+		if source.is_site {
+			//是站点替换的城市
+			merge_recordMap["site_area"] = source.area
+			merge_recordMap["site_city"] = source.city
+			mergeArr = append(mergeArr, 0)
+			is_replace = true
+			source.is_site = false
+
+		}
+	}
+	//2、项目名称
+	if source.projectname == "" && info.projectname != "" {
+		merge_recordMap["projectname"] = source.projectname
+		source.projectname = info.projectname
+		mergeArr = append(mergeArr, 2)
+		is_replace = true
+	}
+	//3、项目编号
+	if source.projectcode == "" && info.projectcode != "" {
+		merge_recordMap["projectcode"] = source.projectcode
+		source.projectcode = info.projectcode
+		mergeArr = append(mergeArr, 3)
+		is_replace = true
+	}
+	//4、采购单位
+	if source.buyer == "" && info.buyer != "" {
+		merge_recordMap["buyer"] = source.buyer
+		source.buyer = info.buyer
+		mergeArr = append(mergeArr, 4)
+		is_replace = true
+	}
+	//5、预算
+	if source.budget == 0 && info.budget != 0 {
+		merge_recordMap["budget"] = source.budget
+		source.budget = info.budget
+		mergeArr = append(mergeArr, 5)
+		is_replace = true
+	}
+	//6、中标单位
+	if source.winner == "" && info.winner != "" {
+		merge_recordMap["winner"] = source.winner
+		source.winner = info.winner
+		mergeArr = append(mergeArr, 6)
+		is_replace = true
+	}
+	//7、中标金额
+	if source.bidamount == 0 && info.bidamount != 0 {
+		merge_recordMap["bidamount"] = source.bidamount
+		source.bidamount = info.bidamount
+		mergeArr = append(mergeArr, 7)
+		is_replace = true
+	}
+	//8、开标时间-地点
+	if source.bidopentime == 0 && info.bidopentime != 0 {
+		merge_recordMap["bidopentime"] = source.bidopentime
+		source.bidopentime = info.bidopentime
+		mergeArr = append(mergeArr, 8)
+		is_replace = true
+	}
+
+	//9、合同编号
+	if source.contractnumber == "" && info.contractnumber != "" {
+		merge_recordMap["contractnumber"] = source.contractnumber
+		source.contractnumber = info.contractnumber
+		mergeArr = append(mergeArr, 9)
+		is_replace = true
+	}
+
+	//10、发布时间
+	if source.publishtime == 0 && info.publishtime != 0 {
+		merge_recordMap["publishtime"] = source.publishtime
+		source.publishtime = info.publishtime
+		mergeArr = append(mergeArr, 10)
+		is_replace = true
+	}
+	//11、代理机构
+	if source.agency == "" && info.agency != "" {
+		merge_recordMap["agency"] = source.agency
+		source.agency = info.agency
+		mergeArr = append(mergeArr, 11)
+		is_replace = true
+	}
+
+	if is_replace { //有过替换更新
+		//总次数+1
+		source.mergemap["total_num"] = util.Int64All(source.mergemap["total_num"]) + 1
+		merge_recordMap["num"] = util.Int64All(source.mergemap["total_num"])
+		//和哪一个数据id进行非空替换的-记录
+		key := info.id
+		source.mergemap[key] = merge_recordMap
+	}
+
+	//待进一步优化
+	return source, mergeArr, is_replace
+}
+
+//权重评估
+func basicDataScore(v *Info, info *Info) bool {
+
+	/*
+	  权重评估
+	  网站优先级判定规则:
+	  1、国家>省级>市级>县区
+	  2、政府采购>公共资源>官方网站|政府门户>社会公共招标平台|企业招标平台
+	  3、同sitetype-分析weight
+	  4、要素打分-分析
+	*/
+	v_score, info_score := -1, -1
+	dict_v := SiteMap[v.site]
+	dict_info := SiteMap[info.site]
+	//先判断level
+	if dict_v != nil {
+		v_level := util.ObjToString(dict_v["level"])
+		if v_level == "国家" {
+			v_score = 4
+		} else if v_level == "省级" {
+			v_score = 3
+		} else if v_level == "市级" {
+			v_score = 2
+		} else if v_level == "县区" {
+			v_score = 1
+		} else if v_level == "" {
+		} else {
+			v_score = 0
+		}
+	}
+
+	if dict_info != nil {
+		info_level := util.ObjToString(dict_info["level"])
+		if info_level == "国家" {
+			info_score = 4
+		} else if info_level == "省级" {
+			info_score = 3
+		} else if info_level == "市级" {
+			info_score = 2
+		} else if info_level == "县区" {
+			info_score = 1
+		} else if info_level == "" {
+
+		} else {
+			v_score = 0
+		}
+	}
+
+	if v_score > info_score {
+		return true
+	}
+	if v_score < info_score {
+		return false
+	}
+
+	//判断sitetype
+	if dict_v != nil {
+		v_sitetype := util.ObjToString(dict_v["sitetype"])
+		if v_sitetype == "政府采购" {
+			v_score = 4
+		} else if v_sitetype == "公共资源" {
+			v_score = 3
+		} else if v_sitetype == "官方网站"|| v_sitetype == "政府门户" {
+			v_score = 2
+		} else if v_sitetype == "社会公共招标平台" || v_sitetype == "企业招标平台" {
+			v_score = 1
+		} else if v_sitetype == "" {
+		} else {
+			v_score = 0
+		}
+	}
+
+	if dict_info != nil {
+		info_sitetype := util.ObjToString(dict_info["sitetype"])
+		if info_sitetype == "政府采购" {
+			info_score = 4
+		} else if info_sitetype == "公共资源" {
+			info_score = 3
+		} else if info_sitetype == "官方网站"|| info_sitetype == "政府门户" {
+			info_score = 2
+		} else if info_sitetype == "社会公共招标平台" || info_sitetype == "企业招标平台" {
+			info_score = 1
+		} else if info_sitetype == "" {
+		} else {
+			info_score = 0
+		}
+	}
+
+	if v_score > info_score {
+		return true
+	}
+	if v_score < info_score {
+		return false
+	}
+
+	if v_score == info_score {//同sitetype 情况下   分析weight
+		v_weight := util.IntAll(dict_v["weight"])
+		info_weight := util.IntAll(dict_info["weight"])
+		if v_weight>info_weight {
+			return true
+		}
+		if info_weight>v_weight {
+			return false
+		}
+	}
+
+	//网站评估
+	m, n := 0, 0
+	if v.projectname != "" {
+		m++
+	}
+	if v.buyer != "" {
+		m++
+	}
+	if v.projectcode != "" || v.contractnumber != "" {
+		m++
+	}
+	if v.budget != 0 {
+		m++
+	}
+	if v.bidamount != 0 {
+		m++
+	}
+	if v.winner != "" {
+		m++
+	}
+	if v.bidopentime != 0 {
+		m++
+	}
+	if v.bidopenaddress != "" {
+		m++
+	}
+	if v.agency != "" {
+		m = m + 2
+	}
+	if v.city != "" {
+		m = m + 2
+	}
+
+	if info.projectname != "" {
+		n++
+	}
+	if info.buyer != "" {
+		n++
+	}
+	if info.projectcode != "" || info.contractnumber != "" {
+		n++
+	}
+	if info.budget != 0 {
+		n++
+	}
+	if info.bidamount != 0 {
+		n++
+	}
+	if info.winner != "" {
+		n++
+	}
+	if info.bidopentime != 0 {
+		n++
+	}
+	if info.bidopenaddress != "" {
+		n++
+	}
+	if info.agency != "" {
+		n = n + 2
+	}
+	if info.city != "" {
+		n = n + 2
+	}
+
+	if m > n {
+		return true
+	} else if m == n {
+		if v.publishtime >= info.publishtime {
+			return true
+		} else {
+			return false
+		}
+	} else {
+		return false
+	}
+}

+ 52 - 861
udpfilterdup/src/datamap.go

@@ -3,7 +3,6 @@ package main
 import (
 	"fmt"
 	"log"
-	"math"
 	qutil "qfw/util"
 	"regexp"
 	"strings"
@@ -53,7 +52,8 @@ type datamap struct {
 	keys   map[string]bool
 }
 
-func TimedTaskDatamap(days int,lasttime int64 ,coll string) *datamap {
+//历史
+func TimedTaskDatamap(days int,lasttime int64) *datamap {
 	log.Println("数据池开始重新构建")
 	datelimit = qutil.Float64All(days * 86400)
 	dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}, []string{},map[string]bool{}}
@@ -61,7 +61,6 @@ func TimedTaskDatamap(days int,lasttime int64 ,coll string) *datamap {
 		log.Println("数据池空数据")
 		return dm
 	}
-
 	start := int(time.Now().Unix())
 	sess := mgo.GetMgoConn()
 	defer mgo.DestoryMongoConn(sess)
@@ -69,7 +68,7 @@ func TimedTaskDatamap(days int,lasttime int64 ,coll string) *datamap {
 		"$lt": lasttime,
 	}}
 	log.Println("query", query)
-	it := sess.DB(mgo.DbName).C(coll).Find(query).Sort("-publishtime").Iter()
+	it := sess.DB(mgo.DbName).C(extract_back).Find(query).Sort("-publishtime").Iter()
 	n, continuSum := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
 		//qutil.IntAll(tmp["dataging"]) == 1
@@ -79,6 +78,9 @@ func TimedTaskDatamap(days int,lasttime int64 ,coll string) *datamap {
 		} else {
 			pt := tmp["publishtime"]
 			pt_time := qutil.Int64All(pt)
+			if pt_time <= 0 {
+				break
+			}
 			if qutil.Float64All(lasttime-pt_time) < datelimit {
 				continuSum++
 				info := NewInfo(tmp)
@@ -113,15 +115,13 @@ func TimedTaskDatamap(days int,lasttime int64 ,coll string) *datamap {
 		tmp = make(map[string]interface{})
 	}
 
-
 	log.Printf("数据池构建完成:%d秒,%d个\n", int(time.Now().Unix())-start, n)
 
-
 	return dm
 }
 
 
-
+//增量
 func NewDatamap(days int, lastid string) *datamap {
 	datelimit = qutil.Float64All(days * 86400 * 2)
 	dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{},[]string{}, map[string]bool{}}
@@ -135,24 +135,17 @@ func NewDatamap(days int, lastid string) *datamap {
 		"$lte": StringTOBsonId(lastid),
 	}}
 	log.Println("query", query)
-	sortName := "-_id"
-	if Is_Sort {
-		sortName = "-publishtime"
-	}
-	it := sess.DB(mgo.DbName).C(extract).Find(query).Sort(sortName).Iter()
+	it := sess.DB(mgo.DbName).C(extract).Find(query).Sort("-publishtime").Iter()
 	now1 := int64(0)
 	n, continuSum := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
 		if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1{
 
 		} else {
-			pt := tmp["comeintime"]
-			if Is_Sort {
-				pt = tmp["publishtime"]
-			}
+			pt := tmp["publishtime"]
 			pt_time := qutil.Int64All(pt)
 			if pt_time <= 0 {
-				continue
+				break
 			}
 			if now1 == 0 {
 				now1 = pt_time
@@ -185,7 +178,7 @@ func NewDatamap(days int, lastid string) *datamap {
 				break
 			}
 		}
-		if n%5000 == 0 {
+		if n%10000 == 0 {
 			log.Println("当前 n:", n,"数量:" ,continuSum)
 		}
 		tmp = make(map[string]interface{})
@@ -194,6 +187,7 @@ func NewDatamap(days int, lastid string) *datamap {
 	return dm
 }
 
+//数据构建
 func NewInfo(tmp map[string]interface{}) *Info {
 	subtype := qutil.ObjToString(tmp["subtype"])
 	area := qutil.ObjToString(tmp["area"])
@@ -239,6 +233,8 @@ func NewInfo(tmp map[string]interface{}) *Info {
 	return info
 }
 
+//判重方法
+//判重方法
 //判重方法
 func (d *datamap) check(info *Info) (b bool, source *Info, reasons string) {
 	reason := ""
@@ -306,7 +302,7 @@ L:
 					}
 					if info.href != "" && info.href != v.href {
 						if v.title==info.title&&len([]rune(info.title)) >10 && isTheSameDay(info.publishtime,v.publishtime){
-							if !againHrefRepeat(v, info) {//进行同站点二次判断
+							if !againRepeat(v, info) {//进行同站点二次判断
 								reason = "同站点-href不同-标题相同等"
 								b = true
 								source = v
@@ -420,10 +416,7 @@ L:
 
 	//往预存数据 d 添加
 	if !b {
-		ct := info.comeintime
-		if Is_Sort ||TimingTask{
-			ct = info.publishtime
-		}
+		ct := info.publishtime
 		dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
 		k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
 		d.lock.Lock()
@@ -458,61 +451,6 @@ L:
 
 	return
 }
-//替换原始数据池
-func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
-	//删除数据池的老数据
-	ct_old := oldData.comeintime
-	if Is_Sort||TimingTask {
-		ct_old = oldData.publishtime
-	}
-	dkey_old := qutil.FormatDateByInt64(&ct_old, qutil.Date_yyyyMMdd)
-	k_old := fmt.Sprintf("%s_%s_%s", dkey_old, oldData.subtype, oldData.area)
-	data_old := d.data[k_old]
-	for k, v := range data_old {
-		if v.id == oldData.id {//删除对应当前的老数据
-			data_old = append(data_old[:k], data_old[k+1:]...)
-			break
-		}
-	}
-	d.data[k_old] = data_old
-
-	//添加新的
-	ct := newData.comeintime
-	if Is_Sort ||TimingTask{
-		ct = newData.publishtime
-	}
-	dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
-	k := fmt.Sprintf("%s_%s_%s", dkey, newData.subtype, newData.area)
-	d.lock.Lock()
-	data := d.data[k]
-	if data == nil {
-		data = []*Info{newData}
-		d.data[k] = data
-		if !d.keys[dkey] {
-			d.keys[dkey] = true
-			d.update(ct)
-		}
-	} else {
-		data = append(data, newData)
-		d.data[k] = data
-	}
-	//添加省
-	isAreaExist :=false
-	for _,v:= range d.areakeys {
-		if v==newData.area {
-			isAreaExist = true
-		}
-	}
-	if !isAreaExist {
-		areaArr := d.areakeys
-		areaArr = append(areaArr,newData.area)
-		d.areakeys = areaArr
-	}
-
-
-	d.lock.Unlock()
-}
-
 
 func (d *datamap) update(t int64) {
 
@@ -565,802 +503,55 @@ func (d *datamap) GetLatelyFiveDayDouble(t int64) []string  {//增量-两倍
 	return array
 }
 
-/*
-**************************
-******** 以下为判重 ********
-**************************
- */
- 
- //完善判重数据监测-前置条件
-func convertArabicNumeralsAndLetters(data string) string {
-	newData :=data
-	res1, _ := regexp.Compile("[a-zA-Z]+");
-	if res1.MatchString(data) {
-		newData = res1.ReplaceAllStringFunc(data, strings.ToUpper);
-	}
-	res2, _ := regexp.Compile("[0-9]+");
-	if res2.MatchString(newData) {
-		arr1:=[]string {"0","1","2","3","4","5","6","7","8","9"}
-		arr2:=[]string {"零","一","二","三","四","五","六","七","八","九"}
-		for i:=0 ;i<len(arr1) ;i++  {
-			resTemp ,_:=regexp.Compile(arr1[i])
-			newData= resTemp.ReplaceAllString(newData, arr2[i]);
-		}
-	}
-	return newData
-}
-
-func dealWithSpecialPhrases(str1 string,str2 string) (string,string) {
-	newStr1:=str1
-	newStr2:=str2
-	res, _ := regexp.Compile("重新招标");
-	if res.MatchString(newStr1) {
-		newStr1 = res.ReplaceAllString(newStr1,"重招");
-	}
-	if res.MatchString(newStr2) {
-		newStr2 = res.ReplaceAllString(newStr2,"重招");
-	}
-	return newStr1,newStr2
-}
-//关键词数量v
-func dealWithSpecialWordNumber(info*Info,v*Info) int {
-	okNum:=0
-	if  info.titleSpecialWord || info.specialWord {
-		okNum++
-	}
-	if  v.titleSpecialWord || v.specialWord {
-		okNum++
-	}
-	return okNum
-}
-
- 
 
- //快速低质量数据判重
-func fastLowQualityHeavy(v *Info, info *Info, reason string) (bool, string) {
-	if !isTheSameDay(v.publishtime,info.publishtime) {
-		return false,reason
-	}
-	//首先判定是否为低质量数据    info目标数据
-	if info.agency==v.agency&&info.title!=""&&
-		info.title==v.title &&
-		info.projectname==""&&info.projectcode==""&&info.contractnumber==""&&info.buyer=="" {
-		isValue:=0//五要素判断
-		if info.budget != 0 {//预算
-			isValue++
-		}
-		if info.bidopentime != 0{//开标时间
-			isValue++
-		}
-		if info.bidopenaddress!=""{//开标地点
-			isValue++
-		}
-		if info.winner != ""{//中标单位
-			isValue++
-		}
-		if info.bidamount != 0 {//中标金额
-			isValue++
-		}
-		if isValue==0 {
-			reason = reason + "---低质量-要素均为空-标题包含关系"
-			return true, reason
-		}else if isValue==1 {
-			isMeet := false
-			if isMeet, reason = judgeLowQualityData(v, info, reason); isMeet {
-				reason = reason + "---低质量-有且一个要素组合"
-				return true, reason
-			}
-		}else {
 
+//替换原始数据池
+func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
+	//删除数据池的老数据
+	ct_old := oldData.publishtime
+	dkey_old := qutil.FormatDateByInt64(&ct_old, qutil.Date_yyyyMMdd)
+	k_old := fmt.Sprintf("%s_%s_%s", dkey_old, oldData.subtype, oldData.area)
+	data_old := d.data[k_old]
+	for k, v := range data_old {
+		if v.id == oldData.id {//删除对应当前的老数据
+			data_old = append(data_old[:k], data_old[k+1:]...)
+			break
 		}
 	}
-	return false,reason
-}
-
-//类别细节原因记录
-func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
-	if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
-		info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
-		info.subtype == "变更" || info.subtype == "其他" {
-		//招标结果
-		if info.budget != 0 && info.budget == v.budget{//预算
-			reason = reason + "---招标类:预算"
-			return true,reason
-		}
-		if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
-			reason = reason + "---招标类:开标时间"
-			return true,reason
-		}
-		if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
-			reason = reason + "---招标类:开标地点"
-			return true,reason
-		}
-	} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
-		//中标结果
-		if v.winner != "" && info.winner == v.winner{//中标单位
-			reason = reason + "---中标类:中标单位"
-			return true,reason
-		}
-		if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
-			reason = reason + "---中标类:中标金额"
-			return true,reason
-		}
-	} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
-		//合同
-		if info.budget != 0 && info.budget == v.budget{//预算
-			reason = reason + "---合同类:预算"
-			return true,reason
-		}
-		if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
-			reason = reason + "---合同类:开标时间"
-			return true,reason
-		}
-		if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
-			reason = reason + "---合同类:开标地点"
-			return true,reason
-		}
-		if v.winner != "" && info.winner == v.winner{//中标单位
-			reason = reason + "---合同类:中标单位"
-			return true,reason
-		}
-		if v.bidamount != 0 && info.bidamount == v.bidamount{//中标金额
-			reason = reason + "---合同类:中标金额"
-			return true,reason
-		}
-	} else {
-		//招标结果
-		if info.budget != 0 && info.budget == v.budget{//预算
-			reason = reason + "---类别空-招标类:预算"
-			return true,reason
-		}
-		if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
-			reason = reason + "---类别空-招标类:开标时间"
-			return true,reason
-		}
-		if info.bidopenaddress!="" && info.bidopenaddress == v.bidopenaddress{//开标地点
-			reason = reason + "---类别空-招标类:开标地点"
-			return true,reason
-		}
-	}
-	return false,reason
-}
-
-//判重方法1
-func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
-
-	isMeet := false
-	if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
-		info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
-		info.subtype == "变更" || info.subtype == "其他" {
-		//招标结果
-		if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
-			if tenderRepeat_C(v, info) {
-				return false, reason
-			} else {
-				reason = reason + "---招标类"
-				return true, reason
-			}
-		} else {
-			return false, reason
-		}
-
-	} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
-		//中标结果
-		if isMeet, reason = winningRepeat_A(v, info, reason); isMeet {
-			if winningRepeat_C(v, info) {
-				return false, reason
-			} else {
-				reason = reason + "---中标类"
-				return true, reason
-			}
-		} else {
-			return false, reason
-		}
+	d.data[k_old] = data_old
 
-	} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
-		//合同
-		if isMeet, reason = contractRepeat_A(v, info, reason); isMeet {
-			if contractRepeat_C(v, info) {
-				return false, reason
-			} else {
-				reason = reason + "---合同类"
-				return true, reason
-			}
-		} else {
-			return false, reason
+	//添加新的
+	ct := newData.publishtime
+	dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
+	k := fmt.Sprintf("%s_%s_%s", dkey, newData.subtype, newData.area)
+	d.lock.Lock()
+	data := d.data[k]
+	if data == nil {
+		data = []*Info{newData}
+		d.data[k] = data
+		if !d.keys[dkey] {
+			d.keys[dkey] = true
+			d.update(ct)
 		}
 	} else {
-		//招标结果
-		if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
-			if tenderRepeat_C(v, info) {
-				return false, reason
-			} else {
-				reason = reason + "---类别空-招标类"
-				return true, reason
-			}
-		} else {
-			return false, reason
-		}
-	}
-
-	return false, reason
-}
-
-//判重方法2
-func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
-	isMeet := false
-	if v.agency == info.agency && v.agency != "" && info.agency != "" {
-		if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
-			info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
-			info.subtype == "变更" || info.subtype == "其他" {
-			//招标结果
-			if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
-				if tenderRepeat_C(v, info) { //有不同
-					return false, reason
-				} else {
-					reason = reason + "---招标类"
-					return true, reason
-				}
-			} else {
-				return false, reason
-			}
-
-		} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
-			//中标结果
-			if isMeet, reason = winningRepeat_B(v, info, reason); isMeet {
-				if winningRepeat_C(v, info) { //有不同
-					return false, reason
-				} else {
-					reason = reason + "---中标类"
-					return true, reason
-				}
-			} else {
-				return false, reason
-			}
-
-		} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
-			//合同
-			if isMeet, reason = contractRepeat_B(v, info, reason); isMeet {
-				if contractRepeat_C(v, info) { //有不同
-					return false, reason
-				} else {
-					reason = reason + "---合同类"
-					return true, reason
-				}
-			} else {
-				return false, reason
-			}
-		} else {
-			//招标结果
-			if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
-				if tenderRepeat_C(v, info) { //有不同
-					return false, reason
-				} else {
-					reason = reason + "---类别空-招标类"
-					return true, reason
-				}
-			} else {
-				return false, reason
-			}
-		}
-	}
-
-	//不同
-	if v.agency != info.agency && v.agency != "" && info.agency != "" {
-		return false, reason
-	}
-	//机构最少一个为空
-	if v.agency == "" || info.agency == "" {
-		var repeat = false
-		if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
-			reason = reason + "---机构最少一个空"
-			return true, reason
-		} else {
-			return false, reason
-		}
-	}
-
-	return false, reason
-}
-
-//招标_A
-func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
-
-	var ss string
-	p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
-	if v.projectname != "" && v.projectname == info.projectname {
-		ss = ss + "p1-名称-"
-		p1 = true
-	}
-	if v.buyer != "" && v.buyer == info.buyer {
-		ss = ss + "p2-单位-"
-		p2 = true
-	}
-	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
-		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
-		ss = ss + "p3-编号组-"
-		p3 = true
-	}
-	if v.budget != 0 && v.budget == info.budget {
-		ss = ss + "p4-预算-"
-		p4 = true
-	}
-	if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
-		ss = ss + "p9-开标时间相同-"
-		p9 = true
-	}
-	if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
-		ss = ss + "p10-开标地点-"
-		p10 = true
-	}
-	if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
-		(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
-		ss = ss + "p11-标题-"
-		p11 = true
-	}
-
-	if  (p1 && p2 && p3) || (p1 && p2 && p4) || (p1 && p2 && p9) || (p1 && p2 && p10) ||
-		(p1 && p2 && p11) || (p1 && p3 && p9) || (p1 && p3 && p10) ||
-		(p1 && p4 && p9) || (p1 && p4 && p10) || (p2 && p3 && p4) ||
-		(p2 && p3 && p9) || (p2 && p3 && p10) || (p2 && p3 && p11) ||
-		(p2 && p4 && p9) || (p2 && p4 && p10) || (p2 && p4 && p11) ||
-		(p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
-		(p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
-		reason = reason + "满足招标A,3要素组合-" + ss + ","
-		return true, reason
-	}
-	return false, reason
-}
-
-//招标_B
-func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
-
-	m, n := 0, 0
-	if v.projectname != "" && v.projectname == info.projectname {
-		m++
-		n++
-	}
-	if v.buyer != "" && v.buyer == info.buyer {
-		m++
-	}
-	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
-		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
-		m++
-	}
-	if v.budget != 0 && v.budget == info.budget {
-		m++
-	}
-	if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
-		m++
-	}
-	//if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
-	//	m++
-	//}
-	if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
-		(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
-		m++
-		n++
-	}
-	if m >= 2 {
-		if n == 2 && m == 2 {
-			return false, reason
-		} else {
-			reason = reason + "满足招标B,六选二,"
-			return true, reason
-		}
-	}
-	return false, reason
-}
-
-//招标_C
-func tenderRepeat_C(v *Info, info *Info) bool {
-
-	if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
-		return true
-	}
-	//原始地址...
-	//if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
-	//	return true
-	//}
-
-	if v.bidopentime != 0 && info.bidopentime != 0 && isBidopentimeInterval(info.bidopentime,v.bidopentime) {
-		return true
-	}
-	//if v.bidopenaddress != "" && info.bidopenaddress != "" && v.bidopenaddress != info.bidopenaddress {
-	//	return true
-	//}
-
-	return false
-}
-
-//中标_A
-func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
-
-	var ss string
-	p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
-	if v.projectname != "" && v.projectname == info.projectname {
-		ss = ss + "p1-项目名称-"
-		p1 = true
-	}
-	if v.buyer != "" && v.buyer == info.buyer {
-		ss = ss + "p2-单位-"
-		p2 = true
-	}
-	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
-		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
-		ss = ss + "p3-编号组--"
-		p3 = true
-	}
-	if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
-		ss = ss + "p5-中标金-"
-		p5 = true
-	}
-	if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
-		ss = ss + "p6-中标人-"
-		p6 = true
-	}
-
-
-	if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
-		(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
-		ss = ss + "p11-标题-"
-		p11 = true
-	}
-
-	if 	(p1 && p2 && p3) || (p1 && p2 && p5) || (p1 && p2 && p6) ||
-		(p1 && p3 && p5) || (p1 && p3 && p6) || (p1 && p5 && p6) ||
-		(p2 && p3 && p5) || (p2 && p3 && p6) || (p2 && p3 && p11) ||
-		(p2 && p5 && p6) || (p2 && p5 && p11) || (p2 && p6 && p11) ||
-		(p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
-		(p5 && p6 && p11) {
-		reason = reason + "满足中标A,3要素组合-" + ss + ","
-		return true, reason
-	}
-
-	return false, reason
-}
-
-//中标_B
-func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
-
-	m, n := 0, 0
-	if v.projectname != "" && v.projectname == info.projectname {
-		m++
-		n++
-	}
-	if v.buyer != "" && v.buyer == info.buyer {
-		m++
-	}
-	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
-		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
-		m++
-	}
-	if v.bidamount != 0 && !isBidWinningAmount(v.bidamount,info.bidamount) {
-		m++
-	}
-	if v.winner != "" && deleteExtraSpace(v.winner) == deleteExtraSpace(info.winner) {
-		m++
-	}
-	if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
-		(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
-		m++
-		n++
-	}
-	if m >= 2 {
-		if n == 2 && m == 2 {
-			return false, reason
-		} else {
-			reason = reason + "满足中标B.六选二,"
-			return true, reason
-		}
-	}
-	return false, reason
-}
-
-//中标_C
-func winningRepeat_C(v *Info, info *Info) bool {
-
-	if v.bidamount != 0 && info.bidamount != 0 && isBidWinningAmount(v.bidamount,info.bidamount) {
-		return true
-	}
-	//
-	if v.winner != "" && info.winner != "" && deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) {
-		return true
-	}
-	//原始地址...
-	//if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
-	//	return true
-	//}
-	return false
-}
-
-//合同_A
-func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) {
-
-	isMeet_1 := false
-	if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 {
-		return true, reason
-	}
-
-	isMeet_2 := false
-	if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 {
-		return true, reason
-	}
-	return false, reason
-}
-
-//合同_B
-func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) {
-
-	isMeet_1 := false
-	if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 {
-		return true, reason
-	}
-	isMeet_2 := false
-	if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 {
-		return true, reason
-	}
-	return false, reason
-}
-
-//合同_C
-func contractRepeat_C(v *Info, info *Info) bool {
-
-	if tenderRepeat_C(v, info) {
-		return true
-	}
-	if winningRepeat_C(v, info) {
-		return true
-	}
-
-	//合同类 - 新增编号
-	if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
-		return true
-	}
-	if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
-		return true
-	}
-
-	return false
-}
-
-//同站点再次判断
-func againHrefRepeat(v *Info, info *Info) bool {
-	//if v.buyer == info.buyer {
-	//
-	//}
-	if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
-		return true
-	}
-	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-		return true
-	}
-	if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
-		return true
-	}
-	if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
-		return true
-	}
-	if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
-		return true
-	}
-	if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
-		return true
-	}
-
-	//if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
-	//	info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
-	//	info.subtype == "变更" || info.subtype == "其他" {
-	//	//招标结果
-	//	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-	//		return true
-	//	}
-	//} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
-	//	//中标结果
-	//	if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
-	//		(deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
-	//		return true
-	//	}
-	//} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
-	//	//合同
-	//	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-	//		return true
-	//	}
-	//	if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
-	//		(deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
-	//		return true
-	//	}
-	//
-	//} else {
-	//	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-	//		return true
-	//	}
-	//}
-
-	return false
-}
-
-
-
-
-//关键词再次判断
-func againRepeat(v *Info, info *Info) bool {
-	//相同采购单位下
-	//if info.buyer != "" && v.buyer == info.buyer {
-	//if v.buyer == info.buyer {
-	//
-	//}
-
-	//if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
-	//	info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
-	//	info.subtype == "其他" || info.subtype == "变更" {
-	//	//预算金额满足条件
-	//	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-	//		return true
-	//	}
-	//} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" ||
-	//	info.subtype == "流标" || info.subtype == "合同" || info.subtype == "验收" ||
-	//	info.subtype == "违规" {
-	//	//中标金额单位满足条件
-	//	if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
-	//		(deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
-	//		return true
-	//	}
-	//} else {
-	//	//预算金额满足条件
-	//	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-	//		return true
-	//	}
-	//}
-
-	if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
-		return true
-	}
-	if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-		return true
-	}
-	if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
-		return true
-	}
-	if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
-		return true
-	}
-	if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
-		return true
-	}
-	if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
-		return true
-	}
-
-	return false
-}
-
-//删除中标单位字符串中多余的空格(含tab)
-func deleteExtraSpace(s string) string {
-	//删除字符串中的多余空格,有多个空格时,仅保留一个空格
-	s1 := strings.Replace(s, "  ", " ", -1)      //替换tab为空格
-	regstr := "\\s{2,}"                          //两个及两个以上空格的正则表达式
-	reg, _ := regexp.Compile(regstr)             //编译正则表达式
-	s2 := make([]byte, len(s1))                  //定义字符数组切片
-	copy(s2, s1)                                 //将字符串复制到切片
-	spc_index := reg.FindStringIndex(string(s2)) //在字符串中搜索
-	for len(spc_index) > 0 {                     //找到适配项
-		s2 = append(s2[:spc_index[0]+1], s2[spc_index[1]:]...) //删除多余空格
-		spc_index = reg.FindStringIndex(string(s2))            //继续在字符串中搜索
-	}
-	return string(s2)
-}
-
-//中标金额倍率:10000
-func isBidWinningAmount(f1 float64 ,f2 float64) bool {
-
-	if f1==f2||f1*10000==f2||f2*10000==f1 {
-		return false
-	}
-	return true
-}
-
-
-//开标时间区间为一天
-func isBidopentimeInterval(i1 int64 ,i2 int64) bool {
-	if i1==0||i2==0 {
-		return false
-	}
-	//不在同一天-或者同一天间隔超过六小时,属于不相等返回true
-	timeOne,timeTwo:=i1,i2
-	day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
-	day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
-	if day1==day2 {
-		//是否间隔超过六小时
-		if math.Abs(float64(i1-i2)) >21600.0 {
-			return true
-		}else {
-			return false
-		}
-	}else {
-		return true
-	}
-}
-
-//开标时间区间为一天
-func isTheSameDay(i1 int64 ,i2 int64) bool {
-	if i1==0||i2==0 {
-		return false
-	}
-	timeOne,timeTwo:=i1,i2
-	day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
-	day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
-	if day1==day2 {
-		return true
-	}
-	return false
-}
-
-
-
-//前置0 五要素均相等认为重复
-func leadingElementSame(v *Info, info *Info) bool {
-
-	isok:= 0
-	if info.projectname != "" && v.projectname == info.projectname {
-		isok++
-	}
-	if info.buyer != "" && v.buyer == info.buyer {
-		isok++
+		data = append(data, newData)
+		d.data[k] = data
 	}
-	if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
-		if info.contractnumber != "" && v.contractnumber == info.contractnumber {
-			isok++
-		}
-	}else {
-		if info.projectcode != "" && v.projectcode == info.projectcode {
-			isok++
+	//添加省
+	isAreaExist :=false
+	for _,v:= range d.areakeys {
+		if v==newData.area {
+			isAreaExist = true
 		}
 	}
-	if info.title != "" && v.title == info.title {
-		isok++
-	}
-	if v.agency == info.agency {
-		isok++
-	}
-
-	if isok==5 {
-		return true
+	if !isAreaExist {
+		areaArr := d.areakeys
+		areaArr = append(areaArr,newData.area)
+		d.areakeys = areaArr
 	}
 
 
-	return false
-}
-
-
-func buyerIsContinue(v *Info, info *Info) bool {
-	//不同采购单位下
-	if !isTheSameDay(info.publishtime,v.publishtime) {
-		return true
-	}
-	if v.title != info.title && v.title != "" && info.title != ""{
-		return true
-	}
-	if v.projectname != info.projectname && v.projectname != "" && info.projectname != ""{
-		return true
-	}
-	//if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
-	//	return true
-	//}
-	//if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
-	//	return true
-	//}
-	//if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
-	//	return true
-	//}
-	if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
-		return true
-	}
-	if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
-		return true
-	}
-
-	return false
+	d.lock.Unlock()
 }
 
 

+ 0 - 51
udpfilterdup/src/datamonitor.go

@@ -1,51 +0,0 @@
-package main
-
-import (
-	"sync"
-	"time"
-)
-
-/**
-增加数据监控服务,每15分钟保存一次重复情况
-**/
-var IS = &InfoStatus{
-	time.Now().Unix(), 0, map[string]int{}, &sync.Mutex{}}
-
-func init() {
-	//go IS.Save()
-}
-
-type InfoStatus struct {
-	Starttime int64
-	Endtime   int64
-	Val       map[string]int
-	Lock      *sync.Mutex
-}
-
-func (is *InfoStatus) Add(t string) {
-	is.Lock.Lock()
-	is.Val[t]++
-	is.Lock.Unlock()
-}
-
-func (is *InfoStatus) Save() {
-	is.Lock.Lock()
-	is.Endtime = time.Now().Unix()
-	save := map[string]interface{}{}
-	all := 0
-	for k, v := range is.Val {
-		all += v
-		save[k] = v
-	}
-	if all > 0 {
-		save["receive"] = all
-		save["starttime"] = is.Starttime
-		save["endtime"] = is.Endtime
-		save["flag"] = "dup"
-		is.Val = map[string]int{}
-		go mgo.Save("datamonitor", save)
-	}
-	is.Starttime = is.Endtime
-	is.Lock.Unlock()
-	time.AfterFunc(15*time.Minute, is.Save)
-}

+ 18 - 518
udpfilterdup/src/main.go

@@ -40,7 +40,6 @@ var (
 	FilterRegTitle_2 = regexp.MustCompile("^_$")
 
 	isMerger       bool                              //是否合并
-	Is_Sort        bool                              //是否排序
 	threadNum      int                               //线程数量
 	SiteMap        map[string]map[string]interface{} //站点map
 	LowHeavy       bool                              //低质量数据判重
@@ -78,7 +77,6 @@ func init() {
 	FilterRegTitle_1 = regexp.MustCompile(util.ObjToString(Sysconfig["specialtitle_1"]))
 	FilterRegTitle_2 = regexp.MustCompile(util.ObjToString(Sysconfig["specialtitle_2"]))
 	isMerger = Sysconfig["isMerger"].(bool)
-	Is_Sort = Sysconfig["isSort"].(bool)
 	threadNum = util.IntAllDef(Sysconfig["threads"], 1)
 	LowHeavy = Sysconfig["lowHeavy"].(bool)
 	TimingTask = Sysconfig["timingTask"].(bool)
@@ -106,6 +104,7 @@ func init() {
 	log.Printf("new站点加载用时:%d秒,%d个\n", int(time.Now().Unix())-start, len(SiteMap))
 }
 
+
 func main() {
 
 	go checkMapJob()
@@ -116,7 +115,6 @@ func main() {
 	if TimingTask {
 		go timedTaskDay()
 	}
-
 	time.Sleep(99999 * time.Hour)
 }
 
@@ -124,21 +122,12 @@ func main() {
 
 //测试组人员使用
 func mainT() {
-
-	//analysNoRepeatDataTest()
-	//return
-
 	if TimingTask {
 		log.Println("定时任务测试开始")
 		go timedTaskDay()
 		time.Sleep(99999 * time.Hour)
 	} else {
-
-		/*
-		5ef01220801f744d045f51f1
-		5ef61eb3801f744d046402dd
-		*/
-		//IdType = true
+		//IdType = true  //打开id字符串模式
 		sid = "5ef01220801f744d045f51f1"
 		eid = "5ef61eb3801f744d046402dd"
 		log.Println("正常判重测试开始")
@@ -156,8 +145,6 @@ func mainT() {
 	}
 }
 
-
-
 func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 	fmt.Println("接受的段数据")
 	switch act {
@@ -214,17 +201,9 @@ func task(data []byte, mapInfo map[string]interface{}) {
 	log.Println(mgo.DbName, extract, q)
 	sess := mgo.GetMgoConn()
 	defer mgo.DestoryMongoConn(sess)
-
-	//是否排序
-	sortName :="_id"
-	if Is_Sort {
-		sortName = "publishtime"
-		log.Println("排序")
-	}
-	it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort(sortName).Iter()
+	it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
 	updateExtract := [][]map[string]interface{}{}
 	ids:=[]string{}
-	log.Println("线程数:", threadNum)
 	pool := make(chan bool, threadNum)
 	wg := &sync.WaitGroup{}
 	n, repeateN := 0, 0
@@ -232,8 +211,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 		if n%10000 == 0 {
 			log.Println("current:", n, tmp["_id"], "repeateN:", repeateN)
 		}
-
-		source := util.ObjToMap(tmp["jsondata"])
+		source := util.ObjToMap(tmp["jsondata"]) //前置-jsondata判重
 		if util.IntAll((*source)["sourcewebsite"]) == 1 {
 			repeateN++
 			updateExtract = append(updateExtract, []map[string]interface{}{
@@ -252,8 +230,6 @@ func task(data []byte, mapInfo map[string]interface{}) {
 				mgo.UpSertBulk(extract, updateExtract...)
 				updateExtract = [][]map[string]interface{}{}
 			}
-
-
 			tmp = make(map[string]interface{})
 			continue
 		}
@@ -274,7 +250,6 @@ func task(data []byte, mapInfo map[string]interface{}) {
 				wg.Done()
 			}()
 			info := NewInfo(tmp)
-
 			if !LowHeavy { //是否进行低质量数据判重
 				if invalidData(info.buyer, info.projectname, info.projectcode, info.contractnumber) {
 					updateExtract = append(updateExtract, []map[string]interface{}{
@@ -284,7 +259,6 @@ func task(data []byte, mapInfo map[string]interface{}) {
 						map[string]interface{}{
 							"$set": map[string]interface{}{
 								"repeat": -1, //无效数据标签
-
 							},
 						},
 					})
@@ -295,161 +269,25 @@ func task(data []byte, mapInfo map[string]interface{}) {
 					return
 				}
 			}
-
-
+			//正常判重
 			b, source, reason := DM.check(info)
 			if b { //有重复,生成更新语句,更新抽取和更新招标
 				repeateN++
-				var is_replace = false
-				var mergeArr = []int64{}                    //更改合并数组记录
-				var newData = &Info{}                       //更换新的数据池数据
-				var repeat_idMap = map[string]interface{}{} //记录判重的
-				var merge_idMap = map[string]interface{}{}  //记录合并的
-				repeat_idMap["_id"] = StringTOBsonId(info.id)
+				var updateID = map[string]interface{}{} //记录更新判重的
+				updateID["_id"] = StringTOBsonId(info.id)
 				if IdType {
-					repeat_idMap["_id"] = info.id
+					updateID["_id"] = info.id
 				}
-				merge_idMap["_id"] = StringTOBsonId(source.id)
-				repeat_id := source.id //初始化一个数据
-
-				if isMerger { //合并相关
-
-					//已原始数据为标准 - 对比数据打判重标签-
-					newData, mergeArr, is_replace = mergeDataFields(source, info)
-					//对比数据打重复标签的id,原始数据id的记录
-					repeat_idMap["_id"] = StringTOBsonId(info.id)
-					merge_idMap["_id"] = StringTOBsonId(source.id)
-
-					if IdType {
-						repeat_idMap["_id"] = info.id
-						merge_idMap["_id"] = source.id
-					}
-					repeat_id = source.id
-					//basic_bool := basicDataScore(source, info)
-					//if basic_bool {
-					//	//已原始数据为标准 - 对比数据打判重标签-
-					//	newData, mergeArr, is_replace = mergeDataFields(source, info)
-					//	//对比数据打重复标签的id,原始数据id的记录
-					//	repeat_idMap["_id"] = StringTOBsonId(info.id)
-					//	merge_idMap["_id"] = StringTOBsonId(source.id)
-					//
-					//	if IdType {
-					//		repeat_idMap["_id"] = info.id
-					//		merge_idMap["_id"] = source.id
-					//	}
-					//	repeat_id = source.id
-					//} else {
-					//	//已对比数据为标准 ,数据池的数据打判重标签
-					//	newData, mergeArr, is_replace = mergeDataFields(info, source)
-					//	DM.replaceSourceData(newData, source) //替换
-					//	//原始数据打重复标签的id,   对比数据id的记录
-					//	repeat_idMap["_id"] = StringTOBsonId(source.id)
-					//	merge_idMap["_id"] = StringTOBsonId(info.id)
-					//	if IdType {
-					//		repeat_idMap["_id"] = source.id
-					//		merge_idMap["_id"] = info.id
-					//	}
-					//	repeat_id = info.id
-					//}
-
-					merge_map := make(map[string]interface{}, 0)
-					if is_replace { //有过合并-更新数据
-						merge_map = map[string]interface{}{
-							"$set": map[string]interface{}{
-								"merge": newData.mergemap,
-							},
-						}
-						//更新合并后的数据
-						for _, value := range mergeArr {
-							if value == 0 {
-								merge_map["$set"].(map[string]interface{})["area"] = newData.area
-								merge_map["$set"].(map[string]interface{})["city"] = newData.city
-							} else if value == 1 {
-								merge_map["$set"].(map[string]interface{})["area"] = newData.area
-								merge_map["$set"].(map[string]interface{})["city"] = newData.city
-							} else if value == 2 {
-								merge_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
-							} else if value == 3 {
-								merge_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
-							} else if value == 4 {
-								merge_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
-							} else if value == 5 {
-								merge_map["$set"].(map[string]interface{})["budget"] = newData.budget
-							} else if value == 6 {
-								merge_map["$set"].(map[string]interface{})["winner"] = newData.winner
-							} else if value == 7 {
-								merge_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
-							} else if value == 8 {
-								merge_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
-							} else if value == 9 {
-								merge_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber
-							} else if value == 10 {
-								merge_map["$set"].(map[string]interface{})["publishtime"] = newData.publishtime
-							} else if value == 11 {
-								merge_map["$set"].(map[string]interface{})["agency"] = newData.agency
-							} else {
-							}
-						}
-						//模板数据更新
-						updateExtract = append(updateExtract, []map[string]interface{}{
-							merge_idMap,
-							merge_map,
-						})
-					}
-				} else { //高质量数据-备份
-
-					//basic_bool := basicDataScore(source, info)
-					//if !basic_bool {
-					//	DM.replaceSourceData(info, source) //替换
-					//	repeat_idMap["_id"] = StringTOBsonId(source.id)
-					//	if IdType {
-					//		repeat_idMap["_id"] = source.id
-					//	}
-					//	repeat_id = info.id
-					//	if len(ids)>=9 {
-					//		ids=append(ids,source.id)
-					//
-					//
-					//		for _, to := range nextNode {
-					//
-					//			key := source.id + "-" + source.id + "-" + util.ObjToString(to["stype"])
-					//			by, _ := json.Marshal(map[string]interface{}{
-					//				"gtid":  source.id,
-					//				"lteid": source.id,
-					//				"stype": util.ObjToString(to["stype"]),
-					//				"key":   key,
-					//				"ids":   strings.Join(ids, ","),
-					//			})
-					//			addr := &net.UDPAddr{
-					//				IP:   net.ParseIP(to["addr"].(string)),
-					//				Port: util.IntAll(to["port"]),
-					//			}
-					//			node := &udpNode{by, addr, time.Now().Unix(), 0}
-					//			udptaskmap.Store(key, node)
-					//			udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
-					//		}
-					//
-					//		//
-					//		ids = []string{}
-					//	}else {
-					//		ids=append(ids,source.id)
-					//	}
-					//
-					//}
-				}
-
-				//重复数据打标签
-				updateExtract = append(updateExtract, []map[string]interface{}{
-					repeat_idMap,
+				updateExtract = append(updateExtract, []map[string]interface{}{//重复数据打标签
+					updateID,
 					map[string]interface{}{
 						"$set": map[string]interface{}{
 							"repeat":        1,
 							"repeat_reason": reason,
-							"repeat_id":     repeat_id,
+							"repeat_id":     source.id,
 						},
 					},
 				})
-
 			}
 		}(tmp)
 		if len(updateExtract) >= 200 {
@@ -489,11 +327,13 @@ func task(data []byte, mapInfo map[string]interface{}) {
 	}
 }
 
-//定时任务
+
+
+
+//定时任务--定时任务--定时任务
 func timedTaskDay() {
 	log.Println("部署定时任务")
 	c := cron.New()
-	//c.AddFunc("0 0 */4 * * ?", func() { movedata() })
 	c.AddFunc("0 0 */4 * * ?", func() { timedTaskOnce() })
 	c.Start()
 }
@@ -530,7 +370,6 @@ func timedTaskOnce() {
 		if num%10000 == 0 {
 			log.Println("正序遍历:", num)
 		}
-
 		source := util.ObjToMap(tmp["jsondata"])
 		if util.IntAll((*source)["sourcewebsite"]) == 1 {
 			updateExtract = append(updateExtract, []map[string]interface{}{
@@ -632,8 +471,7 @@ func timedTaskOnce() {
 		log.Println("构建第",k,"组---(数据池)")
 		//当前组的第一个发布时间
 		first_pt :=util.Int64All(v[0]["publishtime"])
-		coll :=extract_back
-		DM = TimedTaskDatamap(dupdays, first_pt,coll)
+		DM = TimedTaskDatamap(dupdays, first_pt)
 		log.Println("开始遍历判重第",k,"组  共计数量:",len(v))
 		n = n+len(v)
 		log.Println("统计目前总数量:",n,"重复数量:",repeateN)
@@ -734,348 +572,8 @@ func timedTaskOnce() {
 		}
 	}
 }
-//判断是否在周期天内
-func isTaskTimeCycle(pt int64) bool {
-
-	year, month, day := time.Now().Date()
-	predur_pt:=time.Date(year, month, day, 0, 0, 0, 0, time.Local).Add(-time.Duration(dupdays) * 24 * time.Hour).Unix()
-	log.Println(predur_pt)
-
-	if pt >= predur_pt {
-		return true
-	}else  {
-		return false
-	}
-
-}
 
 
-
-
-
-
-
-
-//合并字段-并更新merge字段的值
-func mergeDataFields(source *Info, info *Info) (*Info, []int64, bool) {
-
-	merge_recordMap := make(map[string]interface{}, 0)
-	mergeArr := make([]int64, 0)
-	//是否替换数据了-记录原始的数据
-	is_replace := false
-	//1、城市
-	if source.area == "" || source.area == "全国" {
-		//为空
-		if info.area != "全国" && info.area != "" {
-			merge_recordMap["area"] = source.area
-			merge_recordMap["city"] = source.city
-			source.area = info.area
-			source.city = info.city
-			mergeArr = append(mergeArr, 1)
-			is_replace = true
-		}
-	} else {
-		//不为空-查看站点相关-有值必替换
-		if source.is_site {
-			//是站点替换的城市
-			merge_recordMap["site_area"] = source.area
-			merge_recordMap["site_city"] = source.city
-			mergeArr = append(mergeArr, 0)
-			is_replace = true
-			source.is_site = false
-
-		}
-	}
-	//2、项目名称
-	if source.projectname == "" && info.projectname != "" {
-		merge_recordMap["projectname"] = source.projectname
-		source.projectname = info.projectname
-		mergeArr = append(mergeArr, 2)
-		is_replace = true
-	}
-	//3、项目编号
-	if source.projectcode == "" && info.projectcode != "" {
-		merge_recordMap["projectcode"] = source.projectcode
-		source.projectcode = info.projectcode
-		mergeArr = append(mergeArr, 3)
-		is_replace = true
-	}
-	//4、采购单位
-	if source.buyer == "" && info.buyer != "" {
-		merge_recordMap["buyer"] = source.buyer
-		source.buyer = info.buyer
-		mergeArr = append(mergeArr, 4)
-		is_replace = true
-	}
-	//5、预算
-	if source.budget == 0 && info.budget != 0 {
-		merge_recordMap["budget"] = source.budget
-		source.budget = info.budget
-		mergeArr = append(mergeArr, 5)
-		is_replace = true
-	}
-	//6、中标单位
-	if source.winner == "" && info.winner != "" {
-		merge_recordMap["winner"] = source.winner
-		source.winner = info.winner
-		mergeArr = append(mergeArr, 6)
-		is_replace = true
-	}
-	//7、中标金额
-	if source.bidamount == 0 && info.bidamount != 0 {
-		merge_recordMap["bidamount"] = source.bidamount
-		source.bidamount = info.bidamount
-		mergeArr = append(mergeArr, 7)
-		is_replace = true
-	}
-	//8、开标时间-地点
-	if source.bidopentime == 0 && info.bidopentime != 0 {
-		merge_recordMap["bidopentime"] = source.bidopentime
-		source.bidopentime = info.bidopentime
-		mergeArr = append(mergeArr, 8)
-		is_replace = true
-	}
-
-	//9、合同编号
-	if source.contractnumber == "" && info.contractnumber != "" {
-		merge_recordMap["contractnumber"] = source.contractnumber
-		source.contractnumber = info.contractnumber
-		mergeArr = append(mergeArr, 9)
-		is_replace = true
-	}
-
-	//10、发布时间
-	if source.publishtime == 0 && info.publishtime != 0 {
-		merge_recordMap["publishtime"] = source.publishtime
-		source.publishtime = info.publishtime
-		mergeArr = append(mergeArr, 10)
-		is_replace = true
-	}
-	//11、代理机构
-	if source.agency == "" && info.agency != "" {
-		merge_recordMap["agency"] = source.agency
-		source.agency = info.agency
-		mergeArr = append(mergeArr, 11)
-		is_replace = true
-	}
-
-	if is_replace { //有过替换更新
-		//总次数+1
-		source.mergemap["total_num"] = util.Int64All(source.mergemap["total_num"]) + 1
-		merge_recordMap["num"] = util.Int64All(source.mergemap["total_num"])
-		//和哪一个数据id进行非空替换的-记录
-		key := info.id
-		source.mergemap[key] = merge_recordMap
-	}
-
-	//待进一步优化
-	return source, mergeArr, is_replace
-}
-
-//权重评估
-func basicDataScore(v *Info, info *Info) bool {
-
-	/*
-	  权重评估
-	  网站优先级判定规则:
-	  1、国家>省级>市级>县区
-	  2、政府采购>公共资源>官方网站|政府门户>社会公共招标平台|企业招标平台
-	  3、同sitetype-分析weight
-	  4、要素打分-分析
-	*/
-	v_score, info_score := -1, -1
-	dict_v := SiteMap[v.site]
-	dict_info := SiteMap[info.site]
-	//先判断level
-	if dict_v != nil {
-		v_level := util.ObjToString(dict_v["level"])
-		if v_level == "国家" {
-			v_score = 4
-		} else if v_level == "省级" {
-			v_score = 3
-		} else if v_level == "市级" {
-			v_score = 2
-		} else if v_level == "县区" {
-			v_score = 1
-		} else if v_level == "" {
-		} else {
-			v_score = 0
-		}
-	}
-
-	if dict_info != nil {
-		info_level := util.ObjToString(dict_info["level"])
-		if info_level == "国家" {
-			info_score = 4
-		} else if info_level == "省级" {
-			info_score = 3
-		} else if info_level == "市级" {
-			info_score = 2
-		} else if info_level == "县区" {
-			info_score = 1
-		} else if info_level == "" {
-
-		} else {
-			v_score = 0
-		}
-	}
-
-	if v_score > info_score {
-		return true
-	}
-	if v_score < info_score {
-		return false
-	}
-
-	//判断sitetype
-	if dict_v != nil {
-		v_sitetype := util.ObjToString(dict_v["sitetype"])
-		if v_sitetype == "政府采购" {
-			v_score = 4
-		} else if v_sitetype == "公共资源" {
-			v_score = 3
-		} else if v_sitetype == "官方网站"|| v_sitetype == "政府门户" {
-			v_score = 2
-		} else if v_sitetype == "社会公共招标平台" || v_sitetype == "企业招标平台" {
-			v_score = 1
-		} else if v_sitetype == "" {
-		} else {
-			v_score = 0
-		}
-	}
-
-	if dict_info != nil {
-		info_sitetype := util.ObjToString(dict_info["sitetype"])
-		if info_sitetype == "政府采购" {
-			info_score = 4
-		} else if info_sitetype == "公共资源" {
-			info_score = 3
-		} else if info_sitetype == "官方网站"|| info_sitetype == "政府门户" {
-			info_score = 2
-		} else if info_sitetype == "社会公共招标平台" || info_sitetype == "企业招标平台" {
-			info_score = 1
-		} else if info_sitetype == "" {
-		} else {
-			info_score = 0
-		}
-	}
-
-	if v_score > info_score {
-		return true
-	}
-	if v_score < info_score {
-		return false
-	}
-
-	if v_score == info_score {//同sitetype 情况下   分析weight
-		v_weight := util.IntAll(dict_v["weight"])
-		info_weight := util.IntAll(dict_info["weight"])
-		if v_weight>info_weight {
-			return true
-		}
-		if info_weight>v_weight {
-			return false
-		}
-	}
-
-	//网站评估
-	m, n := 0, 0
-	if v.projectname != "" {
-		m++
-	}
-	if v.buyer != "" {
-		m++
-	}
-	if v.projectcode != "" || v.contractnumber != "" {
-		m++
-	}
-	if v.budget != 0 {
-		m++
-	}
-	if v.bidamount != 0 {
-		m++
-	}
-	if v.winner != "" {
-		m++
-	}
-	if v.bidopentime != 0 {
-		m++
-	}
-	if v.bidopenaddress != "" {
-		m++
-	}
-	if v.agency != "" {
-		m = m + 2
-	}
-	if v.city != "" {
-		m = m + 2
-	}
-
-	if info.projectname != "" {
-		n++
-	}
-	if info.buyer != "" {
-		n++
-	}
-	if info.projectcode != "" || info.contractnumber != "" {
-		n++
-	}
-	if info.budget != 0 {
-		n++
-	}
-	if info.bidamount != 0 {
-		n++
-	}
-	if info.winner != "" {
-		n++
-	}
-	if info.bidopentime != 0 {
-		n++
-	}
-	if info.bidopenaddress != "" {
-		n++
-	}
-	if info.agency != "" {
-		n = n + 2
-	}
-	if info.city != "" {
-		n = n + 2
-	}
-
-	if m > n {
-		return true
-	} else if m == n {
-		if v.publishtime >= info.publishtime {
-			return true
-		} else {
-			return false
-		}
-	} else {
-		return false
-	}
-}
-
-//无效数据
-func invalidData(d1 string, d2 string, d3 string, d4 string) bool {
-	var n int
-	if d1 != "" {
-		n++
-	}
-	if d2 != "" {
-		n++
-	}
-	if d3 != "" {
-		n++
-	}
-	if d4 != "" {
-		n++
-	}
-	if n == 0 {
-		return true
-	}
-	return false
-}
-
 //迁移数据dupdays+5之前的数据
 func movedata() {
 	sess := mgo.GetMgoConn()
@@ -1107,3 +605,5 @@ func movedata() {
 	delnum := mgo.Delete(extract, qv)
 	log.Println("remove from ", extract, delnum)
 }
+
+