zhangjinkun 5 năm trước cách đây
mục cha
commit
14efc5352d
3 tập tin đã thay đổi với 207 bổ sung236 xóa
  1. 2 2
      udpfilterdup/src/config.json
  2. 104 124
      udpfilterdup/src/datamap.go
  3. 101 110
      udpfilterdup/src/main.go

+ 2 - 2
udpfilterdup/src/config.json

@@ -5,9 +5,9 @@
         "addr": "192.168.3.207:27092",
         "pool": 5,
         "db": "extract_kf",
-        "extract": "zk",
+        "extract": "demo_data3.2",
         "site": {
-            "dbname": "zhaolongyue",
+            "dbname": "extract_kf",
             "coll": "site"
         }
     },

+ 104 - 124
udpfilterdup/src/datamap.go

@@ -11,32 +11,33 @@ import (
 )
 
 type Info struct {
-	id          string	//id
-	title       string  //标题
-
-	area        string  //省份
-	city        string  //城市
-	subtype     string  //信息类型
-	buyer       string  //采购单位
-	agency      string  //代理机构
-	winner      string  //中标单位
-	budget      float64 //预算金额
-	bidamount   float64 //中标金额
-	projectname string  //项目名称
-	projectcode string  //项目编号
-	contractnumber string //合同编号
-	publishtime int64   //发布时间
-	bidopentime int64   //开标时间
-	agencyaddr  string  //开标地点
-
-	site        string  //站点
-	href        string  //正文的url
-
-	repeatid    string  //重复id
+	id    string //id
+	title string //标题
+
+	area           string  //省份
+	city           string  //城市
+	subtype        string  //信息类型
+	buyer          string  //采购单位
+	agency         string  //代理机构
+	winner         string  //中标单位
+	budget         float64 //预算金额
+	bidamount      float64 //中标金额
+	projectname    string  //项目名称
+	projectcode    string  //项目编号
+	contractnumber string  //合同编号
+	publishtime    int64   //发布时间
+	comeintime     int64   //入库时间
+	bidopentime    int64   //开标时间
+	agencyaddr     string  //开标地点
+
+	site string //站点
+	href string //正文的url
+
+	repeatid         string                 //重复id
 	titleSpecialWord bool                   //标题特殊词
 	specialWord      bool                   //再次判断的特殊词
 	mergemap         map[string]interface{} //合并记录
-	is_site     bool   //是否站点城市
+	is_site          bool                   //是否站点城市
 
 }
 
@@ -77,9 +78,9 @@ func NewDatamap(days int, lastid string) *datamap {
 		if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1 {
 			continuSum++
 		} else {
-			pt:=tmp["publishtime"]
-			pt_time:=qutil.Int64All(pt)
-			if pt_time<=0 {
+			pt := tmp["publishtime"]
+			pt_time := qutil.Int64All(pt)
+			if pt_time <= 0 {
 				continue
 			}
 			if now1 == 0 {
@@ -123,9 +124,9 @@ func NewHistorymap(startid string, lastid string, startTime int64, lastTime int6
 		true)).Sort("-_id").Iter()
 	m, n := 0, 0
 	for tmp_start := make(map[string]interface{}); it_start.Next(&tmp_start); {
-		pt_s:=tmp_start["publishtime"]
-		pt_time:=qutil.Int64All(pt_s)
-		if pt_time<=0 {
+		pt_s := tmp_start["publishtime"]
+		pt_time := qutil.Int64All(pt_s)
+		if pt_time <= 0 {
 			continue
 		}
 		if qutil.Float64All(startTime-pt_time) <= datelimit {
@@ -154,9 +155,9 @@ func NewHistorymap(startid string, lastid string, startTime int64, lastTime int6
 		true)).Sort("_id").Iter()
 
 	for tmp_last := make(map[string]interface{}); it_last.Next(&tmp_last); {
-		pt_l:=tmp_last["publishtime"]
-		pt_time:=qutil.Int64All(pt_l)
-		if pt_time<=0 {
+		pt_l := tmp_last["publishtime"]
+		pt_time := qutil.Int64All(pt_l)
+		if pt_time <= 0 {
 			continue
 		}
 		if qutil.Float64All(pt_time-lastTime) <= datelimit {
@@ -188,7 +189,7 @@ func NewInfo(tmp map[string]interface{}) *Info {
 		area = "全国"
 	}
 	info := &Info{}
-	info.id = qutil.BsonIdToSId(tmp["_id"])
+	info.id = BsonTOStringId(tmp["_id"])
 	info.title = qutil.ObjToString(tmp["title"])
 	info.area = area
 	info.subtype = subtype
@@ -202,6 +203,7 @@ func NewInfo(tmp map[string]interface{}) *Info {
 	info.budget = qutil.Float64All(tmp["budget"])
 	info.bidamount = qutil.Float64All(tmp["bidamount"])
 	info.publishtime = qutil.Int64All(tmp["publishtime"])
+	info.comeintime = qutil.Int64All(tmp["comeintime"])
 	info.bidopentime = qutil.Int64All(tmp["bidopentime"])
 	info.agencyaddr = qutil.ObjToString(tmp["agencyaddr"])
 	info.site = qutil.ObjToString(tmp["site"])
@@ -217,13 +219,13 @@ func NewInfo(tmp map[string]interface{}) *Info {
 
 	info.is_site = false
 
-
 	return info
 }
+
 //判重方法
 func (d *datamap) check(info *Info) (b bool, source *Info, reasons string) {
 
-	reason:=""
+	reason := ""
 	keys := []string{}
 	d.lock.Lock()
 	for k, _ := range d.keys { //不同时间段
@@ -290,9 +292,9 @@ L:
 							continue //无包含关系
 						}
 						if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) {
-							reason = reason+"标题关键词且包含关系"
+							reason = reason + "标题关键词且包含关系"
 							//继续二级金额判断
-							if !againRepeat(v,info){
+							if !againRepeat(v, info) {
 								b = true
 								source = v
 								reasons = reason
@@ -301,10 +303,6 @@ L:
 						}
 					}
 
-
-
-
-
 					//代理机构相同-非空相等
 					if v.agency != "" && info.agency != "" && v.agency == info.agency {
 						reason = reason + "同机构-"
@@ -344,7 +342,8 @@ L:
 
 	//往预存数据 d 添加
 	if !b {
-		ct := info.publishtime
+		//ct := info.publishtime
+		ct := info.comeintime
 		dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
 		k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
 		d.lock.Lock()
@@ -367,7 +366,7 @@ L:
 }
 
 func (h *historymap) checkHistory(info *Info) (b bool, source *Info, reasons string) {
-	reason:=""
+	reason := ""
 	keys := []string{}
 	h.lock.Lock()
 	for k, _ := range h.keys { //不同时间段
@@ -433,9 +432,9 @@ L:
 							continue //无包含关系
 						}
 						if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) {
-							reason = reason+"标题关键词且包含关系"
+							reason = reason + "标题关键词且包含关系"
 							//继续二级金额判断
-							if !againRepeat(v,info){
+							if !againRepeat(v, info) {
 								b = true
 								source = v
 								reasons = reason
@@ -485,11 +484,11 @@ L:
 	//
 	if b {
 		if info.repeatid == source.id {
-			b = false//重复-无变化-不处理
+			b = false //重复-无变化-不处理
 		}
 	} else {
 		if source != nil {
-			if source.repeatid != "" {//未判重-有变化--记录
+			if source.repeatid != "" { //未判重-有变化--记录
 				b = true
 				reason = "未判重记录"
 				reasons = reason
@@ -498,7 +497,8 @@ L:
 	}
 	//往预存数据 d 添加
 	if !b {
-		ct := info.publishtime
+		//ct := info.publishtime
+		ct := info.comeintime
 		dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
 		k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
 		data := h.data[k]
@@ -568,8 +568,6 @@ func (h *historymap) replaceSourceData(replaceData *Info, replaceId string) {
 	h.lock.Unlock()
 }
 
-
-
 func (d *datamap) update(t int64) {
 	//每天0点清除历史数据
 	d.keymap = d.GetLatelyFiveDay(t)
@@ -605,21 +603,15 @@ func (d *datamap) GetLatelyFiveDay(t int64) []string {
 	return array
 }
 
-
-
-
-
-
-
 /*
 **************************
 ******* 以下为判重 ********
 **************************
-*/
+ */
 //判重方法1
 func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
 
-	isMeet:=false
+	isMeet := false
 	if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
 		info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
 		info.subtype == "变更" || info.subtype == "其他" {
@@ -637,7 +629,7 @@ func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
 
 	} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
 		//中标结果
-		if isMeet, reason = winningRepeat_A(v, info, reason);isMeet {
+		if isMeet, reason = winningRepeat_A(v, info, reason); isMeet {
 			if winningRepeat_C(v, info) {
 				return false, reason
 			} else {
@@ -650,7 +642,7 @@ func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
 
 	} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
 		//合同
-		if isMeet, reason = contractRepeat_A(v, info, reason);isMeet {
+		if isMeet, reason = contractRepeat_A(v, info, reason); isMeet {
 			if contractRepeat_C(v, info) {
 				return false, reason
 			} else {
@@ -662,7 +654,7 @@ func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
 		}
 	} else {
 		//招标结果
-		if isMeet, reason = tenderRepeat_A(v, info, reason);isMeet {
+		if isMeet, reason = tenderRepeat_A(v, info, reason); isMeet {
 			if tenderRepeat_C(v, info) {
 				return false, reason
 			} else {
@@ -679,13 +671,13 @@ func quickHeavyMethodOne(v *Info, info *Info, reason string) (bool, string) {
 
 //判重方法2
 func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
-	isMeet:=false
+	isMeet := false
 	if v.agency == info.agency && v.agency != "" && info.agency != "" {
 		if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
 			info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
 			info.subtype == "变更" || info.subtype == "其他" {
 			//招标结果
-			if isMeet, reason =  tenderRepeat_B(v, info, reason);isMeet {
+			if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
 				if tenderRepeat_C(v, info) { //有不同
 					return false, reason
 				} else {
@@ -698,7 +690,7 @@ func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
 
 		} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
 			//中标结果
-			if isMeet, reason =  winningRepeat_B(v, info, reason);isMeet {
+			if isMeet, reason = winningRepeat_B(v, info, reason); isMeet {
 				if winningRepeat_C(v, info) { //有不同
 					return false, reason
 				} else {
@@ -711,7 +703,7 @@ func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
 
 		} else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
 			//合同
-			if isMeet, reason = contractRepeat_B(v, info, reason);isMeet {
+			if isMeet, reason = contractRepeat_B(v, info, reason); isMeet {
 				if contractRepeat_C(v, info) { //有不同
 					return false, reason
 				} else {
@@ -723,7 +715,7 @@ func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
 			}
 		} else {
 			//招标结果
-			if isMeet, reason = tenderRepeat_B(v, info, reason);isMeet {
+			if isMeet, reason = tenderRepeat_B(v, info, reason); isMeet {
 				if tenderRepeat_C(v, info) { //有不同
 					return false, reason
 				} else {
@@ -755,7 +747,7 @@ func quickHeavyMethodTwo(v *Info, info *Info, reason string) (bool, string) {
 }
 
 //招标_A
-func tenderRepeat_A(v *Info, info *Info, reason string) (bool ,string) {
+func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
 
 	var ss string
 	p1, p2, p3, p4, p9, p10, p11 := false, false, false, false, false, false, false
@@ -767,8 +759,8 @@ func tenderRepeat_A(v *Info, info *Info, reason string) (bool ,string) {
 		ss = ss + "p2(单位)-"
 		p2 = true
 	}
-	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode)>=5)||
-		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber)>=5){
+	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
+		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
 		ss = ss + "p3(编号组)-"
 		p3 = true
 	}
@@ -798,13 +790,13 @@ func tenderRepeat_A(v *Info, info *Info, reason string) (bool ,string) {
 		(p3 && p4 && p9) || (p3 && p4 && p10) || (p3 && p4 && p11) ||
 		(p4 && p9 && p10) || (p4 && p9 && p11) || (p9 && p10 && p11) {
 		reason = reason + "满足招标A,3要素组合-" + ss + ","
-		return true,reason
+		return true, reason
 	}
-	return false,reason
+	return false, reason
 }
 
 //招标_B
-func tenderRepeat_B(v *Info, info *Info, reason string) (bool,string) {
+func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
 
 	m, n := 0, 0
 	if v.projectname != "" && v.projectname == info.projectname {
@@ -814,8 +806,8 @@ func tenderRepeat_B(v *Info, info *Info, reason string) (bool,string) {
 	if v.buyer != "" && v.buyer == info.buyer {
 		m++
 	}
-	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode)>=5)||
-		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber)>=5){
+	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
+		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
 		m++
 	}
 	if v.budget != 0 && v.budget == info.budget {
@@ -834,13 +826,13 @@ func tenderRepeat_B(v *Info, info *Info, reason string) (bool,string) {
 	}
 	if m >= 2 {
 		if n == 2 && m == 2 {
-			return false,reason
+			return false, reason
 		} else {
 			reason = reason + "满足招标B,七选二,"
-			return true,reason
+			return true, reason
 		}
 	}
-	return false,reason
+	return false, reason
 }
 
 //招标_C
@@ -865,7 +857,7 @@ func tenderRepeat_C(v *Info, info *Info) bool {
 }
 
 //中标_A
-func winningRepeat_A(v *Info, info *Info, reason string) (bool,string) {
+func winningRepeat_A(v *Info, info *Info, reason string) (bool, string) {
 
 	var ss string
 	p1, p2, p3, p5, p6, p11 := false, false, false, false, false, false
@@ -877,8 +869,8 @@ func winningRepeat_A(v *Info, info *Info, reason string) (bool,string) {
 		ss = ss + "p2(单位)-"
 		p2 = true
 	}
-	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode)>=5)||
-		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber)>=5){
+	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
+		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
 		ss = ss + "p3(编号组)-"
 		p3 = true
 	}
@@ -903,14 +895,14 @@ func winningRepeat_A(v *Info, info *Info, reason string) (bool,string) {
 		(p3 && p5 && p6) || (p3 && p5 && p11) || (p3 && p6 && p11) ||
 		(p5 && p6 && p11) {
 		reason = reason + "满足中标A,3要素组合-" + ss + ","
-		return true,reason
+		return true, reason
 	}
 
-	return false,reason
+	return false, reason
 }
 
 //中标_B
-func winningRepeat_B(v *Info, info *Info, reason string) (bool,string) {
+func winningRepeat_B(v *Info, info *Info, reason string) (bool, string) {
 
 	m, n := 0, 0
 	if v.projectname != "" && v.projectname == info.projectname {
@@ -920,8 +912,8 @@ func winningRepeat_B(v *Info, info *Info, reason string) (bool,string) {
 	if v.buyer != "" && v.buyer == info.buyer {
 		m++
 	}
-	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode)>=5)||
-		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber)>=5){
+	if (v.projectcode != "" && v.projectcode == info.projectcode && len(v.projectcode) >= 5) ||
+		(v.contractnumber != "" && v.contractnumber == info.contractnumber && len(v.contractnumber) >= 5) {
 		m++
 	}
 	if v.bidamount != 0 && v.bidamount == info.bidamount {
@@ -937,13 +929,13 @@ func winningRepeat_B(v *Info, info *Info, reason string) (bool,string) {
 	}
 	if m >= 2 {
 		if n == 2 && m == 2 {
-			return false,reason
+			return false, reason
 		} else {
 			reason = reason + "满足中标B.六选二,"
-			return true,reason
+			return true, reason
 		}
 	}
-	return false,reason
+	return false, reason
 }
 
 //中标_C
@@ -963,32 +955,32 @@ func winningRepeat_C(v *Info, info *Info) bool {
 }
 
 //合同_A
-func contractRepeat_A(v *Info, info *Info, reason string) (bool,string) {
+func contractRepeat_A(v *Info, info *Info, reason string) (bool, string) {
 
 	isMeet_1 := false
-	if isMeet_1, reason = tenderRepeat_A(v, info, reason);isMeet_1 {
-		return true,reason
+	if isMeet_1, reason = tenderRepeat_A(v, info, reason); isMeet_1 {
+		return true, reason
 	}
 
 	isMeet_2 := false
-	if isMeet_2, reason = winningRepeat_A(v, info, reason);isMeet_2 {
-		return true,reason
+	if isMeet_2, reason = winningRepeat_A(v, info, reason); isMeet_2 {
+		return true, reason
 	}
-	return false,reason
+	return false, reason
 }
 
 //合同_B
-func contractRepeat_B(v *Info, info *Info, reason string) (bool,string) {
+func contractRepeat_B(v *Info, info *Info, reason string) (bool, string) {
 
 	isMeet_1 := false
-	if isMeet_1, reason = tenderRepeat_B(v, info, reason);isMeet_1 {
-		return true,reason
+	if isMeet_1, reason = tenderRepeat_B(v, info, reason); isMeet_1 {
+		return true, reason
 	}
 	isMeet_2 := false
-	if isMeet_2, reason = winningRepeat_B(v, info, reason);isMeet_2 {
-		return true,reason
+	if isMeet_2, reason = winningRepeat_B(v, info, reason); isMeet_2 {
+		return true, reason
 	}
-	return false,reason
+	return false, reason
 }
 
 //合同_C
@@ -1003,40 +995,28 @@ func contractRepeat_C(v *Info, info *Info) bool {
 	return false
 }
 
-
-func againRepeat(v *Info ,info *Info) bool {
+func againRepeat(v *Info, info *Info) bool {
 	//相同采购单位下
-	if info.buyer != "" &&v.buyer == info.buyer {
-		if info.subtype=="招标"||info.subtype=="邀标"||info.subtype=="询价"||
-			info.subtype=="竞谈"||info.subtype=="单一"||info.subtype=="竞价"||
-			info.subtype=="其他"||info.subtype=="变更" {
+	if info.buyer != "" && v.buyer == info.buyer {
+		if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
+			info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
+			info.subtype == "其他" || info.subtype == "变更" {
 			//预算金额满足条件
-			if v.budget!=info.budget&&v.budget!=0&&info.budget!=0 {
+			if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
 				return true
 			}
-		}else if info.subtype=="中标"||info.subtype=="成交"||info.subtype=="废标"||
-			info.subtype=="流标"||info.subtype=="合同"||info.subtype=="验收"||
-			info.subtype=="违规"{
+		} else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" ||
+			info.subtype == "流标" || info.subtype == "合同" || info.subtype == "验收" ||
+			info.subtype == "违规" {
 			//中标金额单位满足条件
-			if (v.bidamount!=info.bidamount&&v.bidamount!=0&&info.bidamount!=0)||
-				(v.winner!=info.winner&&v.winner!=""&&info.winner!=""){
+			if (v.bidamount != info.bidamount && v.bidamount != 0 && info.bidamount != 0) ||
+				(v.winner != info.winner && v.winner != "" && info.winner != "") {
 				return true
 			}
-		}else {
+		} else {
 
 		}
 	}
 
 	return false
 }
-
-
-
-
-
-
-
-
-
-
-

+ 101 - 110
udpfilterdup/src/main.go

@@ -13,7 +13,6 @@ import (
 	"net"
 	"os"
 	"qfw/util"
-	"qfw/util/mongodb"
 	"regexp"
 	"sync"
 	"time"
@@ -22,7 +21,7 @@ import (
 var (
 	Sysconfig map[string]interface{} //配置文件
 	mconf     map[string]interface{} //mongodb配置信息
-	mgo       *mongodb.MongodbSim    //mongodb操作对象
+	mgo       *MongodbSim            //mongodb操作对象
 	extract   string
 	udpclient mu.UdpClient             //udp对象
 	nextNode  []map[string]interface{} //下节点数组
@@ -38,14 +37,13 @@ var (
 	FilterRegTitle_1 = regexp.MustCompile("^_$")
 	FilterRegTitle_2 = regexp.MustCompile("^_$")
 
-	isMerger bool                              //是否合并
-	threadNum int								   //线程数量
-	SiteMap  map[string]map[string]interface{} //站点map
-	idtype, sid, eid string //测试人员判重使用
+	isMerger         bool                              //是否合并
+	threadNum        int                               //线程数量
+	SiteMap          map[string]map[string]interface{} //站点map
+	idtype, sid, eid string                            //测试人员判重使用
 )
 
 func init() {
-
 	flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
 	flag.StringVar(&sid, "sid", "", "开始id")
 	flag.StringVar(&eid, "eid", "", "结束id")
@@ -55,13 +53,13 @@ func init() {
 	util.ReadConfig(&Sysconfig)
 	nextNode = util.ObjArrToMapArr(Sysconfig["nextNode"].([]interface{}))
 	mconf = Sysconfig["mongodb"].(map[string]interface{})
-	mgo = &mongodb.MongodbSim{
+	mgo = &MongodbSim{
 		MongodbAddr: mconf["addr"].(string),
 		DbName:      mconf["db"].(string),
 		Size:        util.IntAllDef(mconf["pool"], 10),
 	}
-	extract = mconf["extract"].(string)
 	mgo.InitPool()
+	extract = mconf["extract"].(string)
 
 	dupdays = util.IntAllDef(Sysconfig["dupdays"], 3)
 	//加载数据
@@ -77,8 +75,8 @@ func init() {
 	SiteMap = make(map[string]map[string]interface{}, 0)
 	start := int(time.Now().Unix())
 	sess_site := mgo.GetMgoConn()
-	defer sess_site.Close()
-	res_site := sess_site.DB(site["dbname"].(string)).C(site["coll"].(string)).Find(nil).Sort("_id").Iter()
+	defer mgo.DestoryMongoConn(sess_site)
+	res_site := sess_site.DB(site["dbname"].(string)).C(site["coll"].(string)).Find(map[string]interface{}{}).Sort("_id").Iter()
 	for site_dict := make(map[string]interface{}); res_site.Next(&site_dict); {
 		data_map := map[string]interface{}{
 			"area":     util.ObjToString(site_dict["area"]),
@@ -102,15 +100,15 @@ func main() {
 }
 
 //测试组人员使用
-func mainTT() {
+func mainT() {
 	/*
-	ObjectId("5da3f31aa5cb26b9b798d3aa")
-	ObjectId("5da418c4a5cb26b9b7e3e9a6")
-	ObjectId("5df5071ce9d1f601e495fa54")
-	ObjectId("5e09c05f0cf41612e0626abc")
+		ObjectId("5da3f31aa5cb26b9b798d3aa")
+		ObjectId("5da418c4a5cb26b9b7e3e9a6")
+		ObjectId("5df5071ce9d1f601e495fa54")
+		ObjectId("5e09c05f0cf41612e0626abc")
 	*/
-	//sid = "5da3f31aa5cb26b9b798d3aa"
-	//eid = "5da418c4a5cb26b9b7e3e9a6"
+	sid = "5df5071ce9d1f601e495fa54"
+	eid = "5e09c05f0cf41612e0626abc"
 
 	mapinfo := map[string]interface{}{}
 	if sid == "" || eid == "" {
@@ -166,9 +164,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 	fmt.Println("开始数据判重")
 	defer util.Catch()
 	//区间id
-	sess := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess)
-	var q map[string]interface{}
+	q := map[string]interface{}{}
 	if idtype == "1" {
 		q = map[string]interface{}{
 			"_id": map[string]interface{}{
@@ -179,15 +175,18 @@ func task(data []byte, mapInfo map[string]interface{}) {
 	} else {
 		q = map[string]interface{}{
 			"_id": map[string]interface{}{
-				"$gt":  util.StringTOBsonId(mapInfo["gtid"].(string)),
-				"$lte": util.StringTOBsonId(mapInfo["lteid"].(string)),
+				"$gt":  StringTOBsonId(mapInfo["gtid"].(string)),
+				"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
 			},
 		}
 	}
-	log.Println(mgo.DbName,extract,q)
-	it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
+	log.Println(mgo.DbName, extract, q)
+	sess := mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sess)
+	//it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
+	it := sess.DB(mgo.DbName).C(extract).Find(&q).Iter()
 	updateExtract := [][]map[string]interface{}{}
-	log.Println("线程数:",threadNum)
+	log.Println("线程数:", threadNum)
 	pool := make(chan bool, threadNum)
 
 	wg := &sync.WaitGroup{}
@@ -206,7 +205,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 			}()
 			info := NewInfo(tmp)
 			//是否为无效数据
-			if invalidData(info.buyer, info.projectname, info.projectcode,info.contractnumber) {
+			if invalidData(info.buyer, info.projectname, info.projectcode, info.contractnumber) {
 				updateExtract = append(updateExtract, []map[string]interface{}{
 					map[string]interface{}{
 						"_id": tmp["_id"],
@@ -218,45 +217,45 @@ func task(data []byte, mapInfo map[string]interface{}) {
 					},
 				})
 				if len(updateExtract) > 500 {
-					mgo.UpdateBulk(extract, updateExtract...)
+					mgo.UpSertBulk(extract, updateExtract...)
 					updateExtract = [][]map[string]interface{}{}
 				}
 			} else {
 				b, source, reason := DM.check(info)
 				if b { //有重复,生成更新语句,更新抽取和更新招标
 					repeateN++
-					var is_replace  = false
-					var mergeArr = []int64{} //更改合并数组记录
-					var newData = &Info{}    //更换新的数据池数据
+					var is_replace = false
+					var mergeArr = []int64{}                    //更改合并数组记录
+					var newData = &Info{}                       //更换新的数据池数据
 					var repeat_idMap = map[string]interface{}{} //记录判重的
-					var merge_idMap = map[string]interface{}{} //记录合并的
-					if idtype == "1" { //先临时决定一个id
+					var merge_idMap = map[string]interface{}{}  //记录合并的
+					if idtype == "1" {                          //先临时决定一个id
 						repeat_idMap["_id"] = info.id
 						merge_idMap["_id"] = source.id
 					} else {
-						repeat_idMap["_id"] = util.StringTOBsonId(info.id)
-						merge_idMap["_id"] = util.StringTOBsonId(source.id)
+						repeat_idMap["_id"] = StringTOBsonId(info.id)
+						merge_idMap["_id"] = StringTOBsonId(source.id)
 					}
-					repeat_id:=source.id
+					repeat_id := source.id
 					//以下合并相关
 					if isMerger {
 						basic_bool := basicDataScore(source, info)
 						if basic_bool {
 							//已原始数据为标准 - 对比数据打判重标签-
-							newData, mergeArr,is_replace = mergeDataFields(source, info)
+							newData, mergeArr, is_replace = mergeDataFields(source, info)
 							DM.replaceSourceData(newData, source.id) //替换
 							//对比数据打重复标签的id,原始数据id的记录
 							if idtype == "1" {
 								repeat_idMap["_id"] = info.id
 								merge_idMap["_id"] = source.id
 							} else {
-								repeat_idMap["_id"] = util.StringTOBsonId(info.id)
-								merge_idMap["_id"] = util.StringTOBsonId(source.id)
+								repeat_idMap["_id"] = StringTOBsonId(info.id)
+								merge_idMap["_id"] = StringTOBsonId(source.id)
 							}
 							repeat_id = source.id
 						} else {
 							//已对比数据为标准 ,数据池的数据打判重标签
-							newData, mergeArr,is_replace = mergeDataFields(info, source)
+							newData, mergeArr, is_replace = mergeDataFields(info, source)
 							DM.replaceSourceData(newData, source.id) //替换
 
 							//原始数据打重复标签的id,   对比数据id的记录
@@ -264,19 +263,18 @@ func task(data []byte, mapInfo map[string]interface{}) {
 								repeat_idMap["_id"] = source.id
 								merge_idMap["_id"] = info.id
 							} else {
-								repeat_idMap["_id"] = util.StringTOBsonId(source.id)
-								merge_idMap["_id"] = util.StringTOBsonId(info.id)
+								repeat_idMap["_id"] = StringTOBsonId(source.id)
+								merge_idMap["_id"] = StringTOBsonId(info.id)
 							}
 							repeat_id = info.id
 						}
 
-
-						merge_map := make(map[string]interface{},0)
-						if is_replace {//有过合并-更新数据
+						merge_map := make(map[string]interface{}, 0)
+						if is_replace { //有过合并-更新数据
 
 							merge_map = map[string]interface{}{
 								"$set": map[string]interface{}{
-									"merge":newData.mergemap,
+									"merge": newData.mergemap,
 								},
 							}
 
@@ -304,11 +302,11 @@ func task(data []byte, mapInfo map[string]interface{}) {
 									merge_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
 								} else if value == 9 {
 									merge_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber
-								}else if value == 10 {
+								} else if value == 10 {
 									merge_map["$set"].(map[string]interface{})["publishtime"] = newData.publishtime
-								}else if value == 11 {
+								} else if value == 11 {
 									merge_map["$set"].(map[string]interface{})["agency"] = newData.agency
-								}else {
+								} else {
 								}
 							}
 							//模板数据更新
@@ -319,15 +317,14 @@ func task(data []byte, mapInfo map[string]interface{}) {
 						}
 					}
 
-
 					//重复数据打标签
 					updateExtract = append(updateExtract, []map[string]interface{}{
 						repeat_idMap,
 						map[string]interface{}{
 							"$set": map[string]interface{}{
-								"repeat": 1,
+								"repeat":        1,
 								"repeat_reason": reason,
-								"repeat_id":repeat_id,
+								"repeat_id":     repeat_id,
 							},
 						},
 					})
@@ -336,14 +333,14 @@ func task(data []byte, mapInfo map[string]interface{}) {
 			}
 		}(tmp)
 		if len(updateExtract) > 500 {
-			mgo.UpdateBulk(extract, updateExtract...)
+			mgo.UpSertBulk(extract, updateExtract...)
 			updateExtract = [][]map[string]interface{}{}
 		}
 		tmp = make(map[string]interface{})
 	}
 	wg.Wait()
 	if len(updateExtract) > 0 {
-		mgo.UpdateBulk(extract, updateExtract...)
+		mgo.UpSertBulk(extract, updateExtract...)
 		//mgo.UpdateBulk(bidding, updateBidding...)
 	}
 	log.Println("this task over.", n, "repeateN:", repeateN, mapInfo["stop"])
@@ -390,8 +387,8 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 	} else {
 		q = map[string]interface{}{
 			"_id": map[string]interface{}{
-				"$gt":  util.StringTOBsonId(mapInfo["gtid"].(string)),
-				"$lte": util.StringTOBsonId(mapInfo["lteid"].(string)),
+				"$gt":  StringTOBsonId(mapInfo["gtid"].(string)),
+				"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
 			},
 		}
 	}
@@ -400,7 +397,7 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 	minTime, maxTime := int64(0), int64(0)
 	for tmp := make(map[string]interface{}); it.Next(&tmp); {
 		//取出最大最小时间
-		if minTime == 0 || maxTime == 0 &&util.Int64All(tmp["publishtime"])!=0{
+		if minTime == 0 || maxTime == 0 && util.Int64All(tmp["publishtime"]) != 0 {
 			minTime = util.Int64All(tmp["publishtime"])
 			maxTime = util.Int64All(tmp["publishtime"])
 		} else {
@@ -414,19 +411,19 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 		}
 	}
 	//时间不正确时
-	if minTime==0&&maxTime==0 {
+	if minTime == 0 && maxTime == 0 {
 		log.Println("段数据区间 publishtime不符合")
 		return
 	}
 	fmt.Println("最小时间==", minTime, "最大时间==", maxTime)
-	gtid,lteid:= util.BsonIdToSId(mapInfo["gtid"].(string)), util.BsonIdToSId(mapInfo["lteid"].(string))
-	fmt.Println(gtid,lteid)
-	HM = NewHistorymap(gtid,lteid, minTime, maxTime)
+	gtid, lteid := util.BsonIdToSId(mapInfo["gtid"].(string)), util.BsonIdToSId(mapInfo["lteid"].(string))
+	fmt.Println(gtid, lteid)
+	HM = NewHistorymap(gtid, lteid, minTime, maxTime)
 	fmt.Println("开始历史数据判重")
 
 	defer util.Catch()
 	//区间id
-	sess_history:= mgo.GetMgoConn()
+	sess_history := mgo.GetMgoConn()
 	defer mgo.DestoryMongoConn(sess_history)
 	var q_history map[string]interface{}
 	if idtype == "1" {
@@ -439,15 +436,15 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 	} else {
 		q_history = map[string]interface{}{
 			"_id": map[string]interface{}{
-				"$gt":  util.StringTOBsonId(mapInfo["gtid"].(string)),
-				"$lte": util.StringTOBsonId(mapInfo["lteid"].(string)),
+				"$gt":  StringTOBsonId(mapInfo["gtid"].(string)),
+				"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
 			},
 		}
 	}
-	log.Println(mgo.DbName,extract,q_history)
+	log.Println(mgo.DbName, extract, q_history)
 	it_history := sess_history.DB(mgo.DbName).C(extract).Find(&q_history).Sort("publishtime").Iter()
 	updateExtract := [][]map[string]interface{}{}
-	log.Println("线程数:",threadNum)
+	log.Println("线程数:", threadNum)
 	pool := make(chan bool, threadNum)
 	wg := &sync.WaitGroup{}
 	//mapLock := &sync.Mutex{}
@@ -464,7 +461,7 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 				wg.Done()
 			}()
 			info := NewInfo(tmp)
-			if invalidData(info.buyer, info.projectname, info.projectcode,info.contractnumber) {
+			if invalidData(info.buyer, info.projectname, info.projectcode, info.contractnumber) {
 				updateExtract = append(updateExtract, []map[string]interface{}{
 					map[string]interface{}{
 						"_id": tmp["_id"],
@@ -476,7 +473,7 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 					},
 				})
 				if len(updateExtract) > 500 {
-					mgo.UpdateBulk(extract, updateExtract...)
+					mgo.UpSertBulk(extract, updateExtract...)
 					updateExtract = [][]map[string]interface{}{}
 				}
 			} else {
@@ -499,38 +496,38 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 						})
 					} else {
 						repeateN++
-						var is_replace  = false
-						var mergeArr = []int64{} //更改合并数组记录
-						var newData = &Info{}    //更换新的数据池数据
+						var is_replace = false
+						var mergeArr = []int64{}                    //更改合并数组记录
+						var newData = &Info{}                       //更换新的数据池数据
 						var repeat_idMap = map[string]interface{}{} //记录判重的
-						var merge_idMap = map[string]interface{}{} //记录合并的
-						if idtype == "1" { //先临时决定一个id
+						var merge_idMap = map[string]interface{}{}  //记录合并的
+						if idtype == "1" {                          //先临时决定一个id
 							repeat_idMap["_id"] = info.id
 							merge_idMap["_id"] = source.id
 						} else {
-							repeat_idMap["_id"] = util.StringTOBsonId(info.id)
-							merge_idMap["_id"] = util.StringTOBsonId(source.id)
+							repeat_idMap["_id"] = StringTOBsonId(info.id)
+							merge_idMap["_id"] = StringTOBsonId(source.id)
 						}
-						repeat_id:=source.id
+						repeat_id := source.id
 						//以下合并相关
 						if isMerger {
 							basic_bool := basicDataScore(source, info)
 							if basic_bool {
 								//已原始数据为标准 - 对比数据打判重标签-
-								newData, mergeArr,is_replace = mergeDataFields(source, info)
+								newData, mergeArr, is_replace = mergeDataFields(source, info)
 								DM.replaceSourceData(newData, source.id) //替换
 								//对比数据打重复标签的id,原始数据id的记录
 								if idtype == "1" {
 									repeat_idMap["_id"] = info.id
 									merge_idMap["_id"] = source.id
 								} else {
-									repeat_idMap["_id"] = util.StringTOBsonId(info.id)
-									merge_idMap["_id"] = util.StringTOBsonId(source.id)
+									repeat_idMap["_id"] = StringTOBsonId(info.id)
+									merge_idMap["_id"] = StringTOBsonId(source.id)
 								}
 								repeat_id = source.id
 							} else {
 								//已对比数据为标准 ,数据池的数据打判重标签
-								newData, mergeArr,is_replace = mergeDataFields(info, source)
+								newData, mergeArr, is_replace = mergeDataFields(info, source)
 								DM.replaceSourceData(newData, source.id) //替换
 
 								//原始数据打重复标签的id,   对比数据id的记录
@@ -538,19 +535,18 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 									repeat_idMap["_id"] = source.id
 									merge_idMap["_id"] = info.id
 								} else {
-									repeat_idMap["_id"] = util.StringTOBsonId(source.id)
-									merge_idMap["_id"] = util.StringTOBsonId(info.id)
+									repeat_idMap["_id"] = StringTOBsonId(source.id)
+									merge_idMap["_id"] = StringTOBsonId(info.id)
 								}
 								repeat_id = info.id
 							}
 
-
-							merge_map := make(map[string]interface{},0)
-							if is_replace {//有过合并-更新数据
+							merge_map := make(map[string]interface{}, 0)
+							if is_replace { //有过合并-更新数据
 
 								merge_map = map[string]interface{}{
 									"$set": map[string]interface{}{
-										"merge":newData.mergemap,
+										"merge": newData.mergemap,
 									},
 								}
 
@@ -578,11 +574,11 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 										merge_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
 									} else if value == 9 {
 										merge_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber
-									}else if value == 10 {
+									} else if value == 10 {
 										merge_map["$set"].(map[string]interface{})["publishtime"] = newData.publishtime
-									}else if value == 11 {
+									} else if value == 11 {
 										merge_map["$set"].(map[string]interface{})["agency"] = newData.agency
-									}else {
+									} else {
 									}
 								}
 								//模板数据更新
@@ -593,15 +589,14 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 							}
 						}
 
-
 						//重复数据打标签
 						updateExtract = append(updateExtract, []map[string]interface{}{
 							repeat_idMap,
 							map[string]interface{}{
 								"$set": map[string]interface{}{
-									"repeat": 1,
+									"repeat":        1,
 									"repeat_reason": reason,
-									"repeat_id":repeat_id,
+									"repeat_id":     repeat_id,
 								},
 							},
 						})
@@ -611,14 +606,14 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 			}
 		}(tmp)
 		if len(updateExtract) > 500 {
-			mgo.UpdateBulk(extract, updateExtract...)
+			mgo.UpSertBulk(extract, updateExtract...)
 			updateExtract = [][]map[string]interface{}{}
 		}
 		tmp = make(map[string]interface{})
 	}
 	wg.Wait()
 	if len(updateExtract) > 0 {
-		mgo.UpdateBulk(extract, updateExtract...)
+		mgo.UpSertBulk(extract, updateExtract...)
 		//mgo.UpdateBulk(bidding, updateBidding...)
 	}
 	log.Println("this task over.", n, "repeateN:", repeateN, mapInfo["stop"])
@@ -647,14 +642,14 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 }
 
 //合并字段-并更新merge字段的值
-func mergeDataFields(source *Info, info *Info) (*Info, []int64,bool) {
+func mergeDataFields(source *Info, info *Info) (*Info, []int64, bool) {
 
-	merge_recordMap := make(map[string]interface{},0)
+	merge_recordMap := make(map[string]interface{}, 0)
 	mergeArr := make([]int64, 0)
 	//是否替换数据了-记录原始的数据
-	is_replace :=false
+	is_replace := false
 	//1、城市
-	if source.area == "" || source.area == "全国"{
+	if source.area == "" || source.area == "全国" {
 		//为空
 		if info.area != "全国" && info.area != "" {
 			merge_recordMap["area"] = source.area
@@ -664,7 +659,7 @@ func mergeDataFields(source *Info, info *Info) (*Info, []int64,bool) {
 			mergeArr = append(mergeArr, 1)
 			is_replace = true
 		}
-	}else {
+	} else {
 		//不为空-查看站点相关-有值必替换
 		if source.is_site {
 			//是站点替换的城市
@@ -749,21 +744,17 @@ func mergeDataFields(source *Info, info *Info) (*Info, []int64,bool) {
 		is_replace = true
 	}
 
-
-
-
-	if is_replace {//有过替换更新
+	if is_replace { //有过替换更新
 		//总次数+1
-		source.mergemap["total_num"] = util.Int64All(source.mergemap["total_num"])+1
+		source.mergemap["total_num"] = util.Int64All(source.mergemap["total_num"]) + 1
 		merge_recordMap["num"] = util.Int64All(source.mergemap["total_num"])
 		//和哪一个数据id进行非空替换的-记录
-		key:=info.id
+		key := info.id
 		source.mergemap[key] = merge_recordMap
 	}
 
-
 	//以上合并过于简单,待进一步优化
-	return source, mergeArr,is_replace
+	return source, mergeArr, is_replace
 }
 
 //权重评估
@@ -867,7 +858,7 @@ func basicDataScore(v *Info, info *Info) bool {
 	if v.buyer != "" {
 		m++
 	}
-	if v.projectcode != ""||v.contractnumber != "" {
+	if v.projectcode != "" || v.contractnumber != "" {
 		m++
 	}
 	if v.budget != 0 {
@@ -898,7 +889,7 @@ func basicDataScore(v *Info, info *Info) bool {
 	if info.buyer != "" {
 		n++
 	}
-	if info.projectcode != "" || info.contractnumber != ""{
+	if info.projectcode != "" || info.contractnumber != "" {
 		n++
 	}
 	if info.budget != 0 {
@@ -951,7 +942,7 @@ func invalidData(d1 string, d2 string, d3 string, d4 string) bool {
 	if d4 != "" {
 		n++
 	}
- 	if n == 0 {
+	if n == 0 {
 		return true
 	}
 	return false