Browse Source

删除多余代码,并优化网站权重逻辑

apple 5 years ago
parent
commit
87f70f1168
3 changed files with 101 additions and 665 deletions
  1. 5 17
      udpfilterdup/src/config.json
  2. 43 352
      udpfilterdup/src/datamap.go
  3. 53 296
      udpfilterdup/src/main.go

+ 5 - 17
udpfilterdup/src/config.json

@@ -4,11 +4,11 @@
     "mongodb": {
         "addr": "192.168.3.207:27092",
         "pool": 5,
-        "db": "qfw",
-        "extract": "result_file_20200410",
-        "extract_back": "result_file_20200409",
+        "db": "extract_kf",
+        "extract": "zk_Copy_of_zk_bidding_0506",
+        "extract_back": "zk_Copy_of_zk_bidding_0506",
         "site": {
-            "dbname": "qfw",
+            "dbname": "extract_kf",
             "coll": "site"
         }
     },
@@ -17,24 +17,12 @@
         "api": "http://10.171.112.160:19281/_send/_mail"
     },
     "nextNode": [
-        {
-            "addr": "172.17.145.179",
-            "port": 1782,
-            "stype": "project",
-            "memo": "合并项目"
-        },
-        {
-            "addr": "127.0.0.1",
-            "port": 1783,
-            "stype": "bidding",
-            "memo": "创建招标数据索引new"
-        }
     ],
     "threads": 1,
     "isMerger": false,
     "isSort":true,
     "lowHeavy":false,
-    "timingTask":true,
+    "timingTask":false,
     "timingSpanDay": 3,
     "timingPubScope": 720,
     "specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",

+ 43 - 352
udpfilterdup/src/datamap.go

@@ -4,7 +4,6 @@ import (
 	"fmt"
 	"log"
 	qutil "qfw/util"
-	"qfw/util/mongodb"
 	"regexp"
 	"strings"
 	"sync"
@@ -30,10 +29,8 @@ type Info struct {
 	comeintime     int64   //入库时间
 	bidopentime    int64   //开标时间
 	bidopenaddress string  //开标地点
-
-	site string //站点
-	href string //正文的url
-
+	site 		   string //站点
+	href 		     string //正文的url
 	repeatid         string                 //重复id
 	titleSpecialWord bool                   //标题特殊词
 	specialWord      bool                   //再次判断的特殊词
@@ -55,16 +52,6 @@ type datamap struct {
 	keys   map[string]bool
 }
 
-//历史更新数据
-type historymap struct {
-	lock   sync.Mutex //锁
-	days   int        //保留几天数据
-	data   map[string][]*Info
-	keymap []string
-	areakeys []string
-	keys   map[string]bool
-}
-
 func TimedTaskDatamap(days int,lasttime int64) *datamap {
 	log.Println("数据池开始重新构建")
 	datelimit = qutil.Float64All(days * 86400)
@@ -199,120 +186,6 @@ func NewDatamap(days int, lastid string) *datamap {
 	return dm
 }
 
-//构建新历史数据池
-func NewHistorymap(startid string, lastid string, startTime int64, lastTime int64) *historymap {
-	datelimit = qutil.Float64All(5 * 86400)
-	hm := &historymap{sync.Mutex{}, 5, map[string][]*Info{}, []string{},[]string{}, map[string]bool{}}
-	if lastid == "" || startid == "" {
-		return hm
-	}
-	//取startid之前5天
-	sess_start := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess_start) //lte  gte
-	it_start := sess_start.DB(mgo.DbName).C(extract).Find(mongodb.ObjToMQ(`{"_id":{"$lte":"`+startid+`"}}`,
-		true)).Sort("-_id").Iter()
-	m, n := 0, 0
-	for tmp_start := make(map[string]interface{}); it_start.Next(&tmp_start); {
-
-		if qutil.IntAll(tmp_start["repeat"]) == 1||qutil.IntAll(tmp_start["repeat"]) == -1 {
-			continue
-		}
-
-		pt_s := tmp_start["comeintime"]
-		if Is_Sort {
-			pt_s = tmp_start["publishtime"]
-		}
-		pt_time := qutil.Int64All(pt_s)
-		if pt_time <= 0 {
-			continue
-		}
-		if qutil.Float64All(startTime-pt_time) <= datelimit {
-			n++
-			info := NewInfo(tmp_start)
-			dkey := qutil.FormatDateWithObj(&pt_s, qutil.Date_yyyyMMdd)
-			k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
-			data := hm.data[k]
-			if data == nil {
-				data = []*Info{}
-			}
-			data = append(data, info)
-			hm.data[k] = data
-			hm.keys[dkey] = true
-
-			//添加省
-			isAreaExist :=false
-			for _,v:= range hm.areakeys {
-				if v==info.area {
-					isAreaExist = true
-				}
-			}
-			if !isAreaExist {
-				areaArr := hm.areakeys
-				areaArr = append(areaArr,info.area)
-				hm.areakeys = areaArr
-			}
-		} else {
-			break
-		}
-		tmp_start = make(map[string]interface{})
-	}
-
-	log.Println("load history 前:", n)
-	//取lastid之后5天
-	sess_last := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess_last) //lte  gte
-	it_last := sess_last.DB(mgo.DbName).C(extract).Find(mongodb.ObjToMQ(`{"_id":{"$gte":"`+lastid+`"}}`,
-		true)).Sort("_id").Iter()
-
-	for tmp_last := make(map[string]interface{}); it_last.Next(&tmp_last); {
-
-		if qutil.IntAll(tmp_last["repeat"]) == 1||qutil.IntAll(tmp_last["repeat"]) == -1 {
-			continue
-		}
-
-		pt_l := tmp_last["comeintime"]
-		if Is_Sort {
-			pt_l = tmp_last["publishtime"]
-		}
-		pt_time := qutil.Int64All(pt_l)
-		if pt_time <= 0 {
-			continue
-		}
-		if qutil.Float64All(pt_time-lastTime) <= datelimit {
-			m++
-			info := NewInfo(tmp_last)
-			dkey := qutil.FormatDateWithObj(&pt_l, qutil.Date_yyyyMMdd)
-			k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
-			data := hm.data[k]
-			if data == nil {
-				data = []*Info{}
-			}
-			data = append(data, info)
-			hm.data[k] = data
-			hm.keys[dkey] = true
-
-			//添加省
-			isAreaExist :=false
-			for _,v:= range hm.areakeys {
-				if v==info.area {
-					isAreaExist = true
-				}
-			}
-			if !isAreaExist {
-				areaArr := hm.areakeys
-				areaArr = append(areaArr,info.area)
-				hm.areakeys = areaArr
-			}
-		} else {
-			break
-		}
-		tmp_last = make(map[string]interface{})
-	}
-
-	log.Println("load history 后:", m)
-	return hm
-}
-
 func NewInfo(tmp map[string]interface{}) *Info {
 	subtype := qutil.ObjToString(tmp["subtype"])
 	area := qutil.ObjToString(tmp["area"])
@@ -320,7 +193,12 @@ func NewInfo(tmp map[string]interface{}) *Info {
 		area = "全国"
 	}
 	info := &Info{}
-	info.id = BsonTOStringId(tmp["_id"])
+	if IdType {
+		info.id = qutil.ObjToString(tmp["_id"])
+	}else  {
+		info.id = BsonTOStringId(tmp["_id"])
+	}
+
 	info.title = qutil.ObjToString(tmp["title"])
 	info.area = area
 	info.subtype = subtype
@@ -539,249 +417,62 @@ L:
 
 	return
 }
-
-func (h *historymap) checkHistory(info *Info) (b bool, source *Info, reasons string) {
-	reason := ""
-	keys := []string{}
-	h.lock.Lock()
-	for k, _ := range h.keys { //不同时间段
-		if info.area=="全国" {
-			//匹配所有省
-			for _,v := range h.areakeys{
-				keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, v))
-			}
-		}else {
-			//匹配指定省
-			keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
-		}
-		keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
-
-	}
-	h.lock.Unlock()
-
-L:
-	for _, k := range keys {
-		h.lock.Lock()
-		data := h.data[k]
-		h.lock.Unlock()
-		if len(data) > 0 { //对比v   找到同类型,同省或全国的数据作对比
-			for _, v := range data {
-				reason = ""
-				if v.id == info.id { //正常重复
-					return false, v, ""
-				}
-				if info.subtype == v.subtype {
-					if info.site != "" {
-						sitelock.Lock()
-						dict := SiteMap[info.site]
-						sitelock.Unlock()
-						if dict != nil {
-							if info.area == "全国" && dict["area"] != "" {
-								info.area = qutil.ObjToString(dict["area"])
-								info.city = qutil.ObjToString(dict["city"])
-							} else {
-								if info.city == "" && dict["city"] != "" {
-									info.area = qutil.ObjToString(dict["area"])
-									info.city = qutil.ObjToString(dict["city"])
-								}
-							}
-						}
-					}
-					//前置条件1 - 站点相关
-					if info.site != "" && info.site == v.site {
-						if info.href != "" && info.href == v.href {
-							reason = "href相同"
-							b = true
-							source = v
-							reasons = reason
-							break L
-						}
-						if info.href != "" && info.href != v.href {
-							reason = "href不同-"
-						}
-					}
-
-					//前置条件2 - 标题相关,有且一个关键词
-					if ((info.titleSpecialWord && !v.titleSpecialWord) || (info.specialWord && !v.specialWord)) &&
-						info.title != v.title && v.title != "" && info.title != "" {
-						continue
-					}
-
-					//前置条件3 - 标题相关,均含有关键词
-					if ((info.titleSpecialWord && v.titleSpecialWord) || (info.specialWord && v.specialWord)) &&
-						len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 && v.title != "" && info.title != "" {
-						if !(strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title)) {
-							continue //无包含关系
-						}
-						if strings.Contains(v.title, info.title) || strings.Contains(info.title, v.title) {
-							reason = reason + "标题关键词且包含关系"
-							//继续二级金额判断
-							if !againRepeat(v, info) {
-								b = true
-								source = v
-								reasons = reason
-								break
-							}
-
-						}
-					}
-
-					//新增快速数据过少判重
-					if LowHeavy {
-						repeat := false
-						if repeat, reason = fastLowQualityHeavy(v, info, reason); repeat {
-							b = true
-							source = v
-							reasons = reason
-							break
-						}
-					}
-
-					//代理机构相同-非空相等
-					if v.agency != "" && info.agency != "" && v.agency == info.agency {
-						reason = reason + "同机构-"
-						repeat := false
-						if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
-							b = true
-							source = v
-							reasons = reason
-							break
-						}
-					} else {
-						reason = reason + "非同机构-"
-						if info.city != "" && info.city == v.city {
-							reason = reason + "同城-"
-							repeat := false
-							if repeat, reason = quickHeavyMethodTwo(v, info, reason); repeat {
-								b = true
-								source = v
-								reasons = reason
-								break
-							}
-						} else {
-							reason = reason + "不同城-"
-							repeat := false
-							if repeat, reason = quickHeavyMethodOne(v, info, reason); repeat {
-								b = true
-								source = v
-								reasons = reason
-								break
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-
-	//
-	if b {
-		if info.repeatid == source.id {
-			b = false //重复-无变化-不处理
-		}
-	} else {
-		if source != nil {
-			if source.repeatid != "" { //未判重-有变化--记录
-				b = true
-				reason = "未判重记录"
-				reasons = reason
-			}
-		}
-	}
-	//往预存数据 d 添加
-	if !b {
-		ct := info.comeintime
-		if Is_Sort {
-			ct = info.publishtime
-		}
-		dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
-		k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
-		data := h.data[k]
-		if data == nil {
-			data = []*Info{info}
-			h.data[k] = data
-			if !h.keys[dkey] {
-				h.keys[dkey] = true
-				//h.update(ct)
-			}
-		} else {
-			data = append(data, info)
-			h.data[k] = data
-		}
-
-		//添加省
-		isAreaExist :=false
-		for _,v:= range h.areakeys {
-			if v==info.area {
-				isAreaExist = true
-			}
-		}
-		if !isAreaExist {
-			areaArr := h.areakeys
-			areaArr = append(areaArr,info.area)
-			h.areakeys = areaArr
-		}
-	}
-	return
-}
-
 //替换原始数据池
-func (d *datamap) replaceSourceData(replaceData *Info, replaceId string) {
-	ct := replaceData.comeintime
+func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
+	ct := newData.comeintime
 	if Is_Sort||TimingTask {
-		ct = replaceData.publishtime
+		ct = newData.publishtime
 	}
 	dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
-	k := fmt.Sprintf("%s_%s_%s", dkey, replaceData.subtype, replaceData.area)
+	k := fmt.Sprintf("%s_%s_%s", dkey, newData.subtype, newData.area)
 	d.lock.Lock()
 	data := d.data[k]
 	if data == nil {
-		data = []*Info{replaceData}
+		data = []*Info{newData}
 		d.data[k] = data
 		if !d.keys[dkey] {
 			d.keys[dkey] = true
 		}
 	} else {
-		//遍历替换
+ 		//遍历替换
+ 		isReplace := false
 		for k, v := range data {
-			if v.id == replaceId {
-				data[k] = replaceData
+			if v.id == oldData.id {
+				data[k] = newData //同天_type_area 替换
+				isReplace = true
 				break
 			}
 		}
-		d.data[k] = data
-	}
-	d.lock.Unlock()
-}
-
-func (h *historymap) replaceSourceData(replaceData *Info, replaceId string) {
-	ct := replaceData.comeintime
-	if Is_Sort {
-		ct = replaceData.publishtime
-	}
-	dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
-	k := fmt.Sprintf("%s_%s_%s", dkey, replaceData.subtype, replaceData.area)
-	h.lock.Lock()
-	data := h.data[k]
-	if data == nil {
-		data = []*Info{replaceData}
-		h.data[k] = data
-		if !h.keys[dkey] {
-			h.keys[dkey] = true
-		}
-	} else {
-		//遍历替换
-		for k, v := range data {
-			if v.id == replaceId {
-				data[k] = replaceData
-				break
+		if !isReplace {
+			//添加新数据 删除老数据
+			data = append(data,newData)
+			ct_old := oldData.comeintime
+			if Is_Sort||TimingTask {
+				ct_old = oldData.publishtime
 			}
+			dkey_old := qutil.FormatDateByInt64(&ct_old, qutil.Date_yyyyMMdd)
+			k_old := fmt.Sprintf("%s_%s_%s", dkey_old, oldData.subtype, oldData.area)
+			data_old := d.data[k_old]
+			if len(data_old)==1 {
+				delete(d.data ,k_old)
+			} else {
+				for k, v := range data_old {
+					if v.id == oldData.id {
+						//删除对应当前的
+						data_old = append(data_old[:k], data_old[k+1:]...)
+						break
+					}
+				}
+				d.data[k_old] = data_old
+			}
+		}else {
+			d.data[k] = data
 		}
-		h.data[k] = data
 	}
-	h.lock.Unlock()
+	d.lock.Unlock()
 }
 
+
 func (d *datamap) update(t int64) {
 	//每天0点清除历史数据
 	d.keymap = d.GetLatelyFiveDay(t)

+ 53 - 296
udpfilterdup/src/main.go

@@ -31,9 +31,6 @@ var (
 	nextNode     []map[string]interface{} //下节点数组
 	dupdays      = 5                      //初始化判重范围
 	DM           *datamap                 //
-	HM           *historymap              //判重数据
-
-	lastid = ""
 
 	//正则筛选相关
 	FilterRegTitle   = regexp.MustCompile("^_$")
@@ -49,7 +46,8 @@ var (
 	TimingTask     bool                              //是否定时任务
 	timingSpanDay  int64                             //时间跨度
 	timingPubScope int64                             //发布时间周期
-	sid, eid       string                            //测试人员判重使用
+	sid,eid,lastid       string                      //测试人员判重使用
+	IdType         bool    //默认object类型
 )
 
 func init() {
@@ -100,6 +98,7 @@ func init() {
 			"district": util.ObjToString(site_dict["district"]),
 			"sitetype": util.ObjToString(site_dict["sitetype"]),
 			"level":    util.ObjToString(site_dict["level"]),
+			"weight":   util.ObjToString(site_dict["weight"]),
 		}
 		SiteMap[util.ObjToString(site_dict["site"])] = data_map
 	}
@@ -107,10 +106,6 @@ func init() {
 }
 
 func main() {
-
-	//TestChuli()
-	//return
-
 	go checkMapJob()
 	updport := Sysconfig["udpport"].(string)
 	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
@@ -125,21 +120,16 @@ func main() {
 
 //测试组人员使用
 func mainT() {
-	/*
-		ObjectId("5da3f31aa5cb26b9b798d3aa")
-		ObjectId("5da418c4a5cb26b9b7e3e9a6")
 
-		ObjectId("5da3f2c5a5cb26b9b79847fc")
-		ObjectId("5db2735ba5cb26b9b7c99c6f")
-	*/
 	if TimingTask {
 		log.Println("定时任务测试开始")
 		go timedTaskDay()
 		time.Sleep(99999 * time.Hour)
 	} else {
-		//2020-4-24
-		sid = "5ea1bb800000000000000000"
-		eid = "5ea9a4800000000000000000"
+		//2019年8月1日-8月17日  712646
+		IdType = true
+		sid = "5d41607aa5cb26b9b734fe30"
+		eid = "5eb172e1f2c1a7850bad1c39"
 		log.Println("正常判重测试开始")
 		log.Println(sid, "---", eid)
 		mapinfo := map[string]interface{}{}
@@ -166,17 +156,13 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
 		} else if mapInfo != nil {
 			taskType := util.ObjToString(mapInfo["stype"])
-			if taskType == "historyTask" {
-				//历史更新流程
-				go historyTask(data, mapInfo)
-			} else if taskType == "normalTask" {
+			if taskType == "normalTask" {
 				//判重流程
 				go task(data, mapInfo)
 			} else {
 				//其他
 				go task(data, mapInfo)
 			}
-
 			key, _ := mapInfo["key"].(string)
 			if key == "" {
 				key = "udpok"
@@ -203,6 +189,14 @@ func task(data []byte, mapInfo map[string]interface{}) {
 			"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
 		},
 	}
+	if IdType {
+		q = map[string]interface{}{
+			"_id": map[string]interface{}{
+				"$gt":  mapInfo["gtid"].(string),
+				"$lte": mapInfo["lteid"].(string),
+			},
+		}
+	}
 	log.Println(mgo.DbName, extract, q)
 	sess := mgo.GetMgoConn()
 	defer mgo.DestoryMongoConn(sess)
@@ -213,7 +207,6 @@ func task(data []byte, mapInfo map[string]interface{}) {
 		log.Println("排序:publishtime")
 		it = sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
 	}
-	//it = sess.DB(mgo.DbName).C(extract).Find(&q).Iter()
 	updateExtract := [][]map[string]interface{}{}
 	log.Println("线程数:", threadNum)
 	pool := make(chan bool, threadNum)
@@ -259,15 +252,16 @@ func task(data []byte, mapInfo map[string]interface{}) {
 
 			b, source, reason := DM.check(info)
 			if b { //有重复,生成更新语句,更新抽取和更新招标
-
 				repeateN++
-
 				var is_replace = false
 				var mergeArr = []int64{}                    //更改合并数组记录
 				var newData = &Info{}                       //更换新的数据池数据
 				var repeat_idMap = map[string]interface{}{} //记录判重的
 				var merge_idMap = map[string]interface{}{}  //记录合并的
 				repeat_idMap["_id"] = StringTOBsonId(info.id)
+				if IdType {
+					repeat_idMap["_id"] = info.id
+				}
 				merge_idMap["_id"] = StringTOBsonId(source.id)
 				repeat_id := source.id //初始化一个数据
 
@@ -276,18 +270,27 @@ func task(data []byte, mapInfo map[string]interface{}) {
 					if basic_bool {
 						//已原始数据为标准 - 对比数据打判重标签-
 						newData, mergeArr, is_replace = mergeDataFields(source, info)
-						DM.replaceSourceData(newData, source.id) //替换
+						DM.replaceSourceData(newData, source) //替换
 						//对比数据打重复标签的id,原始数据id的记录
 						repeat_idMap["_id"] = StringTOBsonId(info.id)
 						merge_idMap["_id"] = StringTOBsonId(source.id)
+
+						if IdType {
+							repeat_idMap["_id"] = info.id
+							merge_idMap["_id"] = source.id
+						}
 						repeat_id = source.id
 					} else {
 						//已对比数据为标准 ,数据池的数据打判重标签
 						newData, mergeArr, is_replace = mergeDataFields(info, source)
-						DM.replaceSourceData(newData, source.id) //替换
+						DM.replaceSourceData(newData, source) //替换
 						//原始数据打重复标签的id,   对比数据id的记录
 						repeat_idMap["_id"] = StringTOBsonId(source.id)
 						merge_idMap["_id"] = StringTOBsonId(info.id)
+						if IdType {
+							repeat_idMap["_id"] = source.id
+							merge_idMap["_id"] = info.id
+						}
 						repeat_id = info.id
 					}
 
@@ -338,14 +341,17 @@ func task(data []byte, mapInfo map[string]interface{}) {
 				} else { //高质量数据
 					basic_bool := basicDataScore(source, info)
 					if !basic_bool {
-						DM.replaceSourceData(info, source.id) //替换
+						DM.replaceSourceData(info, source) //替换
 						repeat_idMap["_id"] = StringTOBsonId(source.id)
+						if IdType {
+							repeat_idMap["_id"] = source.id
+						}
 						repeat_id = info.id
 					}
 				}
 
 
-				log.Println("最终结果","目标id:",info.id)
+				log.Println("最终结果","目标id:",repeat_idMap["_id"])
 
 
 				//重复数据打标签
@@ -398,266 +404,6 @@ func task(data []byte, mapInfo map[string]interface{}) {
 	}
 }
 
-//支持历史更新
-func historyTask(data []byte, mapInfo map[string]interface{}) {
-
-	fmt.Println("开始取历史时间段")
-	defer util.Catch()
-	sess := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess)
-
-	q := map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$gt":  StringTOBsonId(mapInfo["gtid"].(string)),
-			"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
-		},
-	}
-	it := sess.DB(mgo.DbName).C(extract).Find(&q).Iter()
-	minTime, maxTime := int64(0), int64(0)
-	for tmp := make(map[string]interface{}); it.Next(&tmp); {
-		//取出最大最小时间
-		info_time := tmp["comeintime"]
-		if Is_Sort {
-			info_time = tmp["publishtime"]
-		}
-		if minTime == 0 || maxTime == 0 && util.Int64All(info_time) != 0 {
-			minTime = util.Int64All(info_time)
-			maxTime = util.Int64All(info_time)
-		} else {
-			t := util.Int64All(info_time)
-			if t < minTime && t != 0 {
-				minTime = t
-			}
-			if t > maxTime && t != 0 {
-				maxTime = t
-			}
-		}
-	}
-	//时间不正确时
-	if minTime == 0 && maxTime == 0 {
-		log.Println("段数据区间 不符合")
-		return
-	}
-	fmt.Println("最小时间==", minTime, "最大时间==", maxTime)
-	gtid, lteid := util.BsonIdToSId(mapInfo["gtid"].(string)), util.BsonIdToSId(mapInfo["lteid"].(string))
-	fmt.Println(gtid, lteid)
-	HM = NewHistorymap(gtid, lteid, minTime, maxTime)
-
-	fmt.Println("开始历史数据判重")
-
-	defer util.Catch()
-	//区间id
-	sess_history := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess_history)
-	q_history := map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$gt":  StringTOBsonId(mapInfo["gtid"].(string)),
-			"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
-		},
-	}
-	log.Println(mgo.DbName, extract, q_history)
-
-	//是否排序
-	it_history := sess_history.DB(mgo.DbName).C(extract).Find(&q_history).Iter()
-	if Is_Sort {
-		it_history = sess_history.DB(mgo.DbName).C(extract).Find(&q_history).Sort("publishtime").Iter()
-	}
-	updateExtract := [][]map[string]interface{}{}
-	log.Println("线程数:", threadNum)
-	pool := make(chan bool, threadNum)
-	wg := &sync.WaitGroup{}
-	n, repeateN := 0, 0
-	for tmp := make(map[string]interface{}); it_history.Next(&tmp); n++ {
-		if n%10000 == 0 {
-			log.Println("current:", n, tmp["_id"], "repeateN:", repeateN)
-		}
-		if util.IntAll(tmp["dataging"]) == 1 {
-			tmp = make(map[string]interface{})
-			continue
-		}
-
-		pool <- true
-		wg.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-pool
-				wg.Done()
-			}()
-			info := NewInfo(tmp)
-			if !LowHeavy { //是否进行低质量数据判重
-				if invalidData(info.buyer, info.projectname, info.projectcode, info.contractnumber) {
-					updateExtract = append(updateExtract, []map[string]interface{}{
-						map[string]interface{}{
-							"_id": tmp["_id"],
-						},
-						map[string]interface{}{
-							"$set": map[string]interface{}{
-								"repeat": -1, //无效数据标签
-							},
-						},
-					})
-					if len(updateExtract) > 500 {
-						mgo.UpSertBulk(extract, updateExtract...)
-						updateExtract = [][]map[string]interface{}{}
-					}
-					return
-				}
-			}
-			b, source, reason := HM.checkHistory(info)
-			if b { //有重复,生成更新语句,更新抽取和更新招标
-				if reason == "未判重记录" {
-					fmt.Println("未判重记录")
-					//把info的数据判重的标签更换,并新增字段
-					HM.replaceSourceData(info, info.id) //替换即添加
-					updateExtract = append(updateExtract, []map[string]interface{}{
-						map[string]interface{}{
-							"_id": tmp["_id"],
-						},
-						map[string]interface{}{
-							"$set": map[string]interface{}{
-								"repeat":   0,
-								"repeatid": -2,
-							},
-						},
-					})
-				} else {
-					repeateN++
-					var is_replace = false
-					var mergeArr = []int64{}                    //更改合并数组记录
-					var newData = &Info{}                       //更换新的数据池数据
-					var repeat_idMap = map[string]interface{}{} //记录判重的
-					var merge_idMap = map[string]interface{}{}  //记录合并的
-					repeat_idMap["_id"] = StringTOBsonId(info.id)
-					merge_idMap["_id"] = StringTOBsonId(source.id)
-					repeat_id := source.id
-					//以下合并相关
-					if isMerger {
-						basic_bool := basicDataScore(source, info)
-						if basic_bool {
-							//已原始数据为标准 - 对比数据打判重标签-
-							newData, mergeArr, is_replace = mergeDataFields(source, info)
-							HM.replaceSourceData(newData, source.id) //替换
-							//对比数据打重复标签的id,原始数据id的记录
-							repeat_idMap["_id"] = StringTOBsonId(info.id)
-							merge_idMap["_id"] = StringTOBsonId(source.id)
-							repeat_id = source.id
-						} else {
-							//已对比数据为标准 ,数据池的数据打判重标签
-							newData, mergeArr, is_replace = mergeDataFields(info, source)
-							HM.replaceSourceData(newData, source.id) //替换
-							//原始数据打重复标签的id,   对比数据id的记录
-							repeat_idMap["_id"] = StringTOBsonId(source.id)
-							merge_idMap["_id"] = StringTOBsonId(info.id)
-							repeat_id = info.id
-						}
-
-						merge_map := make(map[string]interface{}, 0)
-						if is_replace { //有过合并-更新数据
-							merge_map = map[string]interface{}{
-								"$set": map[string]interface{}{
-									"merge": newData.mergemap,
-								},
-							}
-
-							//更新合并后的数据
-							for _, value := range mergeArr {
-								if value == 0 {
-									merge_map["$set"].(map[string]interface{})["area"] = newData.area
-									merge_map["$set"].(map[string]interface{})["city"] = newData.city
-								} else if value == 1 {
-									merge_map["$set"].(map[string]interface{})["area"] = newData.area
-									merge_map["$set"].(map[string]interface{})["city"] = newData.city
-								} else if value == 2 {
-									merge_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
-								} else if value == 3 {
-									merge_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
-								} else if value == 4 {
-									merge_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
-								} else if value == 5 {
-									merge_map["$set"].(map[string]interface{})["budget"] = newData.budget
-								} else if value == 6 {
-									merge_map["$set"].(map[string]interface{})["winner"] = newData.winner
-								} else if value == 7 {
-									merge_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
-								} else if value == 8 {
-									merge_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
-								} else if value == 9 {
-									merge_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber
-								} else if value == 10 {
-									merge_map["$set"].(map[string]interface{})["publishtime"] = newData.publishtime
-								} else if value == 11 {
-									merge_map["$set"].(map[string]interface{})["agency"] = newData.agency
-								} else {
-								}
-							}
-							//模板数据更新
-							updateExtract = append(updateExtract, []map[string]interface{}{
-								merge_idMap,
-								merge_map,
-							})
-						}
-					} else { //高质量数据
-
-						basic_bool := basicDataScore(source, info)
-						if !basic_bool {
-
-							HM.replaceSourceData(info, source.id) //替换
-							repeat_idMap["_id"] = StringTOBsonId(source.id)
-							repeat_id = info.id
-						}
-					}
-
-					//重复数据打标签
-					updateExtract = append(updateExtract, []map[string]interface{}{
-						repeat_idMap,
-						map[string]interface{}{
-							"$set": map[string]interface{}{
-								"repeat":        1,
-								"repeat_reason": reason,
-								"repeat_id":     repeat_id,
-							},
-						},
-					})
-
-				}
-			}
-		}(tmp)
-		if len(updateExtract) > 500 {
-			mgo.UpSertBulk(extract, updateExtract...)
-			updateExtract = [][]map[string]interface{}{}
-		}
-		tmp = make(map[string]interface{})
-	}
-	wg.Wait()
-	if len(updateExtract) > 0 {
-		mgo.UpSertBulk(extract, updateExtract...)
-		//mgo.UpdateBulk(bidding, updateBidding...)
-	}
-	log.Println("this task over.", n, "repeateN:", repeateN, mapInfo["stop"])
-
-	//任务完成,开始发送广播通知下面节点
-	if n > repeateN && mapInfo["stop"] == nil {
-		for _, to := range nextNode {
-			sid, _ := mapInfo["gtid"].(string)
-			eid, _ := mapInfo["lteid"].(string)
-			key := sid + "-" + eid + "-" + util.ObjToString(to["stype"])
-			by, _ := json.Marshal(map[string]interface{}{
-				"gtid":  sid,
-				"lteid": eid,
-				"stype": util.ObjToString(to["stype"]),
-				"key":   key,
-			})
-			addr := &net.UDPAddr{
-				IP:   net.ParseIP(to["addr"].(string)),
-				Port: util.IntAll(to["port"]),
-			}
-			node := &udpNode{by, addr, time.Now().Unix(), 0}
-			udptaskmap.Store(key, node)
-			udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
-		}
-	}
-}
-
 //定时任务
 func timedTaskDay() {
 	log.Println("部署定时任务")
@@ -816,7 +562,7 @@ func timedTaskOnce() {
 							"dataging":      0,
 						},
 					},
-			})
+				})
 			}else {
 				updateExtract = append(updateExtract, []map[string]interface{}{
 					map[string]interface{}{
@@ -998,7 +744,7 @@ func basicDataScore(v *Info, info *Info) bool {
 	//先判断level
 	if dict_v != nil {
 		v_level := util.ObjToString(dict_v["level"])
-		if v_level == "中央" {
+		if v_level == "国家" {
 			v_score = 4
 		} else if v_level == "省级" {
 			v_score = 3
@@ -1014,7 +760,7 @@ func basicDataScore(v *Info, info *Info) bool {
 
 	if dict_info != nil {
 		info_level := util.ObjToString(dict_info["level"])
-		if info_level == "中央" {
+		if info_level == "国家" {
 			info_score = 4
 		} else if info_level == "省级" {
 			info_score = 3
@@ -1039,11 +785,11 @@ func basicDataScore(v *Info, info *Info) bool {
 	//判断sitetype
 	if dict_v != nil {
 		v_sitetype := util.ObjToString(dict_v["sitetype"])
-		if v_sitetype == "政府采购" || v_sitetype == "政府门户" {
+		if v_sitetype == "政府采购" {
 			v_score = 4
 		} else if v_sitetype == "公共资源" {
 			v_score = 3
-		} else if v_sitetype == "官方网站" {
+		} else if v_sitetype == "官方网站"|| v_sitetype == "政府门户" {
 			v_score = 2
 		} else if v_sitetype == "社会公共招标平台" || v_sitetype == "企业招标平台" {
 			v_score = 1
@@ -1055,11 +801,11 @@ func basicDataScore(v *Info, info *Info) bool {
 
 	if dict_info != nil {
 		info_sitetype := util.ObjToString(dict_info["sitetype"])
-		if info_sitetype == "政府采购" || info_sitetype == "政府门户" {
+		if info_sitetype == "政府采购" {
 			info_score = 4
 		} else if info_sitetype == "公共资源" {
 			info_score = 3
-		} else if info_sitetype == "官方网站" {
+		} else if info_sitetype == "官方网站"|| info_sitetype == "政府门户" {
 			info_score = 2
 		} else if info_sitetype == "社会公共招标平台" || info_sitetype == "企业招标平台" {
 			info_score = 1
@@ -1076,6 +822,17 @@ func basicDataScore(v *Info, info *Info) bool {
 		return false
 	}
 
+	if v_score == info_score {//同sitetype 情况下   分析weight
+		v_weight := util.IntAll(dict_v["weight"])
+		info_weight := util.IntAll(dict_info["weight"])
+		if v_weight>info_weight {
+			return true
+		}
+		if info_weight>v_weight {
+			return false
+		}
+	}
+
 	//网站评估
 	m, n := 0, 0
 	if v.projectname != "" {