浏览代码

融合相关细节修改-性能-全量

apple 4 年之前
父节点
当前提交
cccb6b9b1f

+ 5 - 3
udpfilterdup/src/config.json

@@ -5,8 +5,8 @@
         "addr": "192.168.3.207:27092",
         "pool": 10,
         "db": "zhengkun",
-        "extract": "test",
-        "extract_back": "test",
+        "extract": "all_01_02_fusiontest",
+        "extract_back": "all_01_02_fusiontest",
         "site": {
             "dbname": "zhengkun",
             "coll": "site"
@@ -15,7 +15,7 @@
     "task_mongodb": {
         "task_addrName": "192.168.3.207:27092",
         "task_dbName": "zhengkun",
-        "task_collName": "test",
+        "task_collName": "zk_data",
         "pool": 10
     },
     "jkmail": {
@@ -24,6 +24,8 @@
     },
     "nextNode": [
     ],
+    "userName": "",
+    "password": "",
     "threads": 1,
     "isMerger": false,
     "lowHeavy":true,

+ 15 - 5
udpfilterdup/src/main.go

@@ -52,6 +52,7 @@ var (
 	lteid	string							//历史增量属性
 	IsFull		   bool								//是否全量
 	updatelock 		sync.Mutex         //锁4
+	userName,passWord 	string				//mongo -用户密码
 
 )
 
@@ -68,16 +69,23 @@ func init() {
 
 	util.ReadConfig(&Sysconfig)
 
+	userName = util.ObjToString(Sysconfig["userName"])
+	passWord = util.ObjToString(Sysconfig["passWord"])
+
+	log.Println("集群用户密码:",userName,passWord)
+
 	task_mconf := Sysconfig["task_mongodb"].(map[string]interface{})
 	task_mgo = &MongodbSim{
 		MongodbAddr: task_mconf["task_addrName"].(string),
 		DbName:      task_mconf["task_dbName"].(string),
 		Size:        util.IntAllDef(task_mconf["task_pool"], 10),
+		UserName:	 userName,
+		Password:	 passWord,
+
 	}
 	task_mgo.InitPool()
 	task_collName = task_mconf["task_collName"].(string)
 
-
 	nextNode = util.ObjArrToMapArr(Sysconfig["nextNode"].([]interface{}))
 	mconf = Sysconfig["mongodb"].(map[string]interface{})
 	mgo = &MongodbSim{
@@ -108,6 +116,7 @@ func init() {
 	timingSpanDay = util.Int64All(Sysconfig["timingSpanDay"])
 	timingPubScope = util.Int64All(Sysconfig["timingPubScope"])
 
+
 	//站点配置
 	site := mconf["site"].(map[string]interface{})
 	SiteMap = make(map[string]map[string]interface{}, 0)
@@ -130,8 +139,7 @@ func init() {
 }
 
 
-func main() {
-
+func mainT() {
 	go checkMapJob()
 	updport := Sysconfig["udpport"].(string)
 	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
@@ -165,7 +173,8 @@ func main() {
 }
 
 //测试组人员使用
-func mainT() {
+func main() {
+
 
 	if TimingTask {
 		go historyTaskDay()
@@ -185,7 +194,7 @@ func mainT() {
 
 		log.Println("测试:全量判重-准备开始")
 		task([]byte{}, mapinfo)
-
+		
 		time.Sleep(99999 * time.Hour)
 	}
 }
@@ -650,6 +659,7 @@ func historyTaskDay() {
 		end:=time.Now().Unix()
 
 		log.Println(gtid,lteid)
+
 		if end-start<60*5 {
 			log.Println("睡眠.............")
 			time.Sleep(5 * time.Minute)

+ 13 - 0
udpfilterdup/src/mgo.go

@@ -125,6 +125,8 @@ type MongodbSim struct {
 	Ctx      context.Context
 	ShortCtx context.Context
 	pool     chan bool
+	UserName string
+	Password string
 }
 
 func (m *MongodbSim) GetMgoConn() *MgoSess {
@@ -146,6 +148,17 @@ func (m *MongodbSim) InitPool() {
 	opts.ApplyURI("mongodb://" + m.MongodbAddr)
 	opts.SetMaxPoolSize(uint64(m.Size))
 	m.pool = make(chan bool, m.Size)
+
+	if m.UserName !="" && m.Password !="" {
+		cre := options.Credential{
+			Username:m.UserName,
+			Password:m.Password,
+		}
+		opts.SetAuth(cre)
+	}
+
+
+
 	opts.SetMaxConnIdleTime(2 * time.Hour)
 	m.Ctx, _ = context.WithTimeout(context.Background(), 99999*time.Hour)
 	m.ShortCtx, _ = context.WithTimeout(context.Background(), 1*time.Minute)

+ 1 - 0
udpfilterdup/src/udptaskmap.go

@@ -23,6 +23,7 @@ type udpNode struct {
 }
 
 func checkMapJob() {
+
 	//阿里云内网无法发送邮件
 	jkmail, _ := Sysconfig["jkmail"].(map[string]interface{})
 	if jkmail != nil {

+ 54 - 9
udpfusion/src/config.json

@@ -5,25 +5,70 @@
     "dbName": "zhengkun",
     "collName": "fusion_test",
     "pool": 10,
+    "mgo_pool": 3,
     "site": {
       "dbname": "zhengkun",
       "coll": "site"
     }
   },
-  "notFusionKey": {
-    "repeat_reason":0,
-    "repeat_id":0,
-    "repeat_ids":0,
-    "dataging": 0
+  "es": {
+    "addr": "http://127.0.0.1:12003",
+    "size":50,
+    "es_pool": 10,
+    "index": "zktest",
+    "type": "zktest"
   },
-  "fusion_coll_name":"fusiondata",
-  "record_coll_name":"recorddata",
-  "":"",
+  "fusion_coll_name":"zk_fusiondata",
+  "record_coll_name":"zk_recorddata",
   "jkmail": {
     "to": "zhengkun@topnet.net.cn",
     "api": "http://10.171.112.160:19281/_send/_mail"
   },
   "nextNode": [
 
-  ]
+  ],
+  "notFusionKey": {
+    "title":0,
+    "detail":0,
+    "href":0,
+    "contenthtml": 0,
+    "summary": 0,
+    "publishtime": 0,
+    "spidercode": 0,
+    "site": 0,
+    "channel": 0,
+    "comeintime": 0,
+    "area_city_district": 0,
+    "areaval": 0,
+    "infoformat": 0,
+    "publishdept": 0,
+    "jsondata": 0,
+    "exweigth": 0,
+    "jsoncontent": 0,
+    "sourcehref": 0,
+    "sourcewebsite": 0,
+    "projectname": 0,
+    "bidstatus": 0,
+    "projecthref": 0,
+    "buyer_info": 0,
+    "agency_info": 0,
+    "dataging": 0,
+    "extracttype": 0,
+    "description": 0,
+    "extract_state": 0,
+    "flag_buyer": 0,
+    "check_sensitive": 0,
+    "check_sensitive2": 0,
+    "keywords": 0,
+    "pre_extracttype": 0,
+    "s_sha": 0,
+    "type": 0,
+    "tagname": 0,
+    "kvtext": 0,
+    "repeat": 0,
+    "repeat_id": 0,
+    "repeat_ids": 0,
+    "repeat_reason": 0,
+    "isflow": 0
+  }
 }

+ 270 - 0
udpfusion/src/fusionAddData.go

@@ -0,0 +1,270 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	qu "qfw/util"
+	"qfw/util/elastic"
+	"strings"
+	"sync"
+	"time"
+)
+
+//增量-融合-一小段
+func startTaskAddData(data []byte, mapInfo map[string]interface{}) {
+	log.Println("开始增量融合流程")
+	defer qu.Catch()
+	//区间id
+	q := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"$gt":  StringTOBsonId(mapInfo["gtid"].(string)),
+			"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
+		},
+	}
+	log.Println("查询条件:",q)
+	sess := mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sess)
+	it := sess.DB(mgo.DbName).C(coll_name).Find(&q).Iter()
+	//编译不同的融合组,如何划分组
+
+	//待融合组
+	fusionDataGroupArr := make([][]string,0)
+	//需要更新组
+	updateFusionMap,curFusionKeyMap:=make(map[string]interface{},0),make(map[string]interface{},0)
+	//重复数据组
+	norepeatArr,repeatArr,sourceArr,index := make([]string,0),make([]string,0),make([]string,0),0
+
+
+	start := int(time.Now().Unix())
+	for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
+		if index%10000 == 0 {
+			log.Println("current index",index,tmp["_id"])
+		}
+		tmpId:=BsonTOStringId(tmp["_id"])
+		repeat:=qu.IntAll(tmp["repeat"])
+		sourceid:=qu.ObjToString(tmp["repeat_id"])
+		if repeat==1 {
+			repeatArr = append(repeatArr,tmpId)
+			sourceArr = append(sourceArr,sourceid)
+		}else {
+			norepeatArr = append(repeatArr,tmpId)
+		}
+		tmp = make(map[string]interface{})
+	}
+
+	log.Println("task first:",index,len(fusionDataGroupArr),"+",len(repeatArr))
+	log.Println("遍历数据用时:",int(time.Now().Unix())-start,"秒")
+
+	//根据重复组,重新划分新的组别
+	start = int(time.Now().Unix())
+	elastic.InitElasticSize("http://192.168.3.11:9800",10)
+	for i:=0;i<len(repeatArr);i++ {
+		//查询ES-升索引
+		repeatid := repeatArr[i]
+		sourceid := sourceArr[i]
+		key := fmt.Sprintf("%s",sourceid)
+		dataArr := *elastic.GetById("allzktest","allzktest",sourceid)
+		if len(dataArr)>0 { //存在值
+
+			if curFusionKeyMap[key]==nil { //存在融合表-不在当前id段落内
+				updateFusionMap[key] = ""
+			}
+			//es 随时更新ids
+			allids := qu.ObjToString(dataArr[0]["allids"])
+			allids = allids+","+repeatid
+			updateStr := `ctx._source.allids=`+ `"`+allids+`"`
+			b:=elastic.Update("allzktest","allzktest",sourceid, updateStr)
+			if !b {
+				log.Println("es更新异常",repeatid,sourceid)
+			}
+		}else {
+			//索引查不到-确定新增- es 随时新增ids
+			savetmp := make(map[string]interface{}, 0)
+			savetmp["allids"] = repeatid
+			savetmp["_id"] = StringTOBsonId(sourceid)
+			b:=elastic.Save("allzktest", "allzktest", savetmp)
+			if !b {
+				log.Println("es保存异常",repeatid,sourceid)
+			}
+			curFusionKeyMap[key] = ""
+		}
+	}
+
+	log.Println("前置索引准备完毕... ...","耗时:",int(time.Now().Unix())-start,"秒")
+
+
+	start = int(time.Now().Unix())
+	log.Println("开始数据分组... ... ... ...")
+	log.Println("开始数据分组... ... ... ...")
+	log.Println("开始数据分组... ... ... ...")
+
+	//当前段落组
+	for i:=0;i<len(norepeatArr);i++ {
+		sourceid:=norepeatArr[i]
+		dataArr := *elastic.GetById("allzktest","allzktest",sourceid)
+		if len(dataArr)>0 { //存在值
+			allids := qu.ObjToString(dataArr[0]["allids"])
+			arr := strings.Split(allids,",")
+			arr = append(arr,sourceid)
+			fusionDataGroupArr = append(fusionDataGroupArr,arr)
+		}else {
+			arr:=[]string{sourceid}
+			fusionDataGroupArr = append(fusionDataGroupArr,arr)
+		}
+	}
+	//更新组
+	for k,_:=range updateFusionMap {
+		sourceid:=qu.ObjToString(k)
+		dataArr := *elastic.GetById("allzktest","allzktest",sourceid)
+		if len(dataArr)>0 { //存在值
+			allids := qu.ObjToString(dataArr[0]["allids"])
+			arr := strings.Split(allids,",")
+			arr = append(arr,sourceid)
+			fusionDataGroupArr = append(fusionDataGroupArr,arr)
+		}else {
+			log.Println("融合表更新,查询Es异常:",sourceid)
+		}
+	}
+
+
+
+	//isErrNum:=0
+	//for i:=0;i<len(repeatArr);i++ {
+	//	sourceid := sourceArr[i]
+	//	isAddExist,index := false,0
+	//	//根据原sourceid 直接遍历组
+	//R:	for k,v:=range fusionDataGroupArr{
+	//		for _,v1:=range v{
+	//			if v1==sourceid {
+	//				index = k
+	//				isAddExist = true
+	//				break R
+	//			}
+	//		}
+	//	}
+	//	if i%1000 == 0 {
+	//		log.Println("分组中...","current index",i,repeatArr[i])
+	//	}
+	//
+	//	if isAddExist { //数组截取替换-找到指定
+	//		arr := make([]string,0)
+	//		arr = fusionDataGroupArr[index]
+	//		arr = append(arr,repeatArr[i])//组拼接当前id
+	//		fusionDataGroupArr[index] = arr
+	//		log.Println("... ... 正常单组新增",i)
+	//
+	//	}else {//当前段落未找到-需要查询融合表,,遍历融合表
+	//		arr,fusionTmpData := make([]string,0),make(map[string]interface{},0)
+	//		arr,fusionTmpData = dealWithFindFusionDataArr(sourceid)
+	//		arr = append(arr,repeatArr[i])//组拼接当前id
+	//
+	//
+	//
+	//
+	//
+	//		if len(arr)==1 { //异常错误,新增
+	//			isErrNum++
+	//			log.Println("... ... 数据异常异常,融合表,当前组均找不到数据",repeatArr[i])
+	//			arr_error := make([]string,0)
+	//			arr_error = append(arr_error,repeatArr[i])//组拼接当前id
+	//			fusionDataGroupArr = append(fusionDataGroupArr,arr_error)
+	//			addOrUpdateArr = append(addOrUpdateArr,false)
+	//			infoFusionArr = append(infoFusionArr, map[string]interface{}{})
+	//		}else { //正常更新
+	//			log.Println("... ... 正常多组新增",i)
+	//			fusionDataGroupArr = append(fusionDataGroupArr,arr)
+	//			addOrUpdateArr = append(addOrUpdateArr,true)
+	//			infoFusionArr = append(infoFusionArr,fusionTmpData)
+	//		}
+	//
+	//	}
+	//	//不断改变中
+	//	if i%1000 == 0 {
+	//		log.Println("当前分组数量:",len(fusionDataGroupArr))
+	//	}
+	//}
+
+
+
+
+	log.Println("最终待融合分组数量:",len(fusionDataGroupArr))
+	log.Println("分组完毕数据用时:",int(time.Now().Unix())-start,"秒")
+	log.Println("********************分割线********************")
+	log.Println("********************分割线********************")
+	log.Println("********************分割线********************")
+
+
+	log.Println("开始处理分组融合... ... ... ...")
+	log.Println("开始处理分组融合... ... ... ...")
+	log.Println("开始处理分组融合... ... ... ...")
+
+	start = int(time.Now().Unix())
+	//多线程 - 处理数据
+	pool := make(chan bool, 3)
+	wg := &sync.WaitGroup{}
+
+	for i:=0;i<len(fusionDataGroupArr);i++ {
+		fusionArr := fusionDataGroupArr[i]
+		pool <- true
+		wg.Add(1)
+		go func(fusionArr []string,i int) {
+			defer func() {
+				<-pool
+				wg.Done()
+			}()
+			//构建数据
+			if (i+1)%500 == 0 {
+				log.Println("构建第",i+1,"组数据...","数量:",len(fusionArr),fusionArr)
+			}
+			weight :=NewWeightData(fusionArr)
+			////整理数据-筛选排名,模板
+			weight.analyzeBuildStandardData()
+
+			if len(fusionArr)<=1 {
+				saveFusionData,saveRecordData := weight.dealWithAddFusionStruct()
+				saveid:=mgo.Save(fusion_coll_name,saveFusionData)
+				saveRecordData["_id"] = saveid
+				mgo.Save(record_coll_name,saveRecordData)
+			}else {
+				//if addOrUpdateArr[i] {
+				//	//log.Println("多组更新... ...")
+				//	tmpdata:=infoFusionArr[i]
+				//	updateFusionData,updateRecordData := weight.dealWithMultipleUpdateFusionStruct(tmpdata)
+				//
+				//	UpdateFusion.updatePool <- []map[string]interface{}{
+				//		map[string]interface{}{
+				//			"_id": tmpdata["_id"],
+				//		},
+				//		updateFusionData,
+				//	}
+				//	UpdateRecord.updatePool <- []map[string]interface{}{
+				//		map[string]interface{}{
+				//			"_id": tmpdata["_id"],
+				//		},
+				//		updateRecordData,
+				//	}
+				//}else {
+				//	//log.Println("多组生成... ...")
+				//	saveFusionData,saveRecordData := weight.dealWithMultipleAddFusionStruct()
+				//	saveid:=mgo.Save(fusion_coll_name,saveFusionData)
+				//	saveRecordData["_id"] = saveid
+				//	mgo.Save(record_coll_name,saveRecordData)
+				//}
+			}
+
+
+		}(fusionArr,i)
+
+
+
+	}
+
+	wg.Wait()
+
+	log.Println("fusion is over :",len(fusionDataGroupArr),"用时:",int(time.Now().Unix())-start,"秒")
+	log.Println("睡眠30秒,然后在发广播")
+	time.Sleep(30 * time.Second)
+	//任务完成,开始发送广播通知下面节点
+	taskSendFusionUdp(mapInfo)
+
+}

+ 261 - 0
udpfusion/src/fusionFullData.go

@@ -0,0 +1,261 @@
+package main
+
+import (
+	"log"
+	qu "qfw/util"
+	"qfw/util/elastic"
+	"strings"
+	"sync"
+	"time"
+)
+
+
+
+func startTaskFullData(data []byte, mapInfo map[string]interface{}) {
+
+	log.Println("开始全量融合流程")
+	defer qu.Catch()
+	//区间id
+	q := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"$gt":  StringTOBsonId(mapInfo["gtid"].(string)),
+			"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
+		},
+	}
+	log.Println("查询条件:",q)
+	sess := mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sess)
+	it := sess.DB(mgo.DbName).C(coll_name).Find(&q).Iter()
+	//编译不同的融合组,如何划分组
+	fusionDataGroupMap := make(map[string][]string,0) //待融合组
+
+	norepeatArr,repeatArr,sourceArr,index := make([]string,0),make([]string,0),make([]string,0),0 //重复数据组
+
+	start := int(time.Now().Unix())
+	for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
+		if index%10000 == 0 {
+			log.Println("current index",index,tmp["_id"])
+		}
+		tmpId:=BsonTOStringId(tmp["_id"])
+		repeat:=qu.IntAll(tmp["repeat"])
+		sourceid:=qu.ObjToString(tmp["repeat_id"])
+		if repeat==1 {
+			repeatArr = append(repeatArr,tmpId)
+			sourceArr = append(sourceArr,sourceid)
+		}else {
+			norepeatArr = append(norepeatArr,tmpId)
+		}
+
+		tmp = make(map[string]interface{})
+	}
+
+	log.Println("task first:",index,len(norepeatArr),"+",len(repeatArr))
+	log.Println("遍历数据用时:",int(time.Now().Unix())-start,"秒")
+
+	//根据重复组,重新划分新的组别
+	start = int(time.Now().Unix())
+
+	//多线程升索引
+	pool_es := make(chan bool, es_pool)
+	wg_es := &sync.WaitGroup{}
+	tmpEsMap := make(map[string]string,0)
+	isGroupNum := 1000
+	for i:=0;i<len(repeatArr);i++ {
+		if i%10000 == 0 {
+			log.Println("curent index ",i)
+		}
+		if i%isGroupNum==0 && i!=0 {
+			//新的一组执行上一组生索引
+			for k,v:=range tmpEsMap {
+				pool_es <- true
+				wg_es.Add(1)
+				go func(es_id string,cur_ids string) {
+					defer func() {
+						<-pool_es
+						wg_es.Done()
+					}()
+					if es_id!="" && cur_ids!="" {
+						dataArr := *elastic.GetById(esIndex,esType,es_id)
+						if len(dataArr)>0 { //存在-更新
+							allids := qu.ObjToString(dataArr[0]["allids"])
+							allids = allids+","+cur_ids
+							updateStr := `ctx._source.allids=`+ `"`+allids+`"`
+							elastic.Update(esIndex,esType,es_id, updateStr)
+						}else { //不存在-新增
+							savetmp := make(map[string]interface{}, 0)
+							savetmp["allids"] = cur_ids
+							savetmp["_id"] = StringTOBsonId(es_id)
+							savetmp["template_id"] = ""
+							savetmp["fusion_id"] = ""
+							elastic.Save(esIndex, esType, savetmp)
+						}
+					}else {
+						log.Println("异常",es_id,cur_ids)
+					}
+				}(k,v)
+
+			}
+			wg_es.Wait()
+
+			tmpEsMap = make(map[string]string,0)
+		}
+		//新增一条数据
+		repeatid :=repeatArr[i]
+		sourceid := sourceArr[i]
+		if tmpEsMap[sourceid]!="" {
+			ids := tmpEsMap[sourceid]
+			ids = ids+","+repeatid
+			tmpEsMap[sourceid] = ids
+		}else {
+			tmpEsMap[sourceid] = sourceid+","+repeatid
+		}
+	}
+
+	//处理剩余数据
+	if len(tmpEsMap)>0 {
+		for k,v:=range tmpEsMap {
+			pool_es <- true
+			wg_es.Add(1)
+			go func(es_id string,cur_ids string) {
+				defer func() {
+					<-pool_es
+					wg_es.Done()
+				}()
+				dataArr := *elastic.GetById(esIndex,esType,es_id)
+				if len(dataArr)>0 { //存在-更新
+					allids := qu.ObjToString(dataArr[0]["allids"])
+					allids = allids+","+cur_ids
+					updateStr := `ctx._source.allids=`+ `"`+allids+`"`
+					elastic.Update(esIndex,esType,es_id, updateStr)
+				}else { //不存在-新增
+					savetmp := make(map[string]interface{}, 0)
+					savetmp["allids"] = cur_ids
+					savetmp["_id"] = StringTOBsonId(es_id)
+					savetmp["template_id"] = ""
+					savetmp["fusion_id"] = ""
+					elastic.Save(esIndex,esType, savetmp)
+				}
+			}(k,v)
+		}
+		wg_es.Wait()
+		tmpEsMap = make(map[string]string,0)
+
+	}
+
+
+	log.Println("前置索引准备完毕......耗时:",int(time.Now().Unix())-start,"秒")
+
+	start = int(time.Now().Unix())
+	log.Println("开始数据分组... ... ... ...")
+	log.Println("开始数据分组... ... ... ...")
+	log.Println("开始数据分组... ... ... ...")
+
+	//查询分组-多线程
+	for i:=0;i<len(norepeatArr);i++ {
+		if i%10000==0 {
+			log.Println("cur index ",i,norepeatArr[i])
+		}
+		sourceid:=norepeatArr[i]
+		pool_es <- true
+		wg_es.Add(1)
+		go func(sourceid string) {
+			defer func() {
+				<-pool_es
+				wg_es.Done()
+			}()
+			dataArr := *elastic.GetById(esIndex,esType,sourceid)
+			if len(dataArr)>0 { //存在值
+				allids := qu.ObjToString(dataArr[0]["allids"])
+				arr := strings.Split(allids,",")
+				updatelock.Lock()
+				fusionDataGroupMap[sourceid] = arr
+				updatelock.Unlock()
+			}else {
+				arr:=[]string{sourceid}
+				updatelock.Lock()
+				fusionDataGroupMap[sourceid] = arr
+				updatelock.Unlock()
+
+
+			}
+
+		}(sourceid)
+	}
+	wg_es.Wait()
+
+
+	log.Println("最终待融合分组数量:",len(fusionDataGroupMap))
+	log.Println("分组完毕数据用时:",int(time.Now().Unix())-start,"秒")
+	log.Println("********************分割线********************")
+	log.Println("********************分割线********************")
+	log.Println("********************分割线********************")
+
+
+	log.Println("开始进行正式分组融合......先睡秒30秒")
+	time.Sleep(30 * time.Second)
+
+	start = int(time.Now().Unix())
+	//多线程 - 处理数据
+	pool_mgo := make(chan bool, mgo_pool)
+	wg_mgo := &sync.WaitGroup{}
+
+	fusionIndex:=0
+	for k,v:=range fusionDataGroupMap {
+		fusionIndex++
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(sourceid string ,fusionArr []string,fusionIndex int) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			if fusionIndex % 10000==0 {
+				log.Println("数据融合数量:",fusionIndex,sourceid)
+			}
+			weight :=NewWeightData(fusionArr)
+			weight.analyzeBuildStandardData()
+			if len(fusionArr)<=1 { //单组数据-需要新增Es
+				log.Println("")
+				saveFusionData,saveRecordData := weight.dealWithAddFusionStruct()
+				saveid:=mgo.Save(fusion_coll_name,saveFusionData)
+				saveRecordData["_id"] = saveid
+				mgo.Save(record_coll_name,saveRecordData)
+
+				//新增es
+				savetmp := make(map[string]interface{}, 0)
+				fusionid:=BsonTOStringId(saveid)
+				savetmp["_id"] = StringTOBsonId(sourceid)
+				savetmp["allids"] = sourceid
+				savetmp["template_id"] = sourceid
+				savetmp["fusion_id"] = fusionid
+				elastic.Save(esIndex,esType,savetmp)
+
+
+			}else {
+				saveFusionData,saveRecordData := weight.dealWithMultipleAddFusionStruct()
+				saveid:=mgo.Save(fusion_coll_name,saveFusionData)
+				saveRecordData["_id"] = saveid
+				mgo.Save(record_coll_name,saveRecordData)
+
+				//更新数据-融合id-模板id等 `ctx._source.age=101;ctx._source.name="张三"`
+				fusion_id,template_id:=BsonTOStringId(saveid),qu.ObjToString(saveFusionData["fusion_templateid"])
+				updateStr1 := `ctx._source.template_id=`+ `"`+template_id+`";`
+				updateStr2 := `ctx._source.fusion_id=`+ `"`+fusion_id+`"`
+				elastic.Update(esIndex,esType,sourceid, updateStr1+updateStr2)
+
+			}
+		}(k,v,fusionIndex)
+	}
+
+	wg_mgo.Wait()
+
+	log.Println("fusion is over :",fusionIndex,len(fusionDataGroupMap),"用时:",int(time.Now().Unix())-start,"秒")
+	log.Println("睡眠30秒,然后在发广播")
+
+	time.Sleep(30 * time.Second)
+
+	//任务完成,开始发送广播通知下面节点
+
+	taskSendFusionUdp(mapInfo)
+
+}

+ 43 - 272
udpfusion/src/main.go

@@ -2,70 +2,81 @@ package main
 
 import (
 	"encoding/json"
-	"go.mongodb.org/mongo-driver/bson/primitive"
 	"log"
 	mu "mfw/util"
 	"net"
 	"os"
-	"qfw/common/src/qfw/util"
 	qu "qfw/util"
 	"sync"
 	"time"
+	"qfw/util/elastic"
 )
 
 
 var (
-	sysconfig   		map[string]interface{} 		//配置文件
-	mgo         		*MongodbSim            		//mongodb操作对象
-	udpclient    		mu.UdpClient             	//udp对象
-	nextNode     		[]map[string]interface{} 	//下节点数组
-	coll_name 	 		string
-	fusion_coll_name	string
-	record_coll_name 	string   					//表名
-	NoNeedFusionKey 	map[string]interface{}   	//不需要融合的key
-	UpdateFusion		*updateFusionInfo
-	UpdateRecord		*updateRecordInfo			//更新池
-	siteJsonData		map[string]string			//站点池
+	sysconfig   						map[string]interface{} 		//配置文件
+	mgo         						*MongodbSim            		//mongodb操作对象
+	udpclient    						mu.UdpClient             	//udp对象
+	nextNode     						[]map[string]interface{} 	//下节点数组
+	coll_name 	 						string
+	fusion_coll_name,record_coll_name	string						//新增表名
+	NoNeedFusionKey 					map[string]interface{}   	//不需要融合的key
+	UpdateFusion						*updateFusionInfo
+	UpdateRecord						*updateRecordInfo			//更新池
+	siteJsonData						map[string]string			//站点池
+	esIndex,esType					    string						//索引-类型
+	mgo_pool,es_pool					int
+	updatelock 							sync.Mutex
 )
 
 
 
 func initMgoAndSite()  {
-	mconf := sysconfig["mongodb"].(map[string]interface{})
-	log.Println(mconf)
+	mgoconf := sysconfig["mongodb"].(map[string]interface{})
 	mgo = &MongodbSim{
-		MongodbAddr: mconf["addrName"].(string),
-		DbName:      mconf["dbName"].(string),
-		Size:        qu.IntAllDef(mconf["pool"], 10),
+		MongodbAddr: mgoconf["addrName"].(string),
+		DbName:      mgoconf["dbName"].(string),
+		Size:        qu.IntAllDef(mgoconf["pool"], 10),
 	}
 	mgo.InitPool()
 
 
-	coll_name = mconf["collName"].(string)
+	coll_name = mgoconf["collName"].(string)
+	mgo_pool = qu.IntAllDef(mgoconf["mgo_pool"], 3)
 	fusion_coll_name = sysconfig["fusion_coll_name"].(string)
 	record_coll_name = sysconfig["record_coll_name"].(string)
 	NoNeedFusionKey = sysconfig["notFusionKey"].(map[string]interface{})
 
 
-	site := mconf["site"].(map[string]interface{})
+	site := mgoconf["site"].(map[string]interface{})
 	siteJsonData = make(map[string]string, 0)
 	start := int(time.Now().Unix())
 	sess_site := mgo.GetMgoConn()
 	defer mgo.DestoryMongoConn(sess_site)
 	res_site := sess_site.DB(site["dbname"].(string)).C(site["coll"].(string)).Find(map[string]interface{}{}).Sort("_id").Iter()
 	for site_dict := make(map[string]interface{}); res_site.Next(&site_dict); {
-		siteJsonData[util.ObjToString(site_dict["site"])] = util.ObjToString(site_dict["sitetype"])
+		siteJsonData[qu.ObjToString(site_dict["site"])] = qu.ObjToString(site_dict["sitetype"])
 	}
 	log.Printf("new站点加载用时:%d秒,%d个\n", int(time.Now().Unix())-start, len(siteJsonData))
 }
 
+func initEs()  {
+	//初始化es
+	esconf := sysconfig["es"].(map[string]interface{})
+	addr:=esconf["addr"].(string)
+	size:=qu.IntAllDef(esconf["size"], 50)
+	elastic.InitElasticSize(addr,size)
+	es_pool = qu.IntAllDef(esconf["es_pool"], 10)
+	esIndex = esconf["index"].(string)
+	esType = esconf["type"].(string)
 
-
+}
 
 func init() {
 	//加载配置文件
 	qu.ReadConfig(&sysconfig)
 	initMgoAndSite()
+	initEs()
 
 	//更新池
 	UpdateFusion = newUpdateFusionPool()
@@ -76,8 +87,6 @@ func init() {
 
 
 
-
-
 	log.Println("采用udp模式")
 }
 
@@ -93,10 +102,11 @@ func mainT() {
 
 //快速测试使用
 func main() {
-
-
-	sid := "100000000000000000000000"
-	eid := "900000000000000000000000"
+	//0101-0301
+	//sid := "5fedf5800000000000000000"
+	//eid := "603bbe000000000000000000"
+	sid := "1fedf5800000000000000000"
+	eid := "903bbe000000000000000000"
 	//log.Println(sid, "---", eid)
 	mapinfo := map[string]interface{}{}
 	if sid == "" || eid == "" {
@@ -105,249 +115,10 @@ func main() {
 	}
 	mapinfo["gtid"] = sid
 	mapinfo["lteid"] = eid
-	startTask([]byte{}, mapinfo)
+	startTaskFullData([]byte{}, mapinfo) //全量
 	time.Sleep(99999 * time.Hour)
 
 }
-
-
-
-
-
-
-//融合具体方法
-func startTask(data []byte, mapInfo map[string]interface{}) {
-	log.Println("开始融合流程")
-	defer qu.Catch()
-	//区间id
-	q := map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$gt":  StringTOBsonId(mapInfo["gtid"].(string)),
-			"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
-		},
-	}
-	log.Println("查询条件:",q)
-	sess := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess)
-	it := sess.DB(mgo.DbName).C(coll_name).Find(&q).Iter()
-	//编译不同的融合组,如何划分组
-	/***********************/
-	/***********************/
-	/***y
-	********************/
-	/***********************/
-	fusionDataGroupArr := make([][]string,0) 			//待融合组
-	addOrUpdateArr := make([]bool,0) 					//新增-bool-记录-组新增,组更新
-	infoFusionArr := make([]map[string]interface{},0) 	//记录取融合表的数据
-
-	repeatArr,sourceArr,index := make([]string,0),make([]string,0),0 //重复数据组
-	for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
-		if index%1000 == 0 {
-			log.Println("current index",index,tmp["_id"])
-		}
-		tmpId:=BsonTOStringId(tmp["_id"])
-		repeat:=qu.IntAll(tmp["repeat"])
-		sourceid:=qu.ObjToString(tmp["repeat_id"])
-		if repeat==1 {
-			repeatArr = append(repeatArr,tmpId)
-			sourceArr = append(sourceArr,sourceid)
-		}else {
-			fusionDataGroupArr = append(fusionDataGroupArr,[]string{tmpId})
-			addOrUpdateArr = append(addOrUpdateArr,false)
-			infoFusionArr = append(infoFusionArr, map[string]interface{}{})
-		}
-		tmp = make(map[string]interface{})
-	}
-
-	log.Println("task first:",index,len(fusionDataGroupArr),"+",len(repeatArr))
-
-	//根据重复组,重新划分新的组别
-	for i:=0;i<len(repeatArr);i++ {
-		sourceid := sourceArr[i]
-		isAddExist,index := false,0
-		//根据原sourceid 直接遍历组
-	R:	for k,v:=range fusionDataGroupArr{
-			for _,v1:=range v{
-				if v1==sourceid {
-					index = k
-					isAddExist = true
-					break R
-				}
-			}
-		}
-
-		if isAddExist { //数组截取替换-找到指定
-			arr := make([]string,0)
-			arr = fusionDataGroupArr[index]
-			arr = append(arr,repeatArr[i])//组拼接当前id
-			fusionDataGroupArr[index] = arr
-		}else {//当前段落未找到-需要查询融合表,,遍历融合表
-			arr,fusionTmpData := make([]string,0),make(map[string]interface{},0)
-			arr,fusionTmpData = dealWithFindFusionDataArr(sourceid)
-
-			arr = append(arr,repeatArr[i])//组拼接当前id
-			if len(arr)<1 { //异常错误,新增
-				log.Println("... ... 数据异常异常,融合表,当前组均找不到数据",repeatArr[i])
-				arr_error := make([]string,0)
-				arr_error = append(arr_error,repeatArr[i])//组拼接当前id
-				fusionDataGroupArr = append(fusionDataGroupArr,arr_error)
-				addOrUpdateArr = append(addOrUpdateArr,false)
-				infoFusionArr = append(infoFusionArr, map[string]interface{}{})
-			}else { //正常更新
-				fusionDataGroupArr = append(fusionDataGroupArr,arr)
-				addOrUpdateArr = append(addOrUpdateArr,true)
-				infoFusionArr = append(infoFusionArr,fusionTmpData)
-			}
-
-		}
-		//不断改变中
-		//log.Println("当前分组数量:",len(fusionDataGroupArr))
-	}
-	log.Println("最终待融合分组数量:",len(fusionDataGroupArr))
-	log.Println("********************分割线********************")
-	log.Println("********************分割线********************")
-	log.Println("********************分割线********************")
-	log.Println("开始处理新增分组... ...")
-
-
-
-	start := int(time.Now().Unix())
-
-	//多线程 - 处理数据
-	pool := make(chan bool, 3)
-	wg := &sync.WaitGroup{}
-
-	for i:=0;i<len(fusionDataGroupArr);i++ {
-		fusionArr := fusionDataGroupArr[i]
-		pool <- true
-		wg.Add(1)
-		go func(fusionArr []string,i int) {
-			defer func() {
-				<-pool
-				wg.Done()
-			}()
-			//构建数据
-			log.Println("构建第",i+1,"组数据...","数量:",len(fusionArr),fusionArr)
-			weight :=NewWeightData(fusionArr)
-			////整理数据-筛选排名,模板
-			weight.analyzeBuildStandardData()
-
-			if len(fusionArr)<=1 {
-				//log.Println("单组生成... ...")
-				saveFusionData,saveRecordData := weight.dealWithAddFusionStruct()
-				saveid:=mgo.Save(fusion_coll_name,saveFusionData)
-				saveRecordData["_id"] = saveid
-				mgo.Save(record_coll_name,saveRecordData)
-			}else {
-				if addOrUpdateArr[i] {
-					//log.Println("多组更新... ...")
-					tmpdata:=infoFusionArr[i]
-					updateFusionData,updateRecordData := weight.dealWithMultipleUpdateFusionStruct(tmpdata)
-
-					UpdateFusion.updatePool <- []map[string]interface{}{
-						map[string]interface{}{
-							"_id": tmpdata["_id"],
-						},
-						updateFusionData,
-					}
-					UpdateRecord.updatePool <- []map[string]interface{}{
-						map[string]interface{}{
-							"_id": tmpdata["_id"],
-						},
-						updateRecordData,
-					}
-				}else {
-					//log.Println("多组生成... ...")
-					saveFusionData,saveRecordData := weight.dealWithMultipleAddFusionStruct()
-					saveid:=mgo.Save(fusion_coll_name,saveFusionData)
-					saveRecordData["_id"] = saveid
-					mgo.Save(record_coll_name,saveRecordData)
-				}
-			}
-
-
-		}(fusionArr,i)
-
-
-
-	}
-
-	wg.Wait()
-
-	log.Println("fusion is over :",len(fusionDataGroupArr),"用时:",int(time.Now().Unix())-start,"秒")
-	log.Println("睡眠30秒,然后在发广播")
-	time.Sleep(30 * time.Second)
-	//任务完成,开始发送广播通知下面节点
-	taskSendFusionUdp(mapInfo)
-
-}
-
-//查询融合表数据-找到对应组id
-func dealWithFindFusionDataArr(sourceid string) ([]string,map[string]interface{}) {
-	newArr ,arr := make([]string,0),make(primitive.A,0)
-	tmpData:=make(map[string]interface{},0)
-	q := map[string]interface{}{}
-	sess := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess)
-	it := sess.DB(mgo.DbName).C(fusion_coll_name).Find(&q).Iter()
-
-	for tmp := make(map[string]interface{}); it.Next(&tmp); {
-		//log.Println(reflect.TypeOf(tmp["fusion_allids"]))
-		if fusion_allids,b := tmp["fusion_allids"].(primitive.A);b {
-			for _,v:=range fusion_allids {
-				if v==sourceid {
-					//找到目标组-
-					arr = fusion_allids
-					tmpData = tmp
-					tmp = make(map[string]interface{})
-					break
-				}
-			}
-		}
-
-		tmp = make(map[string]interface{})
-	}
-
-	for _,v:=range  arr{
-		newArr = append(newArr,qu.ObjToString(v))
-	}
-
-	return newArr,tmpData
-}
-
-//查询记录1表数据-找到对应的id , 更新用到
-func dealWithFindRecordData(sourceid string) string {
-	newArr ,arr := make([]string,0),make(primitive.A,0)
-	//tmpData:=make(map[string]interface{},0)
-	q := map[string]interface{}{}
-	sess := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess)
-	it := sess.DB(mgo.DbName).C(fusion_coll_name).Find(&q).Iter()
-
-	for tmp := make(map[string]interface{}); it.Next(&tmp); {
-		//log.Println(reflect.TypeOf(tmp["fusion_allids"]))
-		if fusion_allids,b := tmp["fusion_allids"].(primitive.A);b {
-			for _,v:=range fusion_allids {
-				if v==sourceid {
-					//找到目标组-
-					arr = fusion_allids
-					//tmpData = tmp
-					tmp = make(map[string]interface{})
-					break
-				}
-			}
-		}
-
-		tmp = make(map[string]interface{})
-	}
-
-	for _,v:=range  arr{
-		newArr = append(newArr,qu.ObjToString(v))
-	}
-
-	return ""
-}
-
 //udp 监听
 func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 	switch act {
@@ -361,7 +132,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 		} else if mapInfo != nil {
 			taskType := qu.ObjToString(mapInfo["stype"])
 			if taskType == "fusion" {
-				go startTask(data, mapInfo)
+				go startTaskFullData(data, mapInfo)
 			} else {
 				log.Println("未知类型:融合异常... ...")
 			}
@@ -387,16 +158,16 @@ func taskSendFusionUdp(mapinfo map[string]interface{})  {
 	for _, to := range nextNode {
 		sid, _ := mapinfo["gtid"].(string)
 		eid, _ := mapinfo["lteid"].(string)
-		key := sid + "-" + eid + "-" + util.ObjToString(to["stype"])
+		key := sid + "-" + eid + "-" + qu.ObjToString(to["stype"])
 		by, _ := json.Marshal(map[string]interface{}{
 			"gtid":  sid,
 			"lteid": eid,
-			"stype": util.ObjToString(to["stype"]),
+			"stype": qu.ObjToString(to["stype"]),
 			"key":   key,
 		})
 		addr := &net.UDPAddr{
 			IP:   net.ParseIP(to["addr"].(string)),
-			Port: util.IntAll(to["port"]),
+			Port: qu.IntAll(to["port"]),
 		}
 		node := &udpNode{by, addr, time.Now().Unix(), 0}
 		udptaskmap.Store(key, node)

+ 13 - 0
udpfusion/src/mgo.go

@@ -125,6 +125,8 @@ type MongodbSim struct {
 	Ctx      context.Context
 	ShortCtx context.Context
 	pool     chan bool
+	UserName string
+	Password string
 }
 
 func (m *MongodbSim) GetMgoConn() *MgoSess {
@@ -146,6 +148,17 @@ func (m *MongodbSim) InitPool() {
 	opts.ApplyURI("mongodb://" + m.MongodbAddr)
 	opts.SetMaxPoolSize(uint64(m.Size))
 	m.pool = make(chan bool, m.Size)
+
+	if m.UserName !="" && m.Password !="" {
+		cre := options.Credential{
+			Username:m.UserName,
+			Password:m.Password,
+		}
+		opts.SetAuth(cre)
+	}
+
+
+
 	opts.SetMaxConnIdleTime(2 * time.Hour)
 	m.Ctx, _ = context.WithTimeout(context.Background(), 99999*time.Hour)
 	m.ShortCtx, _ = context.WithTimeout(context.Background(), 1*time.Minute)

+ 87 - 3
udpfusion/src/weightFusion.go

@@ -28,6 +28,12 @@ func (weight *weightDataMap) dealWithAddFusionStruct ()(map[string]interface{},m
 	dict["fusion_allids"] = weight.allids
 	//融合保存相关联ids
 	dict["fusion_saveids"] = weight.saveids
+	//标准id
+	dict["fusion_templateid"] = weight.templateid
+	//站点,质量分
+	dict["fusion_score"] = weight.dealWithLevelAndScoreRecord()
+
+
 
 	//采用新增id
 	delete(dict,"_id")
@@ -57,6 +63,12 @@ func (weight *weightDataMap) dealWithMultipleAddFusionStruct ()(map[string]inter
 	dict["fusion_allids"] = weight.allids
 	//融合保存相关联ids
 	dict["fusion_saveids"] = weight.saveids
+	//融合模板
+	dict["fusion_templateid"] = weight.templateid
+	//站点,质量分
+	dict["fusion_score"] = weight.dealWithLevelAndScoreRecord()
+
+
 
 	//日志记录
 	recordDict := make(map[string]interface{},0)
@@ -91,6 +103,37 @@ func (weight *weightDataMap) dealWithMultipleAddFusionStruct ()(map[string]inter
 	}
 	newRecordDict["number"] = qu.Int64All(1)
 
+
+	//分析recordDict记录在融合表里
+	/*
+		{
+            "item" : {
+                "id" : "603c7addf021652bdeb21fc6",
+                "value" : "建材"
+            }
+	 		"bidamount" : {
+                "id" : "603bc7d036baf5b8f2bb159a",
+                "value" : 8005829.82
+            }
+	}
+	*/
+
+	//
+	fieldCal := make(map[string]interface{},0)
+	for k,v:=range recordDict{
+		dict := *qu.ObjToMap(v)
+		tmp_id := qu.ObjToString(dict["id"])
+		if fieldCal[tmp_id]==nil {
+			fieldCal[tmp_id] = []interface{}{k}
+		}else {
+			arr := fieldCal[tmp_id].([]interface{})
+			arr = append(arr,k)
+			fieldCal[tmp_id] = arr
+		}
+	}
+
+	dict["fusion_fields"] = fieldCal
+
 	//返回,更新数据,日志记录数据
 	return dict,newRecordDict
 }
@@ -108,6 +151,12 @@ func (weight *weightDataMap) dealWithMultipleUpdateFusionStruct (tmpData map[str
 	dict["fusion_allids"] = weight.allids
 	//融合保存相关联ids
 	dict["fusion_saveids"] = weight.saveids
+	//融合模板
+	dict["fusion_templateid"] = weight.templateid
+	//站点,质量分
+	dict["fusion_score"] = weight.dealWithLevelAndScoreRecord()
+
+
 
 
 
@@ -147,6 +196,25 @@ func (weight *weightDataMap) dealWithMultipleUpdateFusionStruct (tmpData map[str
 	}
 	newRecordDict["number"] = number
 
+
+
+
+	fieldCal := make(map[string]interface{},0)
+	for k,v:=range recordDict{
+		tmp_id := qu.ObjToString(v.(map[string]interface{})["id"])
+		if fieldCal[tmp_id]==nil {
+			fieldCal[tmp_id] = []interface{}{k}
+		}else {
+			arr := fieldCal[tmp_id].([]interface{})
+			arr = append(arr,k)
+			fieldCal[tmp_id] = arr
+		}
+	}
+
+	dict["fusion_fields"] = fieldCal
+
+
+
 	//返回,更新数据,日志记录数据
 	return dict,newRecordDict
 }
@@ -154,6 +222,10 @@ func (weight *weightDataMap) dealWithMultipleUpdateFusionStruct (tmpData map[str
 
 
 
+
+
+
+
 //处理其他字段数据
 func (weight *weightDataMap)dealWithOtherFieldData(recordDict *map[string]interface{}) map[string]interface{} {
 
@@ -166,9 +238,10 @@ func (weight *weightDataMap)dealWithOtherFieldData(recordDict *map[string]interf
 	//找到非空数据
 	arr := make([]string,0)
 	for key,value:=range templateTmp {
-		//判断是否为有效值-
-		if !judgeIsEffectiveData(value,key) { //无效
-			arr = append(arr,key)
+		if judgeIsFusionKey(key) { //存在key且无效
+			if !judgeIsEffectiveData(value,key) {
+				arr = append(arr,key)
+			}
 		}
 	}
 	//第一步,替换模板,存在且空值
@@ -401,3 +474,14 @@ func (weight *weightDataMap)dealWithStructData(recordDict *map[string]interface{
 }
 
 
+func (weight *weightDataMap) dealWithLevelAndScoreRecord () map[string]interface{}{
+	dict := make(map[string]interface{},0)
+	saveids:= weight.saveids
+	for _,v:=range saveids{
+		dict[v] = map[string]interface{}{
+			"score":weight.data[v].qualityScore,
+			"level":weight.data[v].siteLevel,
+		}
+	}
+	return dict
+}

+ 29 - 6
udpfusion/src/weightFusionMethod.go

@@ -98,6 +98,7 @@ func (weight *weightDataMap)dealWithTimeData(key string) (int,int) {
 	//最小 最大排序方法
 	return sortTimeArrMethod(timeArr)
 }
+
 //时间排序方法 小 → 大
 func sortTimeArrMethod(arr []int) (int,int) {
 
@@ -112,12 +113,38 @@ func sortTimeArrMethod(arr []int) (int,int) {
 	return arr[indexEarly],arr[indexLately]
 }
 
+//返回true 需要融合的数据
+func judgeIsFusionKey(key string) bool {
+	b:=false
+	if NoNeedFusionKey[key]==nil {
+		b=true
+	}
+	return b
+}
+
 func judgeIsEffectiveData(value interface{},key string) bool  {
-	if value==nil || NoNeedFusionKey[key]!=nil{
+	if value==nil { //不存在的值
 		return false
 	}
 
+	//指定字段处理 area projectaddr buyeraddr agencyaddr winneraddr
+	if key=="area" {
+		if qu.ObjToString(value) == "全国" || qu.ObjToString(value) == ""  {
+			return false
+		}else {
+			return true
+		}
+	}
+	if key=="projectaddr"||key=="buyeraddr"||key=="agencyaddr"||key=="winneraddr" {
+		if qu.ObjToString(value) == "无" || qu.ObjToString(value) == ""  {
+			return false
+		}else {
+			return true
+		}
+	}
+
 	valueType := reflect.ValueOf(value).Kind()
+	//int float 类型
 	if valueType==reflect.Int || valueType==reflect.Int32 || valueType==reflect.Int64 ||
 		valueType==reflect.Int8 || valueType==reflect.Int16||valueType==reflect.Float32||
 		valueType==reflect.Float64{
@@ -128,22 +155,18 @@ func judgeIsEffectiveData(value interface{},key string) bool  {
 				return false
 			}
 		}
-		//bidopentime  bidamount budget publishtime
 		return true
 	}
-
+	//bool类型处理
 	if valueType==reflect.Bool {
 		return true
 	}
 
-
 	//其他类型采用
 	valueLen := reflect.ValueOf(value).Len()
 	if valueLen>0 {
-		//log.Println("其他组类型:",valueType,value,"")
 		return true
 	}else {
-		//log.Println("其他组类型:",valueType,value,"错误")
 		return false
 	}
 

+ 7 - 6
udpfusion/src/weightValueMethod.go

@@ -1,7 +1,7 @@
 package main
+
 import (
 	qu "qfw/util"
-
 )
 
 func dealWithGroupScores(indexArr []int, scoreArr []int,siteArr []int) []int {
@@ -90,15 +90,16 @@ func analyzeTheSite(tmp map[string]interface{}) int {
 	*/
 	level := 0
 	site := qu.ObjToString(tmp["site"])
-	if site == "政府采购" {
+	siteType :=siteJsonData[site]
+	if siteType == "政府采购" {
 		level = 5
-	}else if site == "公共资源"  {
+	}else if siteType == "公共资源"  {
 		level = 4
-	}else if site=="人民政府"||site=="政府机构"||site=="学校"||site=="医院"||site=="公司官方网站"  {
+	}else if siteType=="人民政府"||siteType=="政府机构"||siteType=="学校"||siteType=="医院"||siteType=="公司官方网站"  {
 		level = 3
-	}else if site == "社会公共招标平台"||site == "企业公共平台"  {
+	}else if siteType == "社会公共招标平台"||siteType == "企业公共平台"  {
 		level = 2
-	}else if site == "其他"  {
+	}else if siteType == "其他"  {
 		level = 1
 	}else {