Browse Source

项目融合 - 权重数据,融合

apple 4 years ago
parent
commit
332f440732

+ 9 - 0
udpfilterdup/src/datamap.go

@@ -586,6 +586,15 @@ func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
 }
 
 
+func (d *datamap) currentTotalCount() int {
+	num:=qutil.IntAll(0)
+	for _,v:=range d.data {
+		num = num+qutil.IntAll(len(v))
+	}
+	return num
+}
+
+
 
 
 

+ 1 - 0
udpfilterdup/src/main.go

@@ -313,6 +313,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 	wg.Wait()
 
 	log.Println("this task over.", n, "repeateN:", repeateN, mapInfo["stop"])
+	log.Println("当前数据池的数量:",DM.currentTotalCount())
 
 	time.Sleep(30 * time.Second)
 

+ 19 - 2
udpfusion/src/main.go

@@ -61,7 +61,7 @@ func main() {
 
 	sid := "1f0000000000000000000000"
 	eid := "9f0000000000000000000000"
-	log.Println(sid, "---", eid)
+	//log.Println(sid, "---", eid)
 	mapinfo := map[string]interface{}{}
 	if sid == "" || eid == "" {
 		log.Println("sid,eid参数不能为空")
@@ -113,6 +113,24 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 
 	//遍历数据
 	log.Println("开始融合流程")
+
+
+	//分组数据-分组融合
+
+	//构建数据
+	weight :=NewWeightData([]string{},"")
+	//整理数据-筛选排名,模板
+	weight.analyzeBuildStandardData()
+	log.Println("筛选出模拟数据:",weight.templateid)
+	weight.dealWithMultipleFusionStruct()
+	//进行融合
+
+
+
+
+	return
+
+
 	defer qu.Catch()
 	//区间id
 	q := map[string]interface{}{
@@ -132,7 +150,6 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 			log.Println("当前数量:", index, tmp["_id"])
 		}
 
-		//we:=weight.NewWeightData([]string{})
 		//log.Println(we)
 
 

+ 190 - 0
udpfusion/src/weightFusion.go

@@ -0,0 +1,190 @@
+package main
+
+import (
+	"log"
+	qu "qfw/util"
+	"time"
+	"go.mongodb.org/mongo-driver/bson/primitive"
+)
+
+//处理融合数据-返回,融合新数据数据-新增
+func (weight *weightDataMap) dealWithAddFusionStruct ()(map[string]interface{}){
+	log.Println(weight.saveids)
+	log.Println(weight.templateid)
+	log.Println(len(weight.data))
+
+	//
+	//指定模板数据dict-单条数据
+	dict :=weight.data[weight.templateid].data
+
+	//采用新增id
+	delete(dict,"_id")
+
+	//最早发布时间 (小)
+	dict["early_publishtime"] = qu.IntAll(dict["publishtime"])
+	//最近发布时间  (大)
+	dict["lately_publishtime"] = qu.IntAll(dict["publishtime"])
+	//最早入库时间  (小)
+	dict["early_comeintime"] = qu.IntAll(dict["comeintime"])
+	//最近入库时间  (大)
+	dict["lately_comeintime"] = qu.IntAll(dict["comeintime"])
+	//当前更新时间
+	dict["current_updatetime"] = qu.IntAll(time.Now().Unix())
+	//融合生成时间
+	dict["current_updatetime"] = qu.IntAll(time.Now().Unix())
+	//所有相关联ids
+	dict["fusion_allids"] = weight.allids
+	//融合保存相关联ids
+	dict["fusion_saveids"] = weight.saveids
+
+
+	return dict
+}
+
+//处理多条融合数据-返回融合新数据,融合细节数据
+func (weight *weightDataMap) dealWithMultipleFusionStruct ()(map[string]interface{},map[string]interface{}){
+	//log.Println(weight.saveids)
+	//log.Println(weight.templateid)
+	//log.Println(len(weight.data))
+
+
+	//指定模板数据dict
+	dict :=weight.data[weight.templateid].data
+
+	//最早|近发布时间
+	dict["early_publishtime"],dict["lately_publishtime"] = weight.dealWithTimeData("publishtime")
+	//最早|近入库时间
+	dict["early_comeintime"],dict["lately_comeintime"] = weight.dealWithTimeData("comeintime")
+	//当前更新时间
+	dict["current_updatetime"] = qu.IntAll(time.Now().Unix())
+
+	//融合生成时间-取融合表融合数据
+	dict["current_updatetime"] = ""
+	//所有相关联ids
+	dict["fusion_allids"] = weight.allids
+	//融合保存相关联ids
+	dict["fusion_saveids"] = weight.saveids
+
+
+
+
+	//其他字段逻辑处理
+
+
+
+
+	return dict,dict
+}
+
+//处理结构数据
+func (weight *weightDataMap)dealWithStructData()  {
+
+	//模板id 数据
+	templateid:=weight.templateid
+	templateTmp:=weight.data[templateid].data
+
+	//联系人 winnerorder
+	winnerCount:=qu.IntAll(0)
+	winnerArr,b:=make([]interface{},0),false
+	if winnerArr,b = templateTmp["winnerorder"].(primitive.A);b {
+		winnerCount = qu.IntAll(len(winnerArr))
+	}
+
+	//分包 package
+	packageCount:=qu.IntAll(0)
+	packageArr,b:=make([]interface{},0),false
+	if packageArr,b = templateTmp["package"].(primitive.A);b {
+		packageCount = qu.IntAll(len(packageArr))
+	}
+
+	//附件attach_text
+	/*
+		"attach_text" : {
+        "1" : {
+            "0" : {
+                "file_name" : "政采贷融资.doc",
+                "attach_url" : "d5ca0944-6af1-11eb-a8bb-0242ac120002"
+            }
+        },
+        "0" : {
+            "0" : {
+                "file_name" : "01永嘉县人民医院发光免疫试剂采购及设备租赁项目公开招标文件(电子招标).doc",
+                "attach_url" : "7827b2d4-6adb-11eb-bd40-0242ac120002"
+            }
+        }
+    },
+	*/
+	attach_text:=make(map[string]interface{},0)
+	if attach_text,b = templateTmp["attach_text"].(primitive.M);b {
+
+	}
+	log.Println(attach_text)
+
+
+	//遍历其他数据-
+	for _,value:=range weight.saveids {
+		if templateid == value {
+			continue
+		}
+		//winnerorder
+		tmp:=weight.data[value].data
+		if arr_1,b := tmp["winnerorder"].(primitive.A);b {
+			count:=qu.IntAll(len(arr_1))
+			if count > winnerCount {
+				winnerCount = count
+				winnerArr = arr_1
+			}
+		}
+
+		//package
+		if arr_2,b := tmp["package"].(primitive.A);b {
+			count:=qu.IntAll(len(arr_2))
+			if count > packageCount {
+				packageCount = count
+				packageArr = arr_2
+			}
+		}
+
+
+
+
+	}
+
+
+
+
+
+}
+
+
+
+
+
+
+//处理时间方法
+func (weight *weightDataMap)dealWithTimeData(key string) (int,int) {
+
+	saveids:=weight.saveids
+	data:=weight.data
+	timeArr := make([]int,0)
+	for _,v:=range saveids{
+		timeArr = append(timeArr,qu.IntAll(data[v].data[key]))
+	}
+
+	//最小 最大排序方法
+	return sortTimeArrMethod(timeArr)
+}
+//时间排序方法 小 → 大
+func sortTimeArrMethod(arr []int) (int,int) {
+
+	for i := 0; i < len(arr); i++ {
+		for j := i + 1; j < len(arr); j++ {
+			if arr[i] > arr[j] {
+				arr[i], arr[j] = arr[j], arr[i]
+			}
+		}
+	}
+	log.Println(arr)
+	indexEarly,indexLately := 0,len(arr)-1
+	return arr[indexEarly],arr[indexLately]
+}

+ 307 - 16
udpfusion/src/weightValue.go

@@ -2,36 +2,327 @@ package main
 
 import (
 	"log"
-	qu "qfw/util"
+	"math/rand"
 	"sync"
+	"time"
 )
 
-type WeightInfo struct {
-	maxLevel	bool
-	minLevel	bool
-	siteLevel string
-	elementScore    int
-	ranking		int
+type weightInfo struct {
+	maxLevel 		bool
+	minLevel		bool
+	siteLevel		int
+	qualityScore 	int
+	ranking			int
+	data      		map[string]interface{}
 }
 
 
 //一般数据判重
 type weightDataMap struct {
 	lock   sync.Mutex //锁
-	data   map[string][]*WeightInfo
+	data   map[string]*weightInfo
+	allids    []string
+	saveids    []string
+	templateid 	string 	//模板id
 }
 
-func NewWeightData(arr []string) *weightDataMap {
-	log.Print(qu.ObjToString(""))
-	//测试
-	arr = []string{"5f210d1752c1d9fbf849a6a2","5f20eb1da120e23754bc8422"}
+func NewWeightData(arr []string,templateid string) *weightDataMap {
+	//测试-默认第一个
+	arr = []string{"5638baccaf53745d9a000994","5638baccaf53745d9a000995","5638baccaf53745d9a000998",
+		"603717b8fc702705550b8df4","603717b8fc702705550b8df5","603717b8fc702705550b8df6"}
+	weight := &weightDataMap{sync.Mutex{},map[string]*weightInfo{},[]string{},[]string{},templateid}
 
-	log.Println(len(arr))
-	sess := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess)
+	data := make(map[string]*weightInfo,0)
+	for _,v:=range arr {
+		dict := mgo.FindById(coll_name,v)
+		if dict!=nil && len(dict)>2{
+			data[v] = analyzeTheSoureData(dict)
+		}
+	}
 
+	//测试模拟分数
+	//weight := &weightDataMap{sync.Mutex{},map[string]*weightInfo{},[]string{},[]string{},templateid}
+	//data := make(map[string]*weightInfo,0)
+	//max :=[]bool{false,false,false,false,false,false,false,false,false,false}
+	//min :=[]bool{false,false,false,false,false,false,false,false,false,false}
+	//site :=[]int{2,1,5,3,4,2,3,5,1,0}
+	//qua :=[]int{15,11,11,11,22,19,22,44,22,66}
+	//rank :=[]int{-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
+	//for k,v:=range arr {
+	//	data[v] = &weightInfo{
+	//		max[k],
+	//		min[k],
+	//		site[k],
+	//		qua[k],
+	//		rank[k],
+	//	}
+	//}
 
 
-	return nil
+	weight.data = data
+
+	return weight
 
 }
+
+//分析源数据数据
+func analyzeTheSoureData(tmp map[string]interface{}) *weightInfo {
+
+	maxLevel := false
+	minLevel := false
+	ranking := -1  //默认 无排名
+
+	//分析站点方法
+	siteLevel := analyzeTheSite(tmp)
+	//质量评分
+	qualityScore := analyzeTheElements(tmp)
+
+	return &weightInfo{
+		maxLevel,
+		minLevel,
+		siteLevel,
+		qualityScore,
+		ranking,
+		tmp,
+	}
+}
+
+//分析模板数据-打标记构建数据结构
+func (weight *weightDataMap) analyzeBuildStandardData() {
+
+	//log.Print("分析前",weight.allids,weight.saveids,weight.templateid,len(weight.data))
+
+	weight.lock.Lock()
+	
+	//分析里面的打分,以及是否参与融合来决定
+	data:=weight.data
+
+	//分析不同维度的数据-排列ranking,并调换顺序
+
+	//先构建
+	arrAllIds := make([]string,0)
+	arrSaveIds := make([]string,0)
+	arrMaxLevel := make([]bool,0)
+	arrMinLevel	:= make([]bool,0)
+	arrSiteLevel := make([]int,0)
+	arrQualityScore := make([]int,0)
+	arrRanking		:= make([]int,0) //主要排名
+
+	//无序
+	for k,v:=range data{
+		//log.Println(v)
+		//涉及前置条件,哪些数据不需要融合
+		arrAllIds =  append(arrAllIds,k)
+		arrSaveIds = append(arrSaveIds,k)
+		//
+
+
+
+		arrMaxLevel = append(arrMaxLevel,v.maxLevel)
+		arrMinLevel = append(arrMinLevel,v.minLevel)
+
+		arrSiteLevel = append(arrSiteLevel,v.siteLevel)
+		arrQualityScore = append(arrQualityScore,v.qualityScore)
+		arrRanking = append(arrRanking,v.ranking)
+
+
+	}
+
+	log.Println("初始排名:",arrRanking)
+	log.Println("初始质量:",arrQualityScore)
+	log.Println("初始站点:",arrSiteLevel)
+
+	//第一步,最大权重,重置排名
+	isMaxIndexArr := make([]int,0) //记录索引
+	isMaxIndexValueArr := make([]int,0)
+	for k,v :=range  arrMaxLevel {
+		if v == true {
+			arrRanking[k]=1
+			isMaxIndexArr = append(isMaxIndexArr,k)
+			isMaxIndexValueArr = append(isMaxIndexValueArr,arrQualityScore[k])
+		}
+	}
+	rank_s :=1
+
+	if len(isMaxIndexArr)>=1 {
+		log.Println("进行最大权重...")
+		rankIndexArr := dealWithGroupScores(isMaxIndexArr,isMaxIndexValueArr,arrSiteLevel)
+		//log.Println(rankIndexArr)
+		//重新排名
+		for _,v:=range rankIndexArr {
+			arrRanking[v] = rank_s
+			rank_s++
+		}
+	}else {
+		log.Println("无最大权重-质量-站点排序")
+	}
+
+	log.Println("第一步:经过最高权重比较得出--",arrRanking)
+
+
+	//第二步,最小权重,重置排名
+	isMinIndexArr := make([]int,0)
+	isMinIndexValueArr := make([]int,0)
+	for k,v :=range  arrMinLevel {
+		if v == true && arrMaxLevel[k]!=true {
+			isMinIndexArr = append(isMinIndexArr,k)
+			isMinIndexValueArr = append(isMinIndexValueArr,arrQualityScore[k])
+		}
+	}
+
+	if len(isMinIndexArr)>=1 {
+		log.Println("进行最小权重...")
+		rankIndexArr := dealWithGroupScores(isMinIndexArr,isMinIndexValueArr,arrSiteLevel)
+		//重新排名
+		lastRank := len(arrSaveIds)
+		//log.Println("最小排名分",lastRank,rankIndexArr)
+		for i:=len(rankIndexArr)-1;i>=0;i--  {
+			index:=rankIndexArr[i]
+			arrRanking[index] = lastRank
+			lastRank--
+		}
+	}else {
+		log.Println("无最小权重-质量-站点排序")
+	}
+
+	log.Println("第二步:经过最小权重比较得出--",arrRanking)
+
+	//第三步,分析第一步没排名的数据
+	isQuaIndexArr := make([]int,0)
+	isQuaIndexValueArr := make([]int,0)
+	for k,v:=range arrRanking{
+		if v==-1 {
+			isQuaIndexArr = append(isQuaIndexArr,k)
+			isQuaIndexValueArr = append(isQuaIndexValueArr,arrQualityScore[k])
+		}
+	}
+	if len(isQuaIndexArr)>=1 {
+		log.Println("进行质量-站点组合...")
+		rankIndexArr := dealWithGroupScores(isQuaIndexArr,isQuaIndexValueArr,arrSiteLevel)
+		//log.Println(rankIndexArr)
+		//重新排名
+		for _,v:=range rankIndexArr {
+			arrRanking[v] = rank_s
+			rank_s++
+		}
+	}else {
+		log.Println("不需要进行质量-站点组合...")
+	}
+
+	log.Println("第三步:经过质量-站点权重比较得出--",arrRanking)
+
+
+	template_id:=""
+	//根据-排名-修改
+	for k,v:=range arrRanking {
+		index:=arrSaveIds[k]
+		data[index].ranking = v
+		//log.Println("key:",index,"排名:",v)
+		if v==1 {
+			template_id = index
+		}
+	}
+
+	weight.data = data
+	weight.templateid = template_id
+	weight.allids = arrAllIds
+	weight.saveids = arrSaveIds
+
+	weight.lock.Unlock()
+}
+
+func dealWithGroupScores(indexArr []int, scoreArr []int,siteArr []int) []int {
+
+	//log.Println("下标组",indexArr,"质量分组",scoreArr,"整体站点组",siteArr)
+	//处理分组
+	sort_scoreArr,sort_indexArr := sortGroupInt(scoreArr,indexArr)
+	//log.Println("排序质量分:",sort_scoreArr,sort_indexArr)
+
+	totalIndexArr:=make([][]int,0)
+	lastTmp := -1
+	for k,v :=range sort_scoreArr {
+		if v<lastTmp || k==0 {
+			arr_s := make([]int,0)
+			arr_i := make([]int,0)
+			for index,value :=range scoreArr {
+				if v==value {
+					arr_s = append(arr_s,value)
+					arr_i = append(arr_i,sort_indexArr[index])
+				}
+			}
+			totalIndexArr = append(totalIndexArr,arr_i)
+			lastTmp = v
+		}
+	}
+
+	finallyIndexArr := make([]int,0)
+	for _,v:=range totalIndexArr{
+		if len(v)>1 {
+			//[6 3 4]
+			arr_s :=make([]int,0)
+			for _,v1:=range v{
+				arr_s = append(arr_s,siteArr[v1])
+			}
+			_,b:=sortGroupInt(arr_s,v)
+			for _,v2:=range b {
+				finallyIndexArr = append(finallyIndexArr,v2)
+			}
+		}else {
+			finallyIndexArr = append(finallyIndexArr,v[0])
+		}
+	}
+	return finallyIndexArr
+}
+
+//排序 正常排序 ,站点
+func sortNormalInt(arrValue []int) ([]int){
+	for i := 0; i < len(arrValue); i++ {
+		for j := i + 1; j < len(arrValue); j++ {
+			if arrValue[i] < arrValue[j] {
+				arrValue[i], arrValue[j] = arrValue[j], arrValue[i]
+			}
+		}
+	}
+	return arrValue
+}
+
+
+//排序 质量,分组
+func sortGroupInt(arrValue []int,arrIndex []int) ([]int ,[]int){
+
+	for i := 0; i < len(arrValue); i++ {
+		for j := i + 1; j < len(arrValue); j++ {
+			if arrValue[i] < arrValue[j] {
+				arrValue[i], arrValue[j] = arrValue[j], arrValue[i]
+				arrIndex[i], arrIndex[j] = arrIndex[j], arrIndex[i]
+			}
+		}
+	}
+	return arrValue,arrIndex
+}
+
+
+
+
+
+
+
+//分析站点评分
+func analyzeTheSite(tmp map[string]interface{}) int {
+	/*
+		站点评分1-5级
+	*/
+	//测试随机分
+	rand.Seed(time.Now().UnixNano()) //以当前系统时间作为种子参数
+	return rand.Intn(10)
+}
+
+//分析要素评分
+func analyzeTheElements(tmp map[string]interface{}) int {
+	/*
+		质量评分总分
+	*/
+	//测试随机分
+	rand.Seed(time.Now().UnixNano()) //以当前系统时间作为种子参数
+	return rand.Intn(100)
+}