Bladeren bron

Merge branch 'dev3.4.1' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.4.1

maxiaoshan 4 jaren geleden
bovenliggende
commit
17a0a79cbb

+ 6 - 6
src/jy/extract/extract.go

@@ -647,8 +647,8 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 		for key, val := range j.Result {
 			for i, v := range val {
 				if v.Field == "project_duration" {
-					arr:=clear.ObjToMoney([]interface{}{v.Value, j.Content},j.SpiderCode, j.IsClearnMoney)
-					if len(arr)>0 {
+					arr := clear.ObjToMoney([]interface{}{v.Value, j.Content}, j.SpiderCode, j.IsClearnMoney)
+					if len(arr) > 0 {
 						v.Value = arr[0]
 					}
 				}
@@ -988,7 +988,7 @@ func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in
 				AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
 			}
 		} else if in.Field == "qualifies" {
-			extinfo := extRegCoreToResult(extfrom,pretreated.HtmlToText(qu.ObjToString(doc[extfrom]) ), &map[string]string{}, j, in, isSite)
+			extinfo := extRegCoreToResult(extfrom, pretreated.HtmlToText(qu.ObjToString(doc[extfrom])), &map[string]string{}, j, in, isSite)
 			if len(extinfo) > 0 {
 				AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
 			}
@@ -1807,7 +1807,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					delete(v, "bidamount")
 				}
 				j.Winnerorder = nil
-				if jf!= nil && jf.Winnerorder!= nil{
+				if jf != nil && jf.Winnerorder != nil {
 					jf.Winnerorder = nil
 				}
 			}
@@ -1968,7 +1968,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					if v.Score > -1 {
 						ffield[v.Field] = v.Value
 						if tmp[v.Field] == nil {
-							if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue {
+							if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue && v.Value.(float64) > 100 && v.Value.(float64) < 50000000000 {
 								tmp[v.Field] = v.Value
 								break
 							}
@@ -2202,7 +2202,7 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 	}
 
 	//工期单位-清理
-	if tmp["project_timeunit"]=="年" && tmp["project_duration"] == nil {
+	if tmp["project_timeunit"] == "年" && tmp["project_duration"] == nil {
 		delete(tmp, "project_timeunit")
 	}
 	tmp["repeat"] = 0

+ 6 - 0
src/jy/extract/score_jsondata.go

@@ -194,6 +194,8 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 			oneScore := j.Result[v][0].Score
 			if oneScore < 0 {
 				oneScore = 0
+			}else {
+				oneScore -= 0.5
 			}
 			if v == "budget" || v == "bidamount" {
 				lockclear.Lock()
@@ -209,11 +211,15 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 					if jdextweight > 1 {
 						if oneScore < 0 {
 							oneScore = 0.1
+						}else {
+							oneScore -= 0.5
 						}
 						extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: oneScore + 1, IsTrue: newNum[len(newNum)-1].(bool)})
 					} else {
 						if oneScore < 0 {
 							oneScore = 0.1
+						}else {
+							oneScore -= 0.5
 						}
 						extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: oneScore, IsTrue: newNum[len(newNum)-1].(bool)})
 					}

+ 7 - 1
udpfusion/src/config.json

@@ -3,13 +3,19 @@
   "mongodb": {
     "addrName": "192.168.3.207:27092",
     "dbName": "zhengkun",
-    "collName": "test",
+    "collName": "all_test",
     "pool": 10,
     "site": {
       "site_dbname": "qfw",
       "site_coll": "site"
     }
   },
+  "notFusionKey": {
+    "repeat_reason":0,
+    "repeat_id":0,
+    "repeat_ids":0,
+    "dataging": 0
+  },
   "fusion_coll_name":"fusiondata",
   "record_coll_name":"recorddata",
   "":"",

+ 189 - 62
udpfusion/src/main.go

@@ -8,16 +8,22 @@ import (
 	"os"
 	"qfw/common/src/qfw/util"
 	qu "qfw/util"
+	"strconv"
 	"time"
 )
 
 
 var (
-	sysconfig    map[string]interface{} //配置文件
-	mgo          *MongodbSim            //mongodb操作对象
-	udpclient    mu.UdpClient             //udp对象
-	nextNode     []map[string]interface{} //下节点数组
-	coll_name,fusion_coll_name,record_coll_name 	 string
+	sysconfig   		map[string]interface{} 		//配置文件
+	mgo         		*MongodbSim            		//mongodb操作对象
+	udpclient    		mu.UdpClient             	//udp对象
+	nextNode     		[]map[string]interface{} 	//下节点数组
+	coll_name 	 		string
+	fusion_coll_name	string
+	record_coll_name 	string   					//表名
+	NoNeedFusionKey 	map[string]interface{}   	//不需要融合的key
+	UpdateFusion		*updateFusionInfo
+	UpdateRecord		*updateRecordInfo			//更新池
 )
 
 
@@ -36,6 +42,9 @@ func initMgo()  {
 	coll_name = mconf["collName"].(string)
 	fusion_coll_name = sysconfig["fusion_coll_name"].(string)
 	record_coll_name = sysconfig["record_coll_name"].(string)
+	NoNeedFusionKey = sysconfig["notFusionKey"].(map[string]interface{})
+
+
 }
 
 
@@ -43,6 +52,16 @@ func init() {
 	//加载配置文件
 	qu.ReadConfig(&sysconfig)
 	initMgo()
+
+	//更新池
+	UpdateFusion = newUpdateFusionPool()
+	go UpdateFusion.updateFusionData()
+
+	UpdateRecord = newUpdateRecordPool()
+	go UpdateRecord.updateRecordData()
+
+
+
 	log.Println("采用udp模式")
 }
 
@@ -75,36 +94,7 @@ func main() {
 }
 
 
-func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
-	switch act {
-	case mu.OP_TYPE_DATA: //上个节点的数据
-		//从表中开始处理
-		var mapInfo map[string]interface{}
-		err := json.Unmarshal(data, &mapInfo)
-		log.Println("err:", err, "mapInfo:", mapInfo)
-		if err != nil {
-			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
-		} else if mapInfo != nil {
-			taskType := qu.ObjToString(mapInfo["stype"])
-			if taskType == "fusion" {
-				go startTask(data, mapInfo)
-			} else {
-				log.Println("未知类型:融合异常... ...")
-			}
-			key, _ := mapInfo["key"].(string)
-			if key == "" {
-				key = "udpok"
-			}
-			udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
-		}
-	case mu.OP_NOOP: //下个节点回应
-		ok := string(data)
-		if ok != "" {
-			log.Println("ok:", ok)
-			udptaskmap.Delete(ok)
-		}
-	}
-}
+
 
 
 
@@ -115,21 +105,6 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 	log.Println("开始融合流程")
 
 
-	//分组数据-分组融合
-
-	//构建数据
-	weight :=NewWeightData([]string{},"")
-	//整理数据-筛选排名,模板
-	weight.analyzeBuildStandardData()
-	log.Println("筛选出模拟数据:",weight.templateid)
-	weight.dealWithMultipleFusionStruct()
-	//进行融合
-
-
-
-
-	return
-
 
 	defer qu.Catch()
 	//区间id
@@ -143,40 +118,172 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 	sess := mgo.GetMgoConn()
 	defer mgo.DestoryMongoConn(sess)
 	it := sess.DB(mgo.DbName).C(coll_name).Find(&q).Iter()
-	updateExtract := [][]map[string]interface{}{}
-	index:=0
+	//编译不同的融合组,如何划分组
+	fusionDataGroupArr := make([][]string,0) //待融合组
+	addOrUpdateArr := make([]bool,0) //新增-bool-记录
+
+	repeatArr,sourceArr,index := make([]string,0),make([]string,0),0 //重复数据组
 	for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
-		if index%10000 == 0 {
-			log.Println("当前数量:", index, tmp["_id"])
+		if index%1000 == 0 {
+			log.Println("current index",index,tmp["_id"])
 		}
+		tmpId:=BsonTOStringId(tmp["_id"])
+		repeat:=qu.IntAll(tmp["repeat"])
+		sourceid:=qu.ObjToString(tmp["repeat_id"])
+		if repeat==1 {
+			repeatArr = append(repeatArr,tmpId)
+			sourceArr = append(sourceArr,sourceid)
+		}else {
+			fusionDataGroupArr = append(fusionDataGroupArr,[]string{tmpId})
+			addOrUpdateArr = append(addOrUpdateArr,false)
+		}
+		tmp = make(map[string]interface{})
+	}
 
-		//log.Println(we)
-
+	log.Println("task first:",index,len(fusionDataGroupArr),"+",len(repeatArr))
+	log.Println("状态记录:",len(addOrUpdateArr))
+	//根据重复组,重新划分新的组别
+	num1,num2:=0,0
+	for i:=0;i<len(repeatArr);i++ {
+		sourceid := sourceArr[i]
+		isAddExist,index := false,0
+		//根据原sourceid 直接遍历组
+	R:	for k,v:=range fusionDataGroupArr{
+			for _,v1:=range v{
+				if v1==sourceid {
+					index = k
+					isAddExist = true
+					break R
+				}
+			}
+		}
 
+		if isAddExist { //数组截取替换-找到指定
+			arr := make([]string,0)
+			arr = fusionDataGroupArr[index]
+			arr = append(arr,repeatArr[i])//组拼接当前id
+			fusionDataGroupArr[index] = arr
+			num1++
+		}else {//当前段落未找到-需要查询融合表,,遍历融合表
+			arr := make([]string,0)
+			arr = dealWithFindFusionDataArr(sourceid)
+			arr = append(arr,repeatArr[i])//组拼接当前id
+			if len(arr)<1 {
+				log.Println("数据异常,融合表找不到数据",repeatArr[i])
+			}else { //新增
+				log.Println("数据融合新增")
+				fusionDataGroupArr = append(fusionDataGroupArr,arr)
+				addOrUpdateArr = append(addOrUpdateArr,true)
+			}
+			num2++
 
-		tmp = make(map[string]interface{})
+		}
+		//不断改变中
+		log.Println("当前分组数量:",len(fusionDataGroupArr))
 	}
 
+	log.Println("分组完毕:","重复新增数量:",num1,"重复更新数量:",num2,len(repeatArr))
+	log.Println("最终带融合分组:",len(fusionDataNewGroupArr))
+	//分组细节需要修改 - 带测试
+	return
 
-	if len(updateExtract) >0 {
-		mgo.UpSertBulk(coll_name, updateExtract...)
 
+	log.Println("开始处理新增分组... ...")
+	start := int(time.Now().Unix())
+	//进行分组融合
+	for i:=0;i<len(fusionDataNewGroupArr);i++ {
+		fusionArr := fusionDataNewGroupArr[i]
+		//构建数据
+		log.Println("构建第一组数据...",fusionArr)
+		weight :=NewWeightData(fusionArr)
+		//整理数据-筛选排名,模板
+		weight.analyzeBuildStandardData()
+		if len(fusionArr)<=1 {
+			//更新数据(融合表)   日志数据(日志记录表)
+		 	//updateData,_ := weight.dealWithAddFusionStruct()
+			//log.Println("新增:更新数据",len(updateData))
+		 	//mgo.Save(fusion_coll_name,updateData) //新增
+		}else {
+			//updateData,_ := weight.dealWithMultipleFusionStruct()
+			//log.Println("多组新增:更新数据",len(updateData))
+			//mgo.Save(fusion_coll_name,updateData)
+		}
 	}
+	log.Println("新增融合over :",len(fusionDataNewGroupArr),"用时:",int(time.Now().Unix())-start)
 
 
-	log.Println("task fusion over - 总计数量",index)
+	//多组-融合表更新
+	//UpdateFusion.updatePool <- []map[string]interface{}{//原始数据打标签
+	//	map[string]interface{}{},
+	//	updateData,
+	//}
+
 
 	time.Sleep(30 * time.Second)
 
 	//任务完成,开始发送广播通知下面节点
+	taskSendFusionUdp(mapInfo)
 
-	sendUdp(mapInfo)
+}
+
+//查询融合表数据-找到对应组id
+func dealWithFindFusionDataArr(sourceid string) []string {
+	arr := make([]string,0)
+	q := map[string]interface{}{}
+	sess := mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sess)
+	it := sess.DB(mgo.DbName).C(coll_name).Find(&q).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); {
+		fusion_allids := tmp["fusion_allids"].([]string)
+		for _,v:=range fusion_allids {
+			if v==sourceid {
+				//找到目标组-
+				arr = fusion_allids
+				tmp = make(map[string]interface{})
+				break
+			}
+		}
+		tmp = make(map[string]interface{})
+	}
+	return arr
+}
 
 
 
+//udp 监听
+func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
+	switch act {
+	case mu.OP_TYPE_DATA: //上个节点的数据
+		//从表中开始处理
+		var mapInfo map[string]interface{}
+		err := json.Unmarshal(data, &mapInfo)
+		log.Println("err:", err, "mapInfo:", mapInfo)
+		if err != nil {
+			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
+		} else if mapInfo != nil {
+			taskType := qu.ObjToString(mapInfo["stype"])
+			if taskType == "fusion" {
+				go startTask(data, mapInfo)
+			} else {
+				log.Println("未知类型:融合异常... ...")
+			}
+			key, _ := mapInfo["key"].(string)
+			if key == "" {
+				key = "udpok"
+			}
+			udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
+		}
+	case mu.OP_NOOP: //下个节点回应
+		ok := string(data)
+		if ok != "" {
+			log.Println("ok:", ok)
+			udptaskmap.Delete(ok)
+		}
+	}
 }
 
-func sendUdp(mapinfo map[string]interface{})  {
+
+func taskSendFusionUdp(mapinfo map[string]interface{})  {
 
 	//log.Println("信息融合结束-发送udp")
 	for _, to := range nextNode {
@@ -197,4 +304,24 @@ func sendUdp(mapinfo map[string]interface{})  {
 		udptaskmap.Store(key, node)
 		udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
 	}
+}
+
+
+
+
+
+
+
+
+
+//判断是否在当前id段落
+func judgeIsCurIds (gtid string,lteid string,curid string) bool {
+
+	gt_time, _ := strconv.ParseInt(gtid[:8], 16, 64)
+	lte_time, _ := strconv.ParseInt(lteid[:8], 16, 64)
+	cur_time, _ := strconv.ParseInt(curid[:8], 16, 64)
+	if cur_time>gt_time&&cur_time<=lte_time {
+		return true
+	}
+	return false
 }

+ 1 - 1
udpfusion/src/updateFusion.go

@@ -20,7 +20,7 @@ type updateFusionInfo struct {
 var sp_f = make(chan bool, 5)
 
 func newUpdateFusionPool() *updateFusionInfo {
-	update:=&updateFusionInfo{make(chan []map[string]interface{}, 50000),500}
+	update:=&updateFusionInfo{make(chan []map[string]interface{}, 5000),100}
 	return update
 }
 

+ 219 - 69
udpfusion/src/weightFusion.go

@@ -1,6 +1,7 @@
 package main
 
 import (
+	"fmt"
 	"log"
 	qu "qfw/util"
 	"time"
@@ -8,12 +9,8 @@ import (
 )
 
 //处理融合数据-返回,融合新数据数据-新增
-func (weight *weightDataMap) dealWithAddFusionStruct ()(map[string]interface{}){
-	log.Println(weight.saveids)
-	log.Println(weight.templateid)
-	log.Println(len(weight.data))
+func (weight *weightDataMap) dealWithAddFusionStruct ()(map[string]interface{},map[string]interface{}){
 
-	//
 	//指定模板数据dict-单条数据
 	dict :=weight.data[weight.templateid].data
 
@@ -38,15 +35,12 @@ func (weight *weightDataMap) dealWithAddFusionStruct ()(map[string]interface{}){
 	dict["fusion_saveids"] = weight.saveids
 
 
-	return dict
+
+	return dict,dict
 }
 
 //处理多条融合数据-返回融合新数据,融合细节数据
 func (weight *weightDataMap) dealWithMultipleFusionStruct ()(map[string]interface{},map[string]interface{}){
-	//log.Println(weight.saveids)
-	//log.Println(weight.templateid)
-	//log.Println(len(weight.data))
-
 
 	//指定模板数据dict
 	dict :=weight.data[weight.templateid].data
@@ -66,37 +60,139 @@ func (weight *weightDataMap) dealWithMultipleFusionStruct ()(map[string]interfac
 	dict["fusion_saveids"] = weight.saveids
 
 
+	//日志记录-还有快照页面 等等
+	recordDict := make(map[string]interface{},0)
 
 
-	//其他字段逻辑处理
+	//结构体字段逻辑处理
+	structData := weight.dealWithStructData(&recordDict)
+	for k,v:=range structData {
+		log.Println("key:",k,"value",v)
+		dict["k"] = v
+	}
 
+	//非空新增字段
+	otherFieldData := weight.dealWithOtherFieldData(&recordDict)
+	for k,v:=range otherFieldData {
+		//log.Println("key:",k,"value",v)
+		dict[k] = v
+	}
 
+	dict["repeat"] = 0
 
 
-	return dict,dict
+	//log.Println("待更新数据:",dict)
+	//log.Println("待更新日志:",recordDict)
+
+
+
+
+	//返回,更新数据,日志记录数据
+	return dict,recordDict
 }
 
-//处理结构数据
-func (weight *weightDataMap)dealWithStructData()  {
+
+//处理其他字段数据
+func (weight *weightDataMap)dealWithOtherFieldData(recordDict *map[string]interface{}) map[string]interface{} {
 
 	//模板id 数据
-	templateid:=weight.templateid
-	templateTmp:=weight.data[templateid].data
+	templateid := weight.templateid
+	templateTmp := weight.data[templateid].data
 
-	//联系人 winnerorder
-	winnerCount:=qu.IntAll(0)
-	winnerArr,b:=make([]interface{},0),false
-	if winnerArr,b = templateTmp["winnerorder"].(primitive.A);b {
-		winnerCount = qu.IntAll(len(winnerArr))
+	modifyData := make(map[string]interface{}, 0) //返回修改的数据
+
+	//找到非空数据
+	arr := make([]string,0)
+	for key,value:=range templateTmp {
+		//判断是否为有效值-
+		if !judgeIsEffectiveData(value,key) { //无效
+			arr = append(arr,key)
+		}
+	}
+	//第一步,替换模板,存在且空值
+	if arr!=nil && len(arr)>0 {
+		for _,key:=range arr  {
+			isRank := 2
+		L:	for {
+				for _,v:=range weight.saveids {
+					if v == templateid {
+						continue
+					}
+					dataInfo:=weight.data[v]
+					if dataInfo.ranking==isRank { //找到指定排名-字段数据
+						value:=dataInfo.data[key]
+						if value !=nil && judgeIsEffectiveData(value,key)  {
+							modifyData[key] = value
+							templateTmp[key] = value
+							(*recordDict)[key] = map[string]interface{}{
+								"id":v,
+								"value":value,
+							}
+							break L
+						}
+						break
+					}
+
+				}
+				isRank++
+				if isRank > len(weight.saveids) {
+					break L
+				}
+			}
+		}
 	}
 
-	//分包 package
-	packageCount:=qu.IntAll(0)
-	packageArr,b:=make([]interface{},0),false
-	if packageArr,b = templateTmp["package"].(primitive.A);b {
-		packageCount = qu.IntAll(len(packageArr))
+
+	log.Println("待替换key:",arr,"修改后:",modifyData)
+
+	//第二步-集合最大化
+	isRank := 2
+	for { //不断遍历,找到其他排名数据
+		for _,v:=range weight.saveids {
+			if v == templateid {
+				continue
+			}
+
+
+			dataInfo:=weight.data[v]
+			if dataInfo.ranking==isRank { //找到指定排名数据
+				for key,newValue:=range dataInfo.data{
+					if key=="_id" || templateTmp[key]!=nil || NoNeedFusionKey[key]!=nil{
+						continue
+					}
+					if judgeIsEffectiveData(newValue,key) {
+						log.Println("最大化有效-",key)
+						templateTmp[key] = newValue
+						modifyData[key] = newValue
+						(*recordDict)[key] = map[string]interface{}{
+							"id":v,
+							"value":newValue,
+						}
+					}
+				}
+				break
+			}
+		}
+		isRank++
+		if isRank > len(weight.saveids) {
+			break
+		}
 	}
 
+	log.Println("isRank:",isRank,len(modifyData))
+
+	return modifyData
+}
+
+//处理结构数据
+func (weight *weightDataMap)dealWithStructData(recordDict *map[string]interface{}) map[string]interface{} {
+
+	//模板id 数据
+	templateid:=weight.templateid
+	templateTmp:=weight.data[templateid].data
+
+	modifyData :=make(map[string]interface{},0)
+
 	//附件attach_text
 	/*
 		"attach_text" : {
@@ -114,13 +210,82 @@ func (weight *weightDataMap)dealWithStructData()  {
         }
     },
 	*/
-	attach_text:=make(map[string]interface{},0)
-	if attach_text,b = templateTmp["attach_text"].(primitive.M);b {
 
+	attach_text,isAttach:=make(map[string]interface{},0),false
+	if tmp_arr,b := templateTmp["attach_text"].(map[string]interface{});b {
+		//有值符合-
+		attach_text = tmp_arr
+		log.Println("默认初始:",attach_text)
+	}
+	//附件判重-并合并新增
+	keyIndex := -1
+	//找到当前最大keyIndex
+	for k,_:=range attach_text {
+		key:=qu.IntAll(k)
+		if key>keyIndex {
+			keyIndex = key
+		}
+	}
+	log.Println("当前keyIndex",keyIndex)
+	for _,value_id :=range weight.saveids {
+		if templateid == value_id {
+			continue
+		}
+		rankData := weight.data[value_id].data //具体其他排名数据
+		if attachData,b := rankData["attach_text"].(map[string]interface{});b {
+			if len(attachData)>0  { //有值
+				for _,v:=range attachData { //子元素
+					if attach,isOK := v.(map[string]interface{});isOK {
+						log.Println(attach)
+						if !dealWithRepeatAttachData(attach_text,attach) {
+							//符合条件-不重复直接添加
+							keyIndex++
+							saveKey := fmt.Sprintf("%v",keyIndex)
+							attach_text[saveKey] = attach //key累加
+							log.Println(attach_text)
+							isAttach = true
+
+							//多条情况-融合
+							if (*recordDict)["attach_text"]==nil {
+								(*recordDict)["attach_text"] = []map[string]interface{}{
+									map[string]interface{}{
+										"id":value_id,
+										"value":attach,
+									},
+								}
+							}else {
+								arr := (*recordDict)["attach_text"].([]map[string]interface{})
+								arr = append(arr,map[string]interface{}{
+									"id":value_id,
+									"value":attach,
+								})
+								(*recordDict)["attach_text"] = arr
+							}
+
+						}
+					}
+				}
+			}
+		}
+	}
+
+
+
+	//联系人 winnerorder
+	winnerCount:=qu.IntAll(0)
+	winnerArr,b,isWinner,winnerid:=make(primitive.A,0),false,false,templateid
+	if winnerArr,b = templateTmp["winnerorder"].([]interface{});b {
+		winnerCount = qu.IntAll(len(winnerArr))
 	}
-	log.Println(attach_text)
 
 
+	//分包 package
+	packageCount:=qu.IntAll(0)
+	packageArr,b,isPackage,packageid:=make(map[string]interface{},0),false,false,templateid
+	if packageArr,b = templateTmp["package"].(map[string]interface{});b {
+		packageCount = qu.IntAll(len(packageArr))
+	}
+
 	//遍历其他数据-
 	for _,value:=range weight.saveids {
 		if templateid == value {
@@ -128,63 +293,48 @@ func (weight *weightDataMap)dealWithStructData()  {
 		}
 		//winnerorder
 		tmp:=weight.data[value].data
-		if arr_1,b := tmp["winnerorder"].(primitive.A);b {
+
+		if arr_1,winner_b := tmp["winnerorder"].(primitive.A);winner_b {
 			count:=qu.IntAll(len(arr_1))
 			if count > winnerCount {
 				winnerCount = count
 				winnerArr = arr_1
+				isWinner = true
+				winnerid = value
 			}
 		}
-
 		//package
-		if arr_2,b := tmp["package"].(primitive.A);b {
+		if arr_2,package_b := (tmp["package"]).(map[string]interface{});package_b {
 			count:=qu.IntAll(len(arr_2))
 			if count > packageCount {
 				packageCount = count
 				packageArr = arr_2
+				isPackage = true
+				packageid = value
 			}
 		}
-
-
-
-
 	}
 
 
-
-
-
-}
-
-
-
-
-
-
-//处理时间方法
-func (weight *weightDataMap)dealWithTimeData(key string) (int,int) {
-
-	saveids:=weight.saveids
-	data:=weight.data
-	timeArr := make([]int,0)
-	for _,v:=range saveids{
-		timeArr = append(timeArr,qu.IntAll(data[v].data[key]))
+	//改变的值
+	if len(winnerArr)>0 && winnerArr!=nil && isWinner {
+		modifyData["winnerorder"] = winnerArr
+		(*recordDict)["winnerorder"] = map[string]interface{}{
+			"id":winnerid,
+			"value":winnerArr,
+		}
 	}
-
-	//最小 最大排序方法
-	return sortTimeArrMethod(timeArr)
-}
-//时间排序方法 小 → 大
-func sortTimeArrMethod(arr []int) (int,int) {
-
-	for i := 0; i < len(arr); i++ {
-		for j := i + 1; j < len(arr); j++ {
-			if arr[i] > arr[j] {
-				arr[i], arr[j] = arr[j], arr[i]
-			}
+	if len(packageArr)>0 && packageArr!=nil && isPackage {
+		modifyData["package"] = packageArr
+		(*recordDict)["package"] = map[string]interface{}{
+			"id":packageid,
+			"value":packageArr,
 		}
 	}
-	log.Println(arr)
-	indexEarly,indexLately := 0,len(arr)-1
-	return arr[indexEarly],arr[indexLately]
+	if len(attach_text)>0 && attach_text!=nil && isAttach {
+		modifyData["attach_text"] = attach_text
+	}
+
+	return modifyData
 }
+

+ 145 - 0
udpfusion/src/weightFusionMethod.go

@@ -0,0 +1,145 @@
+package main
+
+import (
+	qu "qfw/util"
+	"reflect"
+)
+
+//处理-重复附件数据
+func dealWithRepeatAttachData(attach map[string]interface{},data map[string]interface{}) bool {
+	/*
+			"attach_text" : {
+			"1" : {
+				"0" : {
+					"file_name" : "政采贷融资.doc",
+					"attach_url" : "d5ca0944-6af1-11eb-a8bb-0242ac120002"
+				}
+			},
+			"0" : {
+				"0" : {
+					"file_name" : "01永嘉县人民医院发光免疫试剂采购及设备租赁项目公开招标文件(电子招标).doc",
+					"attach_url" : "7827b2d4-6adb-11eb-bd40-0242ac120002"
+				}
+			},
+			"3" : { //data
+				"0" : {
+					"file_name" : "政采贷融资.doc",
+					"attach_url" : "d5ca0944-6af1-11eb-a8bb-0242ac120002"
+				}
+			},
+		},
+		*/
+	//重复返回 true
+	for _,sourceValue := range attach{
+		//处理此结构数据
+		new_sourceArr := dealWithSpecialStructAttachData(*qu.ObjToMap(sourceValue))
+		new_dataArr := dealWithSpecialStructAttachData(data)
+		if judgeArrIsTheSame(new_sourceArr,new_dataArr) { //相同
+			return true
+		}
+	}
+	return false
+}
+//处理-特殊结构体-附件数据
+func dealWithSpecialStructAttachData(data map[string]interface{}) []map[string]string {
+
+	arr:=make([]map[string]string,0)
+	//sourceValue - data  平级
+
+	/*
+	"1" : {
+				"0" : {
+					"file_name" : "政采贷融资.doc",
+					"attach_url" : "d5ca0944-6af1-11eb-a8bb-0242ac120002"
+				}
+			},
+	*/
+	for _,v:=range data {
+		if dict,isOK := v.(map[string]interface{});isOK {
+			file_name:=qu.ObjToString(dict["file_name"])
+			attach_url:=qu.ObjToString(dict["attach_url"])
+			arr = append(arr, map[string]string{"file_name":file_name,"attach_url":attach_url})
+		}
+
+	}
+	return arr
+}
+//判断数组-结构是否相同
+func judgeArrIsTheSame(arr1 []map[string]string,arr2 []map[string]string) bool {
+
+	if len(arr1)!=len(arr2) {
+		return false
+	}
+	for _,v := range arr1  {
+		isOK:=false
+		for _,v1:=range arr2 {
+			if v["file_name"]==v1["file_name"] && v["attach_url"]==v1["attach_url"] {
+				isOK = true
+				break
+			}
+		}
+		if !isOK {//不完全一致
+			return false
+		}
+	}
+	return true
+}
+//处理时间方法
+func (weight *weightDataMap)dealWithTimeData(key string) (int,int) {
+
+	saveids:=weight.saveids
+	data:=weight.data
+	timeArr := make([]int,0)
+	for _,v:=range saveids{
+		timeArr = append(timeArr,qu.IntAll(data[v].data[key]))
+	}
+
+	//最小 最大排序方法
+	return sortTimeArrMethod(timeArr)
+}
+//时间排序方法 小 → 大
+func sortTimeArrMethod(arr []int) (int,int) {
+
+	for i := 0; i < len(arr); i++ {
+		for j := i + 1; j < len(arr); j++ {
+			if arr[i] > arr[j] {
+				arr[i], arr[j] = arr[j], arr[i]
+			}
+		}
+	}
+	indexEarly,indexLately := 0,len(arr)-1
+	return arr[indexEarly],arr[indexLately]
+}
+
+func judgeIsEffectiveData(value interface{},key string) bool  {
+	if value==nil || NoNeedFusionKey[key]!=nil{
+		return false
+	}
+
+	valueType := reflect.ValueOf(value).Kind()
+	if valueType==reflect.Int || valueType==reflect.Int32 || valueType==reflect.Int64 ||
+		valueType==reflect.Int8 || valueType==reflect.Int16||valueType==reflect.Float32||
+		valueType==reflect.Float64{
+		//如果类型为金额类
+		if key=="bidopentime" || key=="bidamount" || key=="budget" ||
+			key=="publishtime" || key=="comeintime" {
+			if qu.Float64All(value) == 0.0 {
+				return false
+			}
+		}
+		//bidopentime  bidamount budget publishtime
+		return true
+	}
+
+	//其他类型采用
+	valueLen := reflect.ValueOf(value).Len()
+	if valueLen>0 {
+		//log.Println("其他组类型:",valueType,value,"")
+		return true
+	}else {
+		//log.Println("其他组类型:",valueType,value,"错误")
+		return false
+	}
+
+
+}

+ 14 - 144
udpfusion/src/weightValue.go

@@ -2,9 +2,7 @@ package main
 
 import (
 	"log"
-	"math/rand"
 	"sync"
-	"time"
 )
 
 type weightInfo struct {
@@ -17,7 +15,6 @@ type weightInfo struct {
 }
 
 
-//一般数据判重
 type weightDataMap struct {
 	lock   sync.Mutex //锁
 	data   map[string]*weightInfo
@@ -26,11 +23,10 @@ type weightDataMap struct {
 	templateid 	string 	//模板id
 }
 
-func NewWeightData(arr []string,templateid string) *weightDataMap {
+func NewWeightData(arr []string) *weightDataMap {
 	//测试-默认第一个
-	arr = []string{"5638baccaf53745d9a000994","5638baccaf53745d9a000995","5638baccaf53745d9a000998",
-		"603717b8fc702705550b8df4","603717b8fc702705550b8df5","603717b8fc702705550b8df6"}
-	weight := &weightDataMap{sync.Mutex{},map[string]*weightInfo{},[]string{},[]string{},templateid}
+
+	weight := &weightDataMap{sync.Mutex{},map[string]*weightInfo{},[]string{},[]string{},""}
 
 	data := make(map[string]*weightInfo,0)
 	for _,v:=range arr {
@@ -77,6 +73,14 @@ func analyzeTheSoureData(tmp map[string]interface{}) *weightInfo {
 	//质量评分
 	qualityScore := analyzeTheElements(tmp)
 
+
+	//测试 指定模板-数据-最高权重
+	if BsonTOStringId(tmp["_id"])=="5638baccaf53745d9a000998" {
+		maxLevel = true
+	}
+
+	delete(tmp,"_id")
+
 	return &weightInfo{
 		maxLevel,
 		minLevel,
@@ -90,15 +94,9 @@ func analyzeTheSoureData(tmp map[string]interface{}) *weightInfo {
 //分析模板数据-打标记构建数据结构
 func (weight *weightDataMap) analyzeBuildStandardData() {
 
-	//log.Print("分析前",weight.allids,weight.saveids,weight.templateid,len(weight.data))
-
 	weight.lock.Lock()
-	
 	//分析里面的打分,以及是否参与融合来决定
 	data:=weight.data
-
-	//分析不同维度的数据-排列ranking,并调换顺序
-
 	//先构建
 	arrAllIds := make([]string,0)
 	arrSaveIds := make([]string,0)
@@ -107,16 +105,11 @@ func (weight *weightDataMap) analyzeBuildStandardData() {
 	arrSiteLevel := make([]int,0)
 	arrQualityScore := make([]int,0)
 	arrRanking		:= make([]int,0) //主要排名
-
 	//无序
 	for k,v:=range data{
-		//log.Println(v)
 		//涉及前置条件,哪些数据不需要融合
 		arrAllIds =  append(arrAllIds,k)
 		arrSaveIds = append(arrSaveIds,k)
-		//
-
-
 
 		arrMaxLevel = append(arrMaxLevel,v.maxLevel)
 		arrMinLevel = append(arrMinLevel,v.minLevel)
@@ -124,15 +117,10 @@ func (weight *weightDataMap) analyzeBuildStandardData() {
 		arrSiteLevel = append(arrSiteLevel,v.siteLevel)
 		arrQualityScore = append(arrQualityScore,v.qualityScore)
 		arrRanking = append(arrRanking,v.ranking)
-
-
 	}
 
-	log.Println("初始排名:",arrRanking)
-	log.Println("初始质量:",arrQualityScore)
-	log.Println("初始站点:",arrSiteLevel)
 
-	//第一步,最大权重,重置排名
+	//第一步
 	isMaxIndexArr := make([]int,0) //记录索引
 	isMaxIndexValueArr := make([]int,0)
 	for k,v :=range  arrMaxLevel {
@@ -147,20 +135,12 @@ func (weight *weightDataMap) analyzeBuildStandardData() {
 	if len(isMaxIndexArr)>=1 {
 		log.Println("进行最大权重...")
 		rankIndexArr := dealWithGroupScores(isMaxIndexArr,isMaxIndexValueArr,arrSiteLevel)
-		//log.Println(rankIndexArr)
-		//重新排名
 		for _,v:=range rankIndexArr {
 			arrRanking[v] = rank_s
 			rank_s++
 		}
-	}else {
-		log.Println("无最大权重-质量-站点排序")
 	}
-
-	log.Println("第一步:经过最高权重比较得出--",arrRanking)
-
-
-	//第二步,最小权重,重置排名
+	//第二步
 	isMinIndexArr := make([]int,0)
 	isMinIndexValueArr := make([]int,0)
 	for k,v :=range  arrMinLevel {
@@ -171,23 +151,16 @@ func (weight *weightDataMap) analyzeBuildStandardData() {
 	}
 
 	if len(isMinIndexArr)>=1 {
-		log.Println("进行最小权重...")
 		rankIndexArr := dealWithGroupScores(isMinIndexArr,isMinIndexValueArr,arrSiteLevel)
-		//重新排名
 		lastRank := len(arrSaveIds)
-		//log.Println("最小排名分",lastRank,rankIndexArr)
 		for i:=len(rankIndexArr)-1;i>=0;i--  {
 			index:=rankIndexArr[i]
 			arrRanking[index] = lastRank
 			lastRank--
 		}
-	}else {
-		log.Println("无最小权重-质量-站点排序")
 	}
 
-	log.Println("第二步:经过最小权重比较得出--",arrRanking)
-
-	//第三步,分析第一步没排名的数据
+	//第三步
 	isQuaIndexArr := make([]int,0)
 	isQuaIndexValueArr := make([]int,0)
 	for k,v:=range arrRanking{
@@ -197,21 +170,13 @@ func (weight *weightDataMap) analyzeBuildStandardData() {
 		}
 	}
 	if len(isQuaIndexArr)>=1 {
-		log.Println("进行质量-站点组合...")
 		rankIndexArr := dealWithGroupScores(isQuaIndexArr,isQuaIndexValueArr,arrSiteLevel)
-		//log.Println(rankIndexArr)
-		//重新排名
 		for _,v:=range rankIndexArr {
 			arrRanking[v] = rank_s
 			rank_s++
 		}
-	}else {
-		log.Println("不需要进行质量-站点组合...")
 	}
 
-	log.Println("第三步:经过质量-站点权重比较得出--",arrRanking)
-
-
 	template_id:=""
 	//根据-排名-修改
 	for k,v:=range arrRanking {
@@ -231,98 +196,3 @@ func (weight *weightDataMap) analyzeBuildStandardData() {
 	weight.lock.Unlock()
 }
 
-func dealWithGroupScores(indexArr []int, scoreArr []int,siteArr []int) []int {
-
-	//log.Println("下标组",indexArr,"质量分组",scoreArr,"整体站点组",siteArr)
-	//处理分组
-	sort_scoreArr,sort_indexArr := sortGroupInt(scoreArr,indexArr)
-	//log.Println("排序质量分:",sort_scoreArr,sort_indexArr)
-
-	totalIndexArr:=make([][]int,0)
-	lastTmp := -1
-	for k,v :=range sort_scoreArr {
-		if v<lastTmp || k==0 {
-			arr_s := make([]int,0)
-			arr_i := make([]int,0)
-			for index,value :=range scoreArr {
-				if v==value {
-					arr_s = append(arr_s,value)
-					arr_i = append(arr_i,sort_indexArr[index])
-				}
-			}
-			totalIndexArr = append(totalIndexArr,arr_i)
-			lastTmp = v
-		}
-	}
-
-	finallyIndexArr := make([]int,0)
-	for _,v:=range totalIndexArr{
-		if len(v)>1 {
-			//[6 3 4]
-			arr_s :=make([]int,0)
-			for _,v1:=range v{
-				arr_s = append(arr_s,siteArr[v1])
-			}
-			_,b:=sortGroupInt(arr_s,v)
-			for _,v2:=range b {
-				finallyIndexArr = append(finallyIndexArr,v2)
-			}
-		}else {
-			finallyIndexArr = append(finallyIndexArr,v[0])
-		}
-	}
-	return finallyIndexArr
-}
-
-//排序 正常排序 ,站点
-func sortNormalInt(arrValue []int) ([]int){
-	for i := 0; i < len(arrValue); i++ {
-		for j := i + 1; j < len(arrValue); j++ {
-			if arrValue[i] < arrValue[j] {
-				arrValue[i], arrValue[j] = arrValue[j], arrValue[i]
-			}
-		}
-	}
-	return arrValue
-}
-
-
-//排序 质量,分组
-func sortGroupInt(arrValue []int,arrIndex []int) ([]int ,[]int){
-
-	for i := 0; i < len(arrValue); i++ {
-		for j := i + 1; j < len(arrValue); j++ {
-			if arrValue[i] < arrValue[j] {
-				arrValue[i], arrValue[j] = arrValue[j], arrValue[i]
-				arrIndex[i], arrIndex[j] = arrIndex[j], arrIndex[i]
-			}
-		}
-	}
-	return arrValue,arrIndex
-}
-
-
-
-
-
-
-
-//分析站点评分
-func analyzeTheSite(tmp map[string]interface{}) int {
-	/*
-		站点评分1-5级
-	*/
-	//测试随机分
-	rand.Seed(time.Now().UnixNano()) //以当前系统时间作为种子参数
-	return rand.Intn(10)
-}
-
-//分析要素评分
-func analyzeTheElements(tmp map[string]interface{}) int {
-	/*
-		质量评分总分
-	*/
-	//测试随机分
-	rand.Seed(time.Now().UnixNano()) //以当前系统时间作为种子参数
-	return rand.Intn(100)
-}

+ 98 - 0
udpfusion/src/weightValueMethod.go

@@ -0,0 +1,98 @@
+package main
+
+import (
+	"math/rand"
+	"time"
+)
+
+func dealWithGroupScores(indexArr []int, scoreArr []int,siteArr []int) []int {
+
+	//log.Println("下标组",indexArr,"质量分组",scoreArr,"整体站点组",siteArr)
+	//处理分组
+	sort_scoreArr,sort_indexArr := sortGroupInt(scoreArr,indexArr)
+	//log.Println("排序质量分:",sort_scoreArr,sort_indexArr)
+
+	totalIndexArr:=make([][]int,0)
+	lastTmp := -1
+	for k,v :=range sort_scoreArr {
+		if v<lastTmp || k==0 {
+			arr_s := make([]int,0)
+			arr_i := make([]int,0)
+			for index,value :=range scoreArr {
+				if v==value {
+					arr_s = append(arr_s,value)
+					arr_i = append(arr_i,sort_indexArr[index])
+				}
+			}
+			totalIndexArr = append(totalIndexArr,arr_i)
+			lastTmp = v
+		}
+	}
+
+	finallyIndexArr := make([]int,0)
+	for _,v:=range totalIndexArr{
+		if len(v)>1 {
+			//[6 3 4]
+			arr_s :=make([]int,0)
+			for _,v1:=range v{
+				arr_s = append(arr_s,siteArr[v1])
+			}
+			_,b:=sortGroupInt(arr_s,v)
+			for _,v2:=range b {
+				finallyIndexArr = append(finallyIndexArr,v2)
+			}
+		}else {
+			finallyIndexArr = append(finallyIndexArr,v[0])
+		}
+	}
+	return finallyIndexArr
+}
+
+//排序 正常排序 ,站点
+func sortNormalInt(arrValue []int) ([]int){
+	for i := 0; i < len(arrValue); i++ {
+		for j := i + 1; j < len(arrValue); j++ {
+			if arrValue[i] < arrValue[j] {
+				arrValue[i], arrValue[j] = arrValue[j], arrValue[i]
+			}
+		}
+	}
+	return arrValue
+}
+
+
+//排序 质量,分组
+func sortGroupInt(arrValue []int,arrIndex []int) ([]int ,[]int){
+
+	for i := 0; i < len(arrValue); i++ {
+		for j := i + 1; j < len(arrValue); j++ {
+			if arrValue[i] < arrValue[j] {
+				arrValue[i], arrValue[j] = arrValue[j], arrValue[i]
+				arrIndex[i], arrIndex[j] = arrIndex[j], arrIndex[i]
+			}
+		}
+	}
+	return arrValue,arrIndex
+}
+
+
+
+//分析站点评分
+func analyzeTheSite(tmp map[string]interface{}) int {
+	/*
+		站点评分1-5级
+	*/
+	//测试随机分
+	rand.Seed(time.Now().UnixNano()) //以当前系统时间作为种子参数
+	return rand.Intn(10)
+}
+
+//分析要素评分
+func analyzeTheElements(tmp map[string]interface{}) int {
+	/*
+		质量评分总分
+	*/
+	//测试随机分
+	rand.Seed(time.Now().UnixNano()) //以当前系统时间作为种子参数
+	return rand.Intn(100)
+}