123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336 |
- package main
- import (
- "fmt"
- "go.mongodb.org/mongo-driver/bson/primitive"
- qu "qfw/util"
- "time"
- )
- //处理融合数据-返回,融合新数据数据-新增
- func (weight *weightDataMap) dealWithAddFusionStruct ()(map[string]interface{},map[string]interface{}){
- //指定模板数据dict-单条数据
- dict :=weight.data[weight.templateid].data
- //最早发布时间 (小)
- dict["early_publishtime"] = qu.IntAll(dict["publishtime"])
- //最近发布时间 (大)
- dict["lately_publishtime"] = qu.IntAll(dict["publishtime"])
- //最早入库时间 (小)
- dict["early_comeintime"] = qu.IntAll(dict["comeintime"])
- //最近入库时间 (大)
- dict["lately_comeintime"] = qu.IntAll(dict["comeintime"])
- //当前更新时间
- dict["current_updatetime"] = qu.IntAll(time.Now().Unix())
- //融合生成时间
- dict["current_updatetime"] = qu.IntAll(time.Now().Unix())
- //所有相关联ids
- dict["fusion_allids"] = weight.allids
- //融合保存相关联ids
- dict["fusion_saveids"] = weight.saveids
- //标准id
- dict["fusion_templateid"] = weight.templateid
- //站点,质量分
- dict["fusion_score"] = weight.saveLevelAndScoreRecord()
- //记录所有href
- dict["fusion_all_hrefs"] = weight.allhrefs
- //当前融合源id
- dict["fusion_sourceid"] = weight.sourceid
- //采用新增id
- delete(dict,"_id")
- //日志相关处理
- recordDict := map[string]interface{}{
- "1":map[string]interface{}{
- "data":map[string]interface{}{},
- "sourceid":weight.sourceid,
- "templateid":weight.templateid,
- "allids":weight.allids,
- "snapshot":map[string]interface{}{},
- },
- "number":qu.Int64All(1),
- }
- return dict,recordDict
- }
- //处理多条融合数据 - 多组新增类
- func (weight *weightDataMap) dealWithMultipleAddFusionStruct ()(map[string]interface{},map[string]interface{}){
- //指定模板数据dict
- dict :=weight.data[weight.templateid].data
- //最早|近发布时间
- dict["early_publishtime"],dict["lately_publishtime"] = weight.dealWithTimeData("publishtime")
- //最早|近入库时间
- dict["early_comeintime"],dict["lately_comeintime"] = weight.dealWithTimeData("comeintime")
- //所有相关联ids
- dict["fusion_allids"] = weight.allids
- //融合保存相关联ids
- dict["fusion_saveids"] = weight.saveids
- //融合模板
- dict["fusion_templateid"] = weight.templateid
- //站点,质量分
- dict["fusion_score"] = weight.saveLevelAndScoreRecord()
- //当前更新时间
- dict["fusion_time"] = qu.IntAll(time.Now().Unix())
- //融合生成时间
- dict["fusion_updatetime"] = qu.IntAll(time.Now().Unix())
- //记录所有href
- dict["fusion_all_hrefs"] = weight.allhrefs
- //当前融合源id
- dict["fusion_sourceid"] = weight.sourceid
- //采用新增id
- delete(dict,"_id")
- //发生融合的数据
- new_data := weight.dealWithAllFusionFieldData()
- for k,v:=range new_data {
- value:= *qu.ObjToMap(v)
- dict[k] = value["value"]
- }
- //日志记录
- recordDict := map[string]interface{}{
- "1":map[string]interface{}{
- "data":new_data,
- "sourceid":weight.sourceid,
- "templateid":weight.templateid,
- "allids":weight.allids,
- "snapshot":map[string]interface{}{},
- },
- "number":qu.Int64All(1),
- }
- //融合表记录的字段
- dict["fusion_fields"] = weight.saveChangeFields(new_data)
- //返回,更新数据,日志记录数据
- return dict,recordDict
- }
- //处理多条融合数据-返回融合新数据,融合更新细节数据
- func (weight *weightDataMap) dealWithMultipleUpdateFusionStruct (fusion string)(map[string]interface{},map[string]interface{}){
- //指定模板数据dict
- dict :=weight.data[weight.templateid].data
- //最早|近发布时间
- dict["early_publishtime"],dict["lately_publishtime"] = weight.dealWithTimeData("publishtime")
- //最早|近入库时间
- dict["early_comeintime"],dict["lately_comeintime"] = weight.dealWithTimeData("comeintime")
- //所有相关联ids
- dict["fusion_allids"] = weight.allids
- //融合保存相关联ids
- dict["fusion_saveids"] = weight.saveids
- //融合模板
- dict["fusion_templateid"] = weight.templateid
- //记录所有href
- dict["fusion_all_hrefs"] = weight.allhrefs
- //站点,质量分
- dict["fusion_score"] = weight.saveLevelAndScoreRecord()
- //融合生成时间-取融合表第一次融合时间
- dict["fusion_time"] = tmpData["fusion_time"]
- //当前更新时间
- dict["fusion_updatetime"] = qu.IntAll(time.Now().Unix())
- //当前融合源id
- dict["fusion_sourceid"] = weight.sourceid
- //删除_id
- delete(dict,"_id")
- //发生融合的数据
- new_data := weight.dealWithAllFusionFieldData()
- for k,v:=range new_data {
- value:= *qu.ObjToMap(v)
- dict[k] = value["value"]
- }
- //日志记录-更新
- recordData := mgo.FindById(record_coll_name,BsonTOStringId(tmpData["_id"]))
- number:=qu.Int64All(recordData["number"])
- number++
- key:=fmt.Sprintf("%d",number)
- recordDict := map[string]interface{}{
- key:map[string]interface{}{
- "data":new_data,
- "snapshot":tmpData, //快照页面... ...
- "sourceid":weight.sourceid,
- "templateid":weight.templateid,
- "allids":weight.allids,
- },
- "number":qu.Int64All(1),
- }
- //融合表记录的字段
- dict["fusion_fields"] = weight.saveChangeFields(new_data)
- //返回,更新数据,日志记录数据
- return dict,recordDict
- }
- /*字段处理方法*/
- func (weight *weightDataMap)dealWithAllFusionFieldData()map[string]interface{} {
- //模板id 数据
- templateid := weight.templateid
- templateTmp := weight.data[templateid].data
- modifyData := make(map[string]interface{}, 0) //返回修改的数据
- //前置处理 - 结构化数据 - Arr
- structArrData := *qu.ObjToMap(fusionAllKey["Arr"])
- for key,_ := range structArrData {
- count:=qu.IntAll(0)
- arr,b,isOK,arr_id:=make(primitive.A,0),false,false,templateid
- if arr,b = templateTmp[key].([]interface{});b {
- count = qu.IntAll(len(arr))
- }
- for _,cur_id:=range weight.saveids {
- if templateid == cur_id {
- continue
- }
- tmp:=weight.data[cur_id].data
- if arr_1,isTrue := tmp[key].(primitive.A);isTrue {
- count_1:=qu.IntAll(len(arr_1))
- if count_1 > count {
- count = count_1
- arr = arr_1
- isOK = true
- arr_id = cur_id
- }
- }
- }
- if len(arr)>0 && arr!=nil && isOK { //有改变的值
- modifyData[key] = map[string]interface{}{
- "id":arr_id,
- "value":arr,
- }
- }
- }
- //第一步融合模板无效值
- invalidKeyArr := make([]string,0) //无效
- for key,value:=range templateTmp {
- if judgeIsFusionKey(key) {
- if !judgeIsEffectiveData(value,key) { //存在key且无效
- invalidKeyArr = append(invalidKeyArr,key)
- }
- }
- }
- if invalidKeyArr!=nil && len(invalidKeyArr)>0 {
- for _,key:=range invalidKeyArr {
- isRank := 2
- L: for {
- for _,v:=range weight.saveids {
- if v == templateid {
- continue
- }
- dataInfo:=weight.data[v]
- if dataInfo.ranking==isRank { //找到指定排名-字段数据
- value:=dataInfo.data[key]
- if value !=nil && judgeIsEffectiveData(value,key) {
- templateTmp[key] = value
- modifyData[key] = map[string]interface{}{
- "id":v,
- "value":value,
- }
- break L
- }
- break
- }
- }
- isRank++
- if isRank > len(weight.saveids) {
- break L
- }
- }
- }
- }
- //第二步-集合最大化
- isRank := 2
- for { //不断遍历,找到其他排名数据
- for _,v:=range weight.saveids {
- if v == templateid {
- continue
- }
- dataInfo:=weight.data[v]
- if dataInfo.ranking==isRank { //找到指定排名数据
- for key,newValue:=range dataInfo.data{
- if key!="_id" && judgeIsFusionKey(key) && templateTmp[key]==nil {
- if judgeIsEffectiveData(newValue,key) {
- templateTmp[key] = newValue
- modifyData[key] = map[string]interface{}{
- "id":v,
- "value":newValue,
- }
- }
- }
- }
- break
- }
- }
- isRank++
- if isRank > len(weight.saveids) {
- break
- }
- }
- return modifyData
- }
- /*
- 以下方法-记录统计
- ******************************
- ******************************
- */
- func (weight *weightDataMap) saveLevelAndScoreRecord () map[string]interface{}{
- dict := make(map[string]interface{},0)
- saveids:= weight.saveids
- for _,v:=range saveids{
- dict[v] = map[string]interface{}{
- "score":weight.data[v].qualityScore,
- "level":weight.data[v].siteLevel,
- }
- }
- return dict
- }
- //处理-融合的哪些字段记录
- func (weight *weightDataMap)saveChangeFields(data map[string]interface{}) map[string]interface{} {
- fieldCal := make(map[string]interface{},0)
- for k,v:=range data{
- if k=="attach_text" {
- if attachArr,b := v.([]map[string]interface{});b {
- for _,v1:=range attachArr {
- dict := *qu.ObjToMap(v1)
- tmp_id := qu.ObjToString(dict["id"])
- if fieldCal[tmp_id]==nil {
- href:=qu.ObjToString(weight.data[tmp_id].data["href"])
- fieldCal[tmp_id] = []interface{}{href,k}
- }else {
- arr := fieldCal[tmp_id].([]interface{})
- arr = append(arr,k)
- fieldCal[tmp_id] = arr
- }
- }
- }
- }else {
- dict := *qu.ObjToMap(v)
- tmp_id := qu.ObjToString(dict["id"])
- if fieldCal[tmp_id]==nil {
- href:=qu.ObjToString(weight.data[tmp_id].data["href"])
- fieldCal[tmp_id] = []interface{}{href,k}
- }else {
- arr := fieldCal[tmp_id].([]interface{})
- arr = append(arr,k)
- fieldCal[tmp_id] = arr
- }
- }
- }
- return fieldCal
- }
|