apple d40d0f3d74 判重 合并 5 years ago
..
README.md 4627cc9c24 备份 5 years ago
config.json 4627cc9c24 备份 5 years ago
dataMethod.go 4627cc9c24 备份 5 years ago
dataMethodHeavy.go 4627cc9c24 备份 5 years ago
dataMethodMerge.go 4627cc9c24 备份 5 years ago
datamap.go 4627cc9c24 备份 5 years ago
main.go d40d0f3d74 判重 合并 5 years ago
mgo.go 4627cc9c24 备份 5 years ago
udptaskmap.go 4627cc9c24 备份 5 years ago

README.md

基于内存的信息重复过滤 "extract": "result_file_20200410", "extract_back": "result_file_20200409",

{

"udpport": ":11485",
"dupdays": 7,
"mongodb": {
    "addr": "172.17.4.187:27083",
    "pool": 10,
    "db": "qfw",
    "extract": "result_file_20200410",
    "extract_back": "result_file_20200409",
    "site": {
        "dbname": "qfw",
        "coll": "site"
    }
},
"jkmail": {
    "to": "zhangjinkun@topnet.net.cn",
    "api": "http://10.171.112.160:19281/_send/_mail"
},
"nextNode": [
    {
        "addr": "172.17.145.179",
        "port": 1782,
        "stype": "project",
        "memo": "合并项目"
    },
    {
        "addr": "127.0.0.1",
        "port": 1783,
        "stype": "bidding",
        "memo": "创建招标数据索引new"
    }
],

"threads": 1,
"isMerger": false,
"isSort":false,
"lowHeavy":false,
"timingTask":true,
"timingSpanDay": 3,
"timingPubScope": 720,
"specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",
"specialtitle_0": "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789][))]?",
"specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包|批)",
"specialtitle_2": "项目[((][0-9a-zA-Z一二三四五六七八九十零123456789][))]",
"beifen": "[((]?[0-9一二三四五六七八九十零123456789再][))]?[子分]?[次批标包]|重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研"

}

{

"udpport": ":1785",
"dupdays": 5,
"mongodb": {
    "addr": "172.17.4.187:27083",
    "pool": 5,
    "db": "qfw",
    "extract": "result_file_20200410",
    "extract_back": "result_file_20200409",
    "site": {
        "dbname": "qfw",
        "coll": "site"
    }
},
"jkmail": {
    "to": "zhengkun@topnet.net.cn,zhangjinkun@topnet.net.cn",
    "api": "http://10.171.112.160:19281/_send/_mail"
},
"nextNode": [
    {
        "addr": "172.17.145.179",
        "port": 1782,
        "stype": "project",
        "memo": "合并项目"
    },
    {
        "addr": "127.0.0.1",
        "port": 1783,
        "stype": "bidding",
        "memo": "创建招标数据索引new"
    }
],
"threads": 1,
"isMerger": false,
"isSort":true,
"lowHeavy":false,
"timingTask":false,
"timingSpanDay": 3,
"timingPubScope": 720,
"specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",
"specialtitle_0": "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789][))]?",
"specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包|批)",
"specialtitle_2": "项目[((][0-9a-zA-Z一二三四五六七八九十零123456789][))]",
"beifen": "[((]?[0-9一二三四五六七八九十零123456789再][))]?[子分]?[次批标包]|重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研"

}

//basic_bool := basicDataScore(source, info)

                //if basic_bool {
                //  //已原始数据为标准 - 对比数据打判重标签-
                //  newData, mergeArr, is_replace = mergeDataFields(source, info)
                //  //对比数据打重复标签的id,原始数据id的记录
                //  repeat_idMap["_id"] = StringTOBsonId(info.id)
                //  merge_idMap["_id"] = StringTOBsonId(source.id)
                //
                //  if IdType {
                //      repeat_idMap["_id"] = info.id
                //      merge_idMap["_id"] = source.id
                //  }
                //  repeat_id = source.id
                //} else {
                //  //已对比数据为标准 ,数据池的数据打判重标签
                //  newData, mergeArr, is_replace = mergeDataFields(info, source)
                //  DM.replaceSourceData(newData, source) //替换
                //  //原始数据打重复标签的id,   对比数据id的记录
                //  repeat_idMap["_id"] = StringTOBsonId(source.id)
                //  merge_idMap["_id"] = StringTOBsonId(info.id)
                //  if IdType {
                //      repeat_idMap["_id"] = source.id
                //      merge_idMap["_id"] = info.id
                //  }
                //  repeat_id = info.id
                //}

//basic_bool := basicDataScore(source, info)

                //if !basic_bool {
                //  DM.replaceSourceData(info, source) //替换
                //  repeat_idMap["_id"] = StringTOBsonId(source.id)
                //  if IdType {
                //      repeat_idMap["_id"] = source.id
                //  }
                //  repeat_id = info.id
                //  if len(ids)>=9 {
                //      ids=append(ids,source.id)
                //
                //
                //      for _, to := range nextNode {
                //
                //          key := source.id + "-" + source.id + "-" + util.ObjToString(to["stype"])
                //          by, _ := json.Marshal(map[string]interface{}{
                //              "gtid":  source.id,
                //              "lteid": source.id,
                //              "stype": util.ObjToString(to["stype"]),
                //              "key":   key,
                //              "ids":   strings.Join(ids, ","),
                //          })
                //          addr := &net.UDPAddr{
                //              IP:   net.ParseIP(to["addr"].(string)),
                //              Port: util.IntAll(to["port"]),
                //          }
                //          node := &udpNode{by, addr, time.Now().Unix(), 0}
                //          udptaskmap.Store(key, node)
                //          udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
                //      }
                //
                //      //
                //      ids = []string{}
                //  }else {
                //      ids=append(ids,source.id)
                //  }
                //
                //}

if isMerger { //合并相关

                newData, mergeArr, is_replace := mergeDataFields(source, info)
                merge_map := make(map[string]interface{}, 0)
                if is_replace { //支持合并-更新数据
                    merge_map = map[string]interface{}{
                        "$set": map[string]interface{}{
                            "merge": newData.mergemap,
                        },
                    }
                    //更新合并后的数据
                    for _, value := range mergeArr {
                        if value == 0 {
                            merge_map["$set"].(map[string]interface{})["area"] = newData.area
                            merge_map["$set"].(map[string]interface{})["city"] = newData.city
                        } else if value == 1 {
                            merge_map["$set"].(map[string]interface{})["area"] = newData.area
                            merge_map["$set"].(map[string]interface{})["city"] = newData.city
                        } else if value == 2 {
                            merge_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
                        } else if value == 3 {
                            merge_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
                        } else if value == 4 {
                            merge_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
                        } else if value == 5 {
                            merge_map["$set"].(map[string]interface{})["budget"] = newData.budget
                        } else if value == 6 {
                            merge_map["$set"].(map[string]interface{})["winner"] = newData.winner
                        } else if value == 7 {
                            merge_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
                        } else if value == 8 {
                            merge_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
                        } else if value == 9 {
                            merge_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber
                        } else if value == 10 {
                            merge_map["$set"].(map[string]interface{})["publishtime"] = newData.publishtime
                        } else if value == 11 {
                            merge_map["$set"].(map[string]interface{})["agency"] = newData.agency
                        } else {
                        }
                    }
                    //模板数据更新
                    updateExtract = append(updateExtract, []map[string]interface{}{
                        merge_idMap,
                        merge_map,
                    })
                }
            }