基于内存的信息重复过滤 "extract": "result_file_20200410", "extract_back": "result_file_20200409", { "udpport": ":11485", "dupdays": 7, "mongodb": { "addr": "172.17.4.187:27083", "pool": 10, "db": "qfw", "extract": "result_file_20200410", "extract_back": "result_file_20200409", "site": { "dbname": "qfw", "coll": "site" } }, "jkmail": { "to": "zhangjinkun@topnet.net.cn", "api": "http://10.171.112.160:19281/_send/_mail" }, "nextNode": [ { "addr": "172.17.145.179", "port": 1782, "stype": "project", "memo": "合并项目" }, { "addr": "127.0.0.1", "port": 1783, "stype": "bidding", "memo": "创建招标数据索引new" } ], "threads": 1, "isMerger": false, "isSort":false, "lowHeavy":false, "timingTask":true, "timingSpanDay": 3, "timingPubScope": 720, "specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)", "specialtitle_0": "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789][))]?", "specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包|批)", "specialtitle_2": "项目[((][0-9a-zA-Z一二三四五六七八九十零123456789][))]", "beifen": "[((]?[0-9一二三四五六七八九十零123456789再][))]?[子分]?[次批标包]|重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研" } { "udpport": ":1785", "dupdays": 5, "mongodb": { "addr": "172.17.4.187:27083", "pool": 5, "db": "qfw", "extract": "result_file_20200410", "extract_back": "result_file_20200409", "site": { "dbname": "qfw", "coll": "site" } }, "jkmail": { "to": "zhengkun@topnet.net.cn,zhangjinkun@topnet.net.cn", "api": "http://10.171.112.160:19281/_send/_mail" }, "nextNode": [ { "addr": "172.17.145.179", "port": 1782, "stype": "project", "memo": "合并项目" }, { "addr": "127.0.0.1", "port": 1783, "stype": "bidding", "memo": "创建招标数据索引new" } ], "threads": 1, "isMerger": false, "isSort":true, "lowHeavy":false, "timingTask":false, "timingSpanDay": 3, "timingPubScope": 720, "specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)", "specialtitle_0": "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789][))]?", "specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包|批)", "specialtitle_2": "项目[((][0-9a-zA-Z一二三四五六七八九十零123456789][))]", "beifen": "[((]?[0-9一二三四五六七八九十零123456789再][))]?[子分]?[次批标包]|重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研" } //basic_bool := basicDataScore(source, info) //if basic_bool { // //已原始数据为标准 - 对比数据打判重标签- // newData, mergeArr, is_replace = mergeDataFields(source, info) // //对比数据打重复标签的id,原始数据id的记录 // repeat_idMap["_id"] = StringTOBsonId(info.id) // merge_idMap["_id"] = StringTOBsonId(source.id) // // if IdType { // repeat_idMap["_id"] = info.id // merge_idMap["_id"] = source.id // } // repeat_id = source.id //} else { // //已对比数据为标准 ,数据池的数据打判重标签 // newData, mergeArr, is_replace = mergeDataFields(info, source) // DM.replaceSourceData(newData, source) //替换 // //原始数据打重复标签的id, 对比数据id的记录 // repeat_idMap["_id"] = StringTOBsonId(source.id) // merge_idMap["_id"] = StringTOBsonId(info.id) // if IdType { // repeat_idMap["_id"] = source.id // merge_idMap["_id"] = info.id // } // repeat_id = info.id //} //basic_bool := basicDataScore(source, info) //if !basic_bool { // DM.replaceSourceData(info, source) //替换 // repeat_idMap["_id"] = StringTOBsonId(source.id) // if IdType { // repeat_idMap["_id"] = source.id // } // repeat_id = info.id // if len(ids)>=9 { // ids=append(ids,source.id) // // // for _, to := range nextNode { // // key := source.id + "-" + source.id + "-" + util.ObjToString(to["stype"]) // by, _ := json.Marshal(map[string]interface{}{ // "gtid": source.id, // "lteid": source.id, // "stype": util.ObjToString(to["stype"]), // "key": key, // "ids": strings.Join(ids, ","), // }) // addr := &net.UDPAddr{ // IP: net.ParseIP(to["addr"].(string)), // Port: util.IntAll(to["port"]), // } // node := &udpNode{by, addr, time.Now().Unix(), 0} // udptaskmap.Store(key, node) // udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr) // } // // // // ids = []string{} // }else { // ids=append(ids,source.id) // } // //} if isMerger { //合并相关 newData, mergeArr, is_replace := mergeDataFields(source, info) merge_map := make(map[string]interface{}, 0) if is_replace { //支持合并-更新数据 merge_map = map[string]interface{}{ "$set": map[string]interface{}{ "merge": newData.mergemap, }, } //更新合并后的数据 for _, value := range mergeArr { if value == 0 { merge_map["$set"].(map[string]interface{})["area"] = newData.area merge_map["$set"].(map[string]interface{})["city"] = newData.city } else if value == 1 { merge_map["$set"].(map[string]interface{})["area"] = newData.area merge_map["$set"].(map[string]interface{})["city"] = newData.city } else if value == 2 { merge_map["$set"].(map[string]interface{})["projectname"] = newData.projectname } else if value == 3 { merge_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode } else if value == 4 { merge_map["$set"].(map[string]interface{})["buyer"] = newData.buyer } else if value == 5 { merge_map["$set"].(map[string]interface{})["budget"] = newData.budget } else if value == 6 { merge_map["$set"].(map[string]interface{})["winner"] = newData.winner } else if value == 7 { merge_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount } else if value == 8 { merge_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime } else if value == 9 { merge_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber } else if value == 10 { merge_map["$set"].(map[string]interface{})["publishtime"] = newData.publishtime } else if value == 11 { merge_map["$set"].(map[string]interface{})["agency"] = newData.agency } else { } } //模板数据更新 updateExtract = append(updateExtract, []map[string]interface{}{ merge_idMap, merge_map, }) } }