|
5 years ago | |
---|---|---|
.. | ||
README.md | 5 years ago | |
config.json | 5 years ago | |
dataMethod.go | 5 years ago | |
dataMethodHeavy.go | 5 years ago | |
dataMethodMerge.go | 5 years ago | |
datamap.go | 5 years ago | |
main.go | 5 years ago | |
mgo.go | 5 years ago | |
udptaskmap.go | 5 years ago |
基于内存的信息重复过滤 "extract": "result_file_20200410", "extract_back": "result_file_20200409",
{
"udpport": ":11485",
"dupdays": 7,
"mongodb": {
"addr": "172.17.4.187:27083",
"pool": 10,
"db": "qfw",
"extract": "result_file_20200410",
"extract_back": "result_file_20200409",
"site": {
"dbname": "qfw",
"coll": "site"
}
},
"jkmail": {
"to": "zhangjinkun@topnet.net.cn",
"api": "http://10.171.112.160:19281/_send/_mail"
},
"nextNode": [
{
"addr": "172.17.145.179",
"port": 1782,
"stype": "project",
"memo": "合并项目"
},
{
"addr": "127.0.0.1",
"port": 1783,
"stype": "bidding",
"memo": "创建招标数据索引new"
}
],
"threads": 1,
"isMerger": false,
"isSort":false,
"lowHeavy":false,
"timingTask":true,
"timingSpanDay": 3,
"timingPubScope": 720,
"specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",
"specialtitle_0": "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789][))]?",
"specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包|批)",
"specialtitle_2": "项目[((][0-9a-zA-Z一二三四五六七八九十零123456789][))]",
"beifen": "[((]?[0-9一二三四五六七八九十零123456789再][))]?[子分]?[次批标包]|重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研"
}
{
"udpport": ":1785",
"dupdays": 5,
"mongodb": {
"addr": "172.17.4.187:27083",
"pool": 5,
"db": "qfw",
"extract": "result_file_20200410",
"extract_back": "result_file_20200409",
"site": {
"dbname": "qfw",
"coll": "site"
}
},
"jkmail": {
"to": "zhengkun@topnet.net.cn,zhangjinkun@topnet.net.cn",
"api": "http://10.171.112.160:19281/_send/_mail"
},
"nextNode": [
{
"addr": "172.17.145.179",
"port": 1782,
"stype": "project",
"memo": "合并项目"
},
{
"addr": "127.0.0.1",
"port": 1783,
"stype": "bidding",
"memo": "创建招标数据索引new"
}
],
"threads": 1,
"isMerger": false,
"isSort":true,
"lowHeavy":false,
"timingTask":false,
"timingSpanDay": 3,
"timingPubScope": 720,
"specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",
"specialtitle_0": "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789][))]?",
"specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包|批)",
"specialtitle_2": "项目[((][0-9a-zA-Z一二三四五六七八九十零123456789][))]",
"beifen": "[((]?[0-9一二三四五六七八九十零123456789再][))]?[子分]?[次批标包]|重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研"
}
//basic_bool := basicDataScore(source, info)
//if basic_bool {
// //已原始数据为标准 - 对比数据打判重标签-
// newData, mergeArr, is_replace = mergeDataFields(source, info)
// //对比数据打重复标签的id,原始数据id的记录
// repeat_idMap["_id"] = StringTOBsonId(info.id)
// merge_idMap["_id"] = StringTOBsonId(source.id)
//
// if IdType {
// repeat_idMap["_id"] = info.id
// merge_idMap["_id"] = source.id
// }
// repeat_id = source.id
//} else {
// //已对比数据为标准 ,数据池的数据打判重标签
// newData, mergeArr, is_replace = mergeDataFields(info, source)
// DM.replaceSourceData(newData, source) //替换
// //原始数据打重复标签的id, 对比数据id的记录
// repeat_idMap["_id"] = StringTOBsonId(source.id)
// merge_idMap["_id"] = StringTOBsonId(info.id)
// if IdType {
// repeat_idMap["_id"] = source.id
// merge_idMap["_id"] = info.id
// }
// repeat_id = info.id
//}
//basic_bool := basicDataScore(source, info)
//if !basic_bool {
// DM.replaceSourceData(info, source) //替换
// repeat_idMap["_id"] = StringTOBsonId(source.id)
// if IdType {
// repeat_idMap["_id"] = source.id
// }
// repeat_id = info.id
// if len(ids)>=9 {
// ids=append(ids,source.id)
//
//
// for _, to := range nextNode {
//
// key := source.id + "-" + source.id + "-" + util.ObjToString(to["stype"])
// by, _ := json.Marshal(map[string]interface{}{
// "gtid": source.id,
// "lteid": source.id,
// "stype": util.ObjToString(to["stype"]),
// "key": key,
// "ids": strings.Join(ids, ","),
// })
// addr := &net.UDPAddr{
// IP: net.ParseIP(to["addr"].(string)),
// Port: util.IntAll(to["port"]),
// }
// node := &udpNode{by, addr, time.Now().Unix(), 0}
// udptaskmap.Store(key, node)
// udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
// }
//
// //
// ids = []string{}
// }else {
// ids=append(ids,source.id)
// }
//
//}
if isMerger { //合并相关
newData, mergeArr, is_replace := mergeDataFields(source, info)
merge_map := make(map[string]interface{}, 0)
if is_replace { //支持合并-更新数据
merge_map = map[string]interface{}{
"$set": map[string]interface{}{
"merge": newData.mergemap,
},
}
//更新合并后的数据
for _, value := range mergeArr {
if value == 0 {
merge_map["$set"].(map[string]interface{})["area"] = newData.area
merge_map["$set"].(map[string]interface{})["city"] = newData.city
} else if value == 1 {
merge_map["$set"].(map[string]interface{})["area"] = newData.area
merge_map["$set"].(map[string]interface{})["city"] = newData.city
} else if value == 2 {
merge_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
} else if value == 3 {
merge_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
} else if value == 4 {
merge_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
} else if value == 5 {
merge_map["$set"].(map[string]interface{})["budget"] = newData.budget
} else if value == 6 {
merge_map["$set"].(map[string]interface{})["winner"] = newData.winner
} else if value == 7 {
merge_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
} else if value == 8 {
merge_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
} else if value == 9 {
merge_map["$set"].(map[string]interface{})["contractnumber"] = newData.contractnumber
} else if value == 10 {
merge_map["$set"].(map[string]interface{})["publishtime"] = newData.publishtime
} else if value == 11 {
merge_map["$set"].(map[string]interface{})["agency"] = newData.agency
} else {
}
}
//模板数据更新
updateExtract = append(updateExtract, []map[string]interface{}{
merge_idMap,
merge_map,
})
}
}