|
@@ -36,9 +36,10 @@ var (
|
|
|
dupdays = 5 //初始化判重范围
|
|
|
DM *datamap //
|
|
|
HM *historymap //判重数据
|
|
|
- lastid = "5da3f2c5a5cb26b9b79847fe"
|
|
|
+ lastid = "5d767728a5cb26b9b7748868"
|
|
|
//5da3f2c5a5cb26b9b79847fc
|
|
|
-
|
|
|
+ //ObjectId("5d767728a5cb26b9b7748868")
|
|
|
+ //5da3f2c5a5cb26b9b79847fe
|
|
|
//正则筛选相关
|
|
|
FilterRegTitle = regexp.MustCompile("^_$")
|
|
|
FilterRegTitle_1 = regexp.MustCompile("^_$")
|
|
@@ -247,7 +248,7 @@ func mainTest() {
|
|
|
|
|
|
|
|
|
func main() {
|
|
|
- return
|
|
|
+
|
|
|
go checkMapJob()
|
|
|
|
|
|
updport := Sysconfig["udpport"].(string)
|
|
@@ -349,15 +350,15 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
}else {
|
|
|
//判重原因 reason
|
|
|
// tmp["_id"] 对比id id原始id
|
|
|
+ mapLock.Lock()
|
|
|
b, source,reason := DM.check(info)
|
|
|
if b { //有重复,生成更新语句,更新抽取和更新招标
|
|
|
repeateN++
|
|
|
- mapLock.Lock()
|
|
|
-
|
|
|
var mergeArr []int64 //更改合并数组记录
|
|
|
var newData *Info //更换新的数据池数据
|
|
|
|
|
|
var id_map = map[string]interface{}{}
|
|
|
+ repeat_id := ""
|
|
|
//合并操作--评功权重打分-合并完替换原始数据池
|
|
|
basic_bool := basicDataScore(source,info)
|
|
|
if basic_bool {
|
|
@@ -365,48 +366,55 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
newData,mergeArr= mergeDataFields(source,info)
|
|
|
DM.replaceSourceData(newData,source.id) //替换
|
|
|
id_map["_id"]= util.StringTOBsonId(source.id)
|
|
|
-
|
|
|
+ repeat_id = source.id
|
|
|
//对比的数据打判重标签
|
|
|
- updateExtract = append(updateExtract, []map[string]interface{}{
|
|
|
- map[string]interface{}{
|
|
|
- "_id": tmp["_id"],
|
|
|
- },
|
|
|
- map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "repeat": 1,
|
|
|
- "repeatid": source.id,
|
|
|
- },
|
|
|
- },
|
|
|
- })
|
|
|
-
|
|
|
-
|
|
|
+ //updateExtract = append(updateExtract, []map[string]interface{}{
|
|
|
+ // map[string]interface{}{
|
|
|
+ // "_id": tmp["_id"],
|
|
|
+ // },
|
|
|
+ // map[string]interface{}{
|
|
|
+ // "$set": map[string]interface{}{
|
|
|
+ // "repeat": 1,
|
|
|
+ // "repeatid": source.id,
|
|
|
+ // },
|
|
|
+ // },
|
|
|
+ //})
|
|
|
+ //if len(updateExtract) > 500 {
|
|
|
+ // mgo.UpdateBulk(extract, updateExtract...)
|
|
|
+ // updateExtract = [][]map[string]interface{}{}
|
|
|
+ //}
|
|
|
|
|
|
}else {
|
|
|
//已对比数据为标准 ,数据池的数据打判重标签
|
|
|
newData,mergeArr= mergeDataFields(info,source)
|
|
|
DM.replaceSourceData(newData,source.id)//替换
|
|
|
id_map["_id"]= util.StringTOBsonId(info.id)
|
|
|
-
|
|
|
+ repeat_id = info.id
|
|
|
//数据池的数据打判重标签
|
|
|
- updateExtract = append(updateExtract, []map[string]interface{}{
|
|
|
- map[string]interface{}{
|
|
|
- "_id": util.StringTOBsonId(source.id),
|
|
|
- },
|
|
|
- map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "repeat": 1,
|
|
|
- "repeatid": info.id,
|
|
|
- },
|
|
|
- },
|
|
|
- })
|
|
|
+ //updateExtract = append(updateExtract, []map[string]interface{}{
|
|
|
+ // map[string]interface{}{
|
|
|
+ // "_id": util.StringTOBsonId(source.id),
|
|
|
+ // },
|
|
|
+ // map[string]interface{}{
|
|
|
+ // "$set": map[string]interface{}{
|
|
|
+ // "repeat": 1,
|
|
|
+ // "repeatid": info.id,
|
|
|
+ // },
|
|
|
+ // },
|
|
|
+ //})
|
|
|
+ //
|
|
|
+ //if len(updateExtract) > 500 {
|
|
|
+ // mgo.UpdateBulk(extract, updateExtract...)
|
|
|
+ // updateExtract = [][]map[string]interface{}{}
|
|
|
+ //}
|
|
|
|
|
|
}
|
|
|
-
|
|
|
-
|
|
|
//
|
|
|
var update_map = map[string]interface{}{
|
|
|
"$set": map[string]interface{}{
|
|
|
"reason":reason,
|
|
|
+ "repeat":"1",
|
|
|
+ "repeatid":repeat_id,
|
|
|
"merge":newData.mergemap,
|
|
|
},
|
|
|
}
|
|
@@ -471,6 +479,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
|
|
|
} else {
|
|
|
//IS.Add("new")
|
|
|
+ mapLock.Unlock()
|
|
|
}
|
|
|
}
|
|
|
}(tmp)
|