|
@@ -8,6 +8,7 @@ import (
|
|
"encoding/json"
|
|
"encoding/json"
|
|
"flag"
|
|
"flag"
|
|
"fmt"
|
|
"fmt"
|
|
|
|
+ "github.com/cron"
|
|
"log"
|
|
"log"
|
|
mu "mfw/util"
|
|
mu "mfw/util"
|
|
"net"
|
|
"net"
|
|
@@ -17,7 +18,6 @@ import (
|
|
"sync"
|
|
"sync"
|
|
"time"
|
|
"time"
|
|
|
|
|
|
- "github.com/cron"
|
|
|
|
"gopkg.in/mgo.v2/bson"
|
|
"gopkg.in/mgo.v2/bson"
|
|
)
|
|
)
|
|
|
|
|
|
@@ -587,8 +587,10 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
|
|
})
|
|
})
|
|
}
|
|
}
|
|
} else { //高质量数据
|
|
} else { //高质量数据
|
|
|
|
+
|
|
basic_bool := basicDataScore(source, info)
|
|
basic_bool := basicDataScore(source, info)
|
|
if !basic_bool {
|
|
if !basic_bool {
|
|
|
|
+
|
|
HM.replaceSourceData(info, source.id) //替换
|
|
HM.replaceSourceData(info, source.id) //替换
|
|
repeat_idMap["_id"] = StringTOBsonId(source.id)
|
|
repeat_idMap["_id"] = StringTOBsonId(source.id)
|
|
repeat_id = info.id
|
|
repeat_id = info.id
|
|
@@ -792,8 +794,8 @@ func timedTaskOnce() {
|
|
}
|
|
}
|
|
|
|
|
|
b, source, reason := DM.check(info)
|
|
b, source, reason := DM.check(info)
|
|
- //log.Println("判重结果", b, reason)
|
|
|
|
if b { //有重复,生成更新语句,更新抽取和更新招标
|
|
if b { //有重复,生成更新语句,更新抽取和更新招标
|
|
|
|
+ log.Println("判重结果", b, reason)
|
|
repeateN++
|
|
repeateN++
|
|
var is_replace = false
|
|
var is_replace = false
|
|
var mergeArr = []int64{} //更改合并数组记录
|
|
var mergeArr = []int64{} //更改合并数组记录
|
|
@@ -872,6 +874,7 @@ func timedTaskOnce() {
|
|
} else { //高质量数据
|
|
} else { //高质量数据
|
|
basic_bool := basicDataScore(source, info)
|
|
basic_bool := basicDataScore(source, info)
|
|
if !basic_bool {
|
|
if !basic_bool {
|
|
|
|
+ log.Println("高质量数据替换:",source.id,info.id)
|
|
DM.replaceSourceData(info, source.id) //替换
|
|
DM.replaceSourceData(info, source.id) //替换
|
|
repeat_idMap["_id"] = StringTOBsonId(source.id)
|
|
repeat_idMap["_id"] = StringTOBsonId(source.id)
|
|
repeat_id = info.id
|
|
repeat_id = info.id
|