|
@@ -6,7 +6,6 @@ package main
|
|
|
|
|
|
import (
|
|
|
"encoding/json"
|
|
|
- "flag"
|
|
|
"fmt"
|
|
|
"gopkg.in/mgo.v2/bson"
|
|
|
"log"
|
|
@@ -33,8 +32,9 @@ var (
|
|
|
nextNode []map[string]interface{} //下节点数组
|
|
|
dupdays = 5 //初始化判重范围
|
|
|
DM *datamap //判重数据
|
|
|
- lastid = ""
|
|
|
+ lastid = "5da3f2c5a5cb26b9b79847fe"
|
|
|
//5da3f2c5a5cb26b9b79847fc
|
|
|
+
|
|
|
//正则筛选相关
|
|
|
FilterRegTitle = regexp.MustCompile("^_$")
|
|
|
FilterRegTitle_1 = regexp.MustCompile("^_$")
|
|
@@ -46,8 +46,8 @@ var (
|
|
|
)
|
|
|
|
|
|
func init() {
|
|
|
- flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
|
|
|
- flag.Parse()
|
|
|
+ //flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
|
|
|
+ //flag.Parse()
|
|
|
//172.17.145.163:27080
|
|
|
util.ReadConfig(&Sysconfig)
|
|
|
nextNode = util.ObjArrToMapArr(Sysconfig["nextNode"].([]interface{}))
|
|
@@ -66,12 +66,14 @@ func init() {
|
|
|
|
|
|
|
|
|
//测试临时注释
|
|
|
- //dupdays = util.IntAllDef(Sysconfig["dupdays"], 3)
|
|
|
- ////加载数据
|
|
|
- //DM = NewDatamap(dupdays, lastid)
|
|
|
- //FilterRegTitle = regexp.MustCompile(util.ObjToString(Sysconfig["specialwords"]))
|
|
|
- //FilterRegTitle_1 = regexp.MustCompile(util.ObjToString(Sysconfig["specialtitle_1"]))
|
|
|
- //FilterRegTitle_2 = regexp.MustCompile(util.ObjToString(Sysconfig["specialtitle_2"]))
|
|
|
+ dupdays = util.IntAllDef(Sysconfig["dupdays"], 3)
|
|
|
+ //加载数据
|
|
|
+ DM = NewDatamap(dupdays, lastid)
|
|
|
+ fmt.Println(DM.keys)
|
|
|
+ fmt.Println(DM.data)
|
|
|
+ FilterRegTitle = regexp.MustCompile(util.ObjToString(Sysconfig["specialwords"]))
|
|
|
+ FilterRegTitle_1 = regexp.MustCompile(util.ObjToString(Sysconfig["specialtitle_1"]))
|
|
|
+ FilterRegTitle_2 = regexp.MustCompile(util.ObjToString(Sysconfig["specialtitle_2"]))
|
|
|
|
|
|
|
|
|
|
|
@@ -90,7 +92,7 @@ func init() {
|
|
|
}
|
|
|
|
|
|
//新增一个方法 判断
|
|
|
-func main() {
|
|
|
+func mainTest() {
|
|
|
|
|
|
//log.Println("1")
|
|
|
//代码copy数据
|
|
@@ -221,7 +223,7 @@ func main() {
|
|
|
|
|
|
|
|
|
|
|
|
-func mainTest() {
|
|
|
+func main() {
|
|
|
go checkMapJob()
|
|
|
|
|
|
updport := Sysconfig["udpport"].(string)
|
|
@@ -243,6 +245,14 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
|
|
|
if err != nil {
|
|
|
udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
|
|
|
} else if mapInfo != nil {
|
|
|
+
|
|
|
+ //更新流程
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ //判重流程
|
|
|
go task(data, mapInfo)
|
|
|
key, _ := mapInfo["key"].(string)
|
|
|
if key == "" {
|
|
@@ -321,34 +331,68 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
// tmp["_id"] 对比id id原始id
|
|
|
b, source,reason := DM.check(info)
|
|
|
if b { //有重复,生成更新语句,更新抽取和更新招标
|
|
|
- log.Println("判重具体原因:",reason)
|
|
|
repeateN++
|
|
|
mapLock.Lock()
|
|
|
- updateExtract = append(updateExtract, []map[string]interface{}{
|
|
|
- map[string]interface{}{
|
|
|
- "_id": tmp["_id"],
|
|
|
- },
|
|
|
- map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "repeat": 1,
|
|
|
- "repeatid": source.id,
|
|
|
- "reason":reason,
|
|
|
-
|
|
|
- //......
|
|
|
- },
|
|
|
- },
|
|
|
- })
|
|
|
- //合并操作
|
|
|
- //评功权重打分
|
|
|
+
|
|
|
+ var mergeArr []int64 //更改合并数组记录
|
|
|
+ var newData *Info //更换新的数据池数据
|
|
|
+
|
|
|
+ //数据库更新1
|
|
|
+ var id_map = map[string]interface{}{
|
|
|
+
|
|
|
+ }
|
|
|
+ //合并操作--评功权重打分-合并完替换原始数据池
|
|
|
if basicDataScore(source,info) {
|
|
|
- //已原始数据为标准
|
|
|
+ //已原始数据为标准-
|
|
|
+ newData,mergeArr= mergeDataFields(source,info)
|
|
|
+ DM.replaceSourceData(newData,source.id)
|
|
|
+ id_map["_id"]= util.StringTOBsonId(source.id)
|
|
|
}else {
|
|
|
//对比数据为标准
|
|
|
+ newData,mergeArr= mergeDataFields(info,source)
|
|
|
+ DM.replaceSourceData(newData,source.id)
|
|
|
+ id_map["_id"]= util.StringTOBsonId(info.id)
|
|
|
}
|
|
|
|
|
|
+ //数据库更新2
|
|
|
+ var update_map = map[string]interface{}{
|
|
|
+ "$set": map[string]interface{}{
|
|
|
+ "repeat": 1,
|
|
|
+ "repeatid": newData.id,
|
|
|
+ "reason":reason,
|
|
|
+ "merge":newData.mergemap,
|
|
|
+ },
|
|
|
+ }
|
|
|
|
|
|
+ //更新合并后的数据
|
|
|
+ for _,value :=range mergeArr {
|
|
|
+ if value==1 {
|
|
|
+ update_map["$set"].(map[string]interface{})["area"] = newData.area
|
|
|
+ update_map["$set"].(map[string]interface{})["city"] = newData.city
|
|
|
+ }else if value==2 {
|
|
|
+ update_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
|
|
|
+ }else if value==3 {
|
|
|
+ update_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
|
|
|
+ }else if value==4 {
|
|
|
+ update_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
|
|
|
+ }else if value==5 {
|
|
|
+ update_map["$set"].(map[string]interface{})["budget"] = newData.budget
|
|
|
+ }else if value==6 {
|
|
|
+ update_map["$set"].(map[string]interface{})["winner"] = newData.winner
|
|
|
+ }else if value==7 {
|
|
|
+ update_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
|
|
|
+ }else if value==8 {
|
|
|
+ update_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
|
|
|
+ }else {
|
|
|
+
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
+ //构建数据库更新用到的
|
|
|
+ updateExtract = append(updateExtract, []map[string]interface{}{
|
|
|
+ id_map,
|
|
|
+ update_map,
|
|
|
+ })
|
|
|
if len(updateExtract) > 500 {
|
|
|
mgo.UpdateBulk(extract, updateExtract...)
|
|
|
updateExtract = [][]map[string]interface{}{}
|
|
@@ -391,6 +435,147 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+//合并字段
|
|
|
+func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
|
|
|
+
|
|
|
+ var mergeArr []int64
|
|
|
+ mergeArr = make([]int64,0)
|
|
|
+ //1、城市
|
|
|
+ if (source.area==""||source.area=="全国")&&info.area!="全国"&&info.area!=""{
|
|
|
+ var arrA []string
|
|
|
+ if source.mergemap["area"]==nil {
|
|
|
+ arrA = make([]string, 0)
|
|
|
+ }else {
|
|
|
+ arrA = source.mergemap["area"].([]string)
|
|
|
+ }
|
|
|
+ arrA = append(arrA,source.area)
|
|
|
+ source.mergemap["area"] = arrA
|
|
|
+
|
|
|
+
|
|
|
+ var arrC []string
|
|
|
+ if source.mergemap["city"]==nil {
|
|
|
+ arrC = make([]string, 0)
|
|
|
+ }else {
|
|
|
+ arrC = source.mergemap["city"].([]string)
|
|
|
+ }
|
|
|
+ arrC = append(arrC,source.city)
|
|
|
+ source.mergemap["city"] = arrC
|
|
|
+
|
|
|
+ source.area = info.area
|
|
|
+ source.city = info.city
|
|
|
+ mergeArr = append(mergeArr,1)
|
|
|
+ }
|
|
|
+ //2、项目名称
|
|
|
+ if source.projectname==""&&info.projectname!=""{
|
|
|
+ var arr []string
|
|
|
+ if source.mergemap["projectname"]==nil {
|
|
|
+ arr = make([]string, 0)
|
|
|
+ }else {
|
|
|
+ arr = source.mergemap["projectname"].([]string)
|
|
|
+ }
|
|
|
+ arr = append(arr,source.projectname)
|
|
|
+ source.mergemap["projectname"] = arr
|
|
|
+
|
|
|
+ source.projectname = info.projectname
|
|
|
+ mergeArr = append(mergeArr,2)
|
|
|
+ }
|
|
|
+ //3、项目编号
|
|
|
+ if source.projectcode==""&&info.projectcode!=""{
|
|
|
+ var arr []string
|
|
|
+ if source.mergemap["projectcode"]==nil {
|
|
|
+ arr = make([]string, 0)
|
|
|
+ }else {
|
|
|
+ arr = source.mergemap["projectcode"].([]string)
|
|
|
+ }
|
|
|
+ arr = append(arr,source.projectcode)
|
|
|
+ source.mergemap["projectcode"] = arr
|
|
|
+
|
|
|
+ source.projectcode = info.projectcode
|
|
|
+ mergeArr = append(mergeArr,3)
|
|
|
+ }
|
|
|
+ //4、采购单位
|
|
|
+ if source.buyer==""&&info.buyer!=""{
|
|
|
+ var arr []string
|
|
|
+ if source.mergemap["buyer"]==nil {
|
|
|
+ arr = make([]string, 0)
|
|
|
+ }else {
|
|
|
+ arr = source.mergemap["buyer"].([]string)
|
|
|
+ }
|
|
|
+ arr = append(arr,source.buyer)
|
|
|
+ source.mergemap["buyer"] = arr
|
|
|
+
|
|
|
+ source.buyer = info.buyer
|
|
|
+ mergeArr = append(mergeArr,4)
|
|
|
+ }
|
|
|
+ //5、预算
|
|
|
+ if source.budget==0&&info.budget!=0{
|
|
|
+ var arr []float64
|
|
|
+ if source.mergemap["budget"]==nil {
|
|
|
+ arr = make([]float64, 0)
|
|
|
+ }else {
|
|
|
+ arr = source.mergemap["budget"].([]float64)
|
|
|
+ }
|
|
|
+ arr = append(arr,source.budget)
|
|
|
+ source.mergemap["budget"] = arr
|
|
|
+
|
|
|
+ source.budget = info.budget
|
|
|
+ mergeArr = append(mergeArr,5)
|
|
|
+ }
|
|
|
+ //6、中标单位
|
|
|
+ if source.winner==""&&info.winner!=""{
|
|
|
+ var arr []string
|
|
|
+ if source.mergemap["winner"]==nil {
|
|
|
+ arr = make([]string, 0)
|
|
|
+ }else {
|
|
|
+ arr = source.mergemap["winner"].([]string)
|
|
|
+ }
|
|
|
+ arr = append(arr,source.winner)
|
|
|
+ source.mergemap["winner"] = arr
|
|
|
+
|
|
|
+ source.winner = info.winner
|
|
|
+ mergeArr = append(mergeArr,6)
|
|
|
+ }
|
|
|
+ //7、中标金额
|
|
|
+ if source.bidamount==0&&info.bidamount!=0{
|
|
|
+ var arr []float64
|
|
|
+ if source.mergemap["bidamount"]==nil {
|
|
|
+ arr = make([]float64, 0)
|
|
|
+ }else {
|
|
|
+ arr = source.mergemap["bidamount"].([]float64)
|
|
|
+ }
|
|
|
+ arr = append(arr,source.bidamount)
|
|
|
+ source.mergemap["bidamount"] = arr
|
|
|
+
|
|
|
+ source.bidamount = info.bidamount
|
|
|
+ mergeArr = append(mergeArr,7)
|
|
|
+ }
|
|
|
+ //8、开天时间-地点
|
|
|
+ if source.bidopentime==0&&info.bidopentime!=0{
|
|
|
+ var arr []int64
|
|
|
+ if source.mergemap["bidopentime"]==nil {
|
|
|
+ arr = make([]int64, 0)
|
|
|
+ }else {
|
|
|
+ arr = source.mergemap["bidopentime"].([]int64)
|
|
|
+ }
|
|
|
+ arr = append(arr,source.bidopentime)
|
|
|
+ source.mergemap["bidopentime"] = arr
|
|
|
+
|
|
|
+ source.bidopentime = info.bidopentime
|
|
|
+ mergeArr = append(mergeArr,8)
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ //以上合并过于简单,待进一步优化
|
|
|
+
|
|
|
+ return source,mergeArr
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+//权重评估
|
|
|
func basicDataScore(v *Info, info *Info) bool {
|
|
|
m,n:=0,0
|
|
|
if v.projectname!="" {m++}
|
|
@@ -415,10 +600,17 @@ func basicDataScore(v *Info, info *Info) bool {
|
|
|
if info.agency!="" {n=m+2}
|
|
|
if info.city!="" {n=m+2}
|
|
|
|
|
|
- if m>=n {
|
|
|
+ if m>n {
|
|
|
return true
|
|
|
+ }else if m==n {
|
|
|
+ if v.comeintime>=info.comeintime {
|
|
|
+ return true
|
|
|
+ }else {
|
|
|
+ return false
|
|
|
+ }
|
|
|
+ }else {
|
|
|
+ return false
|
|
|
}
|
|
|
- return false
|
|
|
}
|
|
|
|
|
|
|