|
@@ -6,6 +6,7 @@ package main
|
|
|
|
|
|
import (
|
|
|
"encoding/json"
|
|
|
+ "flag"
|
|
|
"fmt"
|
|
|
"gopkg.in/mgo.v2/bson"
|
|
|
"log"
|
|
@@ -36,8 +37,8 @@ var (
|
|
|
dupdays = 5 //初始化判重范围
|
|
|
DM *datamap //
|
|
|
HM *historymap //判重数据
|
|
|
- lastid = "5d767728a5cb26b9b7748868"
|
|
|
- //ObjectId("5d767728a5cb26b9b7748868")
|
|
|
+ lastid = "5da3f2c5a5cb26b9b79847fc"
|
|
|
+ //5da3f2c5a5cb26b9b79847fc
|
|
|
//正则筛选相关
|
|
|
FilterRegTitle = regexp.MustCompile("^_$")
|
|
|
FilterRegTitle_1 = regexp.MustCompile("^_$")
|
|
@@ -46,12 +47,12 @@ var (
|
|
|
|
|
|
|
|
|
|
|
|
- SiteMap map[string]interface{} //站点map
|
|
|
+ SiteMap map[string]map[string]interface{} //站点map
|
|
|
)
|
|
|
|
|
|
func init() {
|
|
|
- //flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
|
|
|
- //flag.Parse()
|
|
|
+ flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
|
|
|
+ flag.Parse()
|
|
|
//172.17.145.163:27080
|
|
|
util.ReadConfig(&Sysconfig)
|
|
|
nextNode = util.ObjArrToMapArr(Sysconfig["nextNode"].([]interface{}))
|
|
@@ -87,7 +88,7 @@ func init() {
|
|
|
siteMgo.InitPool()
|
|
|
|
|
|
|
|
|
- SiteMap = make(map[string]interface{},0)
|
|
|
+ SiteMap = make(map[string]map[string]interface{},0)
|
|
|
|
|
|
start := int(time.Now().Unix())
|
|
|
//站点配置
|
|
@@ -95,12 +96,14 @@ func init() {
|
|
|
defer sess_site.Close()
|
|
|
res_site := sess_site.DB("zhaolongyue").C("site").Find(nil).Sort("_id").Iter()
|
|
|
for site_dict := make(map[string]interface{}); res_site.Next(&site_dict); {
|
|
|
- data_map := map[string]string{
|
|
|
+ data_map := map[string]interface{}{
|
|
|
"area":util.ObjToString(site_dict["area"]),
|
|
|
"city":util.ObjToString(site_dict["city"]),
|
|
|
"district":util.ObjToString(site_dict["district"]),
|
|
|
+ "subdepttype":util.ObjToString(site_dict["subdepttype"]),
|
|
|
+ "level":util.ObjToString(site_dict["level"]),
|
|
|
}
|
|
|
- SiteMap[site_dict["site"].(string)]= data_map
|
|
|
+ SiteMap[util.ObjToString(site_dict["site"])]= data_map
|
|
|
}
|
|
|
|
|
|
fmt.Printf("用时:%d秒,%d个",int(time.Now().Unix())-start,len(SiteMap))
|
|
@@ -142,8 +145,7 @@ func mainTest() {
|
|
|
// // arr = append(arr,dict)
|
|
|
// //}
|
|
|
//}
|
|
|
-
|
|
|
-
|
|
|
+ //
|
|
|
|
|
|
sess := mgo.GetMgoConn()
|
|
|
defer mgo.DestoryMongoConn(sess)
|
|
@@ -207,11 +209,11 @@ func mainTest() {
|
|
|
}
|
|
|
|
|
|
}
|
|
|
- //打印 1:0情况 66989;
|
|
|
+ //打印 1:0情况 ;
|
|
|
mm:=0
|
|
|
for _,v:=range arr1 {
|
|
|
mm++
|
|
|
- if mm%222==0 {
|
|
|
+ if mm%200==0 {
|
|
|
log.Println(v)
|
|
|
}
|
|
|
}
|
|
@@ -220,11 +222,11 @@ func mainTest() {
|
|
|
log.Println("分割线---------------")
|
|
|
|
|
|
|
|
|
- //打印 0:1情况 8729
|
|
|
+ //打印 0:1情况
|
|
|
nn:=0
|
|
|
for _,v:=range arr2 {
|
|
|
nn++
|
|
|
- if nn%30==0 {
|
|
|
+ if nn%200==0 {
|
|
|
log.Println(v)
|
|
|
}
|
|
|
}
|
|
@@ -265,11 +267,11 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
|
|
|
} else if mapInfo != nil {
|
|
|
|
|
|
//更新流程
|
|
|
- go historyTask(data,mapInfo)
|
|
|
+ //go historyTask(data,mapInfo)
|
|
|
|
|
|
|
|
|
//判重流程
|
|
|
- //go task(data, mapInfo)
|
|
|
+ go task(data, mapInfo)
|
|
|
|
|
|
key, _ := mapInfo["key"].(string)
|
|
|
if key == "" {
|
|
@@ -330,7 +332,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
},
|
|
|
map[string]interface{}{
|
|
|
"$set": map[string]interface{}{
|
|
|
- "repeat": -1,
|
|
|
+ "repeat":-1,
|
|
|
},
|
|
|
},
|
|
|
})
|
|
@@ -346,63 +348,64 @@ func task(data []byte, mapInfo map[string]interface{}) {
|
|
|
b, source,reason := DM.check(info)
|
|
|
if b { //有重复,生成更新语句,更新抽取和更新招标
|
|
|
repeateN++
|
|
|
- var mergeArr = []int64{} //更改合并数组记录
|
|
|
- var newData = &Info{} //更换新的数据池数据
|
|
|
+ //var mergeArr = []int64{} //更改合并数组记录
|
|
|
+ //var newData = &Info{} //更换新的数据池数据
|
|
|
var id_map = map[string]interface{}{}
|
|
|
- repeat_id := ""
|
|
|
-
|
|
|
+ repeat_id := source.id
|
|
|
+ id_map["_id"]= util.StringTOBsonId(info.id)
|
|
|
//合并操作--评功权重打分-合并完替换原始数据池
|
|
|
- basic_bool := basicDataScore(source,info)
|
|
|
- if basic_bool {
|
|
|
- //已原始数据为标准-对比数据打判重标签
|
|
|
- newData,mergeArr= mergeDataFields(source,info)
|
|
|
- DM.replaceSourceData(newData,source.id) //替换
|
|
|
- id_map["_id"]= util.StringTOBsonId(source.id)
|
|
|
- repeat_id = source.id
|
|
|
- }else {
|
|
|
- //已对比数据为标准 ,数据池的数据打判重标签
|
|
|
- newData,mergeArr= mergeDataFields(info,source)
|
|
|
- DM.replaceSourceData(newData,source.id)//替换
|
|
|
- id_map["_id"]= util.StringTOBsonId(info.id)
|
|
|
- repeat_id = info.id
|
|
|
- }
|
|
|
+ //basic_bool := basicDataScore(source,info)
|
|
|
+ //if basic_bool {
|
|
|
+ // //已原始数据为标准-对比数据打判重标签
|
|
|
+ // newData,mergeArr= mergeDataFields(source,info)
|
|
|
+ // DM.replaceSourceData(newData,source.id) //替换
|
|
|
+ // id_map["_id"]= util.StringTOBsonId(source.id)
|
|
|
+ // repeat_id = source.id
|
|
|
+ //}else {
|
|
|
+ // //已对比数据为标准 ,数据池的数据打判重标签
|
|
|
+ // newData,mergeArr= mergeDataFields(info,source)
|
|
|
+ // DM.replaceSourceData(newData,source.id)//替换
|
|
|
+ // id_map["_id"]= util.StringTOBsonId(info.id)
|
|
|
+ // repeat_id = info.id
|
|
|
+ //}
|
|
|
|
|
|
var update_map = map[string]interface{}{
|
|
|
"$set": map[string]interface{}{
|
|
|
- "reason":reason,
|
|
|
- "repeat":"1",
|
|
|
+ "repeat_reason":reason,
|
|
|
+ "repeat":1,
|
|
|
"repeatid":repeat_id,
|
|
|
},
|
|
|
}
|
|
|
|
|
|
//合并记录
|
|
|
- if len(newData.mergemap)>0 {
|
|
|
- update_map["$set"].(map[string]interface{})["merge"] = newData.mergemap
|
|
|
- }
|
|
|
-
|
|
|
- //更新合并后的数据
|
|
|
- for _,value :=range mergeArr {
|
|
|
- if value==1 {
|
|
|
- update_map["$set"].(map[string]interface{})["area"] = newData.area
|
|
|
- update_map["$set"].(map[string]interface{})["city"] = newData.city
|
|
|
- }else if value==2 {
|
|
|
- update_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
|
|
|
- }else if value==3 {
|
|
|
- update_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
|
|
|
- }else if value==4 {
|
|
|
- update_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
|
|
|
- }else if value==5 {
|
|
|
- update_map["$set"].(map[string]interface{})["budget"] = newData.budget
|
|
|
- }else if value==6 {
|
|
|
- update_map["$set"].(map[string]interface{})["winner"] = newData.winner
|
|
|
- }else if value==7 {
|
|
|
- update_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
|
|
|
- }else if value==8 {
|
|
|
- update_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
|
|
|
- }else {
|
|
|
-
|
|
|
- }
|
|
|
- }
|
|
|
+ //if len(newData.mergemap)>0 {
|
|
|
+ // update_map["$set"].(map[string]interface{})["merge"] = newData.mergemap
|
|
|
+ // //fmt.Println("合并长度:",len(newData.mergemap))
|
|
|
+ //}
|
|
|
+ //
|
|
|
+ ////更新合并后的数据
|
|
|
+ //for _,value :=range mergeArr {
|
|
|
+ // if value==1 {
|
|
|
+ // update_map["$set"].(map[string]interface{})["area"] = newData.area
|
|
|
+ // update_map["$set"].(map[string]interface{})["city"] = newData.city
|
|
|
+ // }else if value==2 {
|
|
|
+ // update_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
|
|
|
+ // }else if value==3 {
|
|
|
+ // update_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
|
|
|
+ // }else if value==4 {
|
|
|
+ // update_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
|
|
|
+ // }else if value==5 {
|
|
|
+ // update_map["$set"].(map[string]interface{})["budget"] = newData.budget
|
|
|
+ // }else if value==6 {
|
|
|
+ // update_map["$set"].(map[string]interface{})["winner"] = newData.winner
|
|
|
+ // }else if value==7 {
|
|
|
+ // update_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
|
|
|
+ // }else if value==8 {
|
|
|
+ // update_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
|
|
|
+ // }else {
|
|
|
+ //
|
|
|
+ // }
|
|
|
+ //}
|
|
|
|
|
|
//构建数据库更新用到的
|
|
|
updateExtract = append(updateExtract, []map[string]interface{}{
|
|
@@ -552,7 +555,7 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
|
|
|
map[string]interface{}{
|
|
|
"$set": map[string]interface{}{
|
|
|
"repeat": 0,
|
|
|
- "repeatid": "-1",
|
|
|
+ "repeatid": -2,
|
|
|
},
|
|
|
},
|
|
|
})
|
|
@@ -586,8 +589,8 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
|
|
|
|
|
|
var update_map = map[string]interface{}{
|
|
|
"$set": map[string]interface{}{
|
|
|
- "reason":reason,
|
|
|
- "repeat":"1",
|
|
|
+ "repeat_reason":reason,
|
|
|
+ "repeat":1,
|
|
|
"repeatid":repeat_id,
|
|
|
},
|
|
|
}
|
|
@@ -595,6 +598,7 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
|
|
|
//合并记录
|
|
|
if len(newData.mergemap)>0 {
|
|
|
update_map["$set"].(map[string]interface{})["merge"] = newData.mergemap
|
|
|
+ //fmt.Println("合并长度:",len(newData.mergemap))
|
|
|
}
|
|
|
|
|
|
//更新合并后的数据
|
|
@@ -706,6 +710,8 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
|
|
|
source.area = info.area
|
|
|
source.city = info.city
|
|
|
mergeArr = append(mergeArr,1)
|
|
|
+
|
|
|
+ //fmt.Println("合并-城市")
|
|
|
}
|
|
|
//2、项目名称
|
|
|
if source.projectname==""&&info.projectname!=""{
|
|
@@ -720,6 +726,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
|
|
|
|
|
|
source.projectname = info.projectname
|
|
|
mergeArr = append(mergeArr,2)
|
|
|
+ //fmt.Println("合并-项目名称")
|
|
|
}
|
|
|
//3、项目编号
|
|
|
if source.projectcode==""&&info.projectcode!=""{
|
|
@@ -734,6 +741,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
|
|
|
|
|
|
source.projectcode = info.projectcode
|
|
|
mergeArr = append(mergeArr,3)
|
|
|
+ //fmt.Println("合并-项目标号")
|
|
|
}
|
|
|
//4、采购单位
|
|
|
if source.buyer==""&&info.buyer!=""{
|
|
@@ -748,6 +756,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
|
|
|
|
|
|
source.buyer = info.buyer
|
|
|
mergeArr = append(mergeArr,4)
|
|
|
+ //fmt.Println("合并-采购单位")
|
|
|
}
|
|
|
//5、预算
|
|
|
if source.budget==0&&info.budget!=0{
|
|
@@ -762,6 +771,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
|
|
|
|
|
|
source.budget = info.budget
|
|
|
mergeArr = append(mergeArr,5)
|
|
|
+ //fmt.Println("合并-预算")
|
|
|
}
|
|
|
//6、中标单位
|
|
|
if source.winner==""&&info.winner!=""{
|
|
@@ -776,6 +786,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
|
|
|
|
|
|
source.winner = info.winner
|
|
|
mergeArr = append(mergeArr,6)
|
|
|
+ //fmt.Println("合并-中标单位")
|
|
|
}
|
|
|
//7、中标金额
|
|
|
if source.bidamount==0&&info.bidamount!=0{
|
|
@@ -790,6 +801,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
|
|
|
|
|
|
source.bidamount = info.bidamount
|
|
|
mergeArr = append(mergeArr,7)
|
|
|
+ //fmt.Println("合并-中标金额")
|
|
|
}
|
|
|
//8、开天时间-地点
|
|
|
if source.bidopentime==0&&info.bidopentime!=0{
|
|
@@ -804,6 +816,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
|
|
|
|
|
|
source.bidopentime = info.bidopentime
|
|
|
mergeArr = append(mergeArr,8)
|
|
|
+ //fmt.Println("合并-开标时间")
|
|
|
}
|
|
|
|
|
|
//以上合并过于简单,待进一步优化
|
|
@@ -813,6 +826,10 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
|
|
|
|
|
|
//权重评估
|
|
|
func basicDataScore(v *Info, info *Info) bool {
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ //网站评估
|
|
|
m,n:=0,0
|
|
|
if v.projectname!="" {m++}
|
|
|
if v.buyer!="" {m++}
|