Quellcode durchsuchen

Merge branch 'dev3.3' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.3

fengweiqiang vor 5 Jahren
Ursprung
Commit
7aed0b28ee
2 geänderte Dateien mit 127 neuen und 85 gelöschten Zeilen
  1. 19 12
      udpfilterdup/src/datamap.go
  2. 108 73
      udpfilterdup/src/main.go

+ 19 - 12
udpfilterdup/src/datamap.go

@@ -62,7 +62,7 @@ func NewDatamap(days int, lastid string) *datamap {
 	n, continuSum := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
 		//|| qutil.ObjToString(tmp["subtype"]) == "变更"
-		if qutil.IntAll(tmp["repeat"]) == 1  {
+		if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1{
 			continuSum++
 		} else {
 			cm := tmp["comeintime"] //时间单位
@@ -172,8 +172,10 @@ L:
 						//同城判定有效
 						first_judge:= false
 						if (v.projectcode != ""&&v.projectcode==info.projectcode&&v.projectname != ""&&v.projectname==info.projectname)||
-							(v.title != ""&&v.title==info.title&&v.bidopentime != 0&&v.bidopentime==info.bidopentime&&v.detail != ""&&v.detail==info.detail) {
+							(v.projectcode != ""&&v.projectcode==info.projectcode&&v.bidopentime != 0&&v.bidopentime==info.bidopentime)||
+							(v.title != ""&&v.title==info.title&&v.agencyaddr != ""&&v.agencyaddr==info.agencyaddr&&v.bidopentime != 0&&v.bidopentime==info.bidopentime) {
 							first_judge = true
+
 						}
 						//3/6等判断
 						n := 0
@@ -197,11 +199,9 @@ L:
 						}
 						t:= judgeCityType(v.area,info.area,v.city,info.city)
 						if n>=3||first_judge==true {
-							if t==2 {
-								//同城
+							if t==2 {//同城
 								b = true
 								id = v.id
-								//log.Print("同城满足的",info.id)
 								break L
 							}
 						}
@@ -230,7 +230,8 @@ L:
 						t:= judgeCityType(v.area,info.area,v.city,info.city)
 						c_1 :=conditionTitle(v.title,info.title) //标题满足
 						c_2 :=conditionNum(v.projectcode,info.projectcode) //编号满足
-						c_3 :=conditionTAB(v.title,info.title,v.buyer,info.buyer) //标题+采购单位
+						c_3 :=conditionTitleBuyer(v.title,info.title,v.buyer,info.buyer) //标题+采购单位
+						c_4 :=conditionCodeTime(v.bidopentime,info.bidopentime,v.projectcode,info.projectcode) //编号+开标时间
 
 
 						site_b := false
@@ -256,8 +257,8 @@ L:
 							if v.projectcode != info.projectcode&&len([]rune(info.projectcode)) >=10&&v.projectcode!=""{
 								continue
 							}
-							//先决条件满足三要素
-							if n==3{
+							//先决条件满足三要素,条件4
+							if n==3||c_4{
 								b = true
 								id = v.id
 								break L
@@ -335,9 +336,6 @@ L:
 				//		break L
 				//	}
 				//}
-
-
-
 			}
 		}
 	}
@@ -404,13 +402,22 @@ func conditionNum(c1 string ,c2 string) bool {
 	return false
 }
 //条件三 采购单位+标题
-func conditionTAB(t1 string ,t2 string,b1 string,b2 string) bool {
+func conditionTitleBuyer(t1 string ,t2 string,b1 string,b2 string) bool {
 
 	if t1==t2&&b1==b2 {
 		return true
 	}
 	return false
 }
+//条件四 编号+开标时间
+func conditionCodeTime(t1 int64 ,t2 int64,c1 string,c2 string) bool {
+
+	if c1 != ""&&c1==c2&&t1 != 0&&t1==t2&&len([]rune(c1)) >=10 {
+		return true
+	}
+	return false
+}
+
 
 
 func (d *datamap) update(t int64) {

+ 108 - 73
udpfilterdup/src/main.go

@@ -7,6 +7,7 @@ package main
 import (
 	"encoding/json"
 	"fmt"
+	"gopkg.in/mgo.v2/bson"
 	"log"
 	mu "mfw/util"
 	"net"
@@ -22,7 +23,7 @@ var (
 	mconf        map[string]interface{} //mongodb配置信息
 	mgo          *mongodb.MongodbSim    //mongodb操作对象
 
-	mgoTest          *mongodb.MongodbSim    //mongodb操作对象
+	//mgoTest          *mongodb.MongodbSim    //mongodb操作对象
 
 	extract      string
 	extract_copy string
@@ -36,7 +37,7 @@ var (
 
 	//5d767728a5cb26b9b7748868  //9万
 	//5da3f2c5a5cb26b9b79847fc  //76万
-	FilterRegTitle = regexp.MustCompile("[0-9一二三四五六七八九十零123456789](次|包|标段)")
+	FilterRegTitle = regexp.MustCompile("[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包)")
 	inV_n int   //无效数据数量
 	siteArr     []map[string]interface{} //站点
 	//56404035af5374672e00059c
@@ -65,32 +66,32 @@ func init() {
 
 
 	//测试临时注释
-	dupdays = util.IntAllDef(Sysconfig["dupdays"], 3)
-	//加载数据
-	DM = NewDatamap(dupdays, lastid)
-	sw := util.ObjToString(Sysconfig["specialwords"])
-	if sw != "" {
-		FilterRegexp = regexp.MustCompile(sw)
-	}
+	//dupdays = util.IntAllDef(Sysconfig["dupdays"], 3)
+	////加载数据
+	//DM = NewDatamap(dupdays, lastid)
+	//sw := util.ObjToString(Sysconfig["specialwords"])
+	//if sw != "" {
+	//	FilterRegexp = regexp.MustCompile(sw)
+	//}
 
 
 	//数据库
 	//mongodb.InitMongodbPool(5, "192.168.3.207:27081", "")
 
-	mgoTest = &mongodb.MongodbSim{
-		MongodbAddr: "192.168.3.207:27081",
-		Size:        5,
-		DbName:      "qfw",
-	}
-	mgoTest.InitPool()
+	//mgoTest = &mongodb.MongodbSim{
+	//	MongodbAddr: "192.168.3.207:27081",
+	//	Size:        5,
+	//	DbName:      "qfw",
+	//}
+	//mgoTest.InitPool()
 
 
 }
 
 //新增一个方法 判断
-func main22()  {
+func main()  {
 	//log.Println("1")
-
+	//代码copy数据
 	//sessTest :=mgoTest.GetMgoConn()
 	//defer sessTest.Close()
 	//
@@ -123,68 +124,102 @@ func main22()  {
 
 
 
-	//sess := mgo.GetMgoConn()
-	//defer mgo.DestoryMongoConn(sess)
-	//res, _ := mgo.Find(extract_copy, nil, nil, nil, false, -1, -1)
-	//res_copy, _ := mgo.Find(extract, nil, nil, nil, false, -1, -1)
-
-	//m1 :=map[string]int{} //老版本
-	//m2 :=map[string]int{} //新版本
-	//for _,v:=range *res{
-	//	m1[(v["_id"].(bson.ObjectId).Hex())]= util.IntAll(v["repeat"])
-	//}
-	//for _,v:=range *res_copy{
-	//	m2[(v["_id"].(bson.ObjectId).Hex())]= util.IntAll(v["repeat"])
-	//}
-	//fmt.Println(len(m1),len(m2))
-	//n1:=0
-	//n2:=0
-	//var arr1 []string
-	//var arr2 []string
-	//for k,v:=range m1{
-	//	if m2[k]!=v{
-	//		if m2[k]==0{ //1:0
-	//			n1++
-	//			arr1 = append(arr1,fmt.Sprintf("目标_id:%s",k))
-	//		}
-	//		if m2[k]==1{//0:1
-	//			n2++
-	//		}
-	//	}
-	//}
-	//for _,v:=range arr1 {
-	//	log.Println(v)
-	//}
-	//log.Println("V1版本:1:0 对应数量",n1)
-	//log.Println("V1版本:0:1 对应数量",n2)
-	//
-	//
-	//n1=0
-	//n2=0
-	//for k,v:=range m2{
-	//	if m1[k]!=v{
-	//		if m1[k]==0{
-	//			n1++
-	//		}
-	//		if m1[k]==1{
-	//			n2++
-	//			arr2 = append(arr2,fmt.Sprintf("目标_id:%s",k))
-	//		}
-	//	}
-	//}
-	//for _,v:=range arr2 {
-	//	log.Println(v)
-	//}
-	//log.Println("V2版本:0:1 对应数量",n1)
-	//log.Println("V2版本:1:0 对应数量",n2)
+	sess := mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sess)
+	res_copy := sess.DB("extract_kf").C(extract_copy).Find(nil).Iter()
+
+	m1 :=map[string]int{} //老版本
+	m2 :=map[string]int{} //新版本
+
+	i:=0
+	j:=0
+	for v1 := make(map[string]interface{}); res_copy.Next(&v1); i++{
+		if i%2000==0 {
+			log.Println("当前i:",i)
+		}
+		//if i>10000 {
+		//	break
+		//}
+		m1[(v1["_id"].(bson.ObjectId).Hex())]= util.IntAll(v1["repeat"])
+	}
+
+	sesss := mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sesss)
+	res := sesss.DB("extract_kf").C(extract).Find(nil).Iter()
+
+
+	for v2 := make(map[string]interface{}); res.Next(&v2); j++{
+		if j%2000==0 {
+			log.Println("当前j:",j)
+		}
+		//if j>10000 {
+		//	break
+		//}
+		m2[(v2["_id"].(bson.ObjectId).Hex())]= util.IntAll(v2["repeat"])
+	}
+
+	fmt.Println(len(m1),len(m2))
+	n1:=0
+	n2:=0
+	n3:=0
+	n4:=0
+
+	var arr1 []string
+	var arr2 []string
+	for k,v:=range m1{
+		if m2[k]==1&&v==0{//0:1
+			n1++
+			arr2 = append(arr2,fmt.Sprintf("目标_id:%s",k))
+		}
+		if m2[k]==0&&v==1{ //1:0
+			n2++
+			arr1 = append(arr1,fmt.Sprintf("目标_id:%s",k))
+		}
+		if m2[k]==0&&v==0{ //0:0
+			n3++
+		}
+		if m2[k]==1&&v==1{//1:1
+			n4++
+		}
+	}
+	//打印 1:0情况 少打印 300条    38841
+
+	mm:=0
+	for _,v:=range arr1 {
+		mm++
+		if mm%130==0 {
+			log.Println(v)
+		}
+	}
+
+	log.Println("分割线---------------")
+	log.Println("分割线---------------")
+	log.Println("分割线---------------")
+	log.Println("分割线---------------")
+	log.Println("分割线---------------")
+
+
+	//打印 1:0情况 少打印  1000条   15518
+	nn:=0
+	for _,v:=range arr2 {
+		nn++
+		if nn%30==0 {
+			log.Println(v)
+		}
+	}
+
+	log.Println("V1 0:1---",n1)
+	log.Println("V1 1:0---",n2)
+	log.Println("V1 0:0---",n3)
+	log.Println("V1 1:1---",n4)
+
 
-	// 10-14   10-25   1571025600  1571976000
 
 }
 
 
 
-func main() {
+func main22() {
 	go checkMapJob()
 
 	updport := Sysconfig["udpport"].(string)