浏览代码

质量细节修改

apple 5 年之前
父节点
当前提交
1a1c1b70c9

+ 35 - 4
data_quality/src/config.json

@@ -10,14 +10,13 @@
       "site_coll": "site"
     }
   },
-  "core_element":["buyer","projectname","title","winner","agency","projectcode"],
-  "other_element":["area","city"],
   "score_standard": {
     "total_score": 100,
     "core_max": 70,
     "core_each": 10,
     "other_max": 30,
-    "other_each": 5
+    "other_each": 5,
+    "deduct_each": 5
   },
   "qy_mongodb": {
     "qy_addrName": "192.168.3.207:27092",
@@ -25,7 +24,39 @@
     "qy_collName": "zk",
     "pool": 10
   },
-
+  "core_element":[
+    {"bidamount": {"type": "float", "large": 0}},
+    {"budget": {"type": "float", "large": 0}},
+    {"projectcode": {"type": "string", "length": 5}},
+    {"contractnumber": {"type": "string", "length": 5}},
+    {"title": {"type": "string", "length": 10}},
+    {"projectname": {"type": "string", "length": 10}},
+    {"buyer": {"type": "string", "length": 10}},
+    {"winner": {"type": "string", "length": 10}}
+  ],
+  "other_element": [
+    {"bidopenaddress": {"type": "string","length": 0}},
+    {"winnertel": {"type": "string","length": 0}},
+    {"winnerperson": {"type": "string","length": 0}},
+    {"winneraddr": {"type": "string","length": 0}},
+    {"agencyaddr": {"type": "string","length": 0}},
+    {"buyeraddr": {"type": "string","length": 0}},
+    {"projectaddr": {"type": "string","length": 0}},
+    {"agencytel": {"type": "string","length": 0}},
+    {"agencyperson": {"type": "string","length": 0}},
+    {"buyerperson": {"type": "string","length": 0}},
+    {"agency": {"type": "string","length": 0}},
+    {"buyertel": {"type": "string","length": 0}},
+    {"bidopentime": {"type": "int","large": 0}},
+    {"signaturedate": {"type": "int","large": 0}}
+  ],
+  "deduct_element": [
+    "area",
+    "city",
+    "site",
+    "toptype",
+    "subtype"
+  ],
   "jkmail": {
     "to": "zhengkun@topnet.net.cn",
     "api": "http://10.171.112.160:19281/_send/_mail"

+ 34 - 17
data_quality/src/main.go

@@ -13,20 +13,21 @@ import (
 
 
 var (
-	Sysconfig    map[string]interface{} //配置文件
+	sysconfig    map[string]interface{} //配置文件
 	mgo          *MongodbSim            //mongodb操作对象
 	qy_mgo		 *MongodbSim
 	udpclient    mu.UdpClient             //udp对象
 	nextNode     []map[string]interface{} //下节点数组
 	siteMap	 	 map[string]map[string]interface{} //站点map
 	coll_name,qy_coll_name	string
-	core_element,other_element	[]string	//要素
-	total_score,core_max,core_each,other_max,other_each int
+	core_element,other_element	[]map[string]interface{}	//要素
+	deduct_element 	[]string
+	total_score,core_max,core_each,other_max,other_each ,deduct_each int
 )
 
 func initSite()  {
 	//站点配置
-	mconf := Sysconfig["mongodb"].(map[string]interface{})
+	mconf := sysconfig["mongodb"].(map[string]interface{})
 	site := mconf["site"].(map[string]interface{})
 	siteMap = make(map[string]map[string]interface{}, 0)
 	start := int(time.Now().Unix())
@@ -44,7 +45,8 @@ func initSite()  {
 }
 
 func initMgo()  {
-	mconf := Sysconfig["mongodb"].(map[string]interface{})
+	mconf := sysconfig["mongodb"].(map[string]interface{})
+	log.Println(mconf)
 	mgo = &MongodbSim{
 		MongodbAddr: mconf["addrName"].(string),
 		DbName:      mconf["dbName"].(string),
@@ -53,7 +55,7 @@ func initMgo()  {
 	mgo.InitPool()
 
 
-	qy_mconf := Sysconfig["qy_mongodb"].(map[string]interface{})
+	qy_mconf := sysconfig["qy_mongodb"].(map[string]interface{})
 	qy_mgo = &MongodbSim{
 		MongodbAddr: qy_mconf["qy_addrName"].(string),
 		DbName:      qy_mconf["qy_dbName"].(string),
@@ -64,20 +66,23 @@ func initMgo()  {
 	coll_name = mconf["collName"].(string)
 	qy_coll_name = qy_mconf["qy_collName"].(string)
 
-	core_element = qu.ObjArrToStringArr(Sysconfig["core_element"].([]interface{}))
-	other_element = qu.ObjArrToStringArr(Sysconfig["other_element"].([]interface{}))
-	score_standard := Sysconfig["score_standard"].(map[string]interface{})
+	core_element = qu.ObjArrToMapArr(sysconfig["core_element"].([]interface{}))
+	other_element = qu.ObjArrToMapArr(sysconfig["other_element"].([]interface{}))
+	deduct_element  =qu.ObjArrToStringArr(sysconfig["deduct_element"].([]interface{}))
+
+	score_standard := sysconfig["score_standard"].(map[string]interface{})
 	total_score = qu.IntAll(score_standard["total_score"])
 	core_max = qu.IntAll(score_standard["core_max"])
 	core_each = qu.IntAll(score_standard["core_each"])
 	other_max = qu.IntAll(score_standard["other_max"])
 	other_each = qu.IntAll(score_standard["other_each"])
+	deduct_each = qu.IntAll(score_standard["deduct_each"])
 }
 
 
 func init() {
 	//加载配置文件
-	qu.ReadConfig(&Sysconfig)
+	qu.ReadConfig(&sysconfig)
 	initMgo()
 	initSite()//加载站点
 	log.Println("采用udp模式")
@@ -86,7 +91,7 @@ func init() {
 
 func mainT() {
 	go checkMapJob()
-	updport := Sysconfig["udpport"].(string)
+	updport := sysconfig["udpport"].(string)
 	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
 	udpclient.Listen(processUdpMsg)
 	log.Println("Udp服务监听", updport)
@@ -96,8 +101,8 @@ func mainT() {
 //快速测试使用
 func main() {
 
-	sid := "4f16936d52c1d9fbf843c60e"
-	eid := "6f16936d52c1d9fbf843c60e"
+	sid := "1f0000000000000000000000"
+	eid := "9f0000000000000000000000"
 	log.Println(sid, "---", eid)
 	mapinfo := map[string]interface{}{}
 	if sid == "" || eid == "" {
@@ -157,6 +162,7 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 			"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
 		},
 	}
+	log.Println("查询条件:",q)
 	sess := mgo.GetMgoConn()
 	defer mgo.DestoryMongoConn(sess)
 	it := sess.DB(mgo.DbName).C(coll_name).Find(&q).Iter()
@@ -164,12 +170,12 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 	index:=0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
 		if index%10000 == 0 {
-			log.Println("current:", index, tmp["_id"])
+			log.Println("当前数量:", index, tmp["_id"])
 		}
 
-		element_score:=dealWithElementRate(tmp)
-		error_score,abnormal_score:=dealWithErrorRate(tmp)
-		log.Println("元素分:",element_score,"错误分:",error_score,"异常分:",abnormal_score)
+		element_score,element_reason:=dealWithElementRate(tmp)
+		error_score,abnormal_score,error_reason,abnormal_reason:=dealWithErrorRate(tmp)
+		//log.Println("元素分:",element_score,"错误分:",error_score,"异常分:",abnormal_score)
 
 		updateExtract = append(updateExtract, []map[string]interface{}{
 			map[string]interface{}{
@@ -180,6 +186,11 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 					"element_score": element_score,
 					"error_score":error_score,
 					"abnormal_score": abnormal_score,
+					"quality_reason":map[string]interface{}{
+						"element_reason":element_reason,
+						"error_reason":error_reason,
+						"abnormal_reason":abnormal_reason,
+					},
 				},
 			},
 		})
@@ -196,6 +207,9 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 		mgo.UpSertBulk(coll_name, updateExtract...)
 	}
 
+
+	log.Println("task quality over - 总计数量",index)
+
 	time.Sleep(60 * time.Second)
 
 	//任务完成,开始发送广播通知下面节点
@@ -221,4 +235,7 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 		}
 	}
 
+
+
+
 }

+ 47 - 13
data_quality/src/mark

@@ -2,25 +2,61 @@
 {
   "udpport": ":17007",
   "mongodb": {
-    "addrName": "192.168.3.207:27092",
-    "dbName": "extract_kf",
-    "collName": "zk_move",
-    "pool": 5
+    "addrName": "172.17.4.85:27080",
+    "dbName": "qfw",
+    "collName": "result_20200715",
+    "pool": 10,
+    "site": {
+      "site_dbname": "qfw",
+      "site_coll": "site"
+    }
   },
-  "core_element":["buyer","projectname","title","winner","agency","projectcode"],
-  "other_element":["area","city"],
+  "core_element":[
+    {"bidamount": {"type": "float", "large": 0}},
+    {"budget": {"type": "float", "large": 0}},
+    {"projectcode": {"type": "string", "length": 5}},
+    {"contractnumber": {"type": "string", "length": 5}},
+    {"title": {"type": "string", "length": 10}},
+    {"projectname": {"type": "string", "length": 10}},
+    {"buyer": {"type": "string", "length": 10}},
+    {"winner": {"type": "string", "length": 10}}
+  ],
+  "other_element": [
+    {"bidopenaddress": {"type": "string","length": 0}},
+    {"winnertel": {"type": "string","length": 0}},
+    {"winnerperson": {"type": "string","length": 0}},
+    {"winneraddr": {"type": "string","length": 0}},
+    {"agencyaddr": {"type": "string","length": 0}},
+    {"buyeraddr": {"type": "string","length": 0}},
+    {"projectaddr": {"type": "string","length": 0}},
+    {"agencytel": {"type": "string","length": 0}},
+    {"agencyperson": {"type": "string","length": 0}},
+    {"buyerperson": {"type": "string","length": 0}},
+    {"agency": {"type": "string","length": 0}},
+    {"buyertel": {"type": "string","length": 0}},
+    {"bidopentime": {"type": "int","large": 0}},
+    {"signaturedate": {"type": "int","large": 0}}
+  ],
+  "deduct_element": [
+    "area",
+    "city",
+    "site",
+    "toptype",
+    "subtype"
+  ],
   "score_standard": {
     "total_score": 100,
     "core_max": 70,
     "core_each": 10,
     "other_max": 30,
-    "other_each": 5
+    "other_each": 5,
+    "deduct_each": 5
   },
   "qy_mongodb": {
-    "qy_addrName": "192.168.3.207:27092",
-    "qy_dbName": "extract_kf",
-    "qy_collName": "zk",
-    "pool": 5
+    "qy_addrName": "172.17.4.187:27081",
+    "qy_dbName": "mixdata",
+    "qy_collName": "qyxy_std",
+    "pool": 10
   },
 
   "jkmail": {
@@ -41,5 +77,3 @@
 
 
 
-
-

+ 54 - 16
data_quality/src/scoreExpError.go

@@ -8,17 +8,21 @@ import (
 )
 
 var sitelock sync.Mutex         //锁
+var error_reason map[string]interface{}
+var abnormal_reason map[string]interface{}
 
-func dealWithErrorRate(tmp map[string]interface{}) (int , int) {
+func dealWithErrorRate(tmp map[string]interface{}) (int , int, map[string]interface{}, map[string]interface{}) {
+	error_reason = map[string]interface{}{}
+	abnormal_reason = map[string]interface{}{}
 	//错误 , 异常error_score  abnormal_score,
 	err_num,ab_num:=0,0
-
 	//金额类
 	budget:=qu.Float64All(tmp["budget"])
 	bidamount:=qu.Float64All(tmp["bidamount"])
 	if tmp["budget"]==nil&&tmp["bidamount"]==nil {
 		//均不存在
 		ab_num++
+		abnormal_reason["money"] = "budget-bidamount均不存在"
 	}else if tmp["budget"]!=nil&&tmp["bidamount"]!=nil&&budget!=0&&bidamount!=0 {
 		//均存在
 		err,ab:=amountAnalysis(budget,bidamount)
@@ -28,9 +32,11 @@ func dealWithErrorRate(tmp map[string]interface{}) (int , int) {
 		//二者存在一个
 		if budget==0 && qu.ObjToString(tmp["toptype"])=="招标"{
 			ab_num++
+			abnormal_reason["money"] = "招标:budget空"
 		}
 		if bidamount==0 && qu.ObjToString(tmp["toptype"])=="结果"{
 			ab_num++
+			abnormal_reason["money"] = "结果:bidamount空"
 		}
 	}
 
@@ -38,12 +44,15 @@ func dealWithErrorRate(tmp map[string]interface{}) (int , int) {
 	buyer:=qu.ObjToString(tmp["buyer"])
 	if buyer=="" {
 		ab_num++
+		abnormal_reason["buyer"] = "buyer空"
 	}else {
 		if len(buyer)<10 {
 			err_num++
+			error_reason["buyer"] = "buyer长度"
 		}else {
 			if !buyerAnalysis(buyer) {
 				ab_num++
+				abnormal_reason["buyer"] = "buyer-企业无"
 			}
 		}
 	}
@@ -53,12 +62,15 @@ func dealWithErrorRate(tmp map[string]interface{}) (int , int) {
 		winner:=qu.ObjToString(tmp["winner"])
 		if winner=="" {
 			ab_num++
+			abnormal_reason["winner"] = "winner空"
 		}else {
 			if len(winner)<10 {
 				err_num++
+				error_reason["winner"] = "winner长度"
 			}else {
 				if !winnerAnalysis(winner) {
 					ab_num++
+					abnormal_reason["winner"] = "winner-企业无"
 				}
 			}
 		}
@@ -69,11 +81,11 @@ func dealWithErrorRate(tmp map[string]interface{}) (int , int) {
 	title:=qu.ObjToString(tmp["title"])
 	if title=="" {
 		err_num++
+		error_reason["title"] = "空"
 	}else {
 		if len(title)<10 {
 			err_num++
-		}else if len(title)>60{
-			ab_num++
+			abnormal_reason["title"] = "title长度"
 		}else {
 
 		}
@@ -81,11 +93,11 @@ func dealWithErrorRate(tmp map[string]interface{}) (int , int) {
 	projectname:=qu.ObjToString(tmp["projectname"])
 	if projectname=="" {
 		err_num++
+		error_reason["projectname"] = "空"
 	}else {
 		if len(projectname)<10 {
 			err_num++
-		}else if len(projectname)>60{
-			ab_num++
+			abnormal_reason["projectname"] = "projectname长度"
 		}else {
 
 		}
@@ -97,9 +109,11 @@ func dealWithErrorRate(tmp map[string]interface{}) (int , int) {
 	contractnumber:=qu.ObjToString(tmp["contractnumber"])
 	if projectcode==""&&contractnumber=="" {
 		ab_num++
+		abnormal_reason["code"] = "code-空"
 	}else {
 		if !codesAnalysis(projectcode,contractnumber) {
 			ab_num++
+			abnormal_reason["code"] = "code-不符"
 		}
 	}
 
@@ -108,6 +122,7 @@ func dealWithErrorRate(tmp map[string]interface{}) (int , int) {
 	now:=time.Now().Unix()
 	if publishtime<=0||publishtime-now>0 {
 		err_num++
+		error_reason["publishtime"] = "publishtime-超前(0)"
 	}
 
 	//省份,城市
@@ -119,24 +134,31 @@ func dealWithErrorRate(tmp map[string]interface{}) (int , int) {
 	site := qu.ObjToString(tmp["site"])
 	if !citysAnalysis(area,city,site) {
 		ab_num++
+		abnormal_reason["city"] = "area-站点不一致"
 	}
 
-
-
-
-
 	//招标时间-地点
 	if qu.Int64All(tmp["bidopentime"])==0 && qu.ObjToString(tmp["bidopenaddress"])=="" &&
 		qu.ObjToString(tmp["toptype"])=="招标" {
 		ab_num++
+		abnormal_reason["bidopen"] = "bidopen-时间-地点-空"
 	}
 
 
 
+	//类别问题
+	if qu.ObjToString(tmp["toptype"]) == ""{
+		err_num++
+		error_reason["toptype"] = "toptype:空"
+	}else {
+		if !categoryAnalysis(tmp) {
+			ab_num++
+			abnormal_reason["toptype"] = "toptype:内容>>"
+		}
+	}
 
 
-
-	return err_num,ab_num
+	return err_num,ab_num,error_reason,abnormal_reason
 }
 
 
@@ -151,8 +173,10 @@ func amountAnalysis(budget float64,bidamount float64) (int ,int) {
 
 	}else if (proportion>=0.01&&proportion<0.1)||(proportion>10&&proportion<=100){
 		ab_num++
+		abnormal_reason["money"] = "bidamount/budget间隔异常"
 	}else {
 		err_num++
+		error_reason["money"] = "bidamount/budget-比例错误"
 	}
 
 	return err_num,ab_num
@@ -162,9 +186,9 @@ func amountAnalysis(budget float64,bidamount float64) (int ,int) {
 func buyerAnalysis(buyer string) bool{
 
 	q := map[string]interface{}{
-		"name": buyer,
+		"company_name": buyer,
 	}
-	data,_:=mgo.Find(qy_coll_name,q,nil,map[string]interface{}{"name":1})
+	data,_:=mgo.Find(qy_coll_name,q,nil,map[string]interface{}{"company_name":1})
 	if data==nil {
 		return false
 	}
@@ -176,9 +200,9 @@ func buyerAnalysis(buyer string) bool{
 func winnerAnalysis(winner string) bool {
 
 	q := map[string]interface{}{
-		"name": winner,
+		"company_name": winner,
 	}
-	data,_:=mgo.Find(qy_coll_name,q,nil,map[string]interface{}{"name":1})
+	data,_:=mgo.Find(qy_coll_name,q,nil,map[string]interface{}{"company_name":1})
 	if data==nil {
 		return false
 	}
@@ -230,6 +254,20 @@ func citysAnalysis(area string,city string,site string) bool {
 
 
 
+//分析-类别
+func categoryAnalysis(tmp map[string]interface{}) bool {
+	toptype:=qu.ObjToString(tmp["toptype"])
+	if toptype=="招标" {
+		winner:=qu.ObjToString(tmp["winner"])
+		bidamount:=qu.Float64All(tmp["bidamount"])
+		if winner!=""||bidamount!=0 {
+			//结果超前
+			return false
+		}
+	}
+	return true
+}
+
 
 
 

+ 95 - 17
data_quality/src/scoreLogic.go

@@ -1,31 +1,108 @@
 package main
+import (
+	qu "qfw/util"
+)
+var element_reason map[string]interface{}
 
-func dealWithElementRate(tmp map[string]interface{}) int {
+func dealWithElementRate(tmp map[string]interface{}) (int,map[string]interface{}) {
 
 	//score_standard 打分标准
-	m,n:=0,0
+	element_reason = map[string]interface{}{}
+	m,n,z :=0,0,0
+	core_value,other_value,deduct_value :="","",""
+
+
+	//要素打分 - 需慎重 core_element other_element deduct_element
+
 	for _,v:=range core_element{
-		if tmp[v]!=nil {
-			m++
+		for k1,v1:=range v{
+			if tmp[k1]==nil {
+				continue
+			}
+			dict :=*qu.ObjToMap(v1)
+			element_type := qu.ObjToString(dict["type"])
+			if element_type=="int" {
+				temp_num:=qu.IntAll(dict["large"])
+				if qu.IntAll(tmp[k1])>temp_num {
+					m++
+					core_value = core_value+k1+"-"
+				}
+			}else if element_type=="float" {
+				temp_num:=qu.Float64All(dict["large"])
+				if qu.Float64All(tmp[k1])>temp_num {
+					m++
+					core_value = core_value+k1+"-"
+				}
+			}else if element_type=="string" {
+				temp_length:=qu.IntAll(dict["length"])
+				if len(qu.ObjToString(tmp[k1]))>temp_length {
+					m++
+					core_value = core_value+k1+"-"
+				}
+			}else {
+
+			}
 		}
 	}
+
 	for _,v:=range other_element{
-		if tmp[v]!=nil {
-			n++
+		for k1,v1:=range v{
+			if tmp[k1]==nil {
+				continue
+			}
+			dict :=*qu.ObjToMap(v1)
+			element_type := qu.ObjToString(dict["type"])
+			if element_type=="int" {
+				temp_num:=qu.IntAll(dict["large"])
+				if qu.IntAll(tmp[k1])>temp_num {
+					n++
+					other_value = other_value+k1+"-"
+				}
+			}else if element_type=="float" {
+				temp_num:=qu.Float64All(dict["large"])
+				if qu.Float64All(tmp[k1])>temp_num {
+					n++
+					other_value = other_value+k1+"-"
+				}
+			}else if element_type=="string" {
+				temp_length:=qu.IntAll(dict["length"])
+				if len(qu.ObjToString(tmp[k1]))>temp_length {
+					n++
+					other_value = other_value+k1+"-"
+				}
+			}else {
+
+			}
 		}
 	}
-	return calculateScore(m,n)
+
+
+	for _,v:=range deduct_element{
+		if qu.ObjToString(tmp[v])=="" {
+			z--
+			deduct_value = deduct_value+v+"-"
+		}
+	}
+
+	total,core_s,other_s,deduct_s:=calculateScore(m,n,z)
+	return total,map[string]interface{}{
+		"coreElement":map[string]interface{}{
+			"key":core_value,
+			"core_score":core_s,
+		},
+		"otherElement":map[string]interface{}{
+			"key":other_value,
+			"other_score":other_s,
+		},
+		"deductElement":map[string]interface{}{
+			"key":deduct_value,
+			"deduct_score":deduct_s,
+		},
+	}
 }
 
-func calculateScore(core_num int,other_num int) int  {
-	/*
+func calculateScore(core_num int,other_num int,deduct_num int) (int,int,int,int)  {
 
-    "total_score": 100,
-    "core_max": 70,
-    "core_each": 10,
-    "other_max": 30,
-    "other_each": 5
-	*/
 	m :=core_each*core_num
 	if m>core_max {
 		m = core_max
@@ -36,10 +113,11 @@ func calculateScore(core_num int,other_num int) int  {
 		n = other_max
 	}
 
-	t :=m+n
+	z := deduct_each*deduct_num
+	t :=m+n+z
 	if t > total_score {
 		t=total_score
 	}
 
-	return t
+	return t,m,n,z
 }

+ 2 - 2
data_quality/src/sendmail.go

@@ -24,12 +24,12 @@ type udpNode struct {
 
 func checkMapJob() {
 	//阿里云内网无法发送邮件
-	jkmail, _ := Sysconfig["jkmail"].(map[string]interface{})
+	jkmail, _ := sysconfig["jkmail"].(map[string]interface{})
 	if jkmail != nil {
 		tomail, _ = jkmail["to"].(string)
 		api, _ = jkmail["api"].(string)
 	}
-	log.Println("start checkMapJob", tomail, Sysconfig["jkmail"])
+	log.Println("start checkMapJob", tomail, sysconfig["jkmail"])
 	for {
 		udptaskmap.Range(func(k, v interface{}) bool {
 			now := time.Now().Unix()

+ 2 - 0
udpfilterdup/src/main.go

@@ -414,6 +414,8 @@ func historyTaskDay() {
 			break
 		}
 
+		time.Sleep(5 * time.Minute)
+
 		//开始判重
 		q = map[string]interface{}{
 			"_id": map[string]interface{}{