Browse Source

数据质量-评分-重新制定规则

apple 5 years ago
parent
commit
26a330185c

+ 35 - 35
data_quality/src/config.json

@@ -1,62 +1,62 @@
 {
   "udpport": ":17007",
   "mongodb": {
-    "addrName": "192.168.3.207:27092",
-    "dbName": "extract_kf",
-    "collName": "zk_move",
+    "addrName": "172.17.4.85:27080",
+    "dbName": "qfw",
+    "collName": "result_20200715",
     "pool": 10,
     "site": {
-      "site_dbname": "extract_kf",
+      "site_dbname": "qfw",
       "site_coll": "site"
     }
   },
+  "qy_mongodb": {
+    "qy_addrName": "172.17.4.187:27081",
+    "qy_dbName": "mixdata",
+    "qy_collName": "qyxy_std",
+    "pool": 10
+  },
   "score_standard": {
     "total_score": 100,
-    "core_max": 70,
+    "core_max": 60,
     "core_each": 10,
-    "other_max": 30,
+    "other_max": 40,
     "other_each": 5,
     "deduct_each": 5
   },
-  "qy_mongodb": {
-    "qy_addrName": "192.168.3.207:27092",
-    "qy_dbName": "extract_kf",
-    "qy_collName": "zk",
-    "pool": 10
-  },
   "core_element":[
-    {"bidamount": {"type": "float", "large": 0}},
-    {"budget": {"type": "float", "large": 0}},
-    {"projectcode": {"type": "string", "length": 5}},
-    {"contractnumber": {"type": "string", "length": 5}},
-    {"title": {"type": "string", "length": 10}},
-    {"projectname": {"type": "string", "length": 10}},
-    {"buyer": {"type": "string", "length": 10}},
-    {"winner": {"type": "string", "length": 10}}
+    {"bidamount": {"type": "float", "min": 0,"max":50000000000}},
+    {"budget": {"type": "float", "min": 0,"max":50000000000}},
+    {"projectcode": {"type": "string", "min": 4,"max": 20}},
+    {"projectname": {"type": "string", "min": 4,"max": 40}},
+    {"buyer": {"type": "string", "min":4,"max": 15}},
+    {"winner": {"type": "string", "min": 4,"max": 15}}
   ],
   "other_element": [
-    {"bidopenaddress": {"type": "string","length": 0}},
-    {"winnertel": {"type": "string","length": 0}},
-    {"winnerperson": {"type": "string","length": 0}},
-    {"winneraddr": {"type": "string","length": 0}},
-    {"agencyaddr": {"type": "string","length": 0}},
-    {"buyeraddr": {"type": "string","length": 0}},
-    {"projectaddr": {"type": "string","length": 0}},
-    {"agencytel": {"type": "string","length": 0}},
-    {"agencyperson": {"type": "string","length": 0}},
-    {"buyerperson": {"type": "string","length": 0}},
-    {"agency": {"type": "string","length": 0}},
-    {"buyertel": {"type": "string","length": 0}},
-    {"bidopentime": {"type": "int","large": 0}},
-    {"signaturedate": {"type": "int","large": 0}}
+    {"agency": {"type": "string","min": 3,"max": 15}},
+    {"bidopenaddress": {"type": "string","min": 3,"max": 40}},
+    {"winneraddr": {"type": "string","min": 3,"max": 40}},
+    {"agencyaddr": {"type": "string","min": 3,"max": 40}},
+    {"buyeraddr": {"type": "string","min": 3,"max": 40}},
+    {"projectaddr": {"type": "string","min": 2,"max": 40}},
+    {"agencyperson": {"type": "string","min": 0,"max": 10}},
+    {"buyerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnertel": {"type": "string","min": 4,"max": 20}},
+    {"agencytel": {"type": "string","min": 4,"max": 20}},
+    {"buyertel": {"type": "string","min": 4,"max": 20}},
+    {"bidopentime": {"type": "int","min": 946656000}},
+    {"signaturedate": {"type": "int","min": 946656000}}
   ],
   "deduct_element": [
     "area",
     "city",
     "site",
     "toptype",
-    "subtype"
+    "subtype",
+    "title"
   ],
+  "specialaddr": "号|楼|座|巷|街|幢|路|室|层|区|段|道|园|镇|乡|村|县|区|市|栋|厦|房|社区|单元|交叉口|服务中心",
   "jkmail": {
     "to": "zhengkun@topnet.net.cn",
     "api": "http://10.171.112.160:19281/_send/_mail"

+ 23 - 24
data_quality/src/main.go

@@ -3,12 +3,12 @@ package main
 import (
 	"encoding/json"
 	"log"
+	mu "mfw/util"
 	"net"
 	"os"
 	"qfw/common/src/qfw/util"
 	qu "qfw/util"
 	"time"
-	mu "mfw/util"
 )
 
 
@@ -23,6 +23,7 @@ var (
 	core_element,other_element	[]map[string]interface{}	//要素
 	deduct_element 	[]string
 	total_score,core_max,core_each,other_max,other_each ,deduct_each int
+	specialaddr   string
 )
 
 func initSite()  {
@@ -77,6 +78,8 @@ func initMgo()  {
 	other_max = qu.IntAll(score_standard["other_max"])
 	other_each = qu.IntAll(score_standard["other_each"])
 	deduct_each = qu.IntAll(score_standard["deduct_each"])
+
+	specialaddr = sysconfig["specialaddr"].(string)
 }
 
 
@@ -86,6 +89,7 @@ func init() {
 	initMgo()
 	initSite()//加载站点
 	log.Println("采用udp模式")
+	//utf8.RuneCountInString()
 }
 
 
@@ -111,7 +115,6 @@ func main() {
 	}
 	mapinfo["gtid"] = sid
 	mapinfo["lteid"] = eid
-	mapinfo["stop"] = "true"
 	startTask([]byte{}, mapinfo)
 	time.Sleep(99999 * time.Hour)
 
@@ -176,7 +179,6 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 		element_score,element_reason:=dealWithElementRate(tmp)
 		error_score,abnormal_score,error_reason,abnormal_reason:=dealWithErrorRate(tmp)
 		//log.Println("元素分:",element_score,"错误分:",error_score,"异常分:",abnormal_score)
-
 		updateExtract = append(updateExtract, []map[string]interface{}{
 			map[string]interface{}{
 				"_id": tmp["_id"],
@@ -210,32 +212,29 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 
 	log.Println("task quality over - 总计数量",index)
 
-	time.Sleep(60 * time.Second)
+	time.Sleep(30 * time.Second)
 
 	//任务完成,开始发送广播通知下面节点
-	if mapInfo["stop"] == nil {
-		log.Println("评分统计完成-发送udp")
-		for _, to := range nextNode {
-			sid, _ := mapInfo["gtid"].(string)
-			eid, _ := mapInfo["lteid"].(string)
-			key := sid + "-" + eid + "-" + util.ObjToString(to["stype"])
-			by, _ := json.Marshal(map[string]interface{}{
-				"gtid":  sid,
-				"lteid": eid,
-				"stype": util.ObjToString(to["stype"]),
-				"key":   key,
-			})
-			addr := &net.UDPAddr{
-				IP:   net.ParseIP(to["addr"].(string)),
-				Port: util.IntAll(to["port"]),
-			}
-			node := &udpNode{by, addr, time.Now().Unix(), 0}
-			udptaskmap.Store(key, node)
-			udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
+	log.Println("评分统计完成-发送udp")
+	for _, to := range nextNode {
+		sid, _ := mapInfo["gtid"].(string)
+		eid, _ := mapInfo["lteid"].(string)
+		key := sid + "-" + eid + "-" + util.ObjToString(to["stype"])
+		by, _ := json.Marshal(map[string]interface{}{
+			"gtid":  sid,
+			"lteid": eid,
+			"stype": util.ObjToString(to["stype"]),
+			"key":   key,
+		})
+		addr := &net.UDPAddr{
+			IP:   net.ParseIP(to["addr"].(string)),
+			Port: util.IntAll(to["port"]),
 		}
+		node := &udpNode{by, addr, time.Now().Unix(), 0}
+		udptaskmap.Store(key, node)
+		udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
 	}
 
 
 
-
 }

+ 122 - 36
data_quality/src/mark

@@ -1,56 +1,97 @@
-备份数据使用
 {
   "udpport": ":17007",
   "mongodb": {
-    "addrName": "172.17.4.85:27080",
-    "dbName": "qfw",
-    "collName": "result_20200715",
+    "addrName": "192.168.3.207:27092",
+    "dbName": "extract_kf",
+    "collName": "zk_move",
     "pool": 10,
     "site": {
-      "site_dbname": "qfw",
+      "site_dbname": "extract_kf",
       "site_coll": "site"
     }
   },
+  "score_standard": {
+    "total_score": 100,
+    "core_max": 60,
+    "core_each": 10,
+    "other_max": 40,
+    "other_each": 5,
+    "deduct_each": 5
+  },
+  "qy_mongodb": {
+    "qy_addrName": "192.168.3.207:27092",
+    "qy_dbName": "extract_kf",
+    "qy_collName": "zk",
+    "pool": 10
+  },
   "core_element":[
-    {"bidamount": {"type": "float", "large": 0}},
-    {"budget": {"type": "float", "large": 0}},
-    {"projectcode": {"type": "string", "length": 5}},
-    {"contractnumber": {"type": "string", "length": 5}},
-    {"title": {"type": "string", "length": 10}},
-    {"projectname": {"type": "string", "length": 10}},
-    {"buyer": {"type": "string", "length": 10}},
-    {"winner": {"type": "string", "length": 10}}
+    {"bidamount": {"type": "float", "min": 0,"max":50000000000}},
+    {"budget": {"type": "float", "min": 0,"max":50000000000}},
+    {"projectcode": {"type": "string", "min": 4,"max": 20}},
+    {"projectname": {"type": "string", "min": 4,"max": 40}},
+    {"buyer": {"type": "string", "min":4,"max": 15}},
+    {"winner": {"type": "string", "min": 4,"max": 15}}
   ],
   "other_element": [
-    {"bidopenaddress": {"type": "string","length": 0}},
-    {"winnertel": {"type": "string","length": 0}},
-    {"winnerperson": {"type": "string","length": 0}},
-    {"winneraddr": {"type": "string","length": 0}},
-    {"agencyaddr": {"type": "string","length": 0}},
-    {"buyeraddr": {"type": "string","length": 0}},
-    {"projectaddr": {"type": "string","length": 0}},
-    {"agencytel": {"type": "string","length": 0}},
-    {"agencyperson": {"type": "string","length": 0}},
-    {"buyerperson": {"type": "string","length": 0}},
-    {"agency": {"type": "string","length": 0}},
-    {"buyertel": {"type": "string","length": 0}},
-    {"bidopentime": {"type": "int","large": 0}},
-    {"signaturedate": {"type": "int","large": 0}}
+    {"agency": {"type": "string","min": 3,"max": 15}},
+    {"bidopenaddress": {"type": "string","min": 3,"max": 40}},
+    {"winneraddr": {"type": "string","min": 3,"max": 40}},
+    {"agencyaddr": {"type": "string","min": 3,"max": 40}},
+    {"buyeraddr": {"type": "string","min": 3,"max": 40}},
+    {"projectaddr": {"type": "string","min": 2,"max": 40}},
+    {"agencyperson": {"type": "string","min": 0,"max": 10}},
+    {"buyerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnertel": {"type": "string","min": 4,"max": 20}},
+    {"agencytel": {"type": "string","min": 4,"max": 20}},
+    {"buyertel": {"type": "string","min": 4,"max": 20}},
+    {"bidopentime": {"type": "int","min": 946656000}},
+    {"signaturedate": {"type": "int","min": 946656000}}
   ],
   "deduct_element": [
     "area",
     "city",
     "site",
     "toptype",
-    "subtype"
+    "subtype",
+    "title"
   ],
-  "score_standard": {
-    "total_score": 100,
-    "core_max": 70,
-    "core_each": 10,
-    "other_max": 30,
-    "other_each": 5,
-    "deduct_each": 5
+  "specialaddr": "号|楼|座|巷|街|幢|路|室|层|区|段|道|园|镇|乡|村|县|区|市|栋|厦|房|社区|单元|交叉口|服务中心",
+  "jkmail": {
+    "to": "zhengkun@topnet.net.cn",
+    "api": "http://10.171.112.160:19281/_send/_mail"
+  },
+  "nextNode": [
+
+  ]
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{
+  "udpport": ":17007",
+  "mongodb": {
+    "addrName": "172.17.4.85:27080",
+    "dbName": "qfw",
+    "collName": "result_20200715",
+    "pool": 10,
+    "site": {
+      "site_dbname": "qfw",
+      "site_coll": "site"
+    }
   },
   "qy_mongodb": {
     "qy_addrName": "172.17.4.187:27081",
@@ -58,7 +99,47 @@
     "qy_collName": "qyxy_std",
     "pool": 10
   },
-
+  "score_standard": {
+    "total_score": 100,
+    "core_max": 60,
+    "core_each": 10,
+    "other_max": 40,
+    "other_each": 5,
+    "deduct_each": 5
+  },
+  "core_element":[
+    {"bidamount": {"type": "float", "min": 0,"max":50000000000}},
+    {"budget": {"type": "float", "min": 0,"max":50000000000}},
+    {"projectcode": {"type": "string", "min": 4,"max": 20}},
+    {"projectname": {"type": "string", "min": 4,"max": 40}},
+    {"buyer": {"type": "string", "min":4,"max": 15}},
+    {"winner": {"type": "string", "min": 4,"max": 15}}
+  ],
+  "other_element": [
+    {"agency": {"type": "string","min": 3,"max": 15}},
+    {"bidopenaddress": {"type": "string","min": 3,"max": 40}},
+    {"winneraddr": {"type": "string","min": 3,"max": 40}},
+    {"agencyaddr": {"type": "string","min": 3,"max": 40}},
+    {"buyeraddr": {"type": "string","min": 3,"max": 40}},
+    {"projectaddr": {"type": "string","min": 2,"max": 40}},
+    {"agencyperson": {"type": "string","min": 0,"max": 10}},
+    {"buyerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnertel": {"type": "string","min": 4,"max": 20}},
+    {"agencytel": {"type": "string","min": 4,"max": 20}},
+    {"buyertel": {"type": "string","min": 4,"max": 20}},
+    {"bidopentime": {"type": "int","min": 946656000}},
+    {"signaturedate": {"type": "int","min": 946656000}}
+  ],
+  "deduct_element": [
+    "area",
+    "city",
+    "site",
+    "toptype",
+    "subtype",
+    "title"
+  ],
+  "specialaddr": "号|楼|座|巷|街|幢|路|室|层|区|段|道|园|镇|乡|村|县|区|市|栋|厦|房|社区|单元|交叉口|服务中心",
   "jkmail": {
     "to": "zhengkun@topnet.net.cn",
     "api": "http://10.171.112.160:19281/_send/_mail"
@@ -77,3 +158,8 @@
 
 
 
+
+
+
+
+

+ 2 - 2
data_quality/src/scoreExpError.go

@@ -217,7 +217,7 @@ func codesAnalysis(projectcode string,contractnumber string) bool {
 			return false
 		}
 		//符合-8长度-日期格式 yyyyMMdd
-		if !regAnalysis(projectcode) {
+		if !regAnalysis(projectcode) &&len(projectcode)==8 {
 			return false
 		}
 	}
@@ -227,7 +227,7 @@ func codesAnalysis(projectcode string,contractnumber string) bool {
 			return false
 		}
 
-		if !regAnalysis(contractnumber) {
+		if !regAnalysis(contractnumber)  && len(projectcode)==8  {
 			return false
 		}
 	}

+ 117 - 48
data_quality/src/scoreLogic.go

@@ -1,19 +1,102 @@
 package main
+
 import (
 	qu "qfw/util"
+	"strings"
+	"unicode/utf8"
 )
 var element_reason map[string]interface{}
 
 func dealWithElementRate(tmp map[string]interface{}) (int,map[string]interface{}) {
 
-	//score_standard 打分标准
+	//score_standard 打分标准  要素打分 - 需慎重
 	element_reason = map[string]interface{}{}
 	m,n,z :=0,0,0
 	core_value,other_value,deduct_value :="","",""
 
+	//第一次-配置验证
+	coreArr,otherArr:=filterConfiguration(tmp)
+	//细节过滤-验证
+	new_coreArr,new_otherArr:=[]string{},[]string{}
+	for _,v:=range coreArr{
+		if v=="projectname"||v=="buyer"||v=="winner" {
+			if isChinese(qu.ObjToString(tmp[v])) {
+				new_coreArr = append(new_coreArr,v)
+			}
+		}else {
+			new_coreArr = append(new_coreArr,v)
+		}
+	}
+
+	for _,v:=range otherArr{
+		if v=="agencyperson"||v=="buyerperson"||v=="winnerperson" {
+			if isChinese(qu.ObjToString(tmp[v])) {
+				new_otherArr = append(new_otherArr,v)
+			}
+		}else if v=="agencytel"||v=="buyertel"||v=="winnertel" {
+			if !isChinese(qu.ObjToString(tmp[v])) {
+				if isTelephone(qu.ObjToString(tmp[v])) {
+					new_otherArr = append(new_otherArr,v)
+				}
+			}
+		}else if v=="bidopentime"||v=="signaturedate" {
+			if isTimestamp(qu.Int64All(tmp[v])) {
+				new_otherArr = append(new_otherArr,v)
+			}
+		}else if v=="bidopenaddress"||v=="winneraddr"||v=="agencyaddr"||
+			v=="buyeraddr" {
+			if isPlaceAddr(qu.ObjToString(tmp[v])) {
+				new_otherArr = append(new_otherArr,v)
+			}
+		}else if v=="projectaddr" {
+			if isChinese(qu.ObjToString(tmp[v])) {
+				new_otherArr = append(new_otherArr,v)
+			}
+		}else {
+			new_otherArr = append(new_otherArr,v)
+		}
+	}
+
+	core_value = strings.Join(new_coreArr, ",")
+	m = len(new_coreArr)
 
-	//要素打分 - 需慎重 core_element other_element deduct_element
+	other_value = strings.Join(new_otherArr, ",")
+	n = len(new_otherArr)
 
+
+
+	//扣分项
+	for _,v:=range deduct_element{
+		if qu.ObjToString(tmp[v])=="" {
+			z--
+			if deduct_value == "" {
+				deduct_value = v
+			}else {
+				deduct_value = deduct_value+","+v
+			}
+		}
+	}
+
+	total,core_s,other_s,deduct_s:=calculateScore(m,n,z)
+	return total,map[string]interface{}{
+		"coreElement":map[string]interface{}{
+			"key":core_value,
+			"core_score":core_s,
+		},
+		"otherElement":map[string]interface{}{
+			"key":other_value,
+			"other_score":other_s,
+		},
+		"deductElement":map[string]interface{}{
+			"key":deduct_value,
+			"deduct_score":deduct_s,
+		},
+	}
+}
+
+func filterConfiguration(tmp map[string]interface{})([]string,[]string)  {
+	coreArr ,otherArr:= []string{},[]string{}
+	//核心要素   int - 时间-   float-金额区间  string-字符串长度
 	for _,v:=range core_element{
 		for k1,v1:=range v{
 			if tmp[k1]==nil {
@@ -21,23 +104,25 @@ func dealWithElementRate(tmp map[string]interface{}) (int,map[string]interface{}
 			}
 			dict :=*qu.ObjToMap(v1)
 			element_type := qu.ObjToString(dict["type"])
+
 			if element_type=="int" {
-				temp_num:=qu.IntAll(dict["large"])
-				if qu.IntAll(tmp[k1])>temp_num {
-					m++
-					core_value = core_value+k1+"-"
+				min:=qu.IntAll(dict["min"])
+				if qu.IntAll(tmp[k1])>min {
+					coreArr = append(coreArr,k1)
 				}
 			}else if element_type=="float" {
-				temp_num:=qu.Float64All(dict["large"])
-				if qu.Float64All(tmp[k1])>temp_num {
-					m++
-					core_value = core_value+k1+"-"
+				min:=qu.Float64All(dict["min"])
+				max:=qu.Float64All(dict["max"])
+				if qu.Float64All(tmp[k1])>min && qu.Float64All(tmp[k1])<max{
+					coreArr = append(coreArr,k1)
 				}
 			}else if element_type=="string" {
-				temp_length:=qu.IntAll(dict["length"])
-				if len(qu.ObjToString(tmp[k1]))>temp_length {
-					m++
-					core_value = core_value+k1+"-"
+				min:=qu.IntAll(dict["min"])
+				max:=qu.IntAll(dict["max"])
+
+				if utf8.RuneCountInString(qu.ObjToString(tmp[k1]))>min &&
+					utf8.RuneCountInString(qu.ObjToString(tmp[k1]))<max{
+					coreArr = append(coreArr,k1)
 				}
 			}else {
 
@@ -45,6 +130,7 @@ func dealWithElementRate(tmp map[string]interface{}) (int,map[string]interface{}
 		}
 	}
 
+
 	for _,v:=range other_element{
 		for k1,v1:=range v{
 			if tmp[k1]==nil {
@@ -52,23 +138,25 @@ func dealWithElementRate(tmp map[string]interface{}) (int,map[string]interface{}
 			}
 			dict :=*qu.ObjToMap(v1)
 			element_type := qu.ObjToString(dict["type"])
+
 			if element_type=="int" {
-				temp_num:=qu.IntAll(dict["large"])
-				if qu.IntAll(tmp[k1])>temp_num {
-					n++
-					other_value = other_value+k1+"-"
+				min:=qu.IntAll(dict["min"])
+				if qu.IntAll(tmp[k1])>min {
+					otherArr = append(otherArr,k1)
 				}
 			}else if element_type=="float" {
-				temp_num:=qu.Float64All(dict["large"])
-				if qu.Float64All(tmp[k1])>temp_num {
-					n++
-					other_value = other_value+k1+"-"
+				min:=qu.Float64All(dict["min"])
+				max:=qu.Float64All(dict["max"])
+				if qu.Float64All(tmp[k1])>min && qu.Float64All(tmp[k1])<max{
+					otherArr = append(otherArr,k1)
 				}
 			}else if element_type=="string" {
-				temp_length:=qu.IntAll(dict["length"])
-				if len(qu.ObjToString(tmp[k1]))>temp_length {
-					n++
-					other_value = other_value+k1+"-"
+				min:=qu.IntAll(dict["min"])
+				max:=qu.IntAll(dict["max"])
+
+				if utf8.RuneCountInString(qu.ObjToString(tmp[k1]))>min &&
+					utf8.RuneCountInString(qu.ObjToString(tmp[k1]))<max{
+					otherArr = append(otherArr,k1)
 				}
 			}else {
 
@@ -76,30 +164,11 @@ func dealWithElementRate(tmp map[string]interface{}) (int,map[string]interface{}
 		}
 	}
 
+	return coreArr,otherArr
+}
+
 
-	for _,v:=range deduct_element{
-		if qu.ObjToString(tmp[v])=="" {
-			z--
-			deduct_value = deduct_value+v+"-"
-		}
-	}
 
-	total,core_s,other_s,deduct_s:=calculateScore(m,n,z)
-	return total,map[string]interface{}{
-		"coreElement":map[string]interface{}{
-			"key":core_value,
-			"core_score":core_s,
-		},
-		"otherElement":map[string]interface{}{
-			"key":other_value,
-			"other_score":other_s,
-		},
-		"deductElement":map[string]interface{}{
-			"key":deduct_value,
-			"deduct_score":deduct_s,
-		},
-	}
-}
 
 func calculateScore(core_num int,other_num int,deduct_num int) (int,int,int,int)  {
 

+ 73 - 0
data_quality/src/scoreMethod.go

@@ -0,0 +1,73 @@
+package main
+
+import (
+	"regexp"
+	"time"
+	"unicode"
+)
+
+func isPlaceAddr(str string) bool {
+
+	if isTelephone(str) {
+		return false
+	}
+	regx,_ := regexp.Compile(specialaddr)
+	result:=regx.FindString(str)
+	if result !="" {
+		return true
+	}
+	return false
+}
+
+
+
+func isTimestamp(i int64) bool {
+	now:=time.Now().Unix()
+	if i<now+86400*180 {
+		return true
+	}
+	return false
+}
+
+func isTelephone(str string) bool {
+
+	reg1:=`^1[3|4|5|6|7|8|9][0-9]\d{8}$`
+	regx1,_ := regexp.Compile(reg1)
+
+	arr1:=regx1.FindAllString(str,-1)
+	if len(arr1)>0 {
+		return true
+	}
+
+	reg2:=`^(\d{2,4}-)?\d{7,8}$`
+	regx2,_ := regexp.Compile(reg2)
+	arr2:=regx2.FindAllString(str,-1)
+	if len(arr2)>0 {
+		return true
+	}
+
+	return false
+}
+
+
+
+
+func isChinese(str string) bool {
+	var count int
+	for _, v := range str {
+		if unicode.Is(unicode.Han, v) {
+			count++
+			break
+		}
+	}
+	return count > 0
+}
+
+func isChineseChar(str string) bool {
+	for _, r := range str {
+		if unicode.Is(unicode.Scripts["Han"], r) || (regexp.MustCompile("[\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b]").MatchString(string(r))) {
+			return true
+		}
+	}
+	return false
+}

+ 2 - 2
udpfilterdup/src/config.json

@@ -5,8 +5,8 @@
         "addr": "192.168.3.207:27092",
         "pool": 5,
         "db": "extract_kf",
-        "extract": "zk_move",
-        "extract_back": "zk_move",
+        "extract": "zk_zk_test",
+        "extract_back": "zk_zk_test",
         "site": {
             "dbname": "extract_kf",
             "coll": "site"

+ 7 - 5
udpfilterdup/src/main.go

@@ -16,6 +16,7 @@ import (
 	"os"
 	"qfw/util"
 	"regexp"
+	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -124,11 +125,7 @@ func main() {
 //测试组人员使用
 func mainT() {
 
-	//testRepairData11()
-	//return
-
 	if TimingTask {
-		log.Println("新历史任务测试开始")
 		go historyTaskDay()
 		time.Sleep(99999 * time.Hour)
 	} else {
@@ -742,7 +739,12 @@ func historyTaskDay() {
 //判断是否在当前id段落
 func judgeIsCurIds (gtid string,lteid string,curid string) bool {
 
-
+	gt_time, _ := strconv.ParseInt(gtid[:8], 16, 64)
+	lte_time, _ := strconv.ParseInt(lteid[:8], 16, 64)
+	cur_time, _ := strconv.ParseInt(curid[:8], 16, 64)
+	if cur_time>gt_time&&cur_time<=lte_time {
+		return true
+	}
 	return false
 }