Explorar o código

重构-城市-统一清洗check程序

zhengkun %!s(int64=3) %!d(string=hai) anos
pai
achega
f5506419ad

+ 1 - 1
data_monitoring/listen_data/src/main.go

@@ -114,7 +114,7 @@ func main()  {
 	}
 	save_mgo.InitPool()
 
-	decodeJyUrl()
+	exportFieldScoreIDBIdding()
 
 	return
 

+ 137 - 4
data_monitoring/listen_data/src/zkmethod.go

@@ -29,6 +29,7 @@ var task chan struct{} = make(chan struct{}, 1)
 var rpre *regexp.Regexp = regexp.MustCompile("https://www.jianyu360.com/article/content/")
 var rsuf *regexp.Regexp = regexp.MustCompile("(.html).*")
 var province,all_province,city,brief_city string
+var Url = "https://www.jianyu360.com/article/content/%s.html"
 
 //添加插件chrom-task任务表
 func addChromPluginTaskData()  {
@@ -56,8 +57,6 @@ func addChromPluginTaskData()  {
 		{"name":"宜宾市公共资源交易信息网","code":"sc_ybsggzyjyxxw_gqzb","path":"jyb/20210830/sc_ybsggzyjyxxw_gqzb_bggg"},
 		{"name":"宜宾市公共资源交易信息网","code":"sc_ybsggzyjyxxw_gqzb","path":"jyb/20210830/sc_ybsggzyjyxxw_gqzb_jggg"},
 		{"name":"宜宾市公共资源交易信息网","code":"sc_ybsggzyjyxxw_gqzb","path":"jyb/20210830/sc_ybsggzyjyxxw_gqzb_zbgg"},
-
-
 	}
 	for _,tmp:=range taskArr{
 		tmp["updatetime"] = qu.Int64All(0)
@@ -74,14 +73,19 @@ func decodeJyUrl()  {
 	待修复
 	gd_gdsdzhcgzxpt_wsjj_cjgg
 
-
+	科大讯飞
+	60d595e51a75b8f446ddc2a8
+	60fe53a61a75b8f446150a55
+	60b5f32b8a2adb30a598e041
+	612933b21a75b8f44654fa38
 	*/
 
-	test := "AqQY1wEfzIoLD84JHdzZGUgCjM/DSZgXGNkPB4rICEgfGdzYidUCYE="
+	test := "AbyY1wEdDxYOyYsNHtxZ3IkCCQCIDFjcWhwKCgkPT0efGdzfl5UCjI%3D"
 	var Decode  = qu.CommonDecodeArticle("content", test)
 	log.Debug(Decode[0])
 	return
 }
+
 //加密
 func encodeJyUrl()  {
 	var Url = "https://www.jianyu360.com/article/content/%s.html"
@@ -89,6 +93,135 @@ func encodeJyUrl()  {
 	log.Debug(Encode)
 }
 
+
+
+
+
+
+//根据id 导出指定数据
+func exportFieldScoreIDBIdding()  {
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	q,total:=map[string]interface{}{},0
+	ck_data := make(map[string]map[string]interface{},0)
+	it := sess.DB(save_mgo.DbName).C("zktest_fieldscore_data_new_ck").Find(&q).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+		key_id := BsonTOStringId(tmp["_id"])
+		ck_data[key_id] = tmp
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("结束:",total,len(ck_data))
+
+	fieldArr := []string{"buyer","s_winner","budget","bidamount","projectname","projectcode"}
+	total=0
+	it = sess.DB(save_mgo.DbName).C("zktest_fieldscore_data_new").Find(&q).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+		key_id := BsonTOStringId(tmp["_id"])
+		field_score := *qu.ObjToMap(tmp["field_score"])
+		dict := make(map[string]interface{})
+		dict["_id"] = tmp["_id"]
+		dict["href"] = tmp["href"]
+		dict["subtype"] = tmp["subtype"]
+		for _,v :=range fieldArr {
+
+			//当前字段分
+			score :=int64(-1)
+			score_data := *qu.ObjToMap(field_score[v])
+			if score_data!=nil  {
+				score = qu.Int64All(score_data["score"])
+			}
+
+			isOK := 0
+			if v=="budget"||v=="bidamount" {
+				value := qu.Int64All(tmp[v])
+				ck_key := fmt.Sprintf("ck_%s",v)
+				ck_num := qu.Int64All(ck_data[key_id][ck_key])
+				if ck_num>0 { //比对两边的值
+					if value== qu.Int64All(ck_data[key_id][v]) {
+						isOK=1
+					}else {
+						isOK=-1
+					}
+				}
+			}else {
+				value := qu.ObjToString(tmp[v])
+				ck_key := fmt.Sprintf("ck_%s",v)
+				ck_num := qu.Int64All(ck_data[key_id][ck_key])
+				if ck_num>0 { //比对两边的值
+					if value== qu.ObjToString(ck_data[key_id][v]) {
+						isOK=1
+					}else {
+						isOK=-1
+					}
+				}
+			}
+
+
+			key_1:=fmt.Sprintf("%s_s",v)
+			key_2:=fmt.Sprintf("%s_r",v)
+			desc := "未标"
+			if isOK==1 {
+				desc = "正确"
+			}else if isOK==-1 {
+				desc = "错误"
+			}
+			if score==-1 {
+				desc = ""
+			}
+			dict[key_1] = score
+			dict[key_2] = desc
+		}
+
+		save_mgo.Save("111",dict)
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("结束:",total,len(ck_data))
+
+}
+//导出多站点数据
+func exporMulSiteData()  {
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	q,total:=map[string]interface{}{},0
+	it := sess.DB(save_mgo.DbName).C("bidding").Find(&q).Sort("-_id").Iter()
+	siteMap,isok,isZhao,isZhong := make(map[string]int,0),0,0,0
+	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+		if total%1000==0 {
+			log.Debug("current index",total)
+		}
+
+		if isok>=500 {
+			break
+		}
+		site:=qu.ObjToString(tmp["site"])
+		tmpid := BsonTOStringId(tmp["_id"])
+		subtype:=qu.ObjToString(tmp["subtype"])
+		jyhref  := fmt.Sprintf(Url, qu.CommonEncodeArticle("content", tmpid))
+		dict := tmp
+		dict["jyhref"] = jyhref
+		if subtype=="中标" || subtype=="成交"||subtype=="合同"  {
+			num_site := qu.IntAll(siteMap[site])
+			if num_site < 2 && isZhong<300{
+				isok++
+				isZhong++
+				siteMap[site]=num_site+1
+				save_mgo.Save("zktest_fieldscore_data",dict)
+			}
+		}else if subtype == "招标" || subtype == "邀标" || subtype == "询价" ||
+			subtype=="竞谈" || subtype=="单一" || subtype=="竞价" {
+			num_site := qu.IntAll(siteMap[site])
+			if num_site < 2 && isZhao<200 {
+				isok++
+				siteMap[site]=num_site+1
+				save_mgo.Save("zktest_fieldscore_data",dict)
+			}
+		}
+		tmp = make(map[string]interface{})
+	}
+
+	log.Debug("is over ",total)
+	log.Debug("is site ",len(siteMap))
+}
 //记录字段-来源
 func dealWithFieldSourceData()  {
 	dict1 := make(map[string]interface{},0)

+ 51 - 4
data_monitoring/vps_server/src/config.json

@@ -1,13 +1,60 @@
 {
   "port": "7811",
   "save_mgodb": {
-    "addr": "192.168.3.207:27092",
-    "db": "zhengkun",
+    "addr": "172.17.4.87:27080",
+    "db": "editor",
     "coll": "monitor_other",
     "pool": 5
   },
   "vpsIDs" : [
-    "专用-常州"
+    "专用-蚌埠",
+    "专用-合肥",
+    "专用-莱芜",
+    "专用-芜湖",
+    "专用-南京",
+    "专用-南通",
+    "专用-秦皇岛",
+    "专用-三明",
+    "专用-驻马店",
+    "专用-淮北",
+    "专用-菏泽",
+    "专用-淮南",
+    "专用-徐州",
+    "专用-宣城",
+    "数据-莆田",
+    "数据-池州",
+    "数据-大连",
+    "数据-黄山",
+    "数据-荆州",
+    "数据-聊城",
+    "数据-洛阳",
+    "数据-宁德",
+    "数据-厦门",
+    "数据-宿迁",
+    "数据-信阳",
+    "数据-珠海",
+    "数据-泉州",
+    "数据-苏州",
+    "数据-锦州",
+    "数据-漳州",
+    "数据-丽水",
+    "数据-无锡",
+    "数据-衢州",
+    "附件-北京",
+    "附件-亳州",
+    "附件-济南",
+    "附件-绍兴",
+    "附件-石家庄",
+    "附件-扬州",
+    "附件-阜阳",
+    "附件-漯河",
+    "附件-荆门",
+    "附件-景德镇",
+    "附件-上海",
+    "附件-福州",
+    "附件-天津",
+    "附件-合肥",
+    "附件-宁德"
   ],
   "during": 10,
   "isErr" : 3,
@@ -20,7 +67,7 @@
     "pwd":      "xomkphsjsamybdbj"
   },
   "jkmail": {
-    "to": "zhaoyujian@topnet.net.cn",
+    "to": "zhaoyujian@topnet.net.cn,zhangjinkun@topnet.net.cn",
     "api": "http://172.17.145.179:19281/_send/_mail"
   }
 }

+ 62 - 4
data_quality/src/config.json

@@ -1,9 +1,9 @@
 {
   "udpport": ":17007",
   "mongodb": {
-    "addrName": "127.0.0.1:27017",
-    "dbName": "zhengkun",
-    "collName": "zktest_fieldscore_data",
+    "addrName": "172.17.4.85:27080",
+    "dbName": "qfw",
+    "collName": "zktest_fieldscore_data_new",
     "pool": 10
   },
   "qy_mongodb": {
@@ -14,5 +14,63 @@
     "qy_password": "zk@123123",
     "pool": 10
   },
-  "isTest": true
+  "ext_from": {
+    "title": 98,
+    "detail": 100,
+    "ff": 100,
+    "jsondata": 100,
+    "winnerorder": 98,
+    "package": 98
+  },
+  "ext_type": {
+    "colon": 100,
+    "space": 100,
+    "regexp": 98,
+    "table": 100,
+    "title": 98
+  },
+
+  "buyer_score":{
+    "单位库": 1,
+    "前缀校验": 1,
+    "后缀校验": 1,
+    "非纯中文": 1,
+    "切词": 2,
+    "单位对比": 2,
+    "黑名单":2
+  },
+
+  "s_winner_score":{
+    "单位库": 1,
+    "前缀校验": 1,
+    "后缀校验": 1,
+    "非纯中文": 1,
+    "分包量": 1,
+    "切词": 2,
+    "单位对比": 2,
+    "黑名单": 2
+  },
+
+  "budget_score":{
+    "行业范围": 1,
+    "间隔异常": 2
+  },
+
+  "bidamount_score":{
+    "行业范围": 1,
+    "间隔异常": 2
+  },
+
+  "projectname_score":{
+    "长度过长":1,
+    "中文比例": 2
+  },
+
+  "projectcode_score":{
+    "符号过多": 1,
+    "连续中文": 2
+  },
+
+
+  "isTest": false
 }

+ 45 - 12
data_quality/src/main.go

@@ -16,9 +16,14 @@ var (
 	sysconfig    			map[string]interface{} //配置文件
 	mgo,qy_mgo          	*MongodbSim            //mongodb操作对象
 	udpclient    			mu.UdpClient           //udp对象
-	udplock 				sync.Mutex         	   //udp锁
+	udplock,dataLock 		sync.Mutex         	   //udp锁
 	coll_name,qy_coll_name	string				   //表名
 	isTest					bool				   //是否测试
+	Ext_Type,Ext_From		map[string]interface{}	//抽取来源,方式分
+	buyer_score,s_winner_score	map[string]interface{}
+	budget_score,bidamount_score	map[string]interface{}
+	projectname_score,projectcode_score		map[string]interface{}
+
 )
 
 //mgo-配置等
@@ -44,10 +49,26 @@ func initMgo()  {
 	coll_name = mconf["collName"].(string)
 	qy_coll_name = qy_mconf["qy_collName"].(string)
 }
+//初始化打分
+func initScore()  {
+	Ext_Type = sysconfig["ext_type"].(map[string]interface{})
+	Ext_From = sysconfig["ext_from"].(map[string]interface{})
+
+	buyer_score = sysconfig["buyer_score"].(map[string]interface{})
+	s_winner_score = sysconfig["s_winner_score"].(map[string]interface{})
+
+	budget_score = sysconfig["budget_score"].(map[string]interface{})
+	bidamount_score = sysconfig["bidamount_score"].(map[string]interface{})
+
+	projectname_score = sysconfig["projectname_score"].(map[string]interface{})
+	projectcode_score = sysconfig["projectcode_score"].(map[string]interface{})
+
+}
 //初始化
 func init() {
 	qu.ReadConfig(&sysconfig)//加载配置文件
 	initMgo()
+	initScore()
 }
 
 func mainT() {
@@ -59,6 +80,7 @@ func mainT() {
 }
 //调试流程
 func main() {
+
 	sid := "1f0000000000000000000000"
 	eid := "9f0000000000000000000000"
 	log.Println(sid, "---", eid)
@@ -89,24 +111,35 @@ func startFieldScoreTask(mapInfo map[string]interface{}) {
 	updateFieldScore,total := [][]map[string]interface{}{},0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
 		if total%1000 == 0 {
-			log.Println("当前数量:", total,"~",tmp["_id"])
+			log.Println("当前数量:", total)
 		}
+
+		//验证初始分字段
+		source:= *qu.ObjToMap(tmp["field_source"])
+		f_s := dealWithFieldSourceScore(source)
+
 		//更新集合
 		update_dict := make(map[string]interface{},0)
-
 		subtype := qu.ObjToString(tmp["subtype"])
-		//buyer_s := buyerFieldScore(tmp)
-		//update_dict["buyer"] = buyer_s
-		budget_s := budgetFieldScore(tmp)
+
+
+		buyer_s := buyerFieldScore(tmp,f_s["buyer"])
+		update_dict["buyer"] = buyer_s
+
+		budget_s := budgetFieldScore(tmp,f_s["budget"])
 		update_dict["budget"] = budget_s
 
-		if subtype=="中标"||subtype=="成交"||subtype=="合同" {
-			//winner_s := winnerFieldScore(tmp)
-			//update_dict["winner"] = winner_s
-			bidamount_s := bidamountFieldScore(tmp)
-			update_dict["bidamount"] = bidamount_s
+		projectname_s := projectnameFieldScore(tmp,f_s["projectname"])
+		update_dict["projectname"] = projectname_s
 
+		projectcode_s := projectcodeFieldScore(tmp,f_s["projectcode"])
+		update_dict["projectcode"] = projectcode_s
 
+		if subtype=="中标"||subtype=="成交"||subtype=="合同" {
+			s_winner_s := winnerFieldScore(tmp,f_s["s_winner"])
+			update_dict["s_winner"] = s_winner_s
+			bidamount_s := bidamountFieldScore(tmp,f_s["bidamount"])
+			update_dict["bidamount"] = bidamount_s
 		}
 
 
@@ -116,7 +149,7 @@ func startFieldScoreTask(mapInfo map[string]interface{}) {
 			},
 			map[string]interface{}{
 				"$set": map[string]interface{}{
-					"score": update_dict,
+					"field_score": update_dict,
 				},
 			},
 		})

+ 75 - 2
data_quality/src/method.go

@@ -5,6 +5,7 @@ import (
 	"github.com/go-ego/gse"
 	qu "qfw/util"
 	"regexp"
+	"strings"
 	"unicode"
 	"unicode/utf8"
 )
@@ -22,8 +23,8 @@ var GSE *gse.Segmenter  = &gse.Segmenter{}
 
 
 //编号
-var codeUnConReg *regexp.Regexp = regexp.MustCompile("(null|勘察|测试|设计|设备|项目|标段|工程|监理|范围|分包|月|日|天|\\([0-9]{1}\\)|[,,。、::“”‘’_\"])")
-var codeUnLenReg *regexp.Regexp = regexp.MustCompile("[\\u4e00-\\u9fa5]{9,}")
+var codeUnConReg *regexp.Regexp = regexp.MustCompile("(null|勘察|测试|设计|设备|项目|标段|工程|监理|范围|分包|月|日|天)")
+var codeUnLenReg *regexp.Regexp = regexp.MustCompile("([\u4e00-\u9fa5]{9,})")
 
 
 
@@ -77,6 +78,19 @@ func qyNameIsExistsQYXY(name string) bool{
 
 	return true
 }
+//采购单位库
+func buyerNameIsExists(name string) bool{
+	q := map[string]interface{}{
+		"buyer_name": name,
+	}
+	data :=qy_mgo.FindOne("buyer_enterprise",q)
+	if data==nil || len(data)<2{
+		return false
+	}
+
+	return true
+}
+
 
 //包含非中文
 func isUnHan(str string) bool {
@@ -161,4 +175,63 @@ func isRegTimeDateCode(str string) bool {
 		return true
 	}
 	return false
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+//配置字段初始分
+func dealWithFieldSourceScore(source map[string]interface{}) map[string]int64 {
+	fieldArr := []string{"buyer","s_winner","budget","bidamount","projectname","projectcode"}
+	score := make(map[string]int64,0)
+	for _,v := range fieldArr{
+		score[v] = int64(100)
+	}
+	for _,key := range fieldArr {
+		ext := *qu.ObjToMap(source[key])
+		if ext!=nil{
+			ext_from:=qu.ObjToString(ext["ext_from"])
+			ext_type:=qu.ObjToString(ext["ext_type"])
+			//规范ext_from
+			ext_from = normalizedExtFromName(ext_from)
+			if ext_from=="winnerorder" || ext_from=="package" ||
+				ext_from=="jsondata" || ext_type=="" {
+				dataLock.Lock()
+				score[key] = qu.Int64All(Ext_From[ext_from])
+				dataLock.Unlock()
+			}else {
+				dataLock.Lock()
+				s := qu.Int64All(Ext_From[ext_from])+qu.Int64All(Ext_Type[ext_type])
+				score[key] = s/2
+				dataLock.Unlock()
+			}
+		}
+
+	}
+
+	return score
+}
+
+//规范-抽取来源字符串
+func normalizedExtFromName(str string) string {
+	if strings.Contains(str,"order") {
+		str = "winnerorder"
+	}else if strings.Contains(str,"JsonData") {
+		str = "jsondata"
+	}else {
+
+	}
+	return str
 }

+ 16 - 12
data_quality/src/scorebidamount.go

@@ -1,41 +1,44 @@
 package main
 import (
+	"fmt"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 	qu "qfw/util"
 )
 
-func bidamountFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
+func bidamountFieldScore(tmp map[string]interface{},score int64) (map[string]interface{}) {
 
 	bidamount := qu.Float64All(tmp["bidamount"])
 	budget := qu.Float64All(tmp["budget"])
-	score := int64(100)
-	reason:=""
+	reason,desc:=fmt.Sprintf("%d初始分",score),""
 
 	/*错误项*/
 	if (bidamount < 10.0 || bidamount > 20000000000.0 ) {
 		return map[string]interface{}{
 			"score":int64(0),
-			"reason":"大小错误",
+			"reason":"错误:大小",
 		}
 	}
 
-	//间隔比例-预算存在情况下
+	/*减分项
+	    "行业范围": 1,
+	    "间隔异常": 2
+	*/
+
 	if budget > 0.0 {
 		proportion := bidamount/budget
 		if (proportion>=0.1&&proportion<=0.3)||(proportion>=3&&proportion<=5){
-			reason+="~间隔-2"
-			score -= 2
+			desc = "间隔异常"
+			reason+="~"+desc
+			score -= qu.Int64All(bidamount_score[desc])
 		}
 		if proportion>=0.001&&proportion<0.1{
 			return map[string]interface{}{
 				"score":int64(0),
-				"reason":"比例错误",
+				"reason":"错误:比例",
 			}
 		}
 	}
 
-
-	//行业区间-减分项
 	class := make(map[string]interface{},0)
 	if topscopeclass, ok := tmp["topscopeclass"].(primitive.A); ok&&len(topscopeclass)>0 {
 		for _,v := range topscopeclass{
@@ -43,8 +46,9 @@ func bidamountFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
 			new_value := value[:len(value)-1]
 			if class[new_value]==nil {//校验行业
 				if !checkingClassMoney(bidamount,new_value) {
-					reason+="~"+new_value+"-1"
-					score-=1
+					desc = "行业范围"
+					reason+="~"+desc
+					score -= qu.Int64All(bidamount_score[desc])
 				}
 				class[new_value]=1
 			}

+ 18 - 18
data_quality/src/scorebudget.go

@@ -1,46 +1,45 @@
 package main
 
 import (
+	"fmt"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 	qu "qfw/util"
 )
 
-func budgetFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
+func budgetFieldScore(tmp map[string]interface{},score int64) (map[string]interface{}) {
 
 	budget := qu.Float64All(tmp["budget"])
 	bidamount := qu.Float64All(tmp["bidamount"])
-	score := int64(100)
-	reason:=""
+	reason,desc:=fmt.Sprintf("%d初始分",score),""
 
 	/*错误项*/
-	if (budget<10.0||budget>20000000000.0) && budget>0.0 {
+	if (budget<10.0||budget>20000000000.0)  {
 		return map[string]interface{}{
 			"score":int64(0),
-			"reason":"大小错误",
+			"reason":"错误:大小",
 		}
 	}
-	//间隔比例-预算存在情况下
+
+	/*减分项
+		"行业范围": 1,
+	    "间隔异常": 2
+	*/
+
 	if bidamount > 0.0 && budget >0.0 {
 		proportion := bidamount/budget
 		if (proportion>=0.1&&proportion<=0.3)||(proportion>=3&&proportion<=5){
-			reason+="~间隔-2"
-			score -= 2
+			desc = "间隔异常"
+			reason+="~"+desc
+			score -= qu.Int64All(budget_score[desc])
 		}
 		if proportion>=0.001&&proportion<0.1{
 			return map[string]interface{}{
 				"score":int64(0),
-				"reason":"比例错误",
+				"reason":"错误:比例",
 			}
 		}
 	}
 
-	//空值减分
-	if budget==0.0 {
-		reason+="~空值-1"
-		score -= 1
-	}
-
-	//行业区间-减分项
 	class := make(map[string]interface{},0)
 	if topscopeclass, ok := tmp["topscopeclass"].(primitive.A); budget>0.0&&ok&&len(topscopeclass)>0 {
 		for _,v := range topscopeclass{
@@ -48,8 +47,9 @@ func budgetFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
 			new_value := value[:len(value)-1]
 			if class[new_value]==nil {//校验行业
 				if !checkingClassMoney(budget,new_value) {
-					reason+="~"+new_value+"-1"
-					score-=1
+					desc = "行业范围"
+					reason+="~"+desc
+					score -= qu.Int64All(budget_score[desc])
 				}
 				class[new_value]=1
 			}

+ 55 - 30
data_quality/src/scorebuyer.go

@@ -1,64 +1,89 @@
 package main
 
 import (
+	"fmt"
 	qu "qfw/util"
 	"unicode/utf8"
 )
 
 
-func buyerFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
+func buyerFieldScore(tmp map[string]interface{},score int64) (map[string]interface{}) {
 	buyer := qu.ObjToString(tmp["buyer"])
 	agency := qu.ObjToString(tmp["agency"])
 	winner := qu.ObjToString(tmp["winner"])
-	score := int64(100)
-	reason:=""
+	reason,desc:=fmt.Sprintf("%d初始分",score),""
+
 	/*错误项*/
-	if (utf8.RuneCountInString(buyer) > 0 && utf8.RuneCountInString(buyer) < 4 ) ||
-		buyer=="" || !isHan(buyer) {
+	if  utf8.RuneCountInString(buyer) < 4 || !isHan(buyer) ||
+		utf8.RuneCountInString(buyer) >= 20{
+		return map[string]interface{}{
+			"score":int64(0),
+			"reason":"错误:长度",
+		}
+	}
+	countArr := isCharCount(buyer)
+	if countArr[0] < 4 {
 		return map[string]interface{}{
 			"score":int64(0),
-			"reason":"错误项",
+			"reason":"错误:中文少",
 		}
 	}
-	/*减分项*/
-	//1、企业库匹配
-	if !isTest && !qyNameIsExistsQYXY(buyer)  {
-		reason+="~企业-1"
-		score -= 1
+
+	/*减分项
+		"单位库": 1,
+	    "前缀校验": 1,
+	    "后缀校验": 1,
+	    "非纯中文": 1,
+	    "切词": 2,
+	    "单位对比": 2,
+	    "黑名单":2
+	*/
+
+	if !isTest && !buyerNameIsExists(buyer)  {
+		desc = "单位库"
+		reason+="~"+desc
+		score -= qu.Int64All(buyer_score[desc])
 	}
-	//2、前缀校验
+
+	if isUnHan(buyer){
+		desc = "非纯中文"
+		reason+="~"+desc
+		score -= qu.Int64All(buyer_score[desc])
+	}
+
 	if specHeadReg.MatchString(buyer) || !unHanHeadReg.MatchString(buyer) {
-		reason+="~前缀-2"
-		score -= 2
+		desc = "前缀校验"
+		reason+="~"+desc
+		score -= qu.Int64All(buyer_score[desc])
 	}
-	//3、后缀校验
+
 	if unConReg.MatchString(buyer) || unEndReg.MatchString(buyer) {
 		if unenableReg1.MatchString(buyer) || unenableReg2.MatchString(buyer) {
-			reason+="~略特殊-2"
-			score -= 2
+			desc = "黑名单"
+			reason+="~"+desc
+			score -= qu.Int64All(buyer_score[desc])
 		}
 	}else {
-		reason+="~后缀-2"
-		score -= 2
+		desc = "后缀校验"
+		reason+="~"+desc
+		score -= qu.Int64All(buyer_score[desc])
 	}
-	//4、与其他单位比对
+
+	//与其他单位比对
 	if buyer==agency || buyer==winner {
-		reason+="~其他单位-2"
-		score -= 2
-	}
-	//5、中英文结合
-	if isUnHan(buyer){
-		reason+="~非纯中文-2"
-		score -= 2
+		desc = "单位对比"
+		reason+="~"+desc
+		score -= qu.Int64All(buyer_score[desc])
 	}
 
-	//6、切词首部比对-
+	//切词首部比对-
 	buyer_jb_arr := GSE.Cut(buyer, true)
 	if len(buyer_jb_arr)>0 && buyer_jb_arr!=nil {
 		head_char := qu.ObjToString(buyer_jb_arr[0])
 		if utf8.RuneCountInString(head_char) == 1{
-			reason+="~分词-2"
-			score -= 2
+			desc = "切词"
+			reason+="~"+desc
+			score -= qu.Int64All(buyer_score[desc])
 		}
 	}
 

+ 26 - 23
data_quality/src/scoreprojectcode.go

@@ -1,61 +1,64 @@
 package main
 
 import (
+	"fmt"
 	qu "qfw/util"
 	"unicode/utf8"
 )
 
-func projectcodeFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
+func projectcodeFieldScore(tmp map[string]interface{},score int64) (map[string]interface{}) {
 
 	projectcode := qu.ObjToString(tmp["projectcode"])
-	score := int64(100)
-	reason:=""
+	reason,desc:=fmt.Sprintf("%d初始分",score),""
 
 	/*错误项*/
 	l := utf8.RuneCountInString(projectcode)
 	if l > 19 || l < 5 {
 		return map[string]interface{}{
 			"score":int64(0),
-			"reason":"长度-错误",
+			"reason":"错误:长度",
 		}
 	}
 
 	if isRegTimeDateCode(projectcode) {
 		return map[string]interface{}{
 			"score":int64(0),
-			"reason":"日期格式-错误",
+			"reason":"错误:日期格式",
 		}
 	}
 
-	if !isAlphanumeric(projectcode) {
+	if codeUnConReg.MatchString(projectcode) {
 		return map[string]interface{}{
 			"score":int64(0),
-			"reason":"不含字母数字-错误",
+			"reason":"错误:黑名单",
 		}
 	}
 
-	/*减分项*/
-	//空值
-	if projectcode=="" {
-		reason+="~空值-1"
-		score -= 1
+
+	if !isAlphanumeric(projectcode) {
+		return map[string]interface{}{
+			"score":int64(0),
+			"reason":"错误:不含字母数字",
+		}
 	}
 
-	//符号数量
+	/*减分项
+		"符号过多": 1,
+	    "连续中文": 2,
+	*/
+
 	countArr := isCharCount(projectcode)
-	if countArr[3] > 2 {
-		reason+="~符号-1"
-		score -= 1
-	}
-	//包含关键词
-	if codeUnConReg.MatchString(projectcode) {
-		reason+="~黑名单-2"
-		score -= 2
+	if countArr[3] > 6 {
+		desc = "符号过多"
+		reason+="~"+desc
+		score -= qu.Int64All(projectcode_score[desc])
 	}
+
 	//连续中文长度超过X个
 	if codeUnLenReg.MatchString(projectcode) {
-		reason+="~连续中文长度-2"
-		score -= 2
+		desc = "连续中文"
+		reason+="~"+desc
+		score -= qu.Int64All(projectcode_score[desc])
 	}
 
 

+ 22 - 24
data_quality/src/scoreprojectname.go

@@ -1,47 +1,45 @@
 package main
 
 import (
+	"fmt"
 	qu "qfw/util"
 	"unicode/utf8"
 )
-func projectnameFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
+func projectnameFieldScore(tmp map[string]interface{},score int64) (map[string]interface{}) {
 	projectname := qu.ObjToString(tmp["projectname"])
-	score := int64(100)
-	reason:=""
+	reason,desc:=fmt.Sprintf("%d初始分",score),""
 
 	/*错误项*/
-	if utf8.RuneCountInString(projectname) <= 6  || !isHan(projectname) {
+	if utf8.RuneCountInString(projectname) <= 6  {
 		return map[string]interface{}{
 			"score":int64(0),
-			"reason":"错误",
+			"reason":"错误:长度",
 		}
 	}
 
-	/*减分项*/
-
-	//长度过长
-	if utf8.RuneCountInString(projectname) >= 25 {
-		reason+="~过长-1"
-		score -= 1
+	if !isHan(projectname) {
+		return map[string]interface{}{
+			"score":int64(0),
+			"reason":"错误:无中文",
+		}
 	}
 
+	/*减分项
+		"长度过长":1,
+	    "中文比例": 2
+	*/
 
 
-	//标题切词与名称比对-待开发
-
-
-
-
-	//符号数量
-	countArr := isCharCount(projectname)
-	if countArr[3] > 3 {
-		reason+="~符号-2"
-		score -= 2
+	if utf8.RuneCountInString(projectname) >= 40 {
+		desc = "长度过长"
+		reason+="~"+desc
+		score -= qu.Int64All(projectname_score[desc])
 	}
-	//中文-长度-比例
+
 	if !isHanLenToLittle(projectname) {
-		reason+="~中文比例-2"
-		score -= 2
+		desc = "中文比例"
+		reason+="~"+desc
+		score -= qu.Int64All(projectname_score[desc])
 	}
 
 	return map[string]interface{}{

+ 58 - 32
data_quality/src/scorewinner.go

@@ -1,76 +1,102 @@
 package main
 
 import (
+	"fmt"
 	qu "qfw/util"
 	"strings"
 	"unicode/utf8"
 )
 
-func winnerFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
+func winnerFieldScore(tmp map[string]interface{},score int64) (map[string]interface{}) {
 	s_winner := qu.ObjToString(tmp["s_winner"])
 	buyer := qu.ObjToString(tmp["buyer"])
 	agency := qu.ObjToString(tmp["agency"])
 	package_map:=*qu.ObjToMap(tmp["package"])
-	score := int64(100)
-	reason := ""
+	reason,desc:=fmt.Sprintf("%d初始分",score),""
+
 	s_winner_arr := strings.Split(s_winner,",")
 	if len(s_winner_arr)!=len(package_map) && len(package_map)>0 {
-		reason+="~分包量-1"
-		score -= 1
+		desc = "分包量"
+		reason+="~"+desc
+		score -= qu.Int64All(s_winner_score[desc])
 	}
 
 	for _,winner:=range s_winner_arr{
 		/*错误项*/
-		if (utf8.RuneCountInString(winner) > 0 && utf8.RuneCountInString(winner) < 4 ) ||
-			winner=="" || !isHan(winner) {
+		if  utf8.RuneCountInString(winner) < 4 || !isHan(winner) ||
+			utf8.RuneCountInString(winner) > 20{
+			return map[string]interface{}{
+				"score":int64(0),
+				"reason":"错误",
+			}
+		}
+		countArr := isCharCount(winner)
+		if countArr[0] < 4 {
 			return map[string]interface{}{
 				"score":int64(0),
-				"reason":"错误项",
+				"reason":"错误:中文少",
 			}
 		}
 
-		/*减分项*/
-		//1、企业库匹配
+		/*减分项
+		    "单位库": 1,
+		    "前缀校验": 1,
+		    "后缀校验": 1,
+		    "非纯中文": 1,
+		    "分包量": 1,
+		    "切词": 2,
+		    "单位对比": 2,
+		    "黑名单": 2
+		*/
+
 		if !isTest && !qyNameIsExistsQYXY(winner){
-			reason+="~企业-1"
-			score -= 1
+			desc = "单位库"
+			reason+="~"+desc
+			score -= qu.Int64All(s_winner_score[desc])
+		}
+
+		if isUnHan(winner){
+			desc = "非纯中文"
+			reason+="~"+desc
+			score -= qu.Int64All(s_winner_score[desc])
 		}
-		//2、前缀校验
+
 		if specHeadReg.MatchString(winner) || !unHanHeadReg.MatchString(winner) {
-			reason+="~前缀-2"
-			score -= 2
+			desc = "前缀校验"
+			reason+="~"+desc
+			score -= qu.Int64All(s_winner_score[desc])
 		}
-		//3、后缀校验
+
 		if unConReg.MatchString(winner) || unEndReg.MatchString(winner) {
 			if unenableReg1.MatchString(winner) || unenableReg2.MatchString(winner) {
-				reason+="~略特殊-2"
-				score -= 2
+				desc = "黑名单"
+				reason+="~"+desc
+				score -= qu.Int64All(s_winner_score[desc])
 			}
 		}else {
-			reason+="~后缀-2"
-			score -= 2
+			desc = "后缀校验"
+			reason+="~"+desc
+			score -= qu.Int64All(s_winner_score[desc])
 		}
-		//4、与其他单位比对
+
 		if (winner==agency || winner==buyer) && winner !="" {
-			reason+="~其他单位-2"
-			score -= 2
-		}
-		//5、中英文结合
-		if isUnHan(winner){
-			reason+="~非纯中文-2"
-			score -= 2
+			desc = "单位对比"
+			reason+="~"+desc
+			score -= qu.Int64All(s_winner_score[desc])
 		}
-
-		//6、切词首部比对-影响性能
+		//切词首部比对-影响性能
 		winner_jb_arr := GSE.Cut(winner, true)
 		if len(winner_jb_arr)>0 && winner_jb_arr!=nil {
 			head_char := qu.ObjToString(winner_jb_arr[0])
 			if utf8.RuneCountInString(head_char) == 1{
-				reason+="~分词-2"
-				score -= 2
+				desc = "切词"
+				reason+="~"+desc
+				score -= qu.Int64All(s_winner_score[desc])
 			}
 		}
 	}
+
+
 	return map[string]interface{}{
 		"score":score,
 		"reason":reason,

+ 2 - 2
src/jy/extract/extract.go

@@ -2488,8 +2488,8 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 	//	tmp["bidendtime"] = bidendtime_str
 	//}
 
-	//jyhref:= fmt.Sprintf(JYUrl, qu.CommonEncodeArticle("content", BsonTOStringId(tmp["_id"])))
-	//tmp["jyhref"] = jyhref
+	jyhref:= fmt.Sprintf(JYUrl, qu.CommonEncodeArticle("content", qu.BsonIdToSId(tmp["_id"])))
+	tmp["jytest_href"] = jyhref
 
 	return tmp
 }

+ 1 - 1
src/jy/extract/extractudp.go

@@ -65,7 +65,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 
 					//新版本控制抽取
 					ExtractByUdp(sid, eid, ra)
-					log.Debug("抽取完成udp通知抽取id段",udpinfo, sid, "~", eid)
+					log.Debug("抽取完成udp通知抽取id段-控制台",udpinfo, sid, "~", eid)
 					Udpclient.WriteUdp([]byte(udpinfo), mu.OP_NOOP, ra)
 
 

+ 1 - 1
src/jy/pretreated/analystep.go

@@ -14,7 +14,7 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 //投标文件格式
-var yjReg *regexp.Regexp = regexp.MustCompile("(打分表|负责人|单位|个人|投标人|项目|企业)业绩|主要人员相关资料|唱标记录|否决投标的?情况说明")
+var yjReg *regexp.Regexp = regexp.MustCompile("(打分表|负责人|单位|个人|投标人|项目|企业)业绩|主要人员相关资料|唱标记录|标的名称|否决投标的?情况说明")
 var hisReg = regexp.MustCompile("(开标记录|类似业绩|历史业绩|填报项目业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(</td>)")
 var hisReg2 = regexp.MustCompile("(开标记录|业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(.*原因及其依据.*[::]?)?[\n]?.*?[\n]?(</tr>|</table>|</td>)")
 

+ 0 - 23
udp_city/src/config.json

@@ -1,23 +0,0 @@
-{
-  "mgodb": "SJZY_Rbid_ProG:SJZY%408Pro3gR79aM@172.17.4.187:27082,172.17.145.163:27083",
-  "dbsize": 3,
-  "dbname": "mixdata",
-
-  "mgodb_bidding": "172.17.4.85:27080",
-  "dbname_bidding": "qfw",
-  "findDb":"result_20210109",
-  "udpport": "1799",
-  "nextNode": [
-    {
-      "addr": "127.0.0.1",
-      "port": 1781,
-      "stype":"hangye",
-      "memo": "生kv招标分类"
-    }
-
-
-
-
-
-  ]
-}

+ 0 - 219
udp_city/src/main.go

@@ -1,219 +0,0 @@
-package main
-
-import (
-	"encoding/json"
-	"fmt"
-	"gopkg.in/mgo.v2/bson"
-	"log"
-	mu "mfw/util"
-	mgo "mongodbutil"
-	"net"
-	qu "qfw/util"
-	"strings"
-)
-
-var Udpclient mu.UdpClient //udp对象
-var nextNodes []map[string]interface{}
-var Config map[string]interface{}
-var PageSize = 5000 //查询分页
-var biddingFields = `{"buyer":1,"modifyinfo":1,"area":1,"province":1,"city":1,"district":1}`
-var qyxyFields = `{"company_code":1,"province":1,"city":1,"district":1}`
-var findDb string
-var cc chan bool = make(chan bool, 5)
-
-func init() {
-	qu.ReadConfig(&Config)
-	if len(Config) == 0 {
-		log.Fatal("读取配置文件失败", Config)
-	}
-	findDb = qu.ObjToString(Config["findDb"])
-	initCap := qu.IntAll(Config["dbsize"])
-	addr := qu.ObjToString(Config["mgodb"])
-	dbname := qu.ObjToString(Config["dbname"])
-	cc = make(chan bool, 3)
-	mgo.Mgo = mgo.MgoFactory(initCap, initCap*3, 120, addr, dbname)
-	mgo.Mgo_Bidding = mgo.MgoFactory(initCap, initCap*3, 120, qu.ObjToString(Config["mgodb_bidding"]), qu.ObjToString(Config["dbname_bidding"]))
-	nextNodes = qu.ObjArrToMapArr(Config["nextNode"].([]interface{}))
-	Udpclient = mu.UdpClient{Local: ":" + qu.ObjToString(Config["udpport"]), BufSize: 1024}
-	log.Println("udp run ", Config["udpport"])
-	Udpclient.Listen(processUdpMsg)
-}
-
-func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
-	switch act {
-	case mu.OP_TYPE_DATA:
-		var rep map[string]interface{}
-		err := json.Unmarshal(data, &rep)
-		if err != nil {
-			log.Println(err)
-		} else {
-			sid, _ := rep["gtid"].(string)
-			eid, _ := rep["lteid"].(string)
-			if sid == "" || eid == "" {
-				log.Println("err", "sid=", sid, ",eid=", eid)
-				return
-			} else {
-				go Udpclient.WriteUdp(data, mu.OP_NOOP, ra)
-				log.Println("udp通知抽取id段", sid, " ", eid)
-
-				getCity(sid, eid, qu.ObjToString(rep["stype"]))
-				log.Println("udp通知抽取完成,eid", eid)
-				for _, m := range nextNodes {
-					by, _ := json.Marshal(map[string]interface{}{
-						"gtid":  sid,
-						"lteid": eid,
-						"stype": qu.ObjToString(m["stype"]),
-					})
-					err := Udpclient.WriteUdp(by, mu.OP_TYPE_DATA, &net.UDPAddr{
-						IP:   net.ParseIP(m["addr"].(string)),
-						Port: qu.IntAll(m["port"]),
-					})
-					if err != nil {
-						log.Println(err)
-					}
-				}
-			}
-		}
-	case mu.OP_NOOP: //下个节点回应
-		log.Println(string(data))
-	}
-}
-
-func getCity(sid, eid, rep string) {
-	index := 0
-	var unum int64
-	query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(sid), "$lte": bson.ObjectIdHex(eid)}}
-	count := mgo.Mgo_Bidding.Count(findDb, query)
-	log.Println("查询条件为:", query, "查询条数:", count)
-	pageNum := (count + PageSize - 1) / PageSize
-	limit := PageSize
-	if count < PageSize {
-		limit = count
-	}
-	table := findDb
-	for i := 0; i < pageNum; i++ {
-		query = bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(sid), "$lte": bson.ObjectIdHex(eid)}}
-		log.Printf("page=%d,query=%v,db=%v\n", i+1, query, table)
-		list, _ := mgo.Mgo_Bidding.Find(table, query, map[string]interface{}{
-			"_id": 1,
-		}, biddingFields, false, 0, limit)
-		for _, v := range *list {
-			if qu.ObjToString(v["district"]) != "" && qu.ObjToString(v["city"]) != "" && qu.ObjToString(v["area"]) != "" && qu.ObjToString(v["area"]) != "全国" {
-				index++
-				continue
-			}
-			if qu.ObjToString(v["buyer"]) == "" {
-				index++
-				continue
-			}
-
-			_id := qu.BsonIdToSId(v["_id"])
-			cc <- true
-			go func(v map[string]interface{}) {
-				rdata := cityMarshal(v)
-				if len(rdata) > 0 {
-					umap := make(map[string]interface{})
-					if v["modifyinfo"] == nil {
-						umap["modifyinfo"] = make(map[string]interface{})
-					} else {
-						umap["modifyinfo"] = v["modifyinfo"]
-					}
-					for rk, rv := range rdata {
-						umap[rk] = rv
-						umap["modifyinfo"].(map[string]interface{})[rk] = "企业信息"
-					}
-					unum++
-					log.Println(unum, ",id:", _id, umap)
-					mgo.Mgo_Bidding.UpdateById(table, v["_id"], map[string]interface{}{
-						"$set": umap,
-					})
-				}
-				<-cc
-			}(v)
-			index++
-			if index%1000 == 0 {
-				log.Println("index:", index, ",页码:", i+1, ",_id:", _id)
-			}
-			sid = _id
-			if sid >= eid {
-				break
-			}
-		}
-	}
-}
-
-func cityMarshal(data map[string]interface{}) map[string]string {
-	buyer := qu.ObjToString(data["buyer"])
-	bidarea := qu.ObjToString(data["area"])
-	bidcity := qu.ObjToString(data["city"])
-	biddistrict := qu.ObjToString(data["district"])
-	rdata := make(map[string]string)
-	tmp, _ := mgo.Mgo.FindOneByField("qyxy", `{"company_name":"`+buyer+`"}`, qyxyFields)
-	if tmp == nil || (*tmp) == nil {
-		return rdata
-	}
-	company_code := fmt.Sprint((*tmp)["company_code"])
-	if len(company_code) > 5 {
-		province_city_district, _ := mgo.Mgo.FindOne("address", `{"code":"`+company_code[:6]+`"}`)
-		remarks := fmt.Sprint((*province_city_district)["Remarks"])
-		if remarks == "" || remarks == "废除" || remarks == "已作废" {
-		} else if province_city_district != nil && (*province_city_district) != nil {
-			codeprovince := qu.ObjToString((*province_city_district)["province"])
-			codecity := qu.ObjToString((*province_city_district)["city"])
-			codedistrict := qu.ObjToString((*province_city_district)["district"])
-			if bidarea == "" || bidarea == "全国" {
-				if codeprovince != "" {
-					rdata["area"] = codeprovince
-					if codecity != "" && codecity != codeprovince {
-						rdata["city"] = codecity
-						if codedistrict != "" && codedistrict != codecity {
-							rdata["district"] = codedistrict
-						}
-					}
-				}
-			} else if bidcity == "" && codecity != "" && bidarea == codeprovince {
-				if codecity != bidarea {
-					rdata["city"] = codecity
-					if codedistrict != "" && codecity != codedistrict {
-						rdata["district"] = codedistrict
-					}
-				}
-			} else if biddistrict == "" && codedistrict != "" && bidarea == codeprovince && codecity == bidcity {
-				rdata["district"] = codedistrict
-			}
-			return rdata
-		}
-	}
-
-	entprovince := qu.ObjToString((*tmp)["province"])
-	entprovince = strings.TrimRight(entprovince, "省")
-	entprovince = strings.TrimRight(entprovince, "市")
-	entcity := qu.ObjToString((*tmp)["city"])
-	entdistrict := qu.ObjToString((*tmp)["district"])
-
-	//新增特殊处理-港澳台数据
-	if bidarea == "" || bidarea=="香港" || bidarea=="澳门" || bidarea=="台湾" || bidarea == "全国" {
-		if entprovince != "" {
-			rdata["area"] = entprovince
-			if entcity != "" && entcity != entprovince {
-				rdata["city"] = entcity
-				if entdistrict != "" && entdistrict != entcity {
-					rdata["district"] = entdistrict
-				}
-			}
-		}
-	} else if bidcity == "" && entcity != "" && entprovince == bidarea {
-		rdata["city"] = entcity
-		if entdistrict != "" && entcity != entdistrict {
-			rdata["district"] = entdistrict
-		}
-	} else if biddistrict == "" && entdistrict != "" && entprovince == bidarea && bidcity == entcity {
-		rdata["district"] = entdistrict
-	}
-
-	return rdata
-}
-func main() {
-	c := make(chan bool)
-	<-c
-}

+ 0 - 378
udp_city/src/mongodbutil/mongodbutil.go

@@ -1,378 +0,0 @@
-package mongodbutil
-
-import (
-	"encoding/json"
-	"errors"
-	"fmt"
-	"log"
-	"qfw/util"
-	"runtime"
-	"strings"
-
-	. "gopkg.in/mgo.v2/bson"
-)
-
-//统计
-func (m *Pool) Count(c string, query interface{}) int {
-	defer util.Catch()
-	sess := m.Get()
-	//log.Println("count:", m.Size, m.MongodbAddr, m.DB, sess, m.GetMgoConn(), m)
-	var n int = 0
-	if sess != nil {
-		defer m.Close(sess)
-		coll := sess.DB(m.DB).C(c)
-		var err error
-		n, err = coll.Find(ObjToM(query)).Count()
-		if nil != err {
-			log.Println("CountError", err)
-		}
-	}
-	return n
-}
-
-//统计
-func (m *Pool) CountByErr(c string, query interface{}) (int, error) {
-	defer util.Catch()
-	sess := m.Get()
-	//log.Println("count:", m.Size, m.MongodbAddr, m.DB, sess, m.GetMgoConn(), m)
-	var n int = 0
-	if sess != nil {
-		defer m.Close(sess)
-		coll := sess.DB(m.DB).C(c)
-		var err error
-		n, err = coll.Find(ObjToM(query)).Count()
-		if nil != err {
-			return 0, err
-		} else {
-			return n, nil
-		}
-
-	}
-	return n, errors.New("no sess")
-}
-
-func (m *Pool) Update(c string, query interface{}, set interface{}, upsert bool, multi bool) bool {
-	defer util.Catch()
-	sess := m.Get()
-	b := true
-	if sess != nil {
-		defer m.Close(sess)
-		coll := sess.DB(m.DB).C(c)
-		var err error
-		if upsert {
-			_, err = coll.Upsert(ObjToM(query), ObjToM(set))
-		} else {
-			if multi {
-				_, err = coll.UpdateAll(ObjToM(query), ObjToM(set))
-			} else {
-				err = coll.Update(ObjToM(query), ObjToM(set))
-			}
-		}
-		if nil != err {
-			log.Println("UpdateError", err)
-			b = false
-		}
-	}
-	return b
-}
-
-func (m *Pool) UpdateById(c string, id interface{}, set interface{}) bool {
-	defer util.Catch()
-	sess := m.Get()
-	b := false
-	if sess != nil {
-		defer m.Close(sess)
-		coll := sess.DB(m.DB).C(c)
-		var q interface{}
-		if sid, ok := id.(string); ok {
-			q = M{"_id": util.StringTOBsonId(sid)}
-		} else {
-			q = M{"_id": id}
-		}
-		err := coll.Update(q, ObjToM(set))
-		if nil != err {
-			log.Println("UpdateByIdError", err)
-			b = false
-		} else {
-			b = true
-		}
-	}
-	return b
-}
-
-//批量更新
-
-func (m *Pool) UpdateBulkAll(db, c string, doc ...[]map[string]interface{}) bool {
-	defer util.Catch()
-	sess := m.Get()
-	b := true
-	if sess != nil {
-		defer m.Close(sess)
-		coll := sess.DB(db).C(c)
-		bulk := coll.Bulk()
-		for _, v := range doc {
-			if len(v) == 2 {
-				bulk.Update(v[0], v[1])
-			}
-		}
-		_, err := bulk.Run()
-		if nil != err {
-			log.Println("BulkError", err)
-			b = false
-		}
-	} else {
-		b = false
-	}
-	return b
-}
-
-func (m *Pool) UpdateBulk(c string, doc ...[]map[string]interface{}) bool {
-	return m.UpdateBulkAll(m.DB, c, doc...)
-}
-
-//批量更新
-func (m *Pool) UpSertBulk(c string, doc ...[]map[string]interface{}) bool {
-	defer util.Catch()
-	sess := m.Get()
-	b := true
-	if sess != nil {
-		defer m.Close(sess)
-		coll := sess.DB(m.DB).C(c)
-		bulk := coll.Bulk()
-		for _, v := range doc {
-			if len(v) == 2 {
-				bulk.Upsert(v[0], v[1])
-			}
-		}
-		_, err := bulk.Run()
-		if nil != err {
-			log.Println("BulkUpsertError", err)
-			b = false
-		}
-	} else {
-		b = false
-	}
-	return b
-}
-
-//批量插入
-func (m *Pool) SaveBulk(c string, doc ...map[string]interface{}) bool {
-	defer util.Catch()
-	sess := m.Get()
-	b := true
-	if sess != nil {
-		defer m.Close(sess)
-		coll := sess.DB(m.DB).C(c)
-		bulk := coll.Bulk()
-		for _, v := range doc {
-			bulk.Insert(v)
-		}
-		_, err := bulk.Run()
-		if nil != err {
-			log.Println("BulkError", err)
-			b = false
-		}
-	} else {
-		b = false
-	}
-	return b
-}
-
-func (m *Pool) Save(c string, doc interface{}) string {
-	defer util.Catch()
-	sess := m.Get()
-	if sess != nil {
-		defer m.Close(sess)
-		coll := sess.DB(m.DB).C(c)
-		obj := ObjToM(doc)
-		id := NewObjectId()
-		(*obj)["_id"] = id
-		err := coll.Insert(obj)
-		if nil != err {
-			log.Println("SaveError", err)
-			return ""
-		}
-		return (strings.Split(fmt.Sprintf("%s", id), `"`)[1])
-	}
-	return ""
-}
-
-//查询单条对象
-func (m *Pool) FindOne(c string, query interface{}) (*map[string]interface{}, bool) {
-	return m.FindOneByField(c, query, nil)
-}
-
-//查询单条对象
-func (m *Pool) FindOneByField(c string, query interface{}, fields interface{}) (*map[string]interface{}, bool) {
-	defer util.Catch()
-	res, ok := m.Find(c, query, nil, fields, true, -1, -1)
-	if nil != res && len(*res) > 0 {
-		return &((*res)[0]), ok
-	}
-	return nil, ok
-}
-
-//查询单条对象
-func (m *Pool) FindById(c string, query string, fields interface{}) (*map[string]interface{}, bool) {
-	defer util.Catch()
-	sess := m.Get()
-	var res map[string]interface{}
-	b := false
-	if sess != nil {
-		defer m.Close(sess)
-		res = make(map[string]interface{})
-		coll := sess.DB(m.DB).C(c)
-		var err error
-		err = coll.FindId(ObjectIdHex(query)).Select(ObjToOth(fields)).One(&res)
-		if nil != err {
-			log.Println("FindByIdError", err,query)
-		}
-		b = true
-	}
-	return &res, b
-}
-
-//底层查询方法
-func (m *Pool) Find(c string, query interface{}, order interface{}, fields interface{}, single bool, start int, limit int) (*[]map[string]interface{}, bool) {
-	defer util.Catch()
-	sess := m.Get()
-	var res []map[string]interface{}
-	b := false
-	if sess != nil {
-		defer m.Close(sess)
-		res = make([]map[string]interface{}, 1)
-		coll := sess.DB(m.DB).C(c)
-		var err error
-		if single {
-			err = coll.Find(ObjToM(query)).Select(ObjToOth(fields)).Sort(ObjToArr(order)...).One(&res[0])
-		} else if start > -1 {
-			err = coll.Find(ObjToM(query)).Select(ObjToOth(fields)).Sort(ObjToArr(order)...).Skip(start).Limit(limit).All(&res)
-		} else {
-			err = coll.Find(ObjToM(query)).Select(ObjToOth(fields)).Sort(ObjToArr(order)...).All(&res)
-		}
-		if nil != err {
-			//log.Println("FindError", err)
-		}
-		b = true
-	}
-	return &res, b
-}
-
-//删除对象
-func (m *Pool) Del(c string, query interface{}) bool {
-	defer util.Catch()
-	sess := m.Get()
-	b := false
-	if sess != nil {
-		defer m.Close(sess)
-		coll := sess.DB(m.DB).C(c)
-		_, err := coll.RemoveAll(ObjToM(query))
-		if nil != err {
-			log.Println("DelError", err)
-			b = false
-		} else {
-			b = true
-		}
-	}
-	return b
-}
-
-func (m *Pool) GetObjectId(str string) ObjectId {
-	return ObjectIdHex(str)
-}
-
-func ObjToOth(query interface{}) *M {
-	return ObjToMQ(query, false)
-}
-func ObjToM(query interface{}) *M {
-	return ObjToMQ(query, true)
-}
-
-//obj(string,M)转M,查询用到
-func ObjToMQ(query interface{}, isQuery bool) *M {
-	data := make(M)
-	defer func() {
-		if r := recover(); r != nil {
-			log.Println("[E]", r)
-			for skip := 1; ; skip++ {
-				_, file, line, ok := runtime.Caller(skip)
-				if !ok {
-					break
-				}
-				go log.Printf("%v,%v\n", file, line)
-			}
-		}
-	}()
-	if s2, ok2 := query.(*map[string]interface{}); ok2 {
-		data = M(*s2)
-	} else if s3, ok3 := query.(*M); ok3 {
-		return s3
-	} else if s, ok := query.(string); ok {
-		json.Unmarshal([]byte(strings.Replace(s, "'", "\"", -1)), &data)
-		if ss, oks := data["_id"]; oks && isQuery {
-			switch ss.(type) {
-			case string:
-				data["_id"] = ObjectIdHex(ss.(string))
-			case map[string]interface{}:
-				tmp := ss.(map[string]interface{})
-				for k, v := range tmp {
-					tmp[k] = ObjectIdHex(v.(string))
-				}
-				data["_id"] = tmp
-			}
-
-		}
-	} else if s1, ok1 := query.(map[string]interface{}); ok1 {
-		data = s1
-	} else if s4, ok4 := query.(M); ok4 {
-		data = s4
-	} else {
-		data = nil
-	}
-	return &data
-}
-
-//对象转数组
-func ObjToArr(obj interface{}) []string {
-	if s, ok := obj.(string); ok {
-		if strings.ContainsAny(s, "{") {
-			//暂时简单支持此种写法
-			var temp = make(M)
-			var str = []string{}
-			json.Unmarshal([]byte(s), &temp)
-			for k, v := range temp {
-				m := util.IntAll(v)
-				if m > 0 {
-					str = append(str, k)
-				} else {
-					str = append(str, "-"+k)
-				}
-			}
-			return str
-		} else {
-			return strings.Split(s, ",")
-		}
-	} else if s1, ok1 := obj.([]string); ok1 {
-		return s1
-	} else {
-		return []string{}
-	}
-}
-
-//删除表
-func (m *Pool) DelColl(c string) bool {
-	defer util.Catch()
-	sess := m.Get()
-	b := true
-	if sess != nil {
-		defer m.Close(sess)
-		coll := sess.DB(m.DB).C(c)
-		err := coll.DropCollection()
-		if err != nil {
-			b = false
-		}
-	} else {
-		b = false
-	}
-	return b
-}

+ 0 - 117
udp_city/src/mongodbutil/pool.go

@@ -1,117 +0,0 @@
-package mongodbutil
-
-import (
-	"log"
-	"sync"
-	"time"
-
-	mgo "gopkg.in/mgo.v2"
-)
-
-var Mgo *Pool
-var Mgo_Bidding *Pool
-
-type Pool struct {
-	mu      sync.RWMutex
-	initCap int
-	maxCap  int
-	timeout int64
-	ch      chan *mgosess
-	addr    string
-	DB      string
-	live    int
-}
-type mgosess struct {
-	sess      *mgo.Session
-	timestamp int64
-}
-
-func MgoFactory(initCap int, maxCap int, timeout int64, addr, DB string) *Pool {
-	p := &Pool{sync.RWMutex{}, initCap, maxCap, timeout, make(chan *mgosess, maxCap), addr, DB, 0}
-	p.init()
-	go p.gc()
-	return p
-}
-
-func (p *Pool) GetLive() int {
-	return p.live
-}
-
-func (p *Pool) init() {
-	for i := 0; i < p.initCap; i++ {
-		sess, err := mgo.DialWithTimeout(p.addr, time.Duration(p.timeout)*time.Second)
-		if sess != nil && sess.Ping() == nil {
-			p.live++
-			p.ch <- &mgosess{sess, time.Now().Unix()}
-		} else {
-			log.Println(err.Error())
-		}
-	}
-}
-
-func (p *Pool) Get() (sess *mgo.Session) {
-	p.mu.Lock()
-	defer p.mu.Unlock()
-	if p.live > 0 {
-		select {
-		case mgos := <-p.ch:
-			if mgos.sess.Ping() == nil {
-				sess = mgos.sess
-			} else {
-				p.live--
-			}
-		case <-time.After(100 * time.Millisecond):
-		}
-	}
-	if sess == nil && p.live < p.maxCap {
-		s, err := mgo.DialWithTimeout(p.addr, 10*time.Second)
-		if s != nil && s.Ping() == nil {
-			p.live++
-			sess = s
-		} else {
-			log.Println(err.Error())
-		}
-	}
-	return
-}
-
-func (p *Pool) Close(sess *mgo.Session) {
-	if sess != nil {
-		if sess.Ping() == nil {
-			p.mu.Lock()
-			select {
-			case p.ch <- &mgosess{sess, time.Now().Unix()}:
-			default:
-				p.live--
-			}
-			p.mu.Unlock()
-		} else {
-			p.live--
-		}
-	}
-}
-
-func (p *Pool) gc() {
-	p.mu.Lock()
-	size := len(p.ch)
-	if size > p.initCap {
-		tn := time.Now().Unix()
-		init1 := 0
-		for i := 0; i < size; i++ {
-			select {
-			case c := <-p.ch:
-				if tn-c.timestamp < p.timeout || init1 < p.initCap {
-					p.ch <- c
-					init1++
-				} else {
-					c.sess.Close()
-					p.live--
-				}
-			default:
-			}
-		}
-	}
-	p.mu.Unlock()
-	//log.Println("size:", size, "live:", p.live)
-	time.AfterFunc(time.Duration(p.timeout)*time.Second, p.gc)
-}

+ 57 - 0
udp_datacheck/src/check_bidamount.go

@@ -0,0 +1,57 @@
+package main
+
+import (
+	qu "qfw/util"
+	"regexp"
+)
+
+var classScopeReg *regexp.Regexp = regexp.MustCompile("(建筑工程|交通工程|市政设施)")
+
+//中标金额
+func getCheckDataBidamount(tmp map[string]interface{},update_check *map[string]interface{})  {
+
+	modifyinfo := make(map[string]interface{},0)
+	if (*update_check)["modifyinfo"] != nil {
+		modifyinfo  = *qu.ObjToMap((*update_check)["modifyinfo"])
+	}
+	bidamount := qu.Float64All(tmp["bidamount"])
+	spidercode := qu.ObjToString("spidercode")
+
+	//指定网站-数据
+	/*
+	cq_cqsggzyjyzx_zfcg_zbgs  	符合标准	9条
+	ln_lnzfcgw_gggs_jggg		符合标准	555条
+	a_zgzfcgw_dfgg_zongb_new	中国政府采购网	不太符合
+	a_zgzfcgw_bid_tender_new	中国政府采购网	不太符合
+	*/
+	if spidercode=="cq_cqsggzyjyzx_zfcg_zbgs" || spidercode == "ln_lnzfcgw_gggs_jggg" {
+		if bidamount>1000000000.0 {
+			new_bidamount := bidamount/float64(10000)
+			(*update_check)["bidamount"] = new_bidamount
+			modifyinfo["bidamount"] = "爬虫倍率"
+			(*update_check)["modifyinfo"] = modifyinfo
+			return
+		}
+	}
+
+
+
+	//按照行业-划分-太粗糙易出错-省略
+	//if topscopeclass, ok := tmp["topscopeclass"].(primitive.A);(bidamount>0.0&&ok&&len(topscopeclass)>0){
+	//	isTrue := false
+	//	for _,v := range topscopeclass{
+	//		if classScopeReg.MatchString(qu.ObjToString(v)) {
+	//			isTrue = true
+	//			break
+	//		}
+	//	}
+	//	if !isTrue && bidamount>1000000000.0 {
+	//		new_bidamount := bidamount/float64(10000)
+	//		(*update_check)["bidamount"] = new_bidamount
+	//		modifyinfo["bidamount"] = "行业倍率"
+	//		(*update_check)["modifyinfo"] = modifyinfo
+	//		return
+	//	}
+	//}
+}
+

+ 260 - 0
udp_datacheck/src/check_city.go

@@ -0,0 +1,260 @@
+package main
+
+import (
+	"fmt"
+	qu "qfw/util"
+	"regexp"
+	"strings"
+)
+
+var cityEndReg *regexp.Regexp = regexp.MustCompile("(区|县)$")
+
+func getCheckDataCity(tmp map[string]interface{},update_check *map[string]interface{}) {
+
+	area := qu.ObjToString(tmp["area"])
+	city := qu.ObjToString(tmp["city"])
+	district := qu.ObjToString(tmp["district"])
+	buyer := qu.ObjToString(tmp["buyer"])
+
+	if(district!="" && city!="" && area!="" && area!="全国") || buyer=="" {
+		//标准城市-校验
+		rdata := standardCheckCity(area,city,district)
+		if len(rdata)>0 {
+			umap:=updateLogging(tmp,rdata,"标准信息")
+			copyUpdateData(umap,update_check)
+		}
+		return
+	}
+	rdata := cityMarshal(tmp) //企业表-补城市
+	if len(rdata) > 0 {
+		new_area,new_city,new_district := area,city,district
+		if rdata["area"]!="" {
+			new_area = qu.ObjToString(rdata["area"])
+		}
+		if rdata["city"]!="" {
+			new_city = qu.ObjToString(rdata["city"])
+		}
+		if rdata["district"]!="" {
+			new_district = qu.ObjToString(rdata["district"])
+		}
+		umap:=updateLogging(tmp,rdata,"企业信息")
+		n_rdata := standardCheckCity(new_area,new_city,new_district)
+		if len(n_rdata)>0 {
+			for rk, rv := range n_rdata {
+				umap[rk] = rv
+				umap["modifyinfo"].(map[string]interface{})[rk] = fmt.Sprintf("企业标准信息~%s~%s",qu.ObjToString(tmp[rk]),rv)
+			}
+		}
+		copyUpdateData(umap,update_check)
+	}else {
+		n_rdata := standardCheckCity(area,city,district)
+		if len(n_rdata)>0 {
+			umap:=updateLogging(tmp,n_rdata,"标准信息")
+			copyUpdateData(umap,update_check)
+		}
+	}
+}
+
+//企业表校验
+func cityMarshal(data map[string]interface{}) map[string]string {
+	buyer := qu.ObjToString(data["buyer"])
+	bidarea := qu.ObjToString(data["area"])
+	bidcity := qu.ObjToString(data["city"])
+	biddistrict := qu.ObjToString(data["district"])
+	rdata := make(map[string]string)
+	query_name := map[string]interface{}{
+		"company_name":buyer,
+	}
+	tmp := qy_mgo.FindOne(qy_coll_name, query_name)
+	if tmp == nil || len(tmp)<2 {
+		return rdata
+	}
+	company_code := fmt.Sprint(tmp["company_code"])
+	if len(company_code) > 5 {
+		province_city_district:= qy_mgo.FindOne("address", map[string]interface{}{
+			"code":company_code[:6],
+		})
+		remarks := fmt.Sprint((province_city_district)["Remarks"])
+		if remarks == "" || remarks == "废除" || remarks == "已作废" {
+
+		} else if province_city_district != nil {
+			codeprovince := qu.ObjToString((province_city_district)["province"])
+			codecity := qu.ObjToString((province_city_district)["city"])
+			codedistrict := qu.ObjToString((province_city_district)["district"])
+			if bidarea == "" || bidarea == "全国" {
+				if codeprovince != "" {
+					rdata["area"] = codeprovince
+					if codecity != "" && codecity != codeprovince {
+						rdata["city"] = codecity
+						if codedistrict != "" && codedistrict != codecity {
+							rdata["district"] = codedistrict
+						}
+					}
+				}
+			} else if bidcity == "" && codecity != "" && bidarea == codeprovince {
+				if codecity != bidarea {
+					rdata["city"] = codecity
+					if codedistrict != "" && codecity != codedistrict {
+						rdata["district"] = codedistrict
+					}
+				}
+			} else if biddistrict == "" && codedistrict != "" && bidarea == codeprovince && codecity == bidcity {
+				rdata["district"] = codedistrict
+			}
+			return rdata
+		}
+	}
+
+	entprovince := qu.ObjToString(tmp["company_area"])
+	//entprovince = strings.TrimRight(entprovince, "省")
+	//entprovince = strings.TrimRight(entprovince, "市")
+	entcity := qu.ObjToString(tmp["company_city"])
+	entdistrict := qu.ObjToString(tmp["company_district"])
+
+	//新增特殊处理-港澳台数据
+	if bidarea == "" || bidarea=="香港" || bidarea=="澳门" || bidarea=="台湾" || bidarea == "全国" {
+		if entprovince != "" {
+			rdata["area"] = entprovince
+			if entcity != "" && entcity != entprovince {
+				rdata["city"] = entcity
+				if entdistrict != "" && entdistrict != entcity {
+					rdata["district"] = entdistrict
+				}
+			}
+		}
+	} else if bidcity == "" && entcity != "" && entprovince == bidarea {
+		rdata["city"] = entcity
+		if entdistrict != "" && entcity != entdistrict {
+			rdata["district"] = entdistrict
+		}
+	} else if biddistrict == "" && entdistrict != "" && entprovince == bidarea && bidcity == entcity {
+		rdata["district"] = entdistrict
+	}
+
+	return rdata
+}
+//标准校验
+func standardCheckCity(area string,city string,district string) map[string]string{
+
+	rdata := make(map[string]string,0)
+	if area=="全国"||area=="香港"||area=="澳门"||area=="台湾" {
+		return rdata
+	}
+	//第一步:区校验
+	if district!="" {
+		districtArr := DistrictDict[district]
+		if districtArr==nil {//涉及了 个别别名相关的数据
+			alias_district := aliasDataDistrict(district)
+			if alias_district == "" {
+				rdata["district"] = ""
+			}else {
+				alias_districtArr := DistrictDict[alias_district]
+				if alias_districtArr==nil {
+					rdata["district"] = ""
+				}else {
+					for _,v:=range alias_districtArr{
+						if  city == v.C_Name && area == v.P_Name {
+							rdata["district"] = alias_district
+							return rdata
+						}
+					}
+				}
+			}
+		}else {
+			isTrue := false
+			for _,v:=range districtArr{
+				if  city == v.C_Name && area == v.P_Name {
+					isTrue = true
+					break
+				}
+			}
+			if isTrue { //完全匹配
+				return rdata
+			}else { //未完全匹配
+				if len(districtArr)==1 {
+					rdata["area"] = districtArr[0].P_Name
+					rdata["city"] = districtArr[0].C_Name
+					rdata["district"] = districtArr[0].D_Name
+					return rdata
+				}
+			}
+		}
+	}
+
+	//第二步:区校验-失败   市-校验
+	if city!="" {
+		cityArr := CityDict[city]
+		if cityArr==nil {//把市当成区,匹配三级
+			districtArr := DistrictDict[city]
+			for _,v:=range districtArr{
+				if  city == v.C_Name && area == v.P_Name {
+					rdata["area"] = districtArr[0].P_Name
+					rdata["city"] = districtArr[0].C_Name
+					rdata["district"] = districtArr[0].D_Name
+					return rdata
+				}
+			}
+		}else {
+			isTrue := false
+			for _,v:=range cityArr{
+				if  area == v.P_Name {
+					isTrue = true
+					break
+				}
+			}
+			if isTrue { //完全匹配
+				return rdata
+			}else { //未完全匹配
+				if len(cityArr)==1 {
+					rdata["area"] = cityArr[0].P_Name
+					rdata["city"] = cityArr[0].C_Name
+					rdata["district"] = ""
+					return rdata
+				}
+			}
+		}
+	}
+
+	//第三步:省份校验
+	if ProvinceDict[area]==nil {
+		rdata["area"] = "全国"
+		rdata["city"] = ""
+		rdata["district"] = ""
+	}
+
+	return rdata
+}
+
+//更新日志
+func updateLogging(tmp map[string]interface{},rdata map[string]string,desc string) map[string]interface{} {
+	umap := make(map[string]interface{})
+	if tmp["modifyinfo"] == nil {
+		umap["modifyinfo"] = make(map[string]interface{})
+	} else {
+		umap["modifyinfo"] = tmp["modifyinfo"]
+	}
+	for rk, rv := range rdata {
+		umap[rk] = rv
+		umap["modifyinfo"].(map[string]interface{})[rk] = fmt.Sprintf("%s~%s~%s",desc,qu.ObjToString(tmp[rk]),rv)
+	}
+	return umap
+}
+
+func copyUpdateData(tmp map[string]interface{},update_check *map[string]interface{}) {
+	for k,v := range tmp {
+		(*update_check)[k] = v
+	}
+}
+
+func aliasDataDistrict(district string) string {
+	if cityEndReg.MatchString(district) {
+		str := cityEndReg.FindString(district)
+		if str=="县"{
+			return strings.ReplaceAll(district,str,"区")
+		}else if str=="区"{
+			return strings.ReplaceAll(district,str,"县")
+		}
+	}
+	return ""
+}
+

+ 20 - 0
udp_datacheck/src/config.json

@@ -0,0 +1,20 @@
+{
+  "udpport": ":11109",
+  "mongodb": {
+    "addrName": "127.0.0.1:27017",
+    "dbName": "zhengkun",
+    "collName": "zktest_check_data",
+    "pool": 10
+  },
+  "qy_mongodb": {
+    "qy_addrName": "127.0.0.1:27017",
+    "qy_dbName": "mixdata",
+    "qy_collName": "qyxy_std",
+    "qy_username": "",
+    "qy_password": "",
+    "pool": 10
+  },
+  "jy_collName": "address_jy_2021",
+  "check_thread" : 4,
+  "nextNode": []
+}

+ 289 - 0
udp_datacheck/src/main.go

@@ -0,0 +1,289 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"log"
+	mu "mfw/util"
+	"net"
+	qu "qfw/util"
+	"sync"
+	"time"
+)
+
+type Province struct {
+	P_Name    string
+}
+type City struct {
+	P_Name string
+	C_Name string
+}
+type District struct {
+	P_Name string
+	C_Name string
+	D_Name string
+}
+
+var (
+	Sysconfig    map[string]interface{} 			//配置文件
+	mconf        map[string]interface{} 			//mongodb配置信息
+	data_mgo,qy_mgo   *MongodbSim            		//mongodb操作对象
+	udpclient 	mu.UdpClient 						//udp对象
+	nextNodes 	[]map[string]interface{}			//节点信息
+	coll_name,qy_coll_name,jy_coll_name 	string	//表名
+	check_lock 		sync.Mutex   					//更新锁
+	check_thread	int								//线程数
+	UpdateTask		 *updateInfo					//更新池
+
+	ProvinceDict	map[string][]Province				//省份-map
+	CityDict		map[string][]City					//城市-map
+	DistrictDict	map[string][]District				//区县-map
+)
+
+//初始化城市
+func initCheckCity()  {
+	//初始化-城市配置
+	ProvinceDict = make(map[string][]Province,0)
+	CityDict = make(map[string][]City,0)
+	DistrictDict = make(map[string][]District,0)
+
+	q := map[string]interface{}{
+		"town_code":map[string]interface{}{
+			"$exists":0,
+		},
+	}
+	sess := qy_mgo.GetMgoConn()
+	defer qy_mgo.DestoryMongoConn(sess)
+	it := sess.DB(qy_mgo.DbName).C(jy_coll_name).Find(&q).Iter()
+	total  := 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%1000 == 0 {
+			log.Println("当前数量:", total)
+		}
+		district_code := qu.IntAll(tmp["district_code"])
+		city_code := qu.IntAll(tmp["city_code"])
+		if district_code > 0 {
+			province := qu.ObjToString(tmp["province"])
+			city := qu.ObjToString(tmp["city"])
+			district := qu.ObjToString(tmp["district"])
+			data := District{province,city,district}
+			if DistrictDict[district]==nil {
+				DistrictDict[district] = []District{data}
+			}else {
+				arr := DistrictDict[district]
+				arr = append(arr,data)
+				DistrictDict[district] = arr
+			}
+		}else {
+			if city_code>0 {
+				province := qu.ObjToString(tmp["province"])
+				city := qu.ObjToString(tmp["city"])
+				data := City{province,city}
+				if CityDict[city]==nil {
+					CityDict[city] = []City{data}
+				}else {
+					arr := CityDict[city]
+					arr = append(arr,data)
+					CityDict[city] = arr
+				}
+			}else {
+				province := qu.ObjToString(tmp["province"])
+				data := Province{province}
+				if ProvinceDict[province]==nil {
+					ProvinceDict[province] = []Province{data}
+				}else {
+					arr := ProvinceDict[province]
+					arr = append(arr,data)
+					ProvinceDict[province] = arr
+				}
+			}
+		}
+		tmp = make(map[string]interface{})
+	}
+	log.Println(fmt.Sprintf("城市配置加载完毕...省~%d 市~%d 区~%d",len(ProvinceDict),len(CityDict),len(DistrictDict)))
+}
+
+//mgo-配置等
+func initMgo()  {
+	mconf := Sysconfig["mongodb"].(map[string]interface{})
+	log.Println(mconf)
+	data_mgo = &MongodbSim{
+		MongodbAddr: mconf["addrName"].(string),
+		DbName:      mconf["dbName"].(string),
+		Size:        qu.IntAllDef(mconf["pool"], 10),
+	}
+	data_mgo.InitPool()
+
+	qy_mconf := Sysconfig["qy_mongodb"].(map[string]interface{})
+	qy_mgo = &MongodbSim{
+		MongodbAddr: qy_mconf["qy_addrName"].(string),
+		DbName:      qy_mconf["qy_dbName"].(string),
+		Size:        qu.IntAllDef(qy_mconf["pool"], 10),
+		UserName: qy_mconf["qy_username"].(string),
+		Password: qy_mconf["qy_password"].(string),
+	}
+	qy_mgo.InitPool()
+
+
+	coll_name = mconf["collName"].(string)
+	qy_coll_name = qy_mconf["qy_collName"].(string)
+
+	jy_coll_name = Sysconfig["jy_collName"].(string)
+	nextNodes = qu.ObjArrToMapArr(Sysconfig["nextNode"].([]interface{}))
+	check_thread = qu.IntAll(Sysconfig["check_thread"])
+
+	log.Println("mgo 等配置,加载完毕...")
+}
+
+//初始化
+func init() {
+	qu.ReadConfig(&Sysconfig) //加载配置文件
+	log.Println(Sysconfig)
+	if len(Sysconfig) == 0 {
+		log.Fatal("读取配置文件失败", Sysconfig)
+	}
+	initMgo()       //初始化mgo
+	initCheckCity() //初始化城市
+
+	//更新池
+	UpdateTask = newUpdatePool()
+	go UpdateTask.updateData()
+}
+
+func main()  {
+	updport := Sysconfig["udpport"].(string)
+	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
+	udpclient.Listen(processUdpMsg)
+	log.Println("Udp服务监听", updport)
+
+
+
+	//临时调试
+	sid := "1fffffffffffffffffffffff"
+	eid := "9fffffffffffffffffffffff"
+	startCheckData(sid,eid)
+
+
+
+
+	time.Sleep(99999 * time.Hour)
+
+}
+
+
+//开始审查数据
+func startCheckData(sid, eid string) {
+	log.Println("开始审查数据...")
+	defer qu.Catch()
+	q := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"$gt":  StringTOBsonId(sid),
+			"$lte": StringTOBsonId(eid),
+		},
+	}
+	log.Println("查询条件:",q)
+
+	check_pool := make(chan bool, check_thread)
+	check_wg := &sync.WaitGroup{}
+
+	sess := data_mgo.GetMgoConn()
+	defer data_mgo.DestoryMongoConn(sess)
+	it := sess.DB(data_mgo.DbName).C(coll_name).Find(&q).Iter()
+	total,isRepair := 0,0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%1000 == 0 {
+			log.Println("当前数量:", total,isRepair)
+		}
+		update_id := map[string]interface{}{"_id":tmp["_id"]}
+		check_pool <- true
+		check_wg.Add(1)
+		go func(tmp map[string]interface{},update_id map[string]interface{}) {
+			defer func() {
+				<-check_pool
+				check_wg.Done()
+			}()
+			//更新-
+			update_check := make(map[string]interface{},0)
+			//审查-城市
+			getCheckDataCity(tmp,&update_check)
+			//审查-中标金额
+			getCheckDataBidamount(tmp,&update_check)
+
+			if len(update_check)>0 {
+				isRepair++
+				//正式
+				//UpdateTask.updatePool <- []map[string]interface{}{
+				//	update_id,
+				//	map[string]interface{}{
+				//		"$set": update_check,
+				//	},
+				//}
+				//if update_check["bidamount"]!=nil {
+				//	log.Println("金额变化~",tmp["_id"])
+				//}
+				//测试
+				//UpdateTask.updatePool <- []map[string]interface{}{
+				//	update_id,
+				//	map[string]interface{}{
+				//		"$set": map[string]interface{}{
+				//			"modifyinfo":update_check["modifyinfo"],
+				//		},
+				//	},
+				//}
+			}
+		}(tmp,update_id)
+		tmp = make(map[string]interface{})
+	}
+	check_wg.Wait()
+
+	log.Println("check is over - 总计数量",total,isRepair)
+}
+
+
+
+
+
+
+
+
+//udp监听
+func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
+	switch act {
+	case mu.OP_TYPE_DATA:
+		var rep map[string]interface{}
+		err := json.Unmarshal(data, &rep)
+		if err != nil {
+			log.Println(err)
+		} else {
+			sid, _ := rep["gtid"].(string)
+			eid, _ := rep["lteid"].(string)
+			if sid == "" || eid == "" {
+				log.Println("err", "sid=", sid, ",eid=", eid)
+				return
+			} else {
+				go udpclient.WriteUdp(data, mu.OP_NOOP, ra)
+				log.Println("udp通知抽取id段-审查数据", sid, " ", eid)
+				startCheckData(sid, eid)
+				log.Println("udp通知审查数据完成,eid", eid)
+				for _, m := range nextNodes {
+					by, _ := json.Marshal(map[string]interface{}{
+						"gtid":  sid,
+						"lteid": eid,
+						"stype": qu.ObjToString(m["stype"]),
+					})
+					err := udpclient.WriteUdp(by, mu.OP_TYPE_DATA, &net.UDPAddr{
+						IP:   net.ParseIP(m["addr"].(string)),
+						Port: qu.IntAll(m["port"]),
+					})
+					if err != nil {
+						log.Println(err)
+					}
+				}
+			}
+		}
+	case mu.OP_NOOP: //下个节点回应
+		log.Println(string(data))
+	}
+}
+
+

+ 328 - 0
udp_datacheck/src/mgo.go

@@ -0,0 +1,328 @@
+package main
+
+import (
+	"context"
+	"log"
+	"time"
+
+	"go.mongodb.org/mongo-driver/bson"
+	"go.mongodb.org/mongo-driver/bson/primitive"
+	"go.mongodb.org/mongo-driver/mongo"
+	"go.mongodb.org/mongo-driver/mongo/options"
+)
+
+type MgoSess struct {
+	Db     string
+	Coll   string
+	Query  interface{}
+	Sorts  []string
+	fields interface{}
+	limit  int64
+	skip   int64
+	M      *MongodbSim
+}
+
+type MgoIter struct {
+	Cursor *mongo.Cursor
+}
+
+func (mt *MgoIter) Next(result interface{}) bool {
+	if mt.Cursor != nil {
+		if mt.Cursor.Next(nil) {
+			err := mt.Cursor.Decode(result)
+			if err != nil {
+				log.Println("mgo cur err", err.Error())
+				mt.Cursor.Close(nil)
+				return false
+			}
+			return true
+		} else {
+			mt.Cursor.Close(nil)
+			return false
+		}
+	} else {
+		return false
+	}
+
+}
+
+func (ms *MgoSess) DB(name string) *MgoSess {
+	ms.Db = name
+	return ms
+}
+
+func (ms *MgoSess) C(name string) *MgoSess {
+	ms.Coll = name
+	return ms
+}
+
+func (ms *MgoSess) Find(q interface{}) *MgoSess {
+	ms.Query = q
+	return ms
+}
+
+func (ms *MgoSess) Select(fields interface{}) *MgoSess {
+	ms.fields = fields
+	return ms
+}
+
+func (ms *MgoSess) Limit(limit int64) *MgoSess {
+	ms.limit = limit
+	return ms
+}
+func (ms *MgoSess) Skip(skip int64) *MgoSess {
+	ms.skip = skip
+	return ms
+}
+
+func (ms *MgoSess) Sort(sorts ...string) *MgoSess {
+	ms.Sorts = sorts
+	return ms
+}
+
+func (ms *MgoSess) Iter() *MgoIter {
+	it := &MgoIter{}
+	find := options.Find()
+	if ms.skip > 0 {
+		find.SetSkip(ms.skip)
+	}
+	if ms.limit > 0 {
+		find.SetLimit(ms.limit)
+	}
+	find.SetBatchSize(100)
+	if len(ms.Sorts) > 0 {
+		sort := bson.M{}
+		for _, k := range ms.Sorts {
+			switch k[:1] {
+			case "-":
+				sort[k[1:]] = -1
+			case "+":
+				sort[k[1:]] = 1
+			default:
+				sort[k] = 1
+			}
+		}
+		find.SetSort(sort)
+	}
+	if ms.fields != nil {
+		find.SetProjection(ms.fields)
+	}
+	cur, err := ms.M.C.Database(ms.Db).Collection(ms.Coll).Find(ms.M.Ctx, ms.Query, find)
+	if err != nil {
+		log.Println("mgo find err", err.Error())
+	} else {
+		it.Cursor = cur
+	}
+	return it
+}
+
+type MongodbSim struct {
+	MongodbAddr string
+	Size        int
+	//	MinSize     int
+	DbName   string
+	C        *mongo.Client
+	Ctx      context.Context
+	ShortCtx context.Context
+	pool     chan bool
+	UserName string
+	Password string
+}
+
+func (m *MongodbSim) GetMgoConn() *MgoSess {
+	//m.Open()
+	ms := &MgoSess{}
+	ms.M = m
+	return ms
+}
+
+func (m *MongodbSim) DestoryMongoConn(ms *MgoSess) {
+	//m.Close()
+	ms.M = nil
+	ms = nil
+}
+
+func (m *MongodbSim) InitPool() {
+	opts := options.Client()
+	opts.SetConnectTimeout(3 * time.Second)
+	opts.ApplyURI("mongodb://" + m.MongodbAddr)
+	opts.SetMaxPoolSize(uint64(m.Size))
+	m.pool = make(chan bool, m.Size)
+
+	if m.UserName !="" && m.Password !="" {
+		cre := options.Credential{
+			Username:m.UserName,
+			Password:m.Password,
+		}
+		opts.SetAuth(cre)
+	}
+
+
+
+	opts.SetMaxConnIdleTime(2 * time.Hour)
+	m.Ctx, _ = context.WithTimeout(context.Background(), 99999*time.Hour)
+	m.ShortCtx, _ = context.WithTimeout(context.Background(), 1*time.Minute)
+	client, err := mongo.Connect(m.ShortCtx, opts)
+	if err != nil {
+		log.Println("mgo init error:", err.Error())
+	} else {
+		m.C = client
+		log.Println("init success")
+	}
+}
+
+func (m *MongodbSim) Open() {
+	m.pool <- true
+}
+func (m *MongodbSim) Close() {
+	<-m.pool
+}
+
+//批量插入
+func (m *MongodbSim) UpSertBulk(c string, doc ...[]map[string]interface{}) (map[int64]interface{}, bool) {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	var writes []mongo.WriteModel
+	for _, d := range doc {
+		write := mongo.NewUpdateOneModel()
+		write.SetFilter(d[0])
+		write.SetUpdate(d[1])
+		write.SetUpsert(true)
+		writes = append(writes, write)
+	}
+	r, e := coll.BulkWrite(m.Ctx, writes)
+	if e != nil {
+		log.Println("mgo upsert error:", e.Error())
+		return nil, false
+	}
+	//	else {
+	//		if r.UpsertedCount != int64(len(doc)) {
+	//			log.Println("mgo upsert uncomplete:uc/dc", r.UpsertedCount, len(doc))
+	//		}
+	//		return true
+	//	}
+	return r.UpsertedIDs, true
+}
+
+//批量插入
+func (m *MongodbSim) SaveBulk(c string, doc ...map[string]interface{}) bool {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	var writes []mongo.WriteModel
+	for _, d := range doc {
+		write := mongo.NewInsertOneModel()
+		write.SetDocument(d)
+		writes = append(writes, write)
+	}
+	_, e := coll.BulkWrite(m.Ctx, writes)
+	if e != nil {
+		log.Println("mgo savebulk error:", e.Error())
+		return false
+	}
+	return true
+}
+
+//保存
+func (m *MongodbSim) Save(c string, doc map[string]interface{}) interface{} {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r, err := coll.InsertOne(m.Ctx, doc)
+	if err != nil {
+		return nil
+	}
+	return r.InsertedID
+}
+
+//更新by Id
+func (m *MongodbSim) UpdateById(c, id string, doc map[string]interface{}) bool {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	_, err := coll.UpdateOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)}, doc)
+	if err != nil {
+		return false
+	}
+	return true
+}
+
+//删除by id
+func (m *MongodbSim) DeleteById(c, id string) int64 {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r, err := coll.DeleteOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
+	if err != nil {
+		return 0
+	}
+	return r.DeletedCount
+}
+
+//通过条件删除
+func (m *MongodbSim) Delete(c string, query map[string]interface{}) int64 {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r, err := coll.DeleteMany(m.Ctx, query)
+	if err != nil {
+		return 0
+	}
+	return r.DeletedCount
+}
+
+//findbyid
+func (m *MongodbSim) FindById(c, id string) map[string]interface{} {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r := coll.FindOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
+	v := map[string]interface{}{}
+	r.Decode(&v)
+	return v
+}
+
+//findone
+func (m *MongodbSim) FindOne(c string, query map[string]interface{}) map[string]interface{} {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r := coll.FindOne(m.Ctx, query)
+	v := map[string]interface{}{}
+	r.Decode(&v)
+	return v
+}
+
+//find
+func (m *MongodbSim) Find(c string, query map[string]interface{}, sort, fields interface{}) ([]map[string]interface{}, error) {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	op := options.Find()
+	r, err := coll.Find(m.Ctx, query, op.SetSort(sort), op.SetProjection(fields))
+	if err != nil {
+		log.Fatal(err)
+		return nil, err
+	}
+	var results []map[string]interface{}
+	if err = r.All(m.Ctx, &results); err != nil {
+		log.Fatal(err)
+		return nil, err
+	}
+	return results, nil
+}
+
+//创建_id
+func NewObjectId() primitive.ObjectID {
+	return primitive.NewObjectID()
+}
+
+func StringTOBsonId(id string) primitive.ObjectID {
+	objectId, _ := primitive.ObjectIDFromHex(id)
+	return objectId
+}
+
+func BsonTOStringId(id interface{}) string {
+	return id.(primitive.ObjectID).Hex()
+}

+ 55 - 0
udp_datacheck/src/update_task.go

@@ -0,0 +1,55 @@
+package main
+
+import (
+	"log"
+	"time"
+)
+
+type updateInfo struct {
+	updatePool chan []map[string]interface{}	//更新通道
+	saveSize   	int								//数量
+}
+
+var sp = make(chan bool, 1)
+
+func newUpdatePool() *updateInfo {
+	update:=&updateInfo{make(chan []map[string]interface{}, 5000),200}
+	return update
+}
+
+
+func (update *updateInfo) updateData() {
+	log.Println("开始不断监听--待更新数据")
+	tmpArr := make([][]map[string]interface{}, update.saveSize)
+	tmpIndex := 0
+	for {
+		select {
+		case value := <-update.updatePool:
+			tmpArr[tmpIndex] = value
+			tmpIndex++
+			if tmpIndex == update.saveSize {
+				sp <- true
+				go func(dataArr [][]map[string]interface{}) {
+					defer func() {
+						<-sp
+					}()
+					data_mgo.UpSertBulk(coll_name, dataArr...)
+				}(tmpArr)
+				tmpArr = make([][]map[string]interface{}, update.saveSize)
+				tmpIndex = 0
+			}
+		case <-time.After(5 * time.Second)://无反应时每x秒检测一次
+			if tmpIndex > 0 {
+				sp <- true
+				go func(dataArr [][]map[string]interface{}) {
+					defer func() {
+						<-sp
+					}()
+					data_mgo.UpSertBulk(coll_name, dataArr...)
+				}(tmpArr[:tmpIndex])
+				tmpArr = make([][]map[string]interface{}, update.saveSize)
+				tmpIndex = 0
+			}
+		}
+	}
+}

+ 29 - 0
udp_datacheck/src/zk_mark

@@ -0,0 +1,29 @@
+
+
+{
+  "udpport": ":11109",
+  "mongodb": {
+    "addrName": "172.17.4.85:27080",
+    "dbName": "qfw",
+    "collName": "zktest_check_data",
+    "pool": 10
+  },
+  "qy_mongodb": {
+    "qy_addrName": "172.17.145.163:27083,172.17.4.187:27082",
+    "qy_dbName": "mixdata",
+    "qy_collName": "qyxy_std",
+    "qy_username": "zhengkun",
+    "qy_password": "zk@123123",
+    "pool": 10
+  },
+  "jy_collName": "address_jy_2021",
+  "check_thread" : 4,
+  "nextNode": [
+    {
+      "addr": "127.0.0.1",
+      "port": 1781,
+      "stype":"hangye",
+      "memo": "生kv招标分类"
+    }
+  ]
+}

+ 2 - 2
udps/main.go

@@ -22,8 +22,8 @@ func main() {
 	flag.IntVar(&p, "p", 6601, "端口")
 	flag.IntVar(&tmptime, "tmptime", 0, "时间查询")
 	flag.StringVar(&tmpkey, "tmpkey", "", "时间字段")
-	flag.StringVar(&id1, "gtid", "614168ea1a75b8f44678a39b", "gtid")
-	flag.StringVar(&id2, "lteid", "6142e5741a75b8f4467b3276", "lteid")
+	flag.StringVar(&id1, "gtid", "114168ea1a75b8f44678a39b", "gtid")
+	flag.StringVar(&id2, "lteid", "9142e5741a75b8f4467b3276", "lteid")
 	flag.StringVar(&ids, "ids", "", "id1,id2")
 	flag.StringVar(&stype, "stype", "biddingall", "stype,传递类型")
 	flag.StringVar(&bkey, "bkey", "", "bkey,加上此参数表示不生关键词和摘要")