Browse Source

数据清洗

zhengkun 2 days ago
parent
commit
ffb69d04cb
10 changed files with 557 additions and 115 deletions
  1. 2 3
      sensitive/src/util/udpdata.go
  2. 33 11
      src/check.go
  3. 117 56
      src/check_ai.go
  4. 283 0
      src/check_other.go
  5. 1 0
      src/check_pub.go
  6. 8 8
      src/config.json
  7. 51 19
      src/initvar.go
  8. 20 3
      src/main.go
  9. 41 14
      src/mgo.go
  10. 1 1
      src/udprocess.go

+ 2 - 3
sensitive/src/util/udpdata.go

@@ -48,9 +48,9 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 			sid, _ := rep["gtid"].(string)
 			eid, _ := rep["lteid"].(string)
 			stype := qu.ObjToString(rep["stype"])
+			key := qu.ObjToString(rep["key"])
 			if stype == "monitor" {
 				log.Println("收到监测......")
-				key := qu.ObjToString(rep["key"])
 				udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
 				return
 			}
@@ -58,10 +58,9 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 				log.Println("err", "sid=", sid, ",eid=", eid)
 				return
 			}
-			go udpclient.WriteUdp([]byte("get:"+sid+"_"+eid), mu.OP_NOOP, ra)
+			go udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
 			log.Println("Udp回应上节点~id段")
 			QuerySensitiveWords(sid, eid)
-			log.Println("...计划发送udp~统计下一节点...")
 		}
 	case mu.OP_NOOP: //下个节点回应
 		log.Println(string(data))

+ 33 - 11
src/check.go

@@ -27,6 +27,10 @@ func startCheckData(sid, eid string) {
 		if total%10000 == 0 {
 			log.Println("当前数量:", total, tmp["_id"])
 		}
+		if qu.ObjToString(tmp["site"]) == "剑鱼信息发布平台" {
+			tmp = make(map[string]interface{})
+			continue
+		}
 		update_id := map[string]interface{}{"_id": tmp["_id"]}
 		check_pool <- true
 		check_wg.Add(1)
@@ -39,17 +43,22 @@ func startCheckData(sid, eid string) {
 			update_info := make(map[string]interface{}, 0)
 			//审查-发布时间
 			getCheckDataPub(tmp, update_info)
-			//审查-单位信息
-			getCheckDataUnit(tmp, update_info)
 			//是否删除
-			is_unset := false
-			if update_ai {
-				is_unset = ChooseCheckDataAI(tmp, &update_info)
-				if update_info["com_package"] == nil { //构建单包信息···
-					com_package := CreatSingleFieldInfo(tmp, update_info)
-					update_info["com_package"] = com_package
-				}
+			is_unset := ChooseCheckDataAI(tmp, &update_info)
+			if update_info["com_package"] == nil { //构建单包信息···
+				com_package := CreatSingleFieldInfo(tmp, update_info)
+				update_info["com_package"] = com_package
 			}
+
+			//补充规则大全····
+			getReplenishRules(tmp, update_info)
+			//最终单位修正与选取
+			getTheBestUnit(tmp, update_info)
+			//校验金额与折扣率(删除金额)
+			un_bid := getVerifyBidDiscount(tmp, update_info)
+			//校验是否科学计数法影响金额
+			getScientificNotation(tmp, update_info)
+
 			//最终更新
 			if len(update_info) > 0 {
 				UpdateTask.updatePool <- []map[string]interface{}{
@@ -63,6 +72,12 @@ func startCheckData(sid, eid string) {
 					{"$unset": Unset_Check},
 				}
 			}
+			if un_bid && !is_unset {
+				UpdateTask.updatePool <- []map[string]interface{}{
+					update_id,
+					{"$unset": map[string]interface{}{"bidamount": 1}},
+				}
+			}
 		}(tmp, update_id)
 		tmp = make(map[string]interface{})
 	}
@@ -78,7 +93,9 @@ func CreatSingleFieldInfo(tmp map[string]interface{}, update_info map[string]int
 	package_id = strings.ReplaceAll(package_id, "-", "")
 	com_package["package_id"] = package_id
 	com_package["name"] = qu.ObjToString(tmp["projectname"])
-
+	com_package["projectcode"] = qu.ObjToString(tmp["projectcode"])
+	com_package["packagecode"] = qu.ObjToString(tmp["packagecode"])
+	com_package["contractcode"] = qu.ObjToString(tmp["contractcode"])
 	if update_info["budget"] != nil {
 		com_package["budget"] = update_info["budget"]
 	} else {
@@ -86,11 +103,16 @@ func CreatSingleFieldInfo(tmp map[string]interface{}, update_info map[string]int
 			com_package["budget"] = tmp["budget"]
 		}
 	}
+	toptype := qu.ObjToString(update_info["toptype"])
 	subtype := qu.ObjToString(update_info["subtype"])
+	if toptype == "" {
+		toptype = qu.ObjToString(tmp["toptype"])
+	}
 	if subtype == "" {
 		subtype = qu.ObjToString(tmp["subtype"])
 	}
-	if subtype == "单一" || subtype == "中标" || subtype == "成交" || subtype == "合同" {
+	//中标类
+	if toptype == "结果" || toptype == "其它" || subtype == "单一" {
 		if update_info["bidamount"] != nil {
 			com_package["bidamount"] = update_info["bidamount"]
 		} else {

+ 117 - 56
src/check_ai.go

@@ -22,13 +22,15 @@ func ChooseCheckDataAI(tmp map[string]interface{}, update_info *map[string]inter
 	ChooseTheBestPackageField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, &ext_ai_record)
 	//最终金额修正与选取
 	ChooseTheBestAmountField(tmp, update_info)
-	//最终单位修正与选取
-	ChooseTheBestUnitField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, &ext_ai_record)
+
+	//其他字段选取
+	ChooseTheBestOtherField(ai_zhipu, tmp, update_info, &ext_ai_record)
+
 	//字段记录
 	(*update_info)["ext_ai_record"] = ext_ai_record
 
 	//跨分类是否删除结果类字段
-	if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+	if s_toptype == "结果" || s_toptype == "其它" || s_subtype == "单一" {
 
 	} else {
 		for k, _ := range Unset_Check {
@@ -37,7 +39,6 @@ func ChooseCheckDataAI(tmp map[string]interface{}, update_info *map[string]inter
 			}
 		}
 	}
-
 	return false
 }
 
@@ -71,15 +72,21 @@ func ChooseTheBestClassField(ai_zhipu map[string]interface{}, tmp map[string]int
 
 // 选取字段
 func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info *map[string]interface{}, ext_ai_record *map[string]interface{}) {
-	//基础字段···
 	if s_buyer := qu.ObjToString(ai_zhipu["s_buyer"]); s_buyer != "" {
 		(*update_info)["buyer"] = s_buyer
 		(*ext_ai_record)["buyer"] = tmp["buyer"]
-		if agency := qu.ObjToString(tmp["agency"]); agency != "" && agency == s_buyer {
-			delete((*update_info), "buyer")
-			delete((*ext_ai_record), "buyer")
+	}
+	if s_toptype == "拟建" {
+		if s_owner := qu.ObjToString(ai_zhipu["s_owner"]); s_owner != "" {
+			(*update_info)["owner"] = s_owner
+			(*ext_ai_record)["owner"] = tmp["owner"]
 		}
 	}
+	//基础字段···
+	if s_agency := qu.ObjToString(ai_zhipu["s_agency"]); s_agency != "" {
+		(*update_info)["agency"] = s_agency
+		(*ext_ai_record)["agency"] = tmp["agency"]
+	}
 	if s_projectname := qu.ObjToString(ai_zhipu["s_projectname"]); s_projectname != "" {
 		(*update_info)["projectname"] = s_projectname
 		(*ext_ai_record)["projectname"] = tmp["projectname"]
@@ -92,25 +99,16 @@ func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s
 		(*update_info)["budget"] = s_budget
 		(*ext_ai_record)["budget"] = tmp["budget"]
 	}
-	//地域字段···
-	o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"])
+	//规则地域字段···
+	o_area, o_city, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["city"]), qu.ObjToString(tmp["district"])
 	s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"])
 	if s_area != "" && s_area != "全国" {
+		//部分站点可以排除···更新逻辑
 		(*update_info)["area"] = s_area
 		if s_city != "" {
 			(*update_info)["city"] = s_city
 			if o_district != "" {
-				//判断抽取的区县是否合理···
-				isT := false
-				if ds := S_DistrictDict[o_district]; ds != nil {
-					for _, v := range ds {
-						if v.C_Name == s_city && v.P_Name == s_area {
-							isT = true
-							break
-						}
-					}
-				}
-				if !isT {
+				if s_city != o_city || s_area != o_area {
 					(*update_info)["district"] = ""
 				}
 			}
@@ -124,8 +122,17 @@ func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s
 		(*ext_ai_record)["city"] = tmp["city"]
 		(*ext_ai_record)["district"] = tmp["district"]
 	}
+
+	//对于地域的补充校验
+	site := qu.ObjToString(tmp["site"])
+	if (site == "军队采购网新网址" || site == "军队采购网") && o_area != "全国" && o_area != "" {
+		delete((*update_info), "area")
+		delete((*update_info), "city")
+		delete((*update_info), "district")
+	}
+
 	//先用外围字段替换
-	if s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" || s_subtype == "单一" {
+	if s_toptype == "结果" || s_toptype == "其它" || s_subtype == "单一" {
 		if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
 			(*update_info)["bidamount"] = s_bidamount
 			(*ext_ai_record)["bidamount"] = tmp["bidamount"]
@@ -170,8 +177,6 @@ func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s
 		//		}
 		//	}
 		//}
-	} else {
-
 	}
 }
 
@@ -190,7 +195,7 @@ func ChooseTheBestPackageField(ai_zhipu map[string]interface{}, s_toptype string
 				(*update_info)["budget"] = p_budget
 				(*ext_ai_record)["budget"] = tmp["budget"]
 			}
-			if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+			if s_toptype == "结果" || s_toptype == "其它" || s_subtype == "单一" {
 				if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
 					(*update_info)["s_winner"] = p_winner
 					(*update_info)["winner"] = p_winner
@@ -215,7 +220,7 @@ func ChooseTheBestPackageField(ai_zhipu map[string]interface{}, s_toptype string
 				(*update_info)["budget"] = p_budget
 				(*ext_ai_record)["budget"] = tmp["budget"]
 			}
-			if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+			if s_toptype == "结果" || s_toptype == "其它" || s_subtype == "单一" {
 				if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" && (*update_info)["s_winner"] == nil {
 					(*update_info)["s_winner"] = p_winner
 					(*update_info)["winner"] = p_winner
@@ -259,39 +264,95 @@ func ChooseTheBestAmountField(tmp map[string]interface{}, update_info *map[strin
 	}
 }
 
-// 核对单位值
-func ChooseTheBestUnitField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info *map[string]interface{}, ext_ai_record *map[string]interface{}) {
-	buyer := qu.ObjToString((*update_info)["buyer"])
-	s_winner := qu.ObjToString((*update_info)["s_winner"])
-	agency := qu.ObjToString((*update_info)["agency"])
-	if buyer == "" {
-		buyer = qu.ObjToString(tmp["buyer"])
+// 选择其它字段
+func ChooseTheBestOtherField(ai_zhipu map[string]interface{}, tmp map[string]interface{}, update_info *map[string]interface{}, ext_ai_record *map[string]interface{}) {
+	if s_purchasinglist := qu.ObjToMap(ai_zhipu["s_purchasinglist"]); s_purchasinglist != nil {
+		if purchasinglist := IsMarkInterfaceMap((*s_purchasinglist)["purchasinglist"]); len(purchasinglist) > 0 {
+			(*update_info)["purchasinglist"] = purchasinglist
+		}
+		if purchasinglist_label := qu.ObjToMap((*s_purchasinglist)["purchasinglist_label"]); purchasinglist_label != nil {
+			(*update_info)["purchasinglist_label"] = purchasinglist_label
+		}
+		if jycodes := qu.ObjToString((*s_purchasinglist)["jycodes"]); jycodes != "" {
+			(*update_info)["jycodes"] = jycodes
+		}
+		if purchasing := qu.ObjToString((*s_purchasinglist)["purchasing"]); purchasing != "" {
+			(*update_info)["purchasing"] = purchasing
+		}
 	}
-	if s_winner == "" {
-		s_winner = qu.ObjToString(tmp["s_winner"])
+	//新增字段
+	if biddingcode := qu.ObjToString(ai_zhipu["s_biddingcode"]); biddingcode != "" {
+		(*update_info)["biddingcode"] = biddingcode
 	}
-	if agency == "" {
-		agency = qu.ObjToString(tmp["agency"])
+	if packagecode := qu.ObjToString(ai_zhipu["s_packagecode"]); packagecode != "" {
+		(*update_info)["packagecode"] = packagecode
 	}
 
-	if buyer == s_winner && buyer != "" {
-		if strings.Contains(buyer, "公司") {
-			(*update_info)["buyer"] = ""
-			(*ext_ai_record)["buyer"] = tmp["buyer"]
-		} else {
-			if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
-				(*update_info)["s_winner"] = ""
-				(*update_info)["winner"] = ""
-				(*ext_ai_record)["s_winner"] = tmp["s_winner"]
-				(*ext_ai_record)["winner"] = tmp["winner"]
-			}
-		}
+	//替换字段
+	if contractcode := qu.ObjToString(ai_zhipu["s_contractcode"]); contractcode != "" {
+		(*update_info)["contractcode"] = contractcode
+		(*ext_ai_record)["contractcode"] = tmp["contractcode"]
 	}
-	if agency != "" {
-		if agency == buyer || agency == s_winner {
-			(*update_info)["agency"] = ""
-			(*ext_ai_record)["agency"] = tmp["buyer"]
-		}
+	if bidopenaddress := qu.ObjToString(ai_zhipu["s_bidopenaddress"]); bidopenaddress != "" {
+		(*update_info)["bidopenaddress"] = bidopenaddress
+		(*ext_ai_record)["bidopenaddress"] = tmp["bidopenaddress"]
+	}
+	//折扣
+	if biddiscount := qu.Float64All(ai_zhipu["s_biddiscount"]); biddiscount > 0.0 {
+		(*update_info)["biddiscount"] = biddiscount
+		(*ext_ai_record)["biddiscount"] = tmp["biddiscount"]
+	}
+	//时间戳
+	if bidopentime := qu.Int64All(ai_zhipu["s_bidopentime"]); bidopentime > 0 {
+		(*update_info)["bidopentime"] = bidopentime
+		(*ext_ai_record)["bidopentime"] = tmp["bidopentime"]
+	}
+	if bidendtime := qu.Int64All(ai_zhipu["s_bidendtime"]); bidendtime > 0 {
+		(*update_info)["bidendtime"] = bidendtime
+		(*ext_ai_record)["bidendtime"] = tmp["bidendtime"]
+	}
+	if docstarttime := qu.Int64All(ai_zhipu["s_docstarttime"]); docstarttime > 0 {
+		(*update_info)["docstarttime"] = docstarttime
+		(*ext_ai_record)["docstarttime"] = tmp["docstarttime"]
+	}
+	if docendtime := qu.Int64All(ai_zhipu["s_docendtime"]); docendtime > 0 {
+		(*update_info)["docendtime"] = docendtime
+		(*ext_ai_record)["docendtime"] = tmp["docendtime"]
+	}
+	//
+	if signstarttime := qu.Int64All(ai_zhipu["s_signstarttime"]); signstarttime > 0 {
+		(*update_info)["signstarttime"] = signstarttime
+		(*ext_ai_record)["signstarttime"] = tmp["signstarttime"]
+	}
+	if signendtime := qu.Int64All(ai_zhipu["s_signendtime"]); signendtime > 0 {
+		(*update_info)["signendtime"] = signendtime
+		(*ext_ai_record)["signendtime"] = tmp["signendtime"]
+	}
+
+	//联系方式
+	if buyerperson := qu.ObjToString(ai_zhipu["s_buyerperson"]); buyerperson != "" {
+		(*update_info)["buyerperson"] = buyerperson
+		(*ext_ai_record)["buyerperson"] = tmp["buyerperson"]
+	}
+	if buyertel := qu.ObjToString(ai_zhipu["s_buyertel"]); buyertel != "" {
+		(*update_info)["buyertel"] = buyertel
+		(*ext_ai_record)["buyertel"] = tmp["buyertel"]
+	}
+	if agencyperson := qu.ObjToString(ai_zhipu["s_agencyperson"]); agencyperson != "" {
+		(*update_info)["agencyperson"] = agencyperson
+		(*ext_ai_record)["agencyperson"] = tmp["agencyperson"]
+	}
+	if agencytel := qu.ObjToString(ai_zhipu["s_agencytel"]); agencytel != "" {
+		(*update_info)["agencytel"] = agencytel
+		(*ext_ai_record)["agencytel"] = tmp["agencytel"]
+	}
+	if winnerperson := qu.ObjToString(ai_zhipu["s_winnerperson"]); winnerperson != "" {
+		(*update_info)["winnerperson"] = winnerperson
+		(*ext_ai_record)["winnerperson"] = tmp["winnerperson"]
+	}
+	if winnertel := qu.ObjToString(ai_zhipu["s_winnertel"]); winnertel != "" {
+		(*update_info)["winnertel"] = winnertel
+		(*ext_ai_record)["winnertel"] = tmp["winnertel"]
 	}
 }
 
@@ -347,7 +408,7 @@ func staffInfo(pkg map[string]interface{}) bool {
 
 // 核对ai分包
 func staffAiPackageInfo(com_package []map[string]interface{}, s_toptype string, s_subtype string) []map[string]interface{} {
-	if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+	if s_toptype == "结果" || s_toptype == "其它" || s_subtype == "单一" {
 		return com_package
 	}
 	new_com_package := []map[string]interface{}{}
@@ -379,7 +440,7 @@ func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]inte
 			}
 		}
 	}
-	//2、招标、结果错误校正
+	//2、招标、结果错误校正 (对于开标记录···有异常)
 	if toptype_ai != "结果" && toptype_rule == "结果" {
 		//return toptype_rule,subtype_rule//默认规则为准
 		if len(winnerorder) > 0 || s_winner != "" || data["bidamount"] != nil {

+ 283 - 0
src/check_other.go

@@ -0,0 +1,283 @@
+package main
+
+import (
+	qu "qfw/util"
+	"regexp"
+	"strings"
+	"unicode/utf8"
+)
+
+var ruleReg1 = regexp.MustCompile("^(中部|西部)管道公司$")
+var ruleReg2 = regexp.MustCompile("^(东北|华南|华北|华中|苏北)[分]?公司$")
+var ruleReg3 = regexp.MustCompile("([分]?公司)$")
+
+// 补充规则大全···
+func getReplenishRules(tmp map[string]interface{}, update map[string]interface{}) {
+	for _, v := range ReplensihRules {
+		rule_name := qu.ObjToString(v["rule"])
+		switch rule_name {
+		case "补充前缀地域":
+			RuleInfo1(tmp, update, v)
+		case "省市映射规则":
+			RuleInfo2(tmp, update, v)
+		case "省市前缀规则":
+			RuleInfo2(tmp, update, v)
+		case "单位全称映射":
+			RuleInfo3(tmp, update, v)
+		case "地域修正规则":
+			RuleInfo4(tmp, update, v)
+		case "省级简称拼接":
+			RuleInfo5(tmp, update, v)
+		case "市级全称拼接":
+			RuleInfo5(tmp, update, v)
+		default:
+
+		}
+	}
+
+}
+
+// 核对采购单位
+func RuleInfo1(tmp map[string]interface{}, update map[string]interface{}, info map[string]interface{}) {
+	if qu.ObjToString(tmp["buyer"]) != "" || qu.ObjToString(update["buyer"]) != "" {
+		return
+	}
+	site := qu.ObjToString(tmp["site"])
+	if qu.ObjToString(info[site]) != site {
+		return
+	}
+	area := qu.ObjToString(tmp["area"])
+	if qu.ObjToString(update["area"]) != "" && qu.ObjToString(update["area"]) != "全国" {
+		area = qu.ObjToString(update["area"])
+	}
+	buyer := qu.ObjToString(info["buyer"])
+	if buyer == "" {
+		return
+	}
+	//是否补前缀
+	prefix := qu.IntAll(info["prefix"])
+	if prefix == 1 {
+		if area == "" || area == "全国" {
+			update["buyer"] = buyer
+		} else {
+			if area == "内蒙古" {
+				update["buyer"] = buyer + area + "自治区" + "分公司"
+			} else {
+				update["buyer"] = buyer + area + "分公司"
+			}
+		}
+	} else {
+		update["buyer"] = buyer
+	}
+}
+
+// 采购单位补充
+func RuleInfo2(tmp map[string]interface{}, update map[string]interface{}, info map[string]interface{}) {
+	area := qu.ObjToString(tmp["area"])
+	city := qu.ObjToString(tmp["city"])
+	buyer := qu.ObjToString(tmp["buyer"])
+	if utf8.RuneCountInString(buyer) != 3 { //简称长度···
+		return
+	}
+	if u_area := qu.ObjToString(update["area"]); u_area != "" {
+		area = u_area
+	}
+	if u_city := qu.ObjToString(update["city"]); u_city != "" {
+		city = u_city
+	}
+	if u_buyer := qu.ObjToString(update["buyer"]); u_buyer != "" {
+		buyer = u_buyer
+	}
+
+	rule := qu.ObjToString(info["rule"])
+	name := qu.ObjToString(info["name"])
+	if rule == "省市映射规则" {
+		key := area + "-" + city + "-" + buyer
+		if key == name {
+			if s_name := qu.ObjToString(info["s_name"]); s_name != "" {
+				update["buyer"] = s_name
+			}
+		}
+	}
+	if rule == "省市前缀规则" {
+		if buyer == name {
+			if city != "" {
+				update["buyer"] = city + buyer
+			} else if area != "" && area != "全国" {
+				update["buyer"] = area + buyer
+			} else {
+
+			}
+		}
+	}
+}
+
+// 采购单位映射逻辑
+func RuleInfo3(tmp map[string]interface{}, update map[string]interface{}, info map[string]interface{}) {
+	buyer := qu.ObjToString(tmp["buyer"])
+	if u_buyer := qu.ObjToString(update["buyer"]); u_buyer != "" {
+		buyer = u_buyer
+	}
+	site := qu.ObjToString(tmp["site"])
+	if site == qu.ObjToString(info["site"]) && buyer == qu.ObjToString(info["buyer"]) {
+		if name := qu.ObjToString(info["name"]); name != "" {
+			update["buyer"] = name
+		}
+	}
+}
+
+// 双向验证地域
+func RuleInfo4(tmp map[string]interface{}, update map[string]interface{}, info map[string]interface{}) {
+	flash_key := qu.ObjToString(update["area"]) + "-" + qu.ObjToString(update["city"])
+	rule_key := qu.ObjToString(tmp["area"]) + "-" + qu.ObjToString(tmp["city"]) + "-" + qu.ObjToString(tmp["district"])
+	flash := qu.ObjToString(info["s_name"])
+	rule := qu.ObjToString(info["name"])
+	if flash_key == flash && rule_key == rule {
+		delete(update, "area")
+		delete(update, "city")
+		delete(update, "district")
+	}
+}
+
+func RuleInfo5(tmp map[string]interface{}, update map[string]interface{}, info map[string]interface{}) {
+	buyer := qu.ObjToString(tmp["buyer"])
+	if u_buyer := qu.ObjToString(update["buyer"]); u_buyer != "" {
+		buyer = u_buyer
+	}
+	site := qu.ObjToString(tmp["site"])
+	name := qu.ObjToString(info["name"])
+	if buyer == "" || site != qu.ObjToString(info["site"]) {
+		return
+	}
+	//规则名字···
+	rule := qu.ObjToString(info["rule"])
+	prefix := qu.IntAll(info["prefix"])
+	if rule == "省级简称拼接" {
+		if ruleReg1.MatchString(buyer) || ruleReg2.MatchString(buyer) {
+			update["buyer"] = name + buyer
+			return
+		}
+		if suffix := ruleReg3.FindString(buyer); suffix != "" {
+			str := strings.TrimSuffix(buyer, suffix)
+			if S_ProvinceDict[str] != nil {
+				update["buyer"] = name + buyer
+				return
+			}
+		}
+	}
+
+	if rule == "市级全称拼接" {
+		if suffix := ruleReg3.FindString(buyer); suffix != "" {
+			str := strings.TrimSuffix(buyer, suffix)
+			if S_CityDict[str] != nil {
+				if citys := S_CityDict[str]; len(citys) == 1 {
+					if prefix == 1 {
+						update["buyer"] = name + citys[0].P_Name + "有限公司" + buyer
+					} else {
+						update["buyer"] = name + buyer
+					}
+					return
+				}
+			} else {
+				str = str + "市"
+				if S_CityDict[str] != nil {
+					if citys := S_CityDict[str]; len(citys) == 1 {
+						if prefix == 1 {
+							update["buyer"] = name + citys[0].P_Name + "有限公司" + buyer
+						} else {
+							update["buyer"] = name + buyer
+						}
+						return
+					}
+				}
+			}
+		}
+	}
+
+}
+
+/*******************************************
+*******************************************
+*******************************************
+*******************************************
+*******************************************/
+// 校验金额与折扣率···
+func getVerifyBidDiscount(tmp map[string]interface{}, update map[string]interface{}) bool {
+	bidamount, biddiscount := qu.Float64All(tmp["bidamount"]), qu.Float64All(tmp["biddiscount"])
+	if f1 := qu.Float64All(update["bidamount"]); f1 > 0 {
+		bidamount = f1
+	}
+	if f2 := qu.Float64All(update["biddiscount"]); f2 > 0 {
+		biddiscount = f2
+	}
+	if biddiscount > 0 && bidamount > 0 && bidamount == biddiscount {
+		//删除金额
+		delete(update, "bidamount")
+		if qu.Float64All(tmp["bidamount"]) > 0 {
+			return true
+		}
+	}
+	return false
+}
+
+// 最终单位选取校验
+func getTheBestUnit(tmp map[string]interface{}, update_info map[string]interface{}) {
+	buyer := qu.ObjToString(update_info["buyer"])
+	s_winner := qu.ObjToString(update_info["s_winner"])
+	agency := qu.ObjToString(update_info["agency"])
+	if buyer == "" {
+		buyer = qu.ObjToString(tmp["buyer"])
+	}
+	if s_winner == "" {
+		s_winner = qu.ObjToString(tmp["s_winner"])
+	}
+	if agency == "" {
+		agency = qu.ObjToString(tmp["agency"])
+	}
+
+	if buyer == s_winner && buyer != "" {
+		if strings.Contains(buyer, "公司") {
+			update_info["buyer"] = ""
+		} else {
+			update_info["s_winner"] = ""
+			update_info["winner"] = ""
+		}
+	}
+	if agency != "" {
+		if agency == buyer || agency == s_winner {
+			update_info["agency"] = ""
+		}
+	}
+}
+
+// 可续计数法修正
+func getScientificNotation(tmp map[string]interface{}, update map[string]interface{}) {
+	subtype := qu.ObjToString(tmp["subtype"])
+	bidamount := qu.Float64All(tmp["bidamount"])
+	budget := qu.Float64All(tmp["budget"])
+	if u_s := qu.ObjToString(update["subtype"]); u_s != "" {
+		subtype = u_s
+	}
+	if u_b := qu.Float64All(update["bidamount"]); u_b > 0.0 {
+		bidamount = u_b
+	}
+	if u_t := qu.Float64All(update["budget"]); u_t > 0.0 {
+		budget = u_t
+	}
+	e_bidamount := qu.Float64All(tmp["e_bidamount"])
+	if e_bidamount <= 0.0 {
+		return
+	}
+
+	//用科学计数法的金额-替换中标金额
+	if subtype == "合同" || subtype == "中标" || subtype == "成交" || subtype == "单一" {
+		if bidamount > 0.0 && bidamount < 50.0 {
+			update["bidamount"] = e_bidamount
+		}
+	} else {
+		if budget > 0.0 && budget < 50.0 {
+			update["budget"] = e_bidamount
+		}
+	}
+
+}

+ 1 - 0
src/check_pub.go

@@ -5,6 +5,7 @@ import (
 )
 
 func getCheckDataPub(tmp map[string]interface{}, update map[string]interface{}) {
+
 	publishtime := qu.IntAll(tmp["publishtime"])
 	bidopentime := qu.IntAll(tmp["bidopentime"])
 	bidendtime := qu.IntAll(tmp["bidendtime"])

+ 8 - 8
src/config.json

@@ -1,21 +1,21 @@
 {
   "udpport": ":1799",
-  "udpport_ai":":1792",
+  "udpport_ai":":1799",
   "update_ai": true,
   "mongodb": {
-    "addrName": "127.0.0.1:27017",
-    "dbName": "zhengkun",
-    "collName": "extract_test",
+    "addrName": "127.0.0.1:12001",
+    "dbName": "qfw",
+    "collName": "result_20220219",
     "username": "",
     "password": "",
     "pool": 10
   },
   "qy_mongodb": {
-    "qy_addrName": "127.0.0.1:27017",
-    "qy_dbName": "qfw",
+    "qy_addrName": "127.0.0.1:12005",
+    "qy_dbName": "mixdata",
     "qy_collName": "qyxy_std",
-    "qy_username": "",
-    "qy_password": "",
+    "qy_username": "zhengkun",
+    "qy_password": "zk@123123",
     "pool": 10
   },
   "address_name": "address_jy_2022",

+ 51 - 19
src/initvar.go

@@ -42,8 +42,13 @@ var (
 	//监控相关
 	responselock     sync.Mutex
 	lastNodeResponse int64
-	update_ai        bool
 	dataprocess      map[string]interface{}
+
+	//补充规则全···
+	ReplensihRules []map[string]interface{}
+
+	ruleLock sync.Mutex
+	IsLocal  bool
 )
 
 // mgo-配置等
@@ -57,7 +62,11 @@ func initMgo() {
 		UserName:    mconf["username"].(string),
 		Password:    mconf["password"].(string),
 	}
-	data_mgo.InitPool()
+	if IsLocal {
+		data_mgo.InitPoolDirect()
+	} else {
+		data_mgo.InitPool()
+	}
 
 	qy_mconf := Sysconfig["qy_mongodb"].(map[string]interface{})
 	qy_mgo = &MongodbSim{
@@ -67,34 +76,57 @@ func initMgo() {
 		UserName:    qy_mconf["qy_username"].(string),
 		Password:    qy_mconf["qy_password"].(string),
 	}
-	qy_mgo.InitPool()
+	if IsLocal {
+		qy_mgo.InitPoolDirect()
+	} else {
+		qy_mgo.InitPool()
+	}
 
-	bid_mgo = &MongodbSim{
-		MongodbAddr: "172.17.189.140:27080,172.17.189.141:27081",
-		DbName:      "qfw",
-		Size:        10,
-		UserName:    "zhengkun",
-		Password:    "zk@123123",
+	if !IsLocal {
+		bid_mgo = &MongodbSim{
+			MongodbAddr: "172.31.31.202:27081,172.20.45.128:27080",
+			DbName:      "qfw",
+			Size:        10,
+			UserName:    "zhengkun",
+			Password:    "zk@123123",
+		}
+		bid_mgo.InitPool()
+	} else {
+		bid_mgo = &MongodbSim{
+			MongodbAddr: "127.0.0.1:12005",
+			DbName:      "qfw",
+			Size:        10,
+			UserName:    "zhengkun",
+			Password:    "zk@123123",
+		}
+		bid_mgo.InitPoolDirect()
 	}
-	bid_mgo.InitPool()
 
 	coll_name = mconf["collName"].(string)
 	address_name = Sysconfig["address_name"].(string)
 	nextNode = qu.ObjArrToMapArr(Sysconfig["nextNode"].([]interface{}))
 	check_thread = qu.IntAll(Sysconfig["check_thread"])
-	update_ai = Sysconfig["update_ai"].(bool)
-	if !update_ai {
-		dataprocess = map[string]interface{}{
-			"dataprocess": 4,
-		}
-	} else {
-		dataprocess = map[string]interface{}{
-			"dataprocess_ai": 3,
-		}
+	dataprocess = map[string]interface{}{
+		"dataprocess": 5,
 	}
 	log.Println("mgo 等配置,加载完毕...")
 }
 
+// buyer-配置项
+func initBuyer() {
+	ruleLock.Lock()
+	ReplensihRules = []map[string]interface{}{}
+	dataArr, _ := bid_mgo.Find("bidding_bc", map[string]interface{}{}, nil, nil)
+	for _, v := range dataArr {
+		if qu.IntAll(v["isopen"]) == 1 {
+			ReplensihRules = append(ReplensihRules, v)
+		}
+	}
+	log.Println("补充校验规则加载完毕···", len(ReplensihRules))
+	ruleLock.Unlock()
+
+}
+
 // 初始化城市
 func initCheckCity() {
 	//初始化-城市配置

+ 20 - 3
src/main.go

@@ -1,6 +1,7 @@
 package main
 
 import (
+	"github.com/cron"
 	"log"
 	mu "mfw/util"
 	qu "qfw/util"
@@ -14,7 +15,18 @@ func init() {
 	if len(Sysconfig) == 0 {
 		log.Fatal("读取配置文件失败", Sysconfig)
 	}
-	initMgo() //初始化mgo
+	IsLocal = false
+	initMgo()       //初始化mgo
+	initBuyer()     //加载采购单位
+	initCheckCity() //加载地域信息
+
+	//定时更新站点信息
+	c := cron.New()
+	c.AddFunc("0 0 10 * * ?", func() {
+		initBuyer()
+	})
+	c.Start()
+
 	//更新池
 	UpdateTask = newUpdatePool()
 	go UpdateTask.updateData()
@@ -34,14 +46,19 @@ func main() {
 	go checkMailJob()
 	go lastUdpJob()
 
-	//test()
 	lock := make(chan bool)
 	<-lock
 }
 
 func test() {
 	log.Println("测试修正...")
-	startCheckData("100000000000000000000000", "900000000000000000000000")
+	//update := map[string]interface{}{}
+	//RuleInfo2(map[string]interface{}{"area": "天津", "city": "天津市", "buyer": "天航局"}, update, map[string]interface{}{"rule": "省市映射规则", "name": "天津-天津市-天航局", "s_name": "中交天津航道局有限公司"})
+	//RuleInfo5(map[string]interface{}{"site": "国家石油天然气管网集团有限公司数字供应链平台", "buyer": "华北公司"}, update, map[string]interface{}{"site": "国家石油天然气管网集团有限公司数字供应链平台", "rule": "省级简称拼接"})
+	//RuleInfo5(map[string]interface{}{"site": "中国移动采购与招标网", "buyer": "郑州市分公司"}, update, map[string]interface{}{"site": "中国移动采购与招标网", "rule": "市级全称拼接"})
+
+	startCheckData("6850f0745f834436f0b3c191", "6850f0745f834436f0b3c192")
+
 	lock := make(chan bool)
 	<-lock
 }

+ 41 - 14
src/mgo.go

@@ -142,22 +142,49 @@ func (m *MongodbSim) DestoryMongoConn(ms *MgoSess) {
 	ms = nil
 }
 
-func (m *MongodbSim) InitPool() {
+func (m *MongodbSim) InitPoolDirect() {
 	opts := options.Client()
 	opts.SetConnectTimeout(3 * time.Second)
 	opts.ApplyURI("mongodb://" + m.MongodbAddr)
 	opts.SetMaxPoolSize(uint64(m.Size))
+	opts.SetDirect(true)
 	m.pool = make(chan bool, m.Size)
 
-	if m.UserName !="" && m.Password !="" {
+	if m.UserName != "" && m.Password != "" {
 		cre := options.Credential{
-			Username:m.UserName,
-			Password:m.Password,
+			Username:   m.UserName,
+			Password:   m.Password,
+			AuthSource: "admin",
 		}
 		opts.SetAuth(cre)
 	}
 
+	opts.SetMaxConnIdleTime(2 * time.Hour)
+	m.Ctx, _ = context.WithTimeout(context.Background(), 99999*time.Hour)
+	m.ShortCtx, _ = context.WithTimeout(context.Background(), 1*time.Minute)
+	client, err := mongo.Connect(m.ShortCtx, opts)
+	if err != nil {
+		log.Println("mgo init error:", err.Error())
+	} else {
+		m.C = client
+		log.Println("init success")
+	}
+}
 
+func (m *MongodbSim) InitPool() {
+	opts := options.Client()
+	opts.SetConnectTimeout(3 * time.Second)
+	opts.ApplyURI("mongodb://" + m.MongodbAddr)
+	opts.SetMaxPoolSize(uint64(m.Size))
+	m.pool = make(chan bool, m.Size)
+
+	if m.UserName != "" && m.Password != "" {
+		cre := options.Credential{
+			Username: m.UserName,
+			Password: m.Password,
+		}
+		opts.SetAuth(cre)
+	}
 
 	opts.SetMaxConnIdleTime(2 * time.Hour)
 	m.Ctx, _ = context.WithTimeout(context.Background(), 99999*time.Hour)
@@ -178,7 +205,7 @@ func (m *MongodbSim) Close() {
 	<-m.pool
 }
 
-//批量插入
+// 批量插入
 func (m *MongodbSim) UpSertBulk(c string, doc ...[]map[string]interface{}) (map[int64]interface{}, bool) {
 	m.Open()
 	defer m.Close()
@@ -205,7 +232,7 @@ func (m *MongodbSim) UpSertBulk(c string, doc ...[]map[string]interface{}) (map[
 	return r.UpsertedIDs, true
 }
 
-//批量插入
+// 批量插入
 func (m *MongodbSim) SaveBulk(c string, doc ...map[string]interface{}) bool {
 	m.Open()
 	defer m.Close()
@@ -224,7 +251,7 @@ func (m *MongodbSim) SaveBulk(c string, doc ...map[string]interface{}) bool {
 	return true
 }
 
-//保存
+// 保存
 func (m *MongodbSim) Save(c string, doc map[string]interface{}) interface{} {
 	m.Open()
 	defer m.Close()
@@ -236,7 +263,7 @@ func (m *MongodbSim) Save(c string, doc map[string]interface{}) interface{} {
 	return r.InsertedID
 }
 
-//更新by Id
+// 更新by Id
 func (m *MongodbSim) UpdateById(c, id string, doc map[string]interface{}) bool {
 	m.Open()
 	defer m.Close()
@@ -248,7 +275,7 @@ func (m *MongodbSim) UpdateById(c, id string, doc map[string]interface{}) bool {
 	return true
 }
 
-//删除by id
+// 删除by id
 func (m *MongodbSim) DeleteById(c, id string) int64 {
 	m.Open()
 	defer m.Close()
@@ -260,7 +287,7 @@ func (m *MongodbSim) DeleteById(c, id string) int64 {
 	return r.DeletedCount
 }
 
-//通过条件删除
+// 通过条件删除
 func (m *MongodbSim) Delete(c string, query map[string]interface{}) int64 {
 	m.Open()
 	defer m.Close()
@@ -272,7 +299,7 @@ func (m *MongodbSim) Delete(c string, query map[string]interface{}) int64 {
 	return r.DeletedCount
 }
 
-//findbyid
+// findbyid
 func (m *MongodbSim) FindById(c, id string) map[string]interface{} {
 	m.Open()
 	defer m.Close()
@@ -283,7 +310,7 @@ func (m *MongodbSim) FindById(c, id string) map[string]interface{} {
 	return v
 }
 
-//findone
+// findone
 func (m *MongodbSim) FindOne(c string, query map[string]interface{}) map[string]interface{} {
 	m.Open()
 	defer m.Close()
@@ -294,7 +321,7 @@ func (m *MongodbSim) FindOne(c string, query map[string]interface{}) map[string]
 	return v
 }
 
-//find
+// find
 func (m *MongodbSim) Find(c string, query map[string]interface{}, sort, fields interface{}) ([]map[string]interface{}, error) {
 	m.Open()
 	defer m.Close()
@@ -313,7 +340,7 @@ func (m *MongodbSim) Find(c string, query map[string]interface{}, sort, fields i
 	return results, nil
 }
 
-//创建_id
+// 创建_id
 func NewObjectId() primitive.ObjectID {
 	return primitive.NewObjectID()
 }

+ 1 - 1
src/udprocess.go

@@ -52,7 +52,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 func sendNextNode(sid string, eid string) {
 	//更新记录状态
 	updateProcessUdpIdsInfo(sid, eid)
-	log.Println("判重任务完成...发送下节点udp...")
+	log.Println("清洗任务完成...发送下节点udp...")
 	for _, to := range nextNode {
 		key := sid + "-" + eid + "-" + qu.ObjToString(to["stype"])
 		by, _ := json.Marshal(map[string]interface{}{