فهرست منبع

备份新版-ai抽取清洗合并

zhengkun 10 ماه پیش
والد
کامیت
3665603dbf
13فایلهای تغییر یافته به همراه628 افزوده شده و 834 حذف شده
  1. 63 0
      src/check.go
  2. 218 52
      src/check_ai.go
  3. 0 57
      src/check_bidamount.go
  4. 0 50
      src/check_category.go
  5. 0 286
      src/check_city.go
  6. 8 8
      src/check_publishtime.go
  7. 8 4
      src/config.json
  8. 158 0
      src/initvar.go
  9. 9 376
      src/main.go
  10. 1 1
      src/mark
  11. 0 0
      src/udpmail.go
  12. 72 0
      src/udprocess.go
  13. 91 0
      src/udptask.go

+ 63 - 0
src/check.go

@@ -0,0 +1,63 @@
+package main
+
+import (
+	"log"
+	qu "qfw/util"
+	"sync"
+)
+
+// 开始审查数据
+func startCheckData(sid, eid string) {
+	defer qu.Catch()
+	q := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"$gt":  StringTOBsonId(sid),
+			"$lte": StringTOBsonId(eid),
+		},
+	}
+	check_pool := make(chan bool, check_thread)
+	check_wg := &sync.WaitGroup{}
+	sess := data_mgo.GetMgoConn()
+	defer data_mgo.DestoryMongoConn(sess)
+	it := sess.DB(data_mgo.DbName).C(coll_name).Find(&q).Iter()
+	total := 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Println("当前数量:", total, tmp["_id"])
+		}
+		update_id := map[string]interface{}{"_id": tmp["_id"]}
+		check_pool <- true
+		check_wg.Add(1)
+		go func(tmp map[string]interface{}, update_id map[string]interface{}) {
+			defer func() {
+				<-check_pool
+				check_wg.Done()
+			}()
+			//更新-
+			update_info := make(map[string]interface{}, 0)
+			//审查-发布时间
+			getCheckDataPublishtime(tmp, update_info)
+			is_unset := false
+			if update_ai { //审查-大模型与抽取
+				is_unset = getCheckDataAI(tmp, update_info)
+			}
+			//最终计算是否清洗
+			if len(update_info) > 0 {
+				UpdateTask.updatePool <- []map[string]interface{}{
+					update_id,
+					{"$set": update_info},
+				}
+			}
+			if is_unset {
+				UpdateTask.updatePool <- []map[string]interface{}{
+					update_id,
+					{"$unset": unset_check},
+				}
+			}
+		}(tmp, update_id)
+		tmp = make(map[string]interface{})
+	}
+	check_wg.Wait()
+	log.Println("data_clean is over ", total)
+	sendNextNode(sid, eid)
+}

+ 218 - 52
src/check_ai.go

@@ -1,53 +1,41 @@
 package main
 
-import qu "qfw/util"
-
-// 大模型与抽取数据合并计算
-func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interface{}) {
-	if tmp["ai_zhipu"] == nil {
-		return
-	}
-	//记录抽取原值
-	ext_ai := map[string]interface{}{}
-	ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
-	//分类字段···
-	s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
-	s_toptype, s_subtype = CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
-	if s_toptype != "" && s_subtype != "" {
-		(*update_check)["toptype"] = s_toptype
-		(*update_check)["subtype"] = s_subtype
-		ext_ai["toptype"] = tmp["toptype"]
-		ext_ai["subtype"] = tmp["subtype"]
-	} else {
-		s_toptype = qu.ObjToString(tmp["toptype"])
-		s_subtype = qu.ObjToString(tmp["subtype"])
-	}
+import (
+	"go.mongodb.org/mongo-driver/bson/primitive"
+	qu "qfw/util"
+	"strings"
+)
 
+// 选取字段
+func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) {
 	//基础字段···
 	if s_buyer := qu.ObjToString(ai_zhipu["s_buyer"]); s_buyer != "" {
-		(*update_check)["buyer"] = s_buyer
-		ext_ai["buyer"] = tmp["buyer"]
+		update_info["buyer"] = s_buyer
+		ext_ai_record["buyer"] = tmp["buyer"]
+		if agency := qu.ObjToString(tmp["agency"]); agency != "" && agency == s_buyer {
+			delete(update_info, "buyer")
+			delete(ext_ai_record, "buyer")
+		}
 	}
 	if s_projectname := qu.ObjToString(ai_zhipu["s_projectname"]); s_projectname != "" {
-		(*update_check)["projectname"] = s_projectname
-		ext_ai["projectname"] = tmp["projectname"]
+		update_info["projectname"] = s_projectname
+		ext_ai_record["projectname"] = tmp["projectname"]
 	}
 	if s_projectcode := qu.ObjToString(ai_zhipu["s_projectcode"]); s_projectcode != "" {
-		(*update_check)["projectcode"] = s_projectcode
-		ext_ai["projectcode"] = tmp["projectcode"]
+		update_info["projectcode"] = s_projectcode
+		ext_ai_record["projectcode"] = tmp["projectcode"]
 	}
 	if s_budget := qu.Float64All(ai_zhipu["s_budget"]); s_budget > 0.0 && s_budget < 1000000000.0 {
-		(*update_check)["budget"] = s_budget
-		ext_ai["budget"] = tmp["budget"]
+		update_info["budget"] = s_budget
+		ext_ai_record["budget"] = tmp["budget"]
 	}
-
 	//地域字段···
 	o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"])
 	s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"])
 	if s_area != "" && s_area != "全国" {
-		(*update_check)["area"] = s_area
+		update_info["area"] = s_area
 		if s_city != "" {
-			(*update_check)["city"] = s_city
+			update_info["city"] = s_city
 			if o_district != "" {
 				//判断抽取的区县是否合理···
 				isT := false
@@ -60,32 +48,194 @@ func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interfa
 					}
 				}
 				if !isT {
-					(*update_check)["district"] = ""
+					update_info["district"] = ""
 				}
 			}
 		} else {
 			if o_area != s_area {
-				(*update_check)["city"] = ""
-				(*update_check)["district"] = ""
+				update_info["city"] = ""
+				update_info["district"] = ""
 			}
 		}
-		ext_ai["area"] = tmp["area"]
-		ext_ai["city"] = tmp["city"]
-		ext_ai["district"] = tmp["district"]
+		ext_ai_record["area"] = tmp["area"]
+		ext_ai_record["city"] = tmp["city"]
+		ext_ai_record["district"] = tmp["district"]
 	}
+	//先用外围字段替换
+	if s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" || s_subtype == "单一" {
+		if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
+			update_info["bidamount"] = s_bidamount
+			ext_ai_record["bidamount"] = tmp["bidamount"]
+		}
+		if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
+			update_info["s_winner"] = s_winner
+			ext_ai_record["s_winner"] = tmp["s_winner"]
+			update_info["winner"] = s_winner
+			ext_ai_record["winner"] = tmp["winner"]
+			//对于winner来说...规则值有包含关系,采用规则值
+			if winner := qu.ObjToString(tmp["winner"]); winner != "" {
+				if strings.Contains(s_winner, winner) {
+					delete(update_info, "winner")
+					delete(ext_ai_record, "winner")
+				}
+			}
+		}
+		//旧版弃用
+		//isRulePkg := false
+		//if pkg := *qu.ObjToMap(tmp["package"]); len(pkg) > 1 && (s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同") {
+		//	if !staffInfo(pkg) {
+		//		isRulePkg = true
+		//	}
+		//}
+		//if isRulePkg { //优先采用大模型分包-值替换
+		//	if ispkg, ok := ai_zhipu["ispkg"].(bool); ispkg && ok {
+		//		if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
+		//			if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
+		//				(*update_check)["s_winner"] = p_winner
+		//				(*update_check)["winner"] = p_winner
+		//				ext_ai_record["s_winner"] = tmp["s_winner"]
+		//				ext_ai_record["winner"] = tmp["winner"]
+		//			}
+		//			if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
+		//				(*update_check)["bidamount"] = p_bidamount
+		//				ext_ai_record["bidamount"] = tmp["bidamount"]
+		//			}
+		//			if s_package := qu.ObjToMap((*s_pkg)["s_pkg"]); s_package != nil {
+		//				(*update_check)["package"] = s_package
+		//				ext_ai_record["package"] = tmp["package"]
+		//			}
+		//		}
+		//	}
+		//}
+	} else {
 
-	//中标字段···
-	isRulePkg := false
-	if pkg := *qu.ObjToMap(tmp["package"]); len(pkg) > 1 && (s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同") {
-		if !staffInfo(pkg) {
-			isRulePkg = true
+	}
+}
+
+// 选取分类
+func ChooseTheBestClassField(ai_zhipu map[string]interface{}, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) (string, string) {
+	//分类字段···
+	s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
+	ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
+	if ns_toptype != s_toptype || ns_subtype != s_subtype {
+		ext_ai_record["s_toptype"] = ns_toptype
+		ext_ai_record["s_subtype"] = ns_subtype
+	}
+	//赋值···
+	s_toptype, s_subtype = ns_toptype, ns_subtype
+	if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" {
+		s_toptype = qu.ObjToString(tmp["toptype"])
+		s_subtype = qu.ObjToString(tmp["subtype"])
+	} else {
+		if s_toptype != "" && s_subtype != "" {
+			update_info["toptype"] = s_toptype
+			update_info["subtype"] = s_subtype
+			ext_ai_record["toptype"] = tmp["toptype"]
+			ext_ai_record["subtype"] = tmp["subtype"]
+		} else {
+			s_toptype = qu.ObjToString(tmp["toptype"])
+			s_subtype = qu.ObjToString(tmp["subtype"])
 		}
 	}
+	return s_toptype, s_subtype
+}
 
-	if isRulePkg {
+// 选取分包
+func ChooseTheBestPackageField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) {
+	//新分包判定···com_package - 默认大模型分包可信
+	if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
+		com_package := (*s_pkg)["com_package"]
+		update_info["com_package"] = com_package
+		//是否替换外围字段···
+		if p_budget := qu.Float64All((*s_pkg)["s_budget"]); p_budget > 0.0 {
+			update_info["budget"] = p_budget
+			ext_ai_record["budget"] = tmp["budget"]
+		}
+		if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+			if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
+				update_info["s_winner"] = p_winner
+				update_info["winner"] = p_winner
+				ext_ai_record["s_winner"] = tmp["s_winner"]
+				ext_ai_record["winner"] = tmp["winner"]
+			}
+			if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
+				update_info["bidamount"] = p_bidamount
+				ext_ai_record["bidamount"] = tmp["bidamount"]
+			}
+		}
+	}
+}
 
+// 选取金额
+func ChooseTheBestAmountField(tmp map[string]interface{}, update_info map[string]interface{}) {
+	if r_budget := qu.Float64All(update_info["budget"]); r_budget > 0.0 && r_budget < 1000000000.0 {
+		if o_budget := qu.Float64All(tmp["budget"]); o_budget > 0.0 {
+			if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
+				update_info["budget"] = filterAmount(r_budget, o_budget)
+			}
+		}
+	}
+	if r_bidamount := qu.Float64All(update_info["bidamount"]); r_bidamount > 0.0 && r_bidamount < 1000000000.0 {
+		if o_bidamount := qu.Float64All(tmp["bidamount"]); o_bidamount > 0.0 {
+			if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
+				update_info["bidamount"] = filterAmount(r_bidamount, o_bidamount)
+			}
+		}
+	}
+}
+
+// 大模型与抽取数据合并计算
+func getCheckDataAI(tmp map[string]interface{}, update_info map[string]interface{}) bool {
+	if tmp["ai_zhipu"] == nil {
+		return false
+	}
+	//记录抽取原值
+	ext_ai_record := map[string]interface{}{}
+	ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
+	//选取分类
+	s_toptype, s_subtype := ChooseTheBestClassField(ai_zhipu, tmp, update_info, ext_ai_record)
+	//选取字段
+	ChooseTheBestCoreField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, ext_ai_record)
+	//选取分包
+	ChooseTheBestPackageField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, ext_ai_record)
+	//最终金额修正与选取
+	ChooseTheBestAmountField(tmp, update_info)
+
+	//字段记录
+	update_info["ext_ai_record"] = ext_ai_record
+
+	//跨分类是否删除结果类字段
+	if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+
+	} else {
+		for k, _ := range unset_check {
+			if tmp[k] != nil {
+				return true
+			}
+		}
 	}
 
+	return false
+}
+
+// 筛选金额
+func filterAmount(f1 float64, f2 float64) float64 {
+	//选取一个合适的金额 ...
+	if f1 > f2 {
+		if f1 > 100000000.0 {
+			return f2
+		} else {
+			return f1
+		}
+	} else if f1 < f2 {
+		if f2 > 100000000.0 {
+			return f1
+		} else {
+			return f2
+		}
+	} else {
+		return f1
+	}
 }
 
 // 核算分包信息
@@ -123,9 +273,12 @@ func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]inte
 	subtype_rule := qu.ObjToString(data["subtype"])
 	//1、结果类 中标和成交错误校正
 	s_winner := qu.ObjToString(data["s_winner"])
-	winnerorder, _ := data["winnerorder"].([]interface{})
+	winnerorder := IsMarkInterfaceMap(data["winnerorder"])
 	if toptype_ai == "结果" && toptype_rule == "结果" {
-		if (subtype_ai == "中标" && subtype_rule == "成交") || (subtype_ai == "成交" && subtype_rule == "中标") {
+		if subtype_ai == "成交" && subtype_rule == "成交" && len(winnerorder) > 0 { //规则、大模型都错
+			return "结果", "中标"
+		}
+		if ((subtype_ai == "中标" || subtype_ai == "其它") && subtype_rule == "成交") || ((subtype_ai == "成交" || subtype_ai == "其它") && subtype_rule == "中标") {
 			if len(winnerorder) > 0 { //有中标候选人->中标
 				return toptype_ai, "中标"
 			}
@@ -137,15 +290,12 @@ func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]inte
 	//2、招标、结果错误校正
 	if toptype_ai != "结果" && toptype_rule == "结果" {
 		//return toptype_rule,subtype_rule//默认规则为准
-		if len(winnerorder) > 0 { //有中标候选人->中标
-			//return toptype_rule, "中标"//这里subtype是否返回"中标"?
-			return toptype_rule, subtype_rule //默认规则是正确的
-		} else if s_winner != "" || data["bidamount"] != nil {
+		if len(winnerorder) > 0 || s_winner != "" || data["bidamount"] != nil {
 			return toptype_rule, subtype_rule
 		} else {
 			return toptype_ai, subtype_ai
 		}
-	} else if toptype_ai == "结果" && toptype_rule != "结果" {
+	} else if toptype_ai == "结果" && toptype_rule != "结果" && (subtype_ai == "中标" || subtype_ai == "成交") { //结果-变更
 		//return toptype_rule,subtype_rule//默认规则为准
 		if len(winnerorder) > 0 { //有中标候选人->中标
 			return toptype_ai, "中标" //这里subtype返回"中标",避免ai识别错误
@@ -157,3 +307,19 @@ func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]inte
 	}
 	return toptype_ai, subtype_ai
 }
+
+func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
+	p_list := []map[string]interface{}{}
+	if list_3, ok_3 := t.([]map[string]interface{}); ok_3 {
+		p_list = list_3
+		return p_list
+	}
+	if yl_list_1, ok_1 := t.(primitive.A); ok_1 {
+		p_list = qu.ObjArrToMapArr(yl_list_1)
+	} else {
+		if yl_list_2, ok_2 := t.([]interface{}); ok_2 {
+			p_list = qu.ObjArrToMapArr(yl_list_2)
+		}
+	}
+	return p_list
+}

+ 0 - 57
src/check_bidamount.go

@@ -1,57 +0,0 @@
-package main
-
-import (
-	qu "qfw/util"
-	"regexp"
-)
-
-var classScopeReg *regexp.Regexp = regexp.MustCompile("(建筑工程|交通工程|市政设施)")
-
-//中标金额
-func getCheckDataBidamount(tmp map[string]interface{},update_check *map[string]interface{})  {
-
-	modifycheck := make(map[string]interface{},0)
-	if (*update_check)["modifycheck"] != nil {
-		modifycheck  = *qu.ObjToMap((*update_check)["modifycheck"])
-	}
-	bidamount := qu.Float64All(tmp["bidamount"])
-	spidercode := qu.ObjToString("spidercode")
-
-	//指定网站-数据
-	/*
-	cq_cqsggzyjyzx_zfcg_zbgs  	符合标准	9条
-	ln_lnzfcgw_gggs_jggg		符合标准	555条
-	a_zgzfcgw_dfgg_zongb_new	中国政府采购网	不太符合
-	a_zgzfcgw_bid_tender_new	中国政府采购网	不太符合
-	*/
-	if spidercode=="cq_cqsggzyjyzx_zfcg_zbgs" || spidercode == "ln_lnzfcgw_gggs_jggg" {
-		if bidamount>1000000000.0 {
-			new_bidamount := bidamount/float64(10000)
-			(*update_check)["bidamount"] = new_bidamount
-			modifycheck["bidamount"] = "爬虫倍率"
-			(*update_check)["modifycheck"] = modifycheck
-			return
-		}
-	}
-
-
-
-	//按照行业-划分-太粗糙易出错-省略
-	//if topscopeclass, ok := tmp["topscopeclass"].(primitive.A);(bidamount>0.0&&ok&&len(topscopeclass)>0){
-	//	isTrue := false
-	//	for _,v := range topscopeclass{
-	//		if classScopeReg.MatchString(qu.ObjToString(v)) {
-	//			isTrue = true
-	//			break
-	//		}
-	//	}
-	//	if !isTrue && bidamount>1000000000.0 {
-	//		new_bidamount := bidamount/float64(10000)
-	//		(*update_check)["bidamount"] = new_bidamount
-	//		modifycheck["bidamount"] = "行业倍率"
-	//		(*update_check)["modifycheck"] = modifycheck
-	//		return
-	//	}
-	//}
-}
-

+ 0 - 50
src/check_category.go

@@ -1,50 +0,0 @@
-package main
-
-import (
-	"github.com/go-xweb/log"
-	qu "qfw/util"
-)
-
-//逻辑校验分类数据~
-func getCheckDataCategory(tmp map[string]interface{},update_check *map[string]interface{}) bool {
-	modifycheck := make(map[string]interface{},0)
-	if (*update_check)["modifycheck"] != nil {
-		modifycheck  = *qu.ObjToMap((*update_check)["modifycheck"])
-	}
-	log.Debug(BsonTOStringId(tmp["_id"]))
-	toptype := qu.ObjToString(tmp["toptype"])
-	subtype := qu.ObjToString(tmp["subtype"])
-	winner := qu.ObjToString(tmp["winner"])
-	is_del := false
-	if toptype=="招标" && subtype!="单一" {
-		if winner!="" { //校验~合理性
-			if confirmValidEnt(winner) {
-				(*update_check)["toptype"] = "结果"
-				if tmp["winnerorder"]!=nil {
-					(*update_check)["subtype"] = "中标"
-				}else {
-					(*update_check)["subtype"] = "成交"
-				}
-				modifycheck["top_subtype"] = "分类修复"
-				(*update_check)["modifycheck"] = modifycheck
-			}else {
-				is_del = true
-				delete(*update_check,"bidamount")
-			}
-		}else {
-			is_del = true
-			delete(*update_check,"bidamount")
-		}
-	}
-	return is_del
-}
-
-func confirmValidEnt(name string) bool {
-	tmp := qy_mgo.FindOne(qy_coll_name, map[string]interface{}{
-		"company_name":name,
-	})
-	if tmp != nil && len(tmp)> 2 {
-		return true
-	}
-	return false
-}

+ 0 - 286
src/check_city.go

@@ -1,286 +0,0 @@
-package main
-
-import (
-	"fmt"
-	qu "qfw/util"
-	"regexp"
-	"strings"
-)
-
-var cityEndReg *regexp.Regexp = regexp.MustCompile("(区|县|市)$")
-var ErrBuyerReg *regexp.Regexp = regexp.MustCompile("^(成都东部新区)")
-
-func getCheckDataCity(tmp map[string]interface{}, update_check *map[string]interface{}) {
-
-	area := qu.ObjToString(tmp["area"])
-	city := qu.ObjToString(tmp["city"])
-	district := qu.ObjToString(tmp["district"])
-	buyer := qu.ObjToString(tmp["buyer"])
-
-	if buyer != "" && ErrBuyerReg.MatchString(buyer) && area == "浙江" {
-		(*update_check)["area"] = "四川"
-		(*update_check)["city"] = "成都市"
-		(*update_check)["district"] = ""
-		return
-	}
-
-	if (district != "" && city != "" && area != "" && area != "全国") || buyer == "" {
-		//标准城市-校验
-		rdata := standardCheckCity(area, city, district)
-		if len(rdata) > 0 {
-			umap := updateLogging(tmp, rdata, "标准信息")
-			copyUpdateData(umap, update_check)
-		}
-		return
-	}
-	rdata := cityMarshal(tmp) //企业表-补城市
-	if len(rdata) > 0 {
-		new_area, new_city, new_district := area, city, district
-		if rdata["area"] != "" {
-			new_area = qu.ObjToString(rdata["area"])
-		}
-		if rdata["city"] != "" {
-			new_city = qu.ObjToString(rdata["city"])
-		}
-		if rdata["district"] != "" {
-			new_district = qu.ObjToString(rdata["district"])
-		}
-		umap := updateLogging(tmp, rdata, "企业信息")
-		n_rdata := standardCheckCity(new_area, new_city, new_district)
-		if len(n_rdata) > 0 {
-			for rk, rv := range n_rdata {
-				umap[rk] = rv
-				umap["modifycheck"].(map[string]interface{})[rk] = fmt.Sprintf("企业标准信息~%s~%s", qu.ObjToString(tmp[rk]), rv)
-			}
-		}
-		copyUpdateData(umap, update_check)
-	} else {
-		n_rdata := standardCheckCity(area, city, district)
-		if len(n_rdata) > 0 {
-			umap := updateLogging(tmp, n_rdata, "标准信息")
-			copyUpdateData(umap, update_check)
-		}
-	}
-}
-
-//企业表校验
-func cityMarshal(data map[string]interface{}) map[string]string {
-	buyer := qu.ObjToString(data["buyer"])
-	bidarea := qu.ObjToString(data["area"])
-	bidcity := qu.ObjToString(data["city"])
-	biddistrict := qu.ObjToString(data["district"])
-	rdata := make(map[string]string)
-	query_name := map[string]interface{}{
-		"company_name": buyer,
-	}
-	tmp := qy_mgo.FindOne(qy_coll_name, query_name)
-	if tmp == nil || len(tmp) < 2 {
-		return rdata
-	}
-	company_code := fmt.Sprint(tmp["company_code"])
-	if len(company_code) > 5 {
-		province_city_district := qy_mgo.FindOne("address", map[string]interface{}{
-			"code": company_code[:6],
-		})
-		remarks := fmt.Sprint((province_city_district)["Remarks"])
-		if remarks == "" || remarks == "废除" || remarks == "已作废" {
-
-		} else if province_city_district != nil {
-			codeprovince := qu.ObjToString((province_city_district)["province"])
-			codecity := qu.ObjToString((province_city_district)["city"])
-			codedistrict := qu.ObjToString((province_city_district)["district"])
-			if bidarea == "" || bidarea == "全国" {
-				if codeprovince != "" {
-					rdata["area"] = codeprovince
-					if codecity != "" && codecity != codeprovince {
-						rdata["city"] = codecity
-						if codedistrict != "" && codedistrict != codecity {
-							rdata["district"] = codedistrict
-						}
-					}
-				}
-			} else if bidcity == "" && codecity != "" && bidarea == codeprovince {
-				if codecity != bidarea {
-					rdata["city"] = codecity
-					if codedistrict != "" && codecity != codedistrict {
-						rdata["district"] = codedistrict
-					}
-				}
-			} else if biddistrict == "" && codedistrict != "" && bidarea == codeprovince && codecity == bidcity {
-				rdata["district"] = codedistrict
-			}
-
-			if rdata["city"] != "" || rdata["district"] != "" {
-				return rdata
-			}
-		}
-	}
-
-	entprovince := qu.ObjToString(tmp["company_area"])
-	entcity := qu.ObjToString(tmp["company_city"])
-	entdistrict := qu.ObjToString(tmp["company_district"])
-
-	//新增特殊处理-港澳台数据
-	if bidarea == "" || bidarea == "香港" || bidarea == "澳门" || bidarea == "台湾" || bidarea == "全国" {
-		if entprovince != "" {
-			rdata["area"] = entprovince
-			if entcity != "" && entcity != entprovince {
-				rdata["city"] = entcity
-				if entdistrict != "" && entdistrict != entcity {
-					rdata["district"] = entdistrict
-				}
-			}
-		}
-	} else if bidcity == "" && entcity != "" && entprovince == bidarea {
-		rdata["city"] = entcity
-		if entdistrict != "" && entcity != entdistrict {
-			rdata["district"] = entdistrict
-		}
-	} else if biddistrict == "" && entdistrict != "" && entprovince == bidarea && bidcity == entcity {
-		rdata["district"] = entdistrict
-	}
-
-	return rdata
-}
-
-//标准校验
-func standardCheckCity(area string, city string, district string) map[string]string {
-
-	rdata := make(map[string]string, 0)
-	if area == "香港" || area == "澳门" || area == "台湾" || (area == "全国" && (city == "" && district == "")) {
-		return rdata
-	}
-	//第一步:区校验
-	if district != "" {
-		districtArr := S_DistrictDict[district]
-		if districtArr == nil { //涉及了 个别别名相关的数据
-			trim_arr := aliasDataDistrict(district) //拆分后缀
-			if len(trim_arr) > 0 {
-				for _, alias_district := range trim_arr {
-					alias_districtArr := S_DistrictDict[alias_district]
-					for _, v := range alias_districtArr {
-						if city == v.C_Name && area == v.P_Name {
-							rdata["district"] = alias_district
-							return rdata
-						}
-					}
-				}
-			}
-			rdata["district"] = ""
-		} else {
-			isTrue := false
-			for _, v := range districtArr {
-				if city == v.C_Name && area == v.P_Name {
-					isTrue = true
-					break
-				}
-			}
-			if isTrue { //完全匹配
-				return rdata
-			} else { //未完全匹配
-				if len(districtArr) == 1 {
-					rdata["area"] = districtArr[0].P_Name
-					rdata["city"] = districtArr[0].C_Name
-					rdata["district"] = districtArr[0].D_Name
-					return rdata
-				} else {
-					rdata["district"] = ""
-				}
-			}
-		}
-	}
-
-	//第二步:区校验-失败   市-校验
-	if city != "" {
-		cityArr := S_CityDict[city]
-		if cityArr == nil {
-			//把市当成区,匹配三级   - 存在优化空间- city:郑州  别名
-			districtArr := S_DistrictDict[city]
-			for _, v := range districtArr {
-				if city == v.C_Name && area == v.P_Name {
-					rdata["area"] = districtArr[0].P_Name
-					rdata["city"] = districtArr[0].C_Name
-					rdata["district"] = districtArr[0].D_Name
-					return rdata
-				}
-			}
-			rdata["city"] = ""
-		} else {
-			isTrue := false
-			for _, v := range cityArr {
-				if area == v.P_Name {
-					isTrue = true
-					break
-				}
-			}
-			if isTrue { //完全匹配
-				return rdata
-			} else { //未完全匹配
-				if len(cityArr) == 1 {
-					rdata["area"] = cityArr[0].P_Name
-					rdata["city"] = cityArr[0].C_Name
-					rdata["district"] = ""
-					return rdata
-				} else {
-					rdata["city"] = ""
-				}
-			}
-		}
-	}
-
-	//第三步:省份校验
-	if S_ProvinceDict[area] == nil {
-		rdata["area"] = "全国"
-		rdata["city"] = ""
-		rdata["district"] = ""
-	}
-
-	return rdata
-}
-
-//更新日志
-func updateLogging(tmp map[string]interface{}, rdata map[string]string, desc string) map[string]interface{} {
-	umap := make(map[string]interface{})
-	if tmp["modifycheck"] == nil {
-		umap["modifycheck"] = make(map[string]interface{})
-	} else {
-		umap["modifycheck"] = tmp["modifycheck"]
-	}
-	for rk, rv := range rdata {
-		umap[rk] = rv
-		umap["modifycheck"].(map[string]interface{})[rk] = fmt.Sprintf("%s~%s~%s", desc, qu.ObjToString(tmp[rk]), rv)
-	}
-	return umap
-}
-
-func copyUpdateData(tmp map[string]interface{}, update_check *map[string]interface{}) {
-	for k, v := range tmp {
-		(*update_check)[k] = v
-	}
-}
-
-//拆分三级县
-func aliasDataDistrict(district string) []string {
-	arr := []string{}
-	if cityEndReg.MatchString(district) {
-		str := cityEndReg.FindString(district)
-		strings.TrimRight(district, str)
-		if str == "县" {
-			arr = append(arr, fmt.Sprintf("%s区", strings.TrimRight(district, str)))
-			arr = append(arr, fmt.Sprintf("%s市", strings.TrimRight(district, str)))
-		} else if str == "区" {
-			arr = append(arr, fmt.Sprintf("%s县", strings.TrimRight(district, str)))
-			arr = append(arr, fmt.Sprintf("%s市", strings.TrimRight(district, str)))
-		} else if str == "市" {
-			arr = append(arr, fmt.Sprintf("%s县", strings.TrimRight(district, str)))
-			arr = append(arr, fmt.Sprintf("%s区", strings.TrimRight(district, str)))
-		} else {
-
-		}
-	} else { //未找到 district- 区县市  例: district : 金水
-		arr = append(arr, fmt.Sprintf("%s区", district))
-		arr = append(arr, fmt.Sprintf("%s县", district))
-		arr = append(arr, fmt.Sprintf("%s市", district))
-	}
-	return arr
-}

+ 8 - 8
src/check_publishtime.go

@@ -4,7 +4,7 @@ import (
 	qu "qfw/util"
 )
 
-func getCheckDataPublishtime(tmp map[string]interface{},update_check *map[string]interface{}) {
+func getCheckDataPublishtime(tmp map[string]interface{}, update map[string]interface{}) {
 	publishtime := qu.IntAll(tmp["publishtime"])
 	bidopentime := qu.IntAll(tmp["bidopentime"])
 	bidendtime := qu.IntAll(tmp["bidendtime"])
@@ -12,18 +12,18 @@ func getCheckDataPublishtime(tmp map[string]interface{},update_check *map[string
 	if (bidopentime > 0 || bidendtime > 0) && publishtime > 0 {
 		//验证-是否超过周期16天
 		bid_time := 0
-		if bidopentime>0 {
+		if bidopentime > 0 {
 			bid_time = bidopentime
-		}else {
+		} else {
 			bid_time = bidendtime
 		}
-		if bidendtime<bidopentime && bidendtime>0 && bidopentime>0  {
+		if bidendtime < bidopentime && bidendtime > 0 && bidopentime > 0 {
 			bid_time = bidendtime
 		}
-		if publishtime-bid_time>16*86400 && publishtime>0 && bid_time>0{ //需要修复-异常时间数据
-			(*update_check)["dataging"]  =  1
-			(*update_check)["publishtime"] = bid_time
-			(*update_check)["pt_modify"] = publishtime
+		if publishtime-bid_time > 16*86400 && publishtime > 0 && bid_time > 0 { //需要修复-异常时间数据
+			update["dataging"] = 1
+			update["publishtime"] = bid_time
+			update["pt_modify"] = publishtime
 		}
 	}
 }

+ 8 - 4
src/config.json

@@ -1,20 +1,24 @@
 {
-  "udpport": ":1166",
+  "udpport": ":1799",
+  "udpport_ai":":1792",
+  "update_ai": true,
   "mongodb": {
     "addrName": "127.0.0.1:27017",
     "dbName": "zhengkun",
-    "collName": "zzzzz_uncity_new",
+    "collName": "zktest_info_0930_new",
+    "username": "",
+    "password": "",
     "pool": 10
   },
   "qy_mongodb": {
     "qy_addrName": "127.0.0.1:27017",
-    "qy_dbName": "mixdata",
+    "qy_dbName": "qfw",
     "qy_collName": "qyxy_std",
     "qy_username": "",
     "qy_password": "",
     "pool": 10
   },
-  "jy_collName": "address_jy_2022",
+  "address_name": "address_jy_2022",
   "check_thread" : 1,
   "nextNode": []
 }

+ 158 - 0
src/initvar.go

@@ -0,0 +1,158 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	mu "mfw/util"
+	qu "qfw/util"
+	"sync"
+)
+
+type S_Province struct {
+	P_Name string
+}
+type S_City struct {
+	P_Name string
+	C_Name string
+}
+type S_District struct {
+	P_Name string
+	C_Name string
+	D_Name string
+}
+
+var (
+	Sysconfig               map[string]interface{} //配置文件
+	mconf                   map[string]interface{} //mongodb配置信息
+	data_mgo, qy_mgo        *MongodbSim
+	bid_mgo                 *MongodbSim              //mongodb操作对象
+	udpclient               mu.UdpClient             //udp对象
+	nextNode                []map[string]interface{} //节点信息
+	coll_name, address_name string
+	check_thread            int                     //线程数
+	UpdateTask              *updateInfo             //更新池
+	S_ProvinceDict          map[string][]S_Province //省份-map
+	S_CityDict              map[string][]S_City     //城市-map
+	S_DistrictDict          map[string][]S_District //区县-map
+	//删除字段
+	unset_check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
+	//任务锁
+	udplock, getasklock sync.Mutex
+	taskList            []map[string]interface{}
+	//监控相关
+	responselock     sync.Mutex
+	lastNodeResponse int64
+	update_ai        bool
+	dataprocess      map[string]interface{}
+)
+
+// mgo-配置等
+func initMgo() {
+	mconf = Sysconfig["mongodb"].(map[string]interface{})
+	log.Println(mconf)
+	data_mgo = &MongodbSim{
+		MongodbAddr: mconf["addrName"].(string),
+		DbName:      mconf["dbName"].(string),
+		Size:        qu.IntAllDef(mconf["pool"], 10),
+		UserName:    mconf["username"].(string),
+		Password:    mconf["password"].(string),
+	}
+	data_mgo.InitPool()
+
+	qy_mconf := Sysconfig["qy_mongodb"].(map[string]interface{})
+	qy_mgo = &MongodbSim{
+		MongodbAddr: qy_mconf["qy_addrName"].(string),
+		DbName:      qy_mconf["qy_dbName"].(string),
+		Size:        qu.IntAllDef(qy_mconf["pool"], 10),
+		UserName:    qy_mconf["qy_username"].(string),
+		Password:    qy_mconf["qy_password"].(string),
+	}
+	qy_mgo.InitPool()
+
+	bid_mgo = &MongodbSim{
+		MongodbAddr: "172.17.189.140:27080,172.17.189.141:27081",
+		DbName:      "qfw",
+		Size:        10,
+		UserName:    "zhengkun",
+		Password:    "zk@123123",
+	}
+	bid_mgo.InitPool()
+
+	coll_name = mconf["collName"].(string)
+	address_name = Sysconfig["address_name"].(string)
+	nextNode = qu.ObjArrToMapArr(Sysconfig["nextNode"].([]interface{}))
+	check_thread = qu.IntAll(Sysconfig["check_thread"])
+	update_ai = Sysconfig["update_ai"].(bool)
+	if !update_ai {
+		dataprocess = map[string]interface{}{
+			"dataprocess": 4,
+		}
+	} else {
+		dataprocess = map[string]interface{}{
+			"dataprocess_ai": 3,
+		}
+	}
+	log.Println("mgo 等配置,加载完毕...")
+}
+
+// 初始化城市
+func initCheckCity() {
+	//初始化-城市配置
+	S_ProvinceDict = make(map[string][]S_Province, 0)
+	S_CityDict = make(map[string][]S_City, 0)
+	S_DistrictDict = make(map[string][]S_District, 0)
+	q := map[string]interface{}{
+		"town_code": map[string]interface{}{
+			"$exists": 0,
+		},
+	}
+	sess := qy_mgo.GetMgoConn()
+	defer qy_mgo.DestoryMongoConn(sess)
+	it := sess.DB(qy_mgo.DbName).C(address_name).Find(&q).Iter()
+	total := 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%1000 == 0 {
+			log.Println("当前数量:", total)
+		}
+		district_code := qu.IntAll(tmp["district_code"])
+		city_code := qu.IntAll(tmp["city_code"])
+		if district_code > 0 {
+			province := qu.ObjToString(tmp["province"])
+			city := qu.ObjToString(tmp["city"])
+			district := qu.ObjToString(tmp["district"])
+			data := S_District{province, city, district}
+			if S_DistrictDict[district] == nil {
+				S_DistrictDict[district] = []S_District{data}
+			} else {
+				arr := S_DistrictDict[district]
+				arr = append(arr, data)
+				S_DistrictDict[district] = arr
+			}
+		} else {
+			if city_code > 0 {
+				province := qu.ObjToString(tmp["province"])
+				city := qu.ObjToString(tmp["city"])
+				data := S_City{province, city}
+				if S_CityDict[city] == nil {
+					S_CityDict[city] = []S_City{data}
+				} else {
+					arr := S_CityDict[city]
+					arr = append(arr, data)
+					S_CityDict[city] = arr
+				}
+			} else {
+				province := qu.ObjToString(tmp["province"])
+				data := S_Province{province}
+				if S_ProvinceDict[province] == nil {
+					S_ProvinceDict[province] = []S_Province{data}
+				} else {
+					arr := S_ProvinceDict[province]
+					arr = append(arr, data)
+					S_ProvinceDict[province] = arr
+				}
+			}
+		}
+		tmp = make(map[string]interface{})
+	}
+	log.Println(fmt.Sprintf("城市配置加载完毕...省~%d 市~%d 区~%d", len(S_ProvinceDict), len(S_CityDict), len(S_DistrictDict)))
+}

+ 9 - 376
src/main.go

@@ -1,157 +1,12 @@
 package main
 
 import (
-	"encoding/json"
-	"fmt"
-	"io/ioutil"
 	"log"
 	mu "mfw/util"
-	"net"
-	"net/http"
 	qu "qfw/util"
-	"strings"
-	"sync"
 	"time"
 )
 
-type S_Province struct {
-	P_Name string
-}
-type S_City struct {
-	P_Name string
-	C_Name string
-}
-type S_District struct {
-	P_Name string
-	C_Name string
-	D_Name string
-}
-
-var (
-	Sysconfig                             map[string]interface{} //配置文件
-	mconf                                 map[string]interface{} //mongodb配置信息
-	data_mgo, qy_mgo                      *MongodbSim
-	bid_mgo                               *MongodbSim              //mongodb操作对象
-	udpclient                             mu.UdpClient             //udp对象
-	nextNode                              []map[string]interface{} //节点信息
-	coll_name, qy_coll_name, jy_coll_name string
-	check_lock                            sync.Mutex              //更新锁
-	check_thread                          int                     //线程数
-	UpdateTask                            *updateInfo             //更新池
-	S_ProvinceDict                        map[string][]S_Province //省份-map
-	S_CityDict                            map[string][]S_City     //城市-map
-	S_DistrictDict                        map[string][]S_District //区县-map
-	//删除字段
-	unset_dict          = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
-	udplock, getasklock sync.Mutex
-	taskList            []map[string]interface{}
-	//监控相关
-	responselock     sync.Mutex
-	lastNodeResponse int64
-)
-
-// 初始化城市
-func initCheckCity() {
-	//初始化-城市配置
-	S_ProvinceDict = make(map[string][]S_Province, 0)
-	S_CityDict = make(map[string][]S_City, 0)
-	S_DistrictDict = make(map[string][]S_District, 0)
-	q := map[string]interface{}{
-		"town_code": map[string]interface{}{
-			"$exists": 0,
-		},
-	}
-	sess := qy_mgo.GetMgoConn()
-	defer qy_mgo.DestoryMongoConn(sess)
-	it := sess.DB(qy_mgo.DbName).C(jy_coll_name).Find(&q).Iter()
-	total := 0
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%1000 == 0 {
-			log.Println("当前数量:", total)
-		}
-		district_code := qu.IntAll(tmp["district_code"])
-		city_code := qu.IntAll(tmp["city_code"])
-		if district_code > 0 {
-			province := qu.ObjToString(tmp["province"])
-			city := qu.ObjToString(tmp["city"])
-			district := qu.ObjToString(tmp["district"])
-			data := S_District{province, city, district}
-			if S_DistrictDict[district] == nil {
-				S_DistrictDict[district] = []S_District{data}
-			} else {
-				arr := S_DistrictDict[district]
-				arr = append(arr, data)
-				S_DistrictDict[district] = arr
-			}
-		} else {
-			if city_code > 0 {
-				province := qu.ObjToString(tmp["province"])
-				city := qu.ObjToString(tmp["city"])
-				data := S_City{province, city}
-				if S_CityDict[city] == nil {
-					S_CityDict[city] = []S_City{data}
-				} else {
-					arr := S_CityDict[city]
-					arr = append(arr, data)
-					S_CityDict[city] = arr
-				}
-			} else {
-				province := qu.ObjToString(tmp["province"])
-				data := S_Province{province}
-				if S_ProvinceDict[province] == nil {
-					S_ProvinceDict[province] = []S_Province{data}
-				} else {
-					arr := S_ProvinceDict[province]
-					arr = append(arr, data)
-					S_ProvinceDict[province] = arr
-				}
-			}
-		}
-		tmp = make(map[string]interface{})
-	}
-	log.Println(fmt.Sprintf("城市配置加载完毕...省~%d 市~%d 区~%d", len(S_ProvinceDict), len(S_CityDict), len(S_DistrictDict)))
-}
-
-// mgo-配置等
-func initMgo() {
-	mconf = Sysconfig["mongodb"].(map[string]interface{})
-	log.Println(mconf)
-	data_mgo = &MongodbSim{
-		MongodbAddr: mconf["addrName"].(string),
-		DbName:      mconf["dbName"].(string),
-		Size:        qu.IntAllDef(mconf["pool"], 10),
-	}
-	data_mgo.InitPool()
-
-	qy_mconf := Sysconfig["qy_mongodb"].(map[string]interface{})
-	qy_mgo = &MongodbSim{
-		MongodbAddr: qy_mconf["qy_addrName"].(string),
-		DbName:      qy_mconf["qy_dbName"].(string),
-		Size:        qu.IntAllDef(qy_mconf["pool"], 10),
-		UserName:    qy_mconf["qy_username"].(string),
-		Password:    qy_mconf["qy_password"].(string),
-	}
-	qy_mgo.InitPool()
-
-	bid_mgo = &MongodbSim{
-		MongodbAddr: "172.17.189.140:27080,172.17.189.141:27081",
-		DbName:      "qfw",
-		Size:        10,
-		UserName:    "zhengkun",
-		Password:    "zk@123123",
-	}
-	bid_mgo.InitPool()
-
-	coll_name = mconf["collName"].(string)
-	qy_coll_name = qy_mconf["qy_collName"].(string)
-
-	jy_coll_name = Sysconfig["jy_collName"].(string)
-	nextNode = qu.ObjArrToMapArr(Sysconfig["nextNode"].([]interface{}))
-	check_thread = qu.IntAll(Sysconfig["check_thread"])
-
-	log.Println("mgo 等配置,加载完毕...")
-}
-
 // 初始化
 func init() {
 	qu.ReadConfig(&Sysconfig) //加载配置文件
@@ -159,9 +14,7 @@ func init() {
 	if len(Sysconfig) == 0 {
 		log.Fatal("读取配置文件失败", Sysconfig)
 	}
-	initMgo()       //初始化mgo
-	initCheckCity() //初始化城市
-
+	initMgo() //初始化mgo
 	//更新池
 	UpdateTask = newUpdatePool()
 	go UpdateTask.updateData()
@@ -170,6 +23,9 @@ func init() {
 func main() {
 	lastNodeResponse = time.Now().Unix()
 	updport := Sysconfig["udpport"].(string)
+	if Sysconfig["update_ai"].(bool) {
+		updport = Sysconfig["udpport_ai"].(string)
+	}
 	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
 	udpclient.Listen(processUdpMsg)
 	log.Println("Udp服务监听", updport)
@@ -182,232 +38,9 @@ func main() {
 	<-lock
 }
 
-// 开始审查数据
-func startCheckData(sid, eid string) {
-	defer qu.Catch()
-	q := map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$gt":  StringTOBsonId(sid),
-			"$lte": StringTOBsonId(eid),
-		},
-	}
-	check_pool := make(chan bool, check_thread)
-	check_wg := &sync.WaitGroup{}
-	sess := data_mgo.GetMgoConn()
-	defer data_mgo.DestoryMongoConn(sess)
-	it := sess.DB(data_mgo.DbName).C(coll_name).Find(&q).Iter()
-	total, isRepair := 0, 0
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%10000 == 0 {
-			log.Println("当前数量:", total, isRepair, tmp["_id"])
-		}
-		update_id := map[string]interface{}{"_id": tmp["_id"]}
-		check_pool <- true
-		check_wg.Add(1)
-		go func(tmp map[string]interface{}, update_id map[string]interface{}) {
-			defer func() {
-				<-check_pool
-				check_wg.Done()
-			}()
-			//更新-
-			update_check := make(map[string]interface{}, 0)
-
-			//审查-城市-迁移
-			//getCheckDataCity(tmp, &update_check)
-			//审查-金额-迁移
-			//getCheckDataBidamount(tmp, &update_check)
-			//审查-分类-弃用
-			//getCheckDataCategory(tmp,&update_check)
-
-			//审查-发布时间
-			getCheckDataPublishtime(tmp, &update_check)
-			//审查-大模型与抽取
-			getCheckDataAI(tmp, &update_check)
-
-			//最终计算是否清洗
-			update_dict := make(map[string]interface{}, 0)
-			if len(update_check) > 0 {
-				update_dict["$set"] = update_check
-			}
-			if len(update_dict) > 0 { //注意事项~更新key不能与删除key同时存在
-				isRepair++
-				UpdateTask.updatePool <- []map[string]interface{}{
-					update_id,
-					update_dict,
-				}
-			}
-		}(tmp, update_id)
-		tmp = make(map[string]interface{})
-	}
-	check_wg.Wait()
-	log.Println("data_clean is over ", total, "~", isRepair)
-	sendNextNode(sid, eid)
-}
-
-// udp监听
-func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
-	switch act {
-	case mu.OP_TYPE_DATA:
-		var rep map[string]interface{}
-		err := json.Unmarshal(data, &rep)
-		if err != nil {
-			log.Println(err)
-		} else {
-			sid, _ := rep["gtid"].(string)
-			eid, _ := rep["lteid"].(string)
-			stype := qu.ObjToString(rep["stype"])
-			key := qu.ObjToString(rep["key"])
-			if stype == "monitor" {
-				log.Println("收到监测......")
-				udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
-				return
-			}
-			if sid == "" || eid == "" {
-				log.Println("err", "sid=", sid, ",eid=", eid)
-				return
-			} else {
-				lastNodeResponse = time.Now().Unix()
-				udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
-				udplock.Lock()
-				taskList = append(taskList, map[string]interface{}{
-					"sid": sid,
-					"eid": eid,
-				}) //插入任务
-				log.Println("udp收到任务...数量:", len(taskList), "具体任务:", taskList)
-				udplock.Unlock()
-			}
-		}
-	case mu.OP_NOOP: //下个节点回应
-		log.Println("下节点回应:", string(data))
-		udptaskmap.Delete(string(data))
-	}
-}
-
-// 发送下阶段节点~
-func sendNextNode(sid string, eid string) {
-	//更新记录状态
-	updateProcessUdpIdsInfo(sid, eid)
-	log.Println("判重任务完成...发送下节点udp...")
-	for _, to := range nextNode {
-		key := sid + "-" + eid + "-" + qu.ObjToString(to["stype"])
-		by, _ := json.Marshal(map[string]interface{}{
-			"gtid":  sid,
-			"lteid": eid,
-			"stype": qu.ObjToString(to["stype"]),
-			"key":   key,
-		})
-		addr := &net.UDPAddr{
-			IP:   net.ParseIP(to["addr"].(string)),
-			Port: qu.IntAll(to["port"]),
-		}
-		node := &udpNode{by, addr, time.Now().Unix(), 0}
-		udptaskmap.Store(key, node)
-		udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
-	}
-}
-
-// 更新流程记录id段落
-func updateProcessUdpIdsInfo(sid string, eid string) {
-	query := map[string]interface{}{
-		"gtid": map[string]interface{}{
-			"$gte": sid,
-		},
-		"lteid": map[string]interface{}{
-			"$lte": eid,
-		},
-	}
-	task_coll := "bidding_processing_ids"
-	datas, _ := bid_mgo.Find(task_coll, query, nil, nil)
-	if len(datas) > 0 {
-		log.Println("开始更新流程段落记录~~", len(datas), "段")
-		for _, v := range datas {
-			up_id := BsonTOStringId(v["_id"])
-			if up_id != "" {
-				update := map[string]interface{}{
-					"$set": map[string]interface{}{
-						"dataprocess": 4,
-						"updatetime":  time.Now().Unix(),
-					},
-				}
-				bid_mgo.UpdateById(task_coll, up_id, update)
-				log.Println("流程段落记录~~更新完毕~", update)
-			}
-		}
-	} else {
-		log.Println("未查询到记录id段落~", query)
-	}
-}
-
-func httpDo(detail string) (e error) {
-	client := &http.Client{}
-	req, err := http.NewRequest("POST", "http://127.0.0.1:9991/get",
-		strings.NewReader("detail="+detail))
-	if err != nil {
-		return err
-	}
-	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-	resp, err := client.Do(req)
-	if err != nil {
-		return err
-	}
-	defer resp.Body.Close()
-	body, err := ioutil.ReadAll(resp.Body)
-	if err != nil {
-		return err
-	}
-	log.Println("put ", string(body))
-	return nil
-}
-
-// 监听-获取-分发清洗任务
-func getRepeatTask() {
-	for {
-		if len(taskList) > 0 {
-			getasklock.Lock()
-			len_list := len(taskList)
-			if len_list > 1 {
-				first_id := qu.ObjToString(taskList[0]["sid"])
-				end_id := qu.ObjToString(taskList[len_list-1]["eid"])
-				if first_id != "" && end_id != "" {
-					taskList = taskList[len_list:]
-					log.Println("合并段落~正常~", first_id, "~", end_id, "~剩余任务池~", len(taskList), taskList)
-					startCheckData(first_id, end_id)
-				} else {
-					log.Println("合并段落~错误~正常取段落~~~")
-					mapInfo := taskList[0]
-					if mapInfo != nil {
-						taskList = taskList[1:]
-						log.Println("获取任务段处理中~~~剩余任务池~~~", len(taskList), taskList)
-						sid := qu.ObjToString(mapInfo["sid"])
-						eid := qu.ObjToString(mapInfo["eid"])
-						startCheckData(sid, eid)
-					}
-				}
-			} else {
-				mapInfo := taskList[0]
-				if mapInfo != nil {
-					taskList = taskList[1:]
-					log.Println("获取任务段处理中~~~剩余任务池~~~", len(taskList), taskList)
-					sid := qu.ObjToString(mapInfo["sid"])
-					eid := qu.ObjToString(mapInfo["eid"])
-					startCheckData(sid, eid)
-				}
-			}
-			getasklock.Unlock()
-		} else {
-			time.Sleep(10 * time.Second)
-		}
-	}
-}
-
-func lastUdpJob() {
-	for {
-		responselock.Lock()
-		if time.Now().Unix()-lastNodeResponse >= 1800 {
-			lastNodeResponse = time.Now().Unix() //重置时间
-			sendErrMailApi("数据清洗~发现处理流程超时~给予告警", fmt.Sprintf("半小时左右~无新段落数据进入清洗增量流程...相关人员检查..."))
-		}
-		responselock.Unlock()
-		time.Sleep(300 * time.Second)
-	}
+func test() {
+	log.Println("测试修正...")
+	startCheckData("100000000000000000000000", "900000000000000000000000")
+	lock := make(chan bool)
+	<-lock
 }

+ 1 - 1
src/mark

@@ -23,7 +23,7 @@
       "addr": "127.0.0.1",
       "port": 1781,
       "stype":"hangye",
-      "memo": "生kv招标分类"
+      "memo": "行业分类"
     }
   ]
 }

+ 0 - 0
src/udptaskmap.go → src/udpmail.go


+ 72 - 0
src/udprocess.go

@@ -0,0 +1,72 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	mu "mfw/util"
+	"net"
+	qu "qfw/util"
+	"time"
+)
+
+// udp监听
+func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
+	switch act {
+	case mu.OP_TYPE_DATA:
+		var rep map[string]interface{}
+		err := json.Unmarshal(data, &rep)
+		if err != nil {
+			log.Println(err)
+		} else {
+			sid, _ := rep["gtid"].(string)
+			eid, _ := rep["lteid"].(string)
+			stype := qu.ObjToString(rep["stype"])
+			key := qu.ObjToString(rep["key"])
+			if stype == "monitor" {
+				log.Println("收到监测......")
+				udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
+				return
+			}
+			if sid == "" || eid == "" {
+				log.Println("err", "sid=", sid, ",eid=", eid)
+				return
+			} else {
+				lastNodeResponse = time.Now().Unix()
+				udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
+				udplock.Lock()
+				taskList = append(taskList, map[string]interface{}{
+					"sid": sid,
+					"eid": eid,
+				}) //插入任务
+				log.Println("udp收到任务...数量:", len(taskList), "具体任务:", taskList)
+				udplock.Unlock()
+			}
+		}
+	case mu.OP_NOOP: //下个节点回应
+		log.Println("下节点回应:", string(data))
+		udptaskmap.Delete(string(data))
+	}
+}
+
+// 发送下阶段节点~
+func sendNextNode(sid string, eid string) {
+	//更新记录状态
+	updateProcessUdpIdsInfo(sid, eid)
+	log.Println("判重任务完成...发送下节点udp...")
+	for _, to := range nextNode {
+		key := sid + "-" + eid + "-" + qu.ObjToString(to["stype"])
+		by, _ := json.Marshal(map[string]interface{}{
+			"gtid":  sid,
+			"lteid": eid,
+			"stype": qu.ObjToString(to["stype"]),
+			"key":   key,
+		})
+		addr := &net.UDPAddr{
+			IP:   net.ParseIP(to["addr"].(string)),
+			Port: qu.IntAll(to["port"]),
+		}
+		node := &udpNode{by, addr, time.Now().Unix(), 0}
+		udptaskmap.Store(key, node)
+		udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
+	}
+}

+ 91 - 0
src/udptask.go

@@ -0,0 +1,91 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	qu "qfw/util"
+	"time"
+)
+
+// 更新流程记录id段落
+func updateProcessUdpIdsInfo(sid string, eid string) {
+	query := map[string]interface{}{
+		"gtid": map[string]interface{}{
+			"$gte": sid,
+		},
+		"lteid": map[string]interface{}{
+			"$lte": eid,
+		},
+	}
+	task_coll := "bidding_processing_ids"
+	dataprocess["updatetime"] = time.Now().Unix()
+	datas, _ := bid_mgo.Find(task_coll, query, nil, nil)
+	if len(datas) > 0 {
+		log.Println("开始更新流程段落记录~~", len(datas), "段")
+		for _, v := range datas {
+			up_id := BsonTOStringId(v["_id"])
+			if up_id != "" {
+				update := map[string]interface{}{
+					"$set": dataprocess,
+				}
+				bid_mgo.UpdateById(task_coll, up_id, update)
+				log.Println("流程段落记录~~更新完毕~", update)
+			}
+		}
+	} else {
+		log.Println("未查询到记录id段落~", query)
+	}
+}
+
+// 监听-获取-分发清洗任务
+func getRepeatTask() {
+	for {
+		if len(taskList) > 0 {
+			getasklock.Lock()
+			len_list := len(taskList)
+			if len_list > 1 {
+				first_id := qu.ObjToString(taskList[0]["sid"])
+				end_id := qu.ObjToString(taskList[len_list-1]["eid"])
+				if first_id != "" && end_id != "" {
+					taskList = taskList[len_list:]
+					log.Println("合并段落~正常~", first_id, "~", end_id, "~剩余任务池~", len(taskList), taskList)
+					startCheckData(first_id, end_id)
+				} else {
+					log.Println("合并段落~错误~正常取段落~~~")
+					mapInfo := taskList[0]
+					if mapInfo != nil {
+						taskList = taskList[1:]
+						log.Println("获取任务段处理中~~~剩余任务池~~~", len(taskList), taskList)
+						sid := qu.ObjToString(mapInfo["sid"])
+						eid := qu.ObjToString(mapInfo["eid"])
+						startCheckData(sid, eid)
+					}
+				}
+			} else {
+				mapInfo := taskList[0]
+				if mapInfo != nil {
+					taskList = taskList[1:]
+					log.Println("获取任务段处理中~~~剩余任务池~~~", len(taskList), taskList)
+					sid := qu.ObjToString(mapInfo["sid"])
+					eid := qu.ObjToString(mapInfo["eid"])
+					startCheckData(sid, eid)
+				}
+			}
+			getasklock.Unlock()
+		} else {
+			time.Sleep(10 * time.Second)
+		}
+	}
+}
+
+func lastUdpJob() {
+	for {
+		responselock.Lock()
+		if time.Now().Unix()-lastNodeResponse >= 1800 {
+			lastNodeResponse = time.Now().Unix() //重置时间
+			sendErrMailApi("数据清洗~发现处理流程超时~给予告警", fmt.Sprintf("半小时左右~无新段落数据进入清洗增量流程...相关人员检查..."))
+		}
+		responselock.Unlock()
+		time.Sleep(300 * time.Second)
+	}
+}