Browse Source

大模型适配新分包结构。。。清洗

zhengkun 9 months ago
parent
commit
bffd750ef0
3 changed files with 215 additions and 114 deletions
  1. 48 2
      src/check.go
  2. 166 111
      src/check_ai.go
  3. 1 1
      src/initvar.go

+ 48 - 2
src/check.go

@@ -1,8 +1,10 @@
 package main
 
 import (
+	"github.com/uuid"
 	"log"
 	qu "qfw/util"
+	"strings"
 	"sync"
 )
 
@@ -39,7 +41,11 @@ func startCheckData(sid, eid string) {
 			getCheckDataPublishtime(tmp, update_info)
 			is_unset := false
 			if update_ai { //审查-大模型与抽取
-				is_unset = getCheckDataAI(tmp, update_info)
+				is_unset = ChooseCheckDataAI(tmp, &update_info)
+				if update_info["com_package"] == nil { //构建单包信息···
+					com_package := CreatSingleFieldInfo(tmp, update_info)
+					update_info["com_package"] = com_package
+				}
 			}
 			//最终计算是否清洗
 			if len(update_info) > 0 {
@@ -51,7 +57,7 @@ func startCheckData(sid, eid string) {
 			if is_unset {
 				UpdateTask.updatePool <- []map[string]interface{}{
 					update_id,
-					{"$unset": unset_check},
+					{"$unset": Unset_Check},
 				}
 			}
 		}(tmp, update_id)
@@ -61,3 +67,43 @@ func startCheckData(sid, eid string) {
 	log.Println("data_clean is over ", total)
 	sendNextNode(sid, eid)
 }
+
+func CreatSingleFieldInfo(tmp map[string]interface{}, update_info map[string]interface{}) []map[string]interface{} {
+	pkgArr := []map[string]interface{}{}
+	com_package := map[string]interface{}{}
+	package_id := uuid.New().String()
+	package_id = strings.ReplaceAll(package_id, "-", "")
+	com_package["package_id"] = package_id
+	com_package["name"] = qu.ObjToString(tmp["projectname"])
+
+	if update_info["budget"] != nil {
+		com_package["budget"] = update_info["budget"]
+	} else {
+		if tmp["budget"] != nil {
+			com_package["budget"] = tmp["budget"]
+		}
+	}
+	subtype := qu.ObjToString(update_info["subtype"])
+	if subtype == "" {
+		subtype = qu.ObjToString(tmp["subtype"])
+	}
+	if subtype == "单一" || subtype == "中标" || subtype == "成交" || subtype == "合同" {
+		if update_info["bidamount"] != nil {
+			com_package["bidamount"] = update_info["bidamount"]
+		} else {
+			if tmp["bidamount"] != nil {
+				com_package["bidamount"] = tmp["bidamount"]
+			}
+		}
+
+		if update_info["winner"] != nil {
+			com_package["winner"] = update_info["winner"]
+		} else {
+			if tmp["winner"] != nil {
+				com_package["winner"] = tmp["winner"]
+			}
+		}
+	}
+	pkgArr = append(pkgArr, com_package)
+	return pkgArr
+}

+ 166 - 111
src/check_ai.go

@@ -6,36 +6,98 @@ import (
 	"strings"
 )
 
+// 大模型与抽取数据合并计算
+func ChooseCheckDataAI(tmp map[string]interface{}, update_info *map[string]interface{}) bool {
+	if tmp["ai_zhipu"] == nil {
+		return false
+	}
+	//记录抽取原值
+	ext_ai_record := map[string]interface{}{}
+	ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
+	//选取分类
+	s_toptype, s_subtype := ChooseTheBestClassField(ai_zhipu, tmp, update_info, &ext_ai_record)
+	//选取字段
+	ChooseTheBestCoreField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, &ext_ai_record)
+	//选取分包
+	ChooseTheBestPackageField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, &ext_ai_record)
+	//最终金额修正与选取
+	ChooseTheBestAmountField(tmp, update_info)
+
+	//字段记录
+	(*update_info)["ext_ai_record"] = ext_ai_record
+
+	//跨分类是否删除结果类字段
+	if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+
+	} else {
+		for k, _ := range Unset_Check {
+			if tmp[k] != nil {
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
+// 选取分类
+func ChooseTheBestClassField(ai_zhipu map[string]interface{}, tmp map[string]interface{}, update_info *map[string]interface{}, ext_ai_record *map[string]interface{}) (string, string) {
+	//分类字段···
+	s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
+	ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
+	if ns_toptype != s_toptype || ns_subtype != s_subtype {
+		(*ext_ai_record)["s_toptype"] = ns_toptype
+		(*ext_ai_record)["s_subtype"] = ns_subtype
+	}
+	//赋值···
+	s_toptype, s_subtype = ns_toptype, ns_subtype
+	if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" {
+		s_toptype = qu.ObjToString(tmp["toptype"])
+		s_subtype = qu.ObjToString(tmp["subtype"])
+	} else {
+		if s_toptype != "" && s_subtype != "" {
+			(*update_info)["toptype"] = s_toptype
+			(*update_info)["subtype"] = s_subtype
+			(*ext_ai_record)["toptype"] = tmp["toptype"]
+			(*ext_ai_record)["subtype"] = tmp["subtype"]
+		} else {
+			s_toptype = qu.ObjToString(tmp["toptype"])
+			s_subtype = qu.ObjToString(tmp["subtype"])
+		}
+	}
+	return s_toptype, s_subtype
+}
+
 // 选取字段
-func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) {
+func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info *map[string]interface{}, ext_ai_record *map[string]interface{}) {
 	//基础字段···
 	if s_buyer := qu.ObjToString(ai_zhipu["s_buyer"]); s_buyer != "" {
-		update_info["buyer"] = s_buyer
-		ext_ai_record["buyer"] = tmp["buyer"]
+		(*update_info)["buyer"] = s_buyer
+		(*ext_ai_record)["buyer"] = tmp["buyer"]
 		if agency := qu.ObjToString(tmp["agency"]); agency != "" && agency == s_buyer {
-			delete(update_info, "buyer")
-			delete(ext_ai_record, "buyer")
+			delete((*update_info), "buyer")
+			delete((*ext_ai_record), "buyer")
 		}
 	}
 	if s_projectname := qu.ObjToString(ai_zhipu["s_projectname"]); s_projectname != "" {
-		update_info["projectname"] = s_projectname
-		ext_ai_record["projectname"] = tmp["projectname"]
+		(*update_info)["projectname"] = s_projectname
+		(*ext_ai_record)["projectname"] = tmp["projectname"]
 	}
 	if s_projectcode := qu.ObjToString(ai_zhipu["s_projectcode"]); s_projectcode != "" {
-		update_info["projectcode"] = s_projectcode
-		ext_ai_record["projectcode"] = tmp["projectcode"]
+		(*update_info)["projectcode"] = s_projectcode
+		(*ext_ai_record)["projectcode"] = tmp["projectcode"]
 	}
 	if s_budget := qu.Float64All(ai_zhipu["s_budget"]); s_budget > 0.0 && s_budget < 1000000000.0 {
-		update_info["budget"] = s_budget
-		ext_ai_record["budget"] = tmp["budget"]
+		(*update_info)["budget"] = s_budget
+		(*ext_ai_record)["budget"] = tmp["budget"]
 	}
 	//地域字段···
 	o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"])
 	s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"])
 	if s_area != "" && s_area != "全国" {
-		update_info["area"] = s_area
+		(*update_info)["area"] = s_area
 		if s_city != "" {
-			update_info["city"] = s_city
+			(*update_info)["city"] = s_city
 			if o_district != "" {
 				//判断抽取的区县是否合理···
 				isT := false
@@ -48,35 +110,35 @@ func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s
 					}
 				}
 				if !isT {
-					update_info["district"] = ""
+					(*update_info)["district"] = ""
 				}
 			}
 		} else {
 			if o_area != s_area {
-				update_info["city"] = ""
-				update_info["district"] = ""
+				(*update_info)["city"] = ""
+				(*update_info)["district"] = ""
 			}
 		}
-		ext_ai_record["area"] = tmp["area"]
-		ext_ai_record["city"] = tmp["city"]
-		ext_ai_record["district"] = tmp["district"]
+		(*ext_ai_record)["area"] = tmp["area"]
+		(*ext_ai_record)["city"] = tmp["city"]
+		(*ext_ai_record)["district"] = tmp["district"]
 	}
 	//先用外围字段替换
 	if s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" || s_subtype == "单一" {
 		if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
-			update_info["bidamount"] = s_bidamount
-			ext_ai_record["bidamount"] = tmp["bidamount"]
+			(*update_info)["bidamount"] = s_bidamount
+			(*ext_ai_record)["bidamount"] = tmp["bidamount"]
 		}
 		if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
-			update_info["s_winner"] = s_winner
-			ext_ai_record["s_winner"] = tmp["s_winner"]
-			update_info["winner"] = s_winner
-			ext_ai_record["winner"] = tmp["winner"]
+			(*update_info)["s_winner"] = s_winner
+			(*ext_ai_record)["s_winner"] = tmp["s_winner"]
+			(*update_info)["winner"] = s_winner
+			(*ext_ai_record)["winner"] = tmp["winner"]
 			//对于winner来说...规则值有包含关系,采用规则值
 			if winner := qu.ObjToString(tmp["winner"]); winner != "" {
 				if strings.Contains(s_winner, winner) {
-					delete(update_info, "winner")
-					delete(ext_ai_record, "winner")
+					delete((*update_info), "winner")
+					delete((*ext_ai_record), "winner")
 				}
 			}
 		}
@@ -112,112 +174,90 @@ func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s
 	}
 }
 
-// 选取分类
-func ChooseTheBestClassField(ai_zhipu map[string]interface{}, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) (string, string) {
-	//分类字段···
-	s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
-	ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
-	if ns_toptype != s_toptype || ns_subtype != s_subtype {
-		ext_ai_record["s_toptype"] = ns_toptype
-		ext_ai_record["s_subtype"] = ns_subtype
-	}
-	//赋值···
-	s_toptype, s_subtype = ns_toptype, ns_subtype
-	if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" {
-		s_toptype = qu.ObjToString(tmp["toptype"])
-		s_subtype = qu.ObjToString(tmp["subtype"])
-	} else {
-		if s_toptype != "" && s_subtype != "" {
-			update_info["toptype"] = s_toptype
-			update_info["subtype"] = s_subtype
-			ext_ai_record["toptype"] = tmp["toptype"]
-			ext_ai_record["subtype"] = tmp["subtype"]
-		} else {
-			s_toptype = qu.ObjToString(tmp["toptype"])
-			s_subtype = qu.ObjToString(tmp["subtype"])
-		}
-	}
-	return s_toptype, s_subtype
-}
-
 // 选取分包
-func ChooseTheBestPackageField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) {
+func ChooseTheBestPackageField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info *map[string]interface{}, ext_ai_record *map[string]interface{}) {
 	//新分包判定···com_package - 默认大模型分包可信
 	if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
-		com_package := (*s_pkg)["com_package"]
-		update_info["com_package"] = com_package
-		//是否替换外围字段···
-		if p_budget := qu.Float64All((*s_pkg)["s_budget"]); p_budget > 0.0 {
-			update_info["budget"] = p_budget
-			ext_ai_record["budget"] = tmp["budget"]
-		}
-		if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
-			if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
-				update_info["s_winner"] = p_winner
-				update_info["winner"] = p_winner
-				ext_ai_record["s_winner"] = tmp["s_winner"]
-				ext_ai_record["winner"] = tmp["winner"]
+		com_package := IsMarkInterfaceMap((*s_pkg)["com_package"])
+		//是否替换外围字段···质谱外围是否已提取
+		if len(com_package) > 1 { //多包字段覆盖
+			//校验核对分包结构
+			new_com_package := staffAiPackageInfo(com_package, s_toptype, s_subtype)
+			(*update_info)["com_package"] = new_com_package
+
+			if p_budget := qu.Float64All((*s_pkg)["s_budget"]); p_budget > 0.0 {
+				(*update_info)["budget"] = p_budget
+				(*ext_ai_record)["budget"] = tmp["budget"]
+			}
+			if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+				if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
+					(*update_info)["s_winner"] = p_winner
+					(*update_info)["winner"] = p_winner
+					(*ext_ai_record)["s_winner"] = tmp["s_winner"]
+					(*ext_ai_record)["winner"] = tmp["winner"]
+
+					//对于winner来说...规则值有包含关系,采用规则值应用判重
+					if winner := qu.ObjToString(tmp["winner"]); winner != "" {
+						if strings.Contains(p_winner, winner) {
+							delete((*update_info), "winner")
+							delete((*ext_ai_record), "winner")
+						}
+					}
+				}
+				if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
+					(*update_info)["bidamount"] = p_bidamount
+					(*ext_ai_record)["bidamount"] = tmp["bidamount"]
+				}
 			}
-			if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
-				update_info["bidamount"] = p_bidamount
-				ext_ai_record["bidamount"] = tmp["bidamount"]
+		} else if len(com_package) == 1 { //单包字段覆盖
+			if p_budget := qu.Float64All((*s_pkg)["s_budget"]); p_budget > 0.0 && (*update_info)["budget"] == nil {
+				(*update_info)["budget"] = p_budget
+				(*ext_ai_record)["budget"] = tmp["budget"]
 			}
+			if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+				if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" && (*update_info)["s_winner"] == nil {
+					(*update_info)["s_winner"] = p_winner
+					(*update_info)["winner"] = p_winner
+					(*ext_ai_record)["s_winner"] = tmp["s_winner"]
+					(*ext_ai_record)["winner"] = tmp["winner"]
+
+					//对于winner来说...规则值有包含关系,采用规则值应用判重
+					if winner := qu.ObjToString(tmp["winner"]); winner != "" {
+						if strings.Contains(p_winner, winner) {
+							delete((*update_info), "winner")
+							delete((*ext_ai_record), "winner")
+						}
+					}
+				}
+				if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 && (*update_info)["bidamount"] == nil {
+					(*update_info)["bidamount"] = p_bidamount
+					(*ext_ai_record)["bidamount"] = tmp["bidamount"]
+				}
+			}
+		} else {
+
 		}
 	}
 }
 
 // 选取金额
-func ChooseTheBestAmountField(tmp map[string]interface{}, update_info map[string]interface{}) {
-	if r_budget := qu.Float64All(update_info["budget"]); r_budget > 0.0 && r_budget < 1000000000.0 {
+func ChooseTheBestAmountField(tmp map[string]interface{}, update_info *map[string]interface{}) {
+	if r_budget := qu.Float64All((*update_info)["budget"]); r_budget > 0.0 && r_budget < 1000000000.0 {
 		if o_budget := qu.Float64All(tmp["budget"]); o_budget > 0.0 {
 			if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
-				update_info["budget"] = filterAmount(r_budget, o_budget)
+				(*update_info)["budget"] = filterAmount(r_budget, o_budget)
 			}
 		}
 	}
-	if r_bidamount := qu.Float64All(update_info["bidamount"]); r_bidamount > 0.0 && r_bidamount < 1000000000.0 {
+	if r_bidamount := qu.Float64All((*update_info)["bidamount"]); r_bidamount > 0.0 && r_bidamount < 1000000000.0 {
 		if o_bidamount := qu.Float64All(tmp["bidamount"]); o_bidamount > 0.0 {
 			if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
-				update_info["bidamount"] = filterAmount(r_bidamount, o_bidamount)
+				(*update_info)["bidamount"] = filterAmount(r_bidamount, o_bidamount)
 			}
 		}
 	}
 }
 
-// 大模型与抽取数据合并计算
-func getCheckDataAI(tmp map[string]interface{}, update_info map[string]interface{}) bool {
-	if tmp["ai_zhipu"] == nil {
-		return false
-	}
-	//记录抽取原值
-	ext_ai_record := map[string]interface{}{}
-	ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
-	//选取分类
-	s_toptype, s_subtype := ChooseTheBestClassField(ai_zhipu, tmp, update_info, ext_ai_record)
-	//选取字段
-	ChooseTheBestCoreField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, ext_ai_record)
-	//选取分包
-	ChooseTheBestPackageField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, ext_ai_record)
-	//最终金额修正与选取
-	ChooseTheBestAmountField(tmp, update_info)
-
-	//字段记录
-	update_info["ext_ai_record"] = ext_ai_record
-
-	//跨分类是否删除结果类字段
-	if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
-
-	} else {
-		for k, _ := range unset_check {
-			if tmp[k] != nil {
-				return true
-			}
-		}
-	}
-
-	return false
-}
-
 // 筛选金额
 func filterAmount(f1 float64, f2 float64) float64 {
 	//选取一个合适的金额 ...
@@ -268,6 +308,21 @@ func staffInfo(pkg map[string]interface{}) bool {
 	return true
 }
 
+// 核对ai分包
+func staffAiPackageInfo(com_package []map[string]interface{}, s_toptype string, s_subtype string) []map[string]interface{} {
+	if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+		return com_package
+	}
+	new_com_package := []map[string]interface{}{}
+	for _, v := range com_package {
+		delete(v, "winner")
+		delete(v, "bidamount")
+		new_com_package = append(new_com_package, v)
+	}
+	return new_com_package
+}
+
+// 核对分类
 func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]interface{}) (string, string) {
 	toptype_rule := qu.ObjToString(data["toptype"])
 	subtype_rule := qu.ObjToString(data["subtype"])

+ 1 - 1
src/initvar.go

@@ -35,7 +35,7 @@ var (
 	S_CityDict              map[string][]S_City     //城市-map
 	S_DistrictDict          map[string][]S_District //区县-map
 	//删除字段
-	unset_check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
+	Unset_Check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
 	//任务锁
 	udplock, getasklock sync.Mutex
 	taskList            []map[string]interface{}