Browse Source

分包结构构建

zhengkun 1 year ago
parent
commit
d16ee3fc7d
8 changed files with 213 additions and 219 deletions
  1. 34 0
      ai/ai_baidu.go
  2. 14 25
      clean/c_all.go
  3. 7 10
      extract/extract.go
  4. 2 1
      main.go
  5. 0 182
      prompt/prompt.go
  6. 97 0
      prompt/prompt_class.go
  7. 8 0
      prompt/prompt_field.go
  8. 51 1
      prompt/prompt_package.go

+ 34 - 0
ai/ai_baidu.go

@@ -41,3 +41,37 @@ func PostBaiDuAI(content string) map[string]interface{} {
 	json.Unmarshal([]byte(result), &res)
 	return res
 }
+
+// 请求质谱数据外围字段...
+func PostZhiPuInfo(content string) map[string]interface{} {
+	zp, ok := map[string]interface{}{}, 0
+	for {
+		ok++
+		if zp = PostZhiPuAI(content); len(zp) > 0 {
+			break
+		}
+		if ok >= 5 {
+			break
+		}
+	}
+	return zp
+}
+
+// 请求质谱数据-分类字段
+func PostZhiPuClassInfo(content string) (map[string]interface{}, bool) {
+	zp := map[string]interface{}{}
+	times := 0
+	ok := false
+	for {
+		times++
+		zp = PostClassZhiPuAI(content)
+		if len(zp) > 0 {
+			ok = true
+			break
+		}
+		if times >= 5 {
+			break
+		}
+	}
+	return zp, ok
+}

+ 14 - 25
clean/c_all.go

@@ -3,7 +3,6 @@ package clean
 import (
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"regexp"
-	"unicode/utf8"
 )
 
 // 通用清洗方法
@@ -14,47 +13,36 @@ var (
 )
 
 func CleanFieldInfo(zhipu map[string]interface{}, pkg map[string]interface{}) map[string]interface{} {
-	update := map[string]interface{}{}
+	data := map[string]interface{}{}
 	if s_area, s_city := CleanRegion(qu.ObjToString(zhipu["省份"]), qu.ObjToString(zhipu["城市"])); s_area != "" || s_city != "" {
-		update["s_area"] = s_area
-		update["s_city"] = s_city
+		data["s_area"] = s_area
+		data["s_city"] = s_city
 	}
 	if s_buyer := CleanBuyer(qu.ObjToString(zhipu["采购单位"])); s_buyer != "" {
-		update["s_buyer"] = s_buyer
+		data["s_buyer"] = s_buyer
 	}
 	if s_pname := CleanPname(qu.ObjToString(zhipu["项目名称"])); s_pname != "" {
-		update["s_pname"] = s_pname
+		data["s_pname"] = s_pname
 	}
 	if s_pcode := CleanPcode(qu.ObjToString(zhipu["项目编号"])); s_pcode != "" {
-		update["s_pcode"] = s_pcode
+		data["s_pcode"] = s_pcode
 	}
 	if s_budget := CleanMoney(zhipu["预算金额"]); s_budget > 0.0 && s_budget < 1000000000.0 {
-		update["s_budget"] = s_budget
+		data["s_budget"] = s_budget
 	}
 	if s_bidamount := CleanMoney(zhipu["中标金额"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
-		update["s_bidamount"] = s_bidamount
+		data["s_bidamount"] = s_bidamount
 	}
 	if s_winner := CleanWinner(qu.ObjToString(zhipu["中标单位"])); s_winner != "" {
-		update["s_winner"] = s_winner
+		data["s_winner"] = s_winner
 	}
 	if s_biddiscount := CleanBiddiscount(qu.ObjToString(zhipu["投标折扣系数"])); s_biddiscount != "" {
-		update["s_biddiscount"] = s_biddiscount
+		data["s_biddiscount"] = s_biddiscount
 	}
 
-	//对于分包
-	if len(pkg) > 0 {
-		if s_winner := qu.ObjToString(pkg["s_winner"]); s_winner != "" {
-			if utf8.RuneCountInString(s_winner) > utf8.RuneCountInString(qu.ObjToString(update["s_winner"])) {
-				update["s_winner"] = s_winner
-			}
-		}
-		if s_bidamount := qu.Float64All(pkg["s_bidamount"]); s_bidamount > 0.0 {
-			if s_bidamount > qu.Float64All(update["s_bidamount"]) {
-				update["s_bidamount"] = s_bidamount
-			}
-		}
-	}
-	return update
+	data["s_pkg"] = pkg
+
+	return data
 }
 
 // 最终逻辑校验
@@ -67,6 +55,7 @@ func CleanFinallyInfo(data map[string]interface{}) map[string]interface{} {
 			delete(data, "s_budget")
 		}
 	}
+	//删除不删除均可···与抽取值进行合并时判断也行···
 	if s_subtype == "单一" || s_subtype == "合同" || s_subtype == "中标" || s_subtype == "成交" || (s_subtype == "" && s_toptype == "") {
 
 	} else {

+ 7 - 10
extract/extract.go

@@ -59,9 +59,9 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 		pkg = prompt.AcquireMultiplePackageInfo(detail)
 	}
 	//获取外围字段数据
-	info := prompt.AcquireExtractFieldInfo(detail)
+	f_info := prompt.AcquireExtractFieldInfo(detail)
 	//外围字段清洗
-	data := clean.CleanFieldInfo(info, pkg)
+	f_data := clean.CleanFieldInfo(f_info, pkg)
 	//获取分类字段数据
 	s_toptype, s_subtype := "", ""
 	if qu.ObjToString(v["toptype"]) == "拟建" {
@@ -70,16 +70,13 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 		s_toptype, s_subtype = prompt.AcquireClassInfo(detail, title)
 	}
 	if s_toptype != "" {
-		data["s_toptype"] = s_toptype
-		data["s_subtype"] = s_subtype
+		f_data["s_toptype"] = s_toptype
+		f_data["s_subtype"] = s_subtype
 	}
-	//临时···记录分包信息
-	data["ispkg"] = ispkg
-	data["pkg"] = pkg
-	//最终逻辑校验
-	data = clean.CleanFinallyInfo(data)
+	//此次最终校验
+	f_data = clean.CleanFinallyInfo(f_data)
 
-	return data
+	return f_data
 }
 
 // 暂时不启用...无限重试

+ 2 - 1
main.go

@@ -1,6 +1,7 @@
 package main
 
 import (
+	"data_ai/extract"
 	"data_ai/udp"
 	"data_ai/ul"
 )
@@ -16,7 +17,7 @@ func main() {
 	//extract.TestPackageInfo()
 	//extract.ExtractFieldInfo("ai_294", "ai_294_zhipu_test")
 	//extract.ExtractFieldInfo("ai_41411", "ai_41411_zhipu")
-	//extract.TestSingleFieldInfo("zktest_ai_107", "6699292566cf0db42a555c57")
+	extract.TestSingleFieldInfo("ai_41411", "669e86d066cf0db42a652745")
 
 	lock := make(chan bool)
 	<-lock

+ 0 - 182
prompt/prompt.go

@@ -1,182 +0,0 @@
-package prompt
-
-import (
-	"data_ai/ai"
-	"data_ai/clean"
-	"data_ai/ul"
-	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
-	"strings"
-)
-
-// 获取分类信息...
-func AcquireClassInfo(detail string, title string) (string, string) {
-	top_content := PromptToptypeFieldText(detail, title)
-	top_zp, ok := PostZhiPuClassInfo(top_content)
-	if !ok {
-		return "", ""
-	}
-	//根据一级分类识别,获取toptype和subtype
-	var toptype, subtype string //标准化的一级、二级分类
-	toptype_ai, _ := top_zp["信息分类"].(string)
-	tpInfo := ul.ToptypeDict[toptype_ai]
-	if tpInfo != nil {
-		toptype = tpInfo.Topetype
-		subtype = tpInfo.Subtype
-	} else { //匹配结果为非正规toptype,如:询价公告(66993a9d66cf0db42a5597af)
-		for top, reg := range ul.ToptypeRegs {
-			if reg.MatchString(toptype_ai) {
-				if tpInfo = ul.ToptypeDict[top]; tpInfo != nil { //ToptypeRegs中预告公告、验收公告、合同公告直接获取subtype
-					toptype = tpInfo.Topetype
-					subtype = tpInfo.Subtype
-					break
-				}
-			}
-		}
-	}
-	if toptype == "" || tpInfo == nil {
-		return "", ""
-	}
-	//获取二级分类
-	sub_zp := map[string]interface{}{}
-	if subtype == "" {
-		sub_content := PromptSubtypeFieldText(detail, title, toptype, tpInfo)
-		sub_zp, ok = PostZhiPuClassInfo(sub_content)
-		if !ok {
-			return "", ""
-		}
-		subtype_ai, _ := sub_zp["信息分类"].(string)
-		subtype = tpInfo.SubtypeDict[subtype_ai]
-		if subtype == "" && tpInfo.SubtypeRegs != nil { //二级分类校正
-			for sub, reg := range tpInfo.SubtypeRegs {
-				if reg.MatchString(subtype_ai) {
-					subtype = sub
-					break
-				}
-			}
-		}
-	}
-	//subtype到此还可能为空,给默认值
-	if subtype == "" {
-		subtype = "其它"
-	}
-	return toptype, subtype
-}
-
-// 根据抽取字段校正分类结果
-func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]interface{}) (string, string) {
-	toptype_rule := qu.ObjToString(data["toptype"])
-	subtype_rule := qu.ObjToString(data["subtype"])
-	//1、结果类 中标和成交错误校正
-	s_winner := qu.ObjToString(data["s_winner"])
-	winnerorder, _ := data["winnerorder"].([]interface{})
-	if toptype_ai == "结果" && toptype_rule == "结果" {
-		if (subtype_ai == "中标" && subtype_rule == "成交") || (subtype_ai == "成交" && subtype_rule == "中标") {
-			if len(winnerorder) > 0 { //有中标候选人->中标
-				return toptype_ai, "中标"
-			}
-			if s_winner != "" || data["bidamount"] != nil {
-				return toptype_ai, "成交"
-			}
-		}
-	}
-	//2、招标、结果错误校正
-	if toptype_ai != "结果" && toptype_rule == "结果" {
-		//return toptype_rule,subtype_rule//默认规则为准
-		if len(winnerorder) > 0 { //有中标候选人->中标
-			//return toptype_rule, "中标"//这里subtype是否返回"中标"?
-			return toptype_rule, subtype_rule //默认规则是正确的
-		} else if s_winner != "" || data["bidamount"] != nil {
-			return toptype_rule, subtype_rule
-		} else {
-			return toptype_ai, subtype_ai
-		}
-	} else if toptype_ai == "结果" && toptype_rule != "结果" {
-		//return toptype_rule,subtype_rule//默认规则为准
-		if len(winnerorder) > 0 { //有中标候选人->中标
-			return toptype_ai, "中标" //这里subtype返回"中标",避免ai识别错误
-		} else if s_winner != "" || data["bidamount"] != nil {
-			return toptype_ai, "成交" //这里subtype返回"成交",避免ai识别错误
-		} else {
-			return toptype_ai, subtype_ai
-		}
-	}
-	return toptype_ai, subtype_ai
-}
-
-// 获取外围抽取字段
-func AcquireExtractFieldInfo(detail string) map[string]interface{} {
-	content := PromptFieldText(detail)
-	zp := PostZhiPuInfo(content)
-	return zp
-}
-
-// 获取是否为分包信息
-func AcquireIsPackageInfo(detail string) bool {
-	content := PromptIsPackageText(detail)
-	zp := PostZhiPuInfo(content)
-	if qu.ObjToString(zp["分包"]) == "是" {
-		return true
-	}
-	return false
-}
-
-// 获取标讯多包信息
-func AcquireMultiplePackageInfo(detail string) map[string]interface{} {
-	content := PromptMultiplePackageText(detail)
-	zp := PostZhiPuInfo(content)
-	return zp
-	//后续在转格式...暂时先输出两个值
-	pkg := map[string]interface{}{}
-	s_winner, s_bidamount := "", 0.0
-	win_arr, win_temp := []string{}, map[string]string{}
-	if score := qu.Float64All(zp["分包信息score"]); score >= 90.0 {
-		pkginfo := ul.IsMarkInterfaceMap(zp["分包信息"])
-		for _, v := range pkginfo {
-			winner := clean.CleanWinner(qu.ObjToString(v["中标单位"]))
-			bidamount := clean.CleanMoney((v["中标金额"]))
-			s_bidamount += bidamount
-			if win_temp[winner] == "" && winner != "" {
-				win_arr = append(win_arr, winner)
-				win_temp[winner] = winner
-			}
-		}
-		s_winner = strings.Join(win_arr, ",")
-		pkg["s_winner"] = s_winner
-		pkg["s_bidamount"] = s_bidamount
-	}
-	return pkg
-}
-
-// 请求质谱数据外围字段...
-func PostZhiPuInfo(content string) map[string]interface{} {
-	zp, ok := map[string]interface{}{}, 0
-	for {
-		ok++
-		if zp = ai.PostZhiPuAI(content); len(zp) > 0 {
-			break
-		}
-		if ok >= 5 {
-			break
-		}
-	}
-	return zp
-}
-
-// 请求质谱数据-分类字段
-func PostZhiPuClassInfo(content string) (map[string]interface{}, bool) {
-	zp := map[string]interface{}{}
-	times := 0
-	ok := false
-	for {
-		times++
-		zp = ai.PostClassZhiPuAI(content)
-		if len(zp) > 0 {
-			ok = true
-			break
-		}
-		if times >= 5 {
-			break
-		}
-	}
-	return zp, ok
-}

+ 97 - 0
prompt/prompt_class.go

@@ -1,11 +1,108 @@
 package prompt
 
 import (
+	"data_ai/ai"
 	"data_ai/ul"
 	"fmt"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"unicode/utf8"
 )
 
+// 获取分类信息...
+func AcquireClassInfo(detail string, title string) (string, string) {
+	top_content := PromptToptypeFieldText(detail, title)
+	top_zp, ok := ai.PostZhiPuClassInfo(top_content)
+	if !ok {
+		return "", ""
+	}
+	//根据一级分类识别,获取toptype和subtype
+	var toptype, subtype string //标准化的一级、二级分类
+	toptype_ai, _ := top_zp["信息分类"].(string)
+	tpInfo := ul.ToptypeDict[toptype_ai]
+	if tpInfo != nil {
+		toptype = tpInfo.Topetype
+		subtype = tpInfo.Subtype
+	} else { //匹配结果为非正规toptype,如:询价公告(66993a9d66cf0db42a5597af)
+		for top, reg := range ul.ToptypeRegs {
+			if reg.MatchString(toptype_ai) {
+				if tpInfo = ul.ToptypeDict[top]; tpInfo != nil { //ToptypeRegs中预告公告、验收公告、合同公告直接获取subtype
+					toptype = tpInfo.Topetype
+					subtype = tpInfo.Subtype
+					break
+				}
+			}
+		}
+	}
+	if toptype == "" || tpInfo == nil {
+		return "", ""
+	}
+	//获取二级分类
+	sub_zp := map[string]interface{}{}
+	if subtype == "" {
+		sub_content := PromptSubtypeFieldText(detail, title, toptype, tpInfo)
+		sub_zp, ok = ai.PostZhiPuClassInfo(sub_content)
+		if !ok {
+			return "", ""
+		}
+		subtype_ai, _ := sub_zp["信息分类"].(string)
+		subtype = tpInfo.SubtypeDict[subtype_ai]
+		if subtype == "" && tpInfo.SubtypeRegs != nil { //二级分类校正
+			for sub, reg := range tpInfo.SubtypeRegs {
+				if reg.MatchString(subtype_ai) {
+					subtype = sub
+					break
+				}
+			}
+		}
+	}
+	//subtype到此还可能为空,给默认值
+	if subtype == "" {
+		subtype = "其它"
+	}
+	return toptype, subtype
+}
+
+// 根据抽取字段校正分类结果
+func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]interface{}) (string, string) {
+	toptype_rule := qu.ObjToString(data["toptype"])
+	subtype_rule := qu.ObjToString(data["subtype"])
+	//1、结果类 中标和成交错误校正
+	s_winner := qu.ObjToString(data["s_winner"])
+	winnerorder, _ := data["winnerorder"].([]interface{})
+	if toptype_ai == "结果" && toptype_rule == "结果" {
+		if (subtype_ai == "中标" && subtype_rule == "成交") || (subtype_ai == "成交" && subtype_rule == "中标") {
+			if len(winnerorder) > 0 { //有中标候选人->中标
+				return toptype_ai, "中标"
+			}
+			if s_winner != "" || data["bidamount"] != nil {
+				return toptype_ai, "成交"
+			}
+		}
+	}
+	//2、招标、结果错误校正
+	if toptype_ai != "结果" && toptype_rule == "结果" {
+		//return toptype_rule,subtype_rule//默认规则为准
+		if len(winnerorder) > 0 { //有中标候选人->中标
+			//return toptype_rule, "中标"//这里subtype是否返回"中标"?
+			return toptype_rule, subtype_rule //默认规则是正确的
+		} else if s_winner != "" || data["bidamount"] != nil {
+			return toptype_rule, subtype_rule
+		} else {
+			return toptype_ai, subtype_ai
+		}
+	} else if toptype_ai == "结果" && toptype_rule != "结果" {
+		//return toptype_rule,subtype_rule//默认规则为准
+		if len(winnerorder) > 0 { //有中标候选人->中标
+			return toptype_ai, "中标" //这里subtype返回"中标",避免ai识别错误
+		} else if s_winner != "" || data["bidamount"] != nil {
+			return toptype_ai, "成交" //这里subtype返回"成交",避免ai识别错误
+		} else {
+			return toptype_ai, subtype_ai
+		}
+	}
+	return toptype_ai, subtype_ai
+}
+
 func PromptToptypeFieldText(detail, title string) string {
 	if utf8.RuneCountInString(detail) > ul.MaxLen {
 		detail = string([]rune(detail)[:ul.MaxLen])

+ 8 - 0
prompt/prompt_field.go

@@ -1,10 +1,18 @@
 package prompt
 
 import (
+	"data_ai/ai"
 	"data_ai/ul"
 	"unicode/utf8"
 )
 
+// 获取外围抽取字段
+func AcquireExtractFieldInfo(detail string) map[string]interface{} {
+	content := PromptFieldText(detail)
+	zp := ai.PostZhiPuInfo(content)
+	return zp
+}
+
 // 提示词优选
 func PromptFieldText(detail string) string {
 	if utf8.RuneCountInString(detail) > ul.MaxLen {

+ 51 - 1
prompt/prompt_package.go

@@ -1,10 +1,61 @@
 package prompt
 
 import (
+	"data_ai/ai"
+	"data_ai/clean"
 	"data_ai/ul"
+	"fmt"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"strings"
 	"unicode/utf8"
 )
 
+// 获取是否为分包信息
+func AcquireIsPackageInfo(detail string) bool {
+	content := PromptIsPackageText(detail)
+	zp := ai.PostZhiPuInfo(content)
+	if qu.ObjToString(zp["分包"]) == "是" {
+		return true
+	}
+	return false
+}
+
+// 获取标讯多包信息
+func AcquireMultiplePackageInfo(detail string) map[string]interface{} {
+	content := PromptMultiplePackageText(detail)
+	zp := ai.PostZhiPuInfo(content)
+	//后续在转格式...
+	ai_pkg := map[string]interface{}{}
+	s_winner, s_bidamount, s_pkg := "", 0.0, map[string]map[string]interface{}{}
+	win_arr, win_temp := []string{}, map[string]string{}
+	if score := qu.Float64All(zp["分包信息score"]); score >= 90.0 {
+		pkginfo := ul.IsMarkInterfaceMap(zp["分包信息"])
+		for k, v := range pkginfo { //
+			key := fmt.Sprintf("%d", k+1)
+			name := qu.ObjToString(v["标段/包号"])
+			winner := clean.CleanWinner(qu.ObjToString(v["中标单位"]))
+			bidamount := clean.CleanMoney((v["中标金额"]))
+			//分包信息结构
+			s_pkg[key] = map[string]interface{}{
+				"name":      name,
+				"winner":    winner,
+				"bidamount": bidamount,
+			}
+			//去重计算单位与总金额
+			s_bidamount += bidamount
+			if win_temp[winner] == "" && winner != "" {
+				win_arr = append(win_arr, winner)
+				win_temp[winner] = winner
+			}
+		}
+		s_winner = strings.Join(win_arr, ",")
+		ai_pkg["s_winner"] = s_winner
+		ai_pkg["s_bidamount"] = s_bidamount
+		ai_pkg["s_pkg"] = s_pkg
+	}
+	return ai_pkg
+}
+
 // 分包判断-提问词
 func PromptIsPackageText(detail string) string {
 	if utf8.RuneCountInString(detail) > ul.MaxLen {
@@ -30,7 +81,6 @@ func PromptMultiplePackageText(detail string) string {
 请根据提供的招投标文件内容,精确提取并整理以下信息,形成一个便于统计的JSON:
 {
 "分包信息":[{
-"包项目名称":(此处填写包项目名),
 "标段/包号":(形式如下但不限于:一标段、包一、I包、I标段,不可以填写项目编号),
 "中标单位":(中标角色包括但不限于成交供应商(注:当入围供应商/中标人存在多个,选择第一位为中标单位)、中标人、中标方、承包方、中选单位、服务商、第一|1名中标候选人(忽略其他中标候选人)。当流标显示流标,废标时显示废标。联合体投标时,请列出所有单位名称使用","分割),
 "中标金额":(中标金额数值及单位,多个金额时请进行计算。非单价,如果是单价,则等于单价*数量),