zhengkun vor 9 Monaten
Ursprung
Commit
ad6aaba495
7 geänderte Dateien mit 83 neuen und 32 gelöschten Zeilen
  1. 1 1
      ai/ai_zhipu.go
  2. 0 2
      clean/c_all.go
  3. 1 1
      config.json
  4. 61 17
      extract/extract.go
  5. 1 1
      main.go
  6. 18 10
      prompt/prompt_field.go
  7. 1 0
      ul/attr.go

+ 1 - 1
ai/ai_zhipu.go

@@ -133,7 +133,7 @@ func PostClassZhiPuAI(content string) map[string]interface{} {
 	return map[string]interface{}{}
 }
 
-// 智谱清言-分字段
+// 智谱清言-分字段
 func PostPackageZhiPuAI(content string) map[string]interface{} {
 	// API的URL
 	apiURL := "https://open.bigmodel.cn/api/paas/v4/chat/completions"

+ 0 - 2
clean/c_all.go

@@ -55,7 +55,5 @@ func CleanFieldInfo(zhipu map[string]interface{}, fns []string) map[string]inter
 		}
 	}
 
-	//对于采购单位与中标单位一致的数据进行逻辑清洗
-
 	return data
 }

+ 1 - 1
config.json

@@ -20,7 +20,7 @@
     "local": true,
     "l_addr": "127.0.0.1:12005",
     "addr": "172.17.189.140:27080,172.17.189.141:27081",
-    "dbname" : "qfw_ai",
+    "dbname" : "qfw",
     "username": "zhengkun",
     "password": "zk@123123"
   },

+ 61 - 17
extract/extract.go

@@ -75,7 +75,10 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	title := qu.ObjToString(v["title"])
 	dl := utf8.RuneCountInString(detail) //文本长度
 	//过滤数据···
-	if strings.Contains(title, "开标记录") || v["jyfb_data"] != nil || dl < 20 {
+	if strings.Contains(title, "开标记录") || v["jyfb_data"] != nil {
+		return map[string]interface{}{}
+	}
+	if dl < 20 {
 		return map[string]interface{}{}
 	}
 	//识别结构,短文本结构
@@ -85,12 +88,17 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	}
 	//文本格式转换
 	detail = ul.HttpConvertToMarkdown(detail)
-	//获取外围字段数据
-	f_info_1 := prompt.AcquireExtractFieldInfoFirst(detail, shorText)
-	f_info_2 := prompt.AcquireExtractFieldInfoFirst(detail, shorText)
+	//短文本判断是否有效性
+	if shorText {
+		if info := prompt.AcquireJudgeShortInfo(detail); info["结果"] != "是" {
+			return map[string]interface{}{}
+		}
+	}
+	//获取外围字段数据-拆分合并字段
+	f_info_1 := prompt.AcquireExtractFieldInfoFirst(detail)
+	f_info_2 := prompt.AcquireExtractFieldInfoFirst(detail)
 	f_info := MergeInfo([]map[string]interface{}{f_info_1, f_info_2})
 
-	//非短文本
 	if !shorText {
 		//获取分包信息
 		if pkg := prompt.AcquireNewMultiplePackageInfo(detail); len(pkg) > 0 {
@@ -100,28 +108,26 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 		s_toptype, s_subtype := prompt.AcquireClassInfo(detail, title, qu.ObjToString(v["toptype"]))
 		f_info["s_toptype"] = s_toptype
 		f_info["s_subtype"] = s_subtype
-	}
 
-	//调用标的物识别
-	if p_list := getPurList(v, detail, f_info); len(p_list) > 0 {
-		f_info["purchasinglist"] = p_list
+		//调用标的物识别
+		if p_list := getPurList(v, detail, f_info); len(p_list) > 0 {
+			f_info["purchasinglist"] = p_list
+		}
 	}
 
 	//字段清洗
 	fns := getpnsinfo(v) //获取附件名字
 	f_data = clean.CleanFieldInfo(f_info, fns)
 
-	//对于某些字段进行二级校验
-	if s_buyer := qu.ObjToString(f_data["s_buyer"]); s_buyer != "" {
-		if zp_buyer := prompt.AcquireBuyerInfo(s_buyer); zp_buyer["实体单位"] != nil {
-			if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
-				f_data["s_buyer"] = ns_buyer
-			}
-		}
-	}
+	//采购单位二级校验
+	CheckOutBuyerInfo(f_data)
+
+	//强制逻辑判断-
+	ForcedLogicDecideInfo(f_data)
 	return f_data
 }
 
+// 确认抽取范围
 func ConfrimExtractInfo(q map[string]interface{}) map[string]interface{} {
 	dict := map[string]interface{}{}
 	sess := ul.SourceMgo.GetMgoConn()
@@ -158,6 +164,7 @@ func getpnsinfo(tmp map[string]interface{}) []string {
 	return arr
 }
 
+// 获取正文数据
 func getDetailText(v map[string]interface{}, tmpid string) string {
 	detail := qu.ObjToString(v["detail"])
 	if ul.IsTool {
@@ -196,6 +203,17 @@ func getPurList(v map[string]interface{}, detail string, f_info map[string]inter
 	return []map[string]interface{}{}
 }
 
+// 二次校验采购单位
+func CheckOutBuyerInfo(f_data map[string]interface{}) {
+	if s_buyer := qu.ObjToString(f_data["s_buyer"]); s_buyer != "" {
+		if zp_buyer := prompt.AcquireBuyerInfo(s_buyer); zp_buyer["实体单位"] != nil {
+			if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
+				f_data["s_buyer"] = ns_buyer
+			}
+		}
+	}
+}
+
 // 合并字段
 func MergeInfo(infos []map[string]interface{}) map[string]interface{} {
 	info := map[string]interface{}{}
@@ -207,6 +225,32 @@ func MergeInfo(infos []map[string]interface{}) map[string]interface{} {
 	return info
 }
 
+// 强制逻辑判断数据
+func ForcedLogicDecideInfo(f_data map[string]interface{}) {
+	//原则大模型
+	//多单位不能一致
+	s_buyer := qu.ObjToString(f_data["s_buyer"])
+	s_winner := qu.ObjToString(f_data["s_winner"])
+	if s_buyer == s_winner && s_buyer != "" {
+		/*
+			1、若单位名称-不含公司保留采购单位
+			2、若单位名称-含公司保留中标单位
+		*/
+		if strings.Contains(s_buyer, "公司") {
+			f_data["s_buyer"] = ""
+		} else {
+			f_data["s_winner"] = ""
+		}
+	}
+
+	//代理机构
+	if s_agency := qu.ObjToString(f_data["s_agency"]); s_agency != "" {
+		if s_agency == s_buyer || s_agency == s_winner {
+			f_data["s_agency"] = ""
+		}
+	}
+}
+
 // 暂时不启用...无限重试
 func RunResetUpdateFieldInfo(arr []string, name string, s_name string) {
 	//log.Debug("开始重置更新...", len(arr))

+ 1 - 1
main.go

@@ -31,7 +31,7 @@ func main() {
 		return
 	}
 
-	extract.TestSingleFieldInfo("bidding", "664a2a4066cf0db42a39fb02")
+	//extract.TestSingleFieldInfo("bidding", "6722de29b25c3e1debe624c9")
 
 	lock := make(chan bool)
 	<-lock

+ 18 - 10
prompt/prompt_field.go

@@ -8,8 +8,14 @@ import (
 )
 
 var pmt_field_prefix = `
-	
-`
+	请帮我判断公告内容有效还是无效:
+	有效性:文中出现实体单位
+    无效性:文中没有出现实体单位
+	json形式回答,急着要结果,避免解释要干脆利落:
+	{
+	"结果":"(是/否)",
+	}
+	公告:` + `%s` + "\n结果JSON:"
 
 var pmt_field1 = `
 你是一个文本处理模型,专门用于分析文本提取信息,你具备以下能力:
@@ -34,6 +40,7 @@ var pmt_field1 = `
 
 请回答我的问题,不要联想,不要无中生有,不要生成解释,对于尚未确定或未明确的信息请在JSON对应的值填写:无
 正文内容:` + "\n" + `%s` + "\n结果JSON:"
+
 var pmt_field2 = `
 你是一个文本处理模型,专门用于分析文本提取信息,你具备以下能力:
 1、实体识别抽取
@@ -57,22 +64,23 @@ var pmt_field2 = `
 请回答我的问题,不要联想,不要无中生有,不要生成解释,对于尚未确定或未明确的信息请在JSON对应的值填写:无
 正文内容:` + "\n" + `%s` + "\n结果JSON:"
 
+// 判断短文本
+func AcquireJudgeShortInfo(detail string) map[string]interface{} {
+	content := PromptFieldText(detail, pmt_field_prefix)
+	zp := ai.PostZhiPuInfo(content)
+	return zp
+}
+
 // 获取抽取字段第一次
-func AcquireExtractFieldInfoFirst(detail string, shorText bool) map[string]interface{} {
+func AcquireExtractFieldInfoFirst(detail string) map[string]interface{} {
 	content := PromptFieldText(detail, pmt_field1)
-	if shorText {
-		content = PromptFieldText(detail, pmt_field1)
-	}
 	zp := ai.PostZhiPuInfo(content)
 	return zp
 }
 
 // 获取抽取字段第二次
-func AcquireExtractFieldInfoSecond(detail string, shorText bool) map[string]interface{} {
+func AcquireExtractFieldInfoSecond(detail string) map[string]interface{} {
 	content := PromptFieldText(detail, pmt_field2)
-	if shorText {
-		content = PromptFieldText(detail, pmt_field2)
-	}
 	zp := ai.PostZhiPuInfo(content)
 	return zp
 }

+ 1 - 0
ul/attr.go

@@ -18,6 +18,7 @@ var (
 	IsTool, IsFull, IsLocal bool
 	Reading                 int
 	FlashModel              string
+	SpecialTextReg          = regexp.MustCompile("(原网页|见附件|下载附件|(查看|访问)(源网|原网)|详情请下载附件!|详情请访问原网页!)")
 )
 
 type ExtReg struct {