소스 검색

备份-deepseek新版解析

zhengkun 6 달 전
부모
커밋
d23cee4619
14개의 변경된 파일823개의 추가작업 그리고 371개의 파일을 삭제
  1. 152 152
      ai/ai_deekseek.go
  2. 306 0
      clean/c_deepseek.go
  3. 3 2
      clean/c_money.go
  4. 13 1
      config.json
  5. 12 1
      extract/extension.go
  6. 83 1
      extract/extract.go
  7. 105 147
      main.go
  8. 9 6
      prompt/prompt_buyer.go
  9. 2 11
      prompt/prompt_class.go
  10. 90 0
      prompt/prompt_deepseek.go
  11. 5 21
      prompt/prompt_field.go
  12. 1 5
      prompt/prompt_package.go
  13. 16 19
      ul/attr.go
  14. 26 5
      ul/init.go

+ 152 - 152
ai/ai_deekseek.go

@@ -8,13 +8,12 @@ import (
 	"io/ioutil"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"net/http"
-	"strings"
 	"time"
 )
 
 var req_retry_deepseek = 1
 
-// 通用外围
+// 通用提示语···
 func PostDeepSeekAI(content string) map[string]interface{} {
 	// API的URL
 	apiURL := "https://api.deepseek.com/chat/completions"
@@ -26,8 +25,9 @@ func PostDeepSeekAI(content string) map[string]interface{} {
 	})
 	//glm-4-air	glm-4-0520  glm-4-flash
 	requestData := map[string]interface{}{
-		"model":    "deepseek-chat",
-		"messages": messages,
+		"model":       "deepseek-chat",
+		"temperature": 1,
+		"messages":    messages,
 	}
 	jsonData, _ := json.Marshal(requestData)
 	// 创建HTTP请求
@@ -39,7 +39,7 @@ func PostDeepSeekAI(content string) map[string]interface{} {
 	// 设置请求头
 	req.Header.Add("Content-Type", "application/json")
 	req.Header.Add("Accept", "application/json")
-	req.Header.Add("Authorization", "Bearer sk-832c259e371743a0bb7b280cb86ad806")
+	req.Header.Add("Authorization", "Bearer sk-7e40165d06a04f3c87e2fc15e69df625")
 
 	client := &http.Client{}
 	client.Timeout = 120 * time.Second
@@ -71,125 +71,125 @@ func PostDeepSeekAI(content string) map[string]interface{} {
 }
 
 // 分类字段
-func PostClassDeepSeekAI(content string) map[string]interface{} {
-	// API的URL
-	apiURL := "https://api.deepseek.com/chat/completions"
-	// 构造请求数据
-	messages := []map[string]interface{}{}
-	messages = append(messages, map[string]interface{}{
-		"role":    "user",
-		"content": content,
-	})
-	//glm-4-air	glm-4-0520  glm-4-flash
-	requestData := map[string]interface{}{
-		"model":    "deepseek-chat",
-		"messages": messages,
-	}
-	jsonData, _ := json.Marshal(requestData)
-	// 创建HTTP请求
-	req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData))
-	if err != nil {
-		log.Debug("Error: %s", err)
-		return map[string]interface{}{}
-	}
-	// 设置请求头
-	req.Header.Add("Content-Type", "application/json")
-	req.Header.Add("Accept", "application/json")
-	req.Header.Add("Authorization", "Bearer sk-832c259e371743a0bb7b280cb86ad806")
-
-	client := &http.Client{}
-	client.Timeout = 120 * time.Second
-	resp, err := client.Do(req)
-	if err != nil {
-		return map[string]interface{}{}
-	}
-	defer resp.Body.Close()
-
-	// 解析响应
-	body, _ := ioutil.ReadAll(resp.Body)
-	res := make(map[string]interface{})
-	json.Unmarshal(body, &res)
-	if res != nil {
-		if choices := ul.IsMarkInterfaceMap(res["choices"]); len(choices) > 0 {
-			if message := qu.ObjToMap(choices[0]["message"]); message != nil {
-				result := qu.ObjToString((*message)["content"])
-				result = ul.Escape.ReplaceAllString(result, "")
-				if new_result := ul.SaveResultReg.FindString(result); new_result != "" {
-					result = new_result
-				}
-				dict := make(map[string]interface{})
-				json.Unmarshal([]byte(result), &dict)
-				return dict
-			}
-		}
-	}
-	return map[string]interface{}{}
-}
+//func PostClassDeepSeekAI(content string) map[string]interface{} {
+//	// API的URL
+//	apiURL := "https://api.deepseek.com/chat/completions"
+//	// 构造请求数据
+//	messages := []map[string]interface{}{}
+//	messages = append(messages, map[string]interface{}{
+//		"role":    "user",
+//		"content": content,
+//	})
+//	//glm-4-air	glm-4-0520  glm-4-flash
+//	requestData := map[string]interface{}{
+//		"model":    "deepseek-chat",
+//		"messages": messages,
+//	}
+//	jsonData, _ := json.Marshal(requestData)
+//	// 创建HTTP请求
+//	req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData))
+//	if err != nil {
+//		log.Debug("Error: %s", err)
+//		return map[string]interface{}{}
+//	}
+//	// 设置请求头
+//	req.Header.Add("Content-Type", "application/json")
+//	req.Header.Add("Accept", "application/json")
+//	req.Header.Add("Authorization", "Bearer sk-832c259e371743a0bb7b280cb86ad806")
+//
+//	client := &http.Client{}
+//	client.Timeout = 120 * time.Second
+//	resp, err := client.Do(req)
+//	if err != nil {
+//		return map[string]interface{}{}
+//	}
+//	defer resp.Body.Close()
+//
+//	// 解析响应
+//	body, _ := ioutil.ReadAll(resp.Body)
+//	res := make(map[string]interface{})
+//	json.Unmarshal(body, &res)
+//	if res != nil {
+//		if choices := ul.IsMarkInterfaceMap(res["choices"]); len(choices) > 0 {
+//			if message := qu.ObjToMap(choices[0]["message"]); message != nil {
+//				result := qu.ObjToString((*message)["content"])
+//				result = ul.Escape.ReplaceAllString(result, "")
+//				if new_result := ul.SaveResultReg.FindString(result); new_result != "" {
+//					result = new_result
+//				}
+//				dict := make(map[string]interface{})
+//				json.Unmarshal([]byte(result), &dict)
+//				return dict
+//			}
+//		}
+//	}
+//	return map[string]interface{}{}
+//}
 
 // 分包字段
-func PostPackageDeepSeekAI(content string) map[string]interface{} {
-	// API的URL
-	apiURL := "https://api.deepseek.com/chat/completions"
-	// 构造请求数据
-	messages := []map[string]interface{}{}
-	messages = append(messages, map[string]interface{}{
-		"role":    "system",
-		"content": "你是一名’招标工程师’,拥有写标书及阅读理解公告的能力,根据要求抽取所需的内容,抽取内容要实事求是,不会无中生有。",
-	})
-	messages = append(messages, map[string]interface{}{
-		"role":    "user",
-		"content": content,
-	})
-	//glm-4-air	glm-4-0520  glm-4-flash
-	requestData := map[string]interface{}{
-		"model":    "deepseek-chat",
-		"messages": messages,
-	}
-	jsonData, _ := json.Marshal(requestData)
-	// 创建HTTP请求
-	req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData))
-	if err != nil {
-		log.Debug("Error: %s", err)
-		return map[string]interface{}{}
-	}
-	// 设置请求头
-	req.Header.Add("Content-Type", "application/json")
-	req.Header.Add("Accept", "application/json")
-	req.Header.Add("Authorization", "Bearer sk-832c259e371743a0bb7b280cb86ad806")
-
-	client := &http.Client{}
-	client.Timeout = 180 * time.Second
-	resp, err := client.Do(req)
-	if err != nil {
-		return map[string]interface{}{}
-	}
-	defer resp.Body.Close()
-
-	// 解析响应
-	body, _ := ioutil.ReadAll(resp.Body)
-	res := make(map[string]interface{})
-	json.Unmarshal(body, &res)
-	if res != nil {
-		if choices := ul.IsMarkInterfaceMap(res["choices"]); len(choices) > 0 {
-			if message := qu.ObjToMap(choices[0]["message"]); message != nil {
-				result := qu.ObjToString((*message)["content"])
-				//最终正确的结果
-				arr := strings.Split(result, "最终正确的结果")
-				if len(arr) > 1 {
-					result = arr[1]
-				}
-				result = ul.Escape.ReplaceAllString(result, "")
-				if new_result := ul.SaveResultReg.FindString(result); new_result != "" {
-					result = new_result
-				}
-				dict := make(map[string]interface{})
-				json.Unmarshal([]byte(result), &dict)
-				return dict
-			}
-		}
-	}
-	return map[string]interface{}{}
-}
+//func PostPackageDeepSeekAI(content string) map[string]interface{} {
+//	// API的URL
+//	apiURL := "https://api.deepseek.com/chat/completions"
+//	// 构造请求数据
+//	messages := []map[string]interface{}{}
+//	messages = append(messages, map[string]interface{}{
+//		"role":    "system",
+//		"content": "你是一名’招标工程师’,拥有写标书及阅读理解公告的能力,根据要求抽取所需的内容,抽取内容要实事求是,不会无中生有。",
+//	})
+//	messages = append(messages, map[string]interface{}{
+//		"role":    "user",
+//		"content": content,
+//	})
+//	//glm-4-air	glm-4-0520  glm-4-flash
+//	requestData := map[string]interface{}{
+//		"model":    "deepseek-chat",
+//		"messages": messages,
+//	}
+//	jsonData, _ := json.Marshal(requestData)
+//	// 创建HTTP请求
+//	req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData))
+//	if err != nil {
+//		log.Debug("Error: %s", err)
+//		return map[string]interface{}{}
+//	}
+//	// 设置请求头
+//	req.Header.Add("Content-Type", "application/json")
+//	req.Header.Add("Accept", "application/json")
+//	req.Header.Add("Authorization", "Bearer sk-832c259e371743a0bb7b280cb86ad806")
+//
+//	client := &http.Client{}
+//	client.Timeout = 180 * time.Second
+//	resp, err := client.Do(req)
+//	if err != nil {
+//		return map[string]interface{}{}
+//	}
+//	defer resp.Body.Close()
+//
+//	// 解析响应
+//	body, _ := ioutil.ReadAll(resp.Body)
+//	res := make(map[string]interface{})
+//	json.Unmarshal(body, &res)
+//	if res != nil {
+//		if choices := ul.IsMarkInterfaceMap(res["choices"]); len(choices) > 0 {
+//			if message := qu.ObjToMap(choices[0]["message"]); message != nil {
+//				result := qu.ObjToString((*message)["content"])
+//				//最终正确的结果
+//				arr := strings.Split(result, "最终正确的结果")
+//				if len(arr) > 1 {
+//					result = arr[1]
+//				}
+//				result = ul.Escape.ReplaceAllString(result, "")
+//				if new_result := ul.SaveResultReg.FindString(result); new_result != "" {
+//					result = new_result
+//				}
+//				dict := make(map[string]interface{})
+//				json.Unmarshal([]byte(result), &dict)
+//				return dict
+//			}
+//		}
+//	}
+//	return map[string]interface{}{}
+//}
 
 /*****************************
 ******************************
@@ -213,35 +213,35 @@ func PostDeepSeekInfo(content string) map[string]interface{} {
 }
 
 // 请求多包字段...
-func PostDeepSeekPackageInfo(content string) map[string]interface{} {
-	zp, ok := map[string]interface{}{}, 0
-	for {
-		ok++
-		if zp = PostPackageDeepSeekAI(content); len(zp) > 0 {
-			break
-		}
-		if ok >= req_retry_deepseek {
-			break
-		}
-	}
-	return zp
-}
+//func PostDeepSeekPackageInfo(content string) map[string]interface{} {
+//	zp, ok := map[string]interface{}{}, 0
+//	for {
+//		ok++
+//		if zp = PostPackageDeepSeekAI(content); len(zp) > 0 {
+//			break
+//		}
+//		if ok >= req_retry_deepseek {
+//			break
+//		}
+//	}
+//	return zp
+//}
 
 // 请求质谱数据-分类字段
-func PostDeepSeekClassInfo(content string) (map[string]interface{}, bool) {
-	zp := map[string]interface{}{}
-	times := 0
-	ok := false
-	for {
-		times++
-		zp = PostClassDeepSeekAI(content)
-		if len(zp) > 0 {
-			ok = true
-			break
-		}
-		if times >= req_retry_deepseek {
-			break
-		}
-	}
-	return zp, ok
-}
+//func PostDeepSeekClassInfo(content string) (map[string]interface{}, bool) {
+//	zp := map[string]interface{}{}
+//	times := 0
+//	ok := false
+//	for {
+//		times++
+//		zp = PostClassDeepSeekAI(content)
+//		if len(zp) > 0 {
+//			ok = true
+//			break
+//		}
+//		if times >= req_retry_deepseek {
+//			break
+//		}
+//	}
+//	return zp, ok
+//}

+ 306 - 0
clean/c_deepseek.go

@@ -0,0 +1,306 @@
+package clean
+
+import (
+	"data_ai/ul"
+	"github.com/google/uuid"
+	"github.com/shopspring/decimal"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"strings"
+)
+
+var DSToptype = map[string]map[string]string{
+	"招标公告": { //招标
+		"单一来源公告": "单一",
+		"询价公告":   "询价",
+		"竞价公告":   "竞价",
+		"邀标公告":   "邀标",
+		"竞争谈判公告": "竞谈",
+		"变更公告":   "变更",
+		"招标公告":   "招标",
+	},
+	"结果公告": { //其它
+		"中标公告":   "中标",
+		"候选公告":   "中标",
+		"成交公告":   "成交",
+		"变更公告":   "结果变更",
+		"结果其它公告": "其它",
+		"废标公告":   "废标",
+		"流标公告":   "流标",
+	},
+	"预告公告": { //预告
+		"预告公告":   "预告",
+		"预审公告":   "预审",
+		"预审结果公告": "预审结果",
+		"论证意见公告": "论证意见",
+		"需求公示公告": "需求公示",
+	},
+}
+
+// 清洗新模型····
+func CleanDeepSeekInfo(ai_info map[string]interface{}, tmp map[string]interface{}) map[string]interface{} {
+	fns := getfnsinfo(tmp)
+	zhipu := *qu.ObjToMap(ai_info["任务1"])
+	data := map[string]interface{}{}
+	//重点字段
+	if s_area, s_city := CleanRegion(qu.ObjToString(zhipu["省份"]), qu.ObjToString(zhipu["城市"])); s_area != "" || s_city != "" {
+		data["s_area"] = s_area
+		data["s_city"] = s_city
+	}
+	if s_buyer := CleanBuyer(qu.ObjToString(zhipu["采购单位"])); s_buyer != "" {
+		data["s_buyer"] = s_buyer
+	}
+	if s_pname := CleanPname(qu.ObjToString(zhipu["项目名称"])); s_pname != "" {
+		data["s_projectname"] = s_pname
+	}
+	if s_pcode := CleanPcode(qu.ObjToString(zhipu["项目编号"]), fns); s_pcode != "" {
+		data["s_projectcode"] = s_pcode
+	}
+	if s_biddingcode := CleanOtherCode(qu.ObjToString(zhipu["招标编号"])); s_biddingcode != "" {
+		data["s_biddingcode"] = s_biddingcode
+	}
+	if s_packagecode := CleanOtherCode(qu.ObjToString(zhipu["标段编号"])); s_packagecode != "" {
+		data["s_packagecode"] = s_packagecode
+	}
+	if s_contractcode := CleanOtherCode(qu.ObjToString(zhipu["合同编号"])); s_contractcode != "" {
+		data["s_contractcode"] = s_contractcode
+	}
+	if s_budget, s_budget_unit := CleanMoney([]interface{}{zhipu["预算金额"], ""}); s_budget > 0.0 && s_budget < 1000000000.0 {
+		if !s_budget_unit {
+			if n_s_budget := ConvertMoney(s_budget, qu.ObjToString(zhipu["预算金额单位"])); n_s_budget > 0.0 && n_s_budget < 1000000000.0 {
+				data["s_budget"] = n_s_budget
+			} else {
+				data["s_budget"] = s_budget
+			}
+		} else {
+			data["s_budget"] = s_budget
+		}
+	}
+	if s_bidamount, s_bidamount_unit := CleanMoney([]interface{}{zhipu["中标金额"], ""}); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
+		if !s_bidamount_unit {
+			if n_s_bidamount := ConvertMoney(s_bidamount, qu.ObjToString(zhipu["中标金额单位"])); n_s_bidamount > 0.0 && n_s_bidamount < 1000000000.0 {
+				data["s_bidamount"] = n_s_bidamount
+			} else {
+				data["s_bidamount"] = s_bidamount
+			}
+		} else {
+			data["s_bidamount"] = s_bidamount
+		}
+	}
+	if s_agency := CleanAgency(qu.ObjToString(zhipu["代理机构"])); s_agency != "" {
+		data["s_agency"] = s_agency
+	}
+	if s_winner := CleanWinner(qu.ObjToString(zhipu["中标单位"])); s_winner != "" {
+		data["s_winner"] = s_winner
+	}
+
+	//其他字段
+	if s_bidopenaddress := CleanOtherName(qu.ObjToString(zhipu["开标地点"])); s_bidopenaddress != "" {
+		data["s_bidopenaddress"] = s_bidopenaddress
+	}
+	if s_biddiscount := CleanDiscount(qu.ObjToString(zhipu["中标金额折扣率"])); s_biddiscount > 0.0 {
+		data["s_biddiscount"] = s_biddiscount
+	}
+
+	//时间相关
+	if s_bidopentime := CleanTime(qu.ObjToString(zhipu["开标日期"])); s_bidopentime > 0 {
+		data["s_bidopentime"] = s_bidopentime
+	}
+	if s_bidendtime := CleanTime(qu.ObjToString(zhipu["投标截止时间"])); s_bidendtime > 0 {
+		data["s_bidendtime"] = s_bidendtime
+	}
+	if s_docstarttime := CleanTime(qu.ObjToString(zhipu["招标文件获取开始时间"])); s_docstarttime > 0 {
+		data["s_docstarttime"] = s_docstarttime
+	}
+	if s_docendtime := CleanTime(qu.ObjToString(zhipu["招标文件获取结束时间"])); s_docendtime > 0 {
+		data["s_docendtime"] = s_docendtime
+	}
+
+	//联系方式方式
+	if s_buyerperson := CleanContactPerson(qu.ObjToString(zhipu["采购单位联系人"])); s_buyerperson != "" {
+		data["s_buyerperson"] = s_buyerperson
+	}
+	if s_buyertel := CleanContactTel(qu.ObjToString(zhipu["采购单位联系方式"])); s_buyertel != "" {
+		data["s_buyertel"] = s_buyertel
+	}
+	if s_agencyperson := CleanContactPerson(qu.ObjToString(zhipu["代理机构联系人"])); s_agencyperson != "" {
+		data["s_agencyperson"] = s_agencyperson
+	}
+	if s_agencytel := CleanContactTel(qu.ObjToString(zhipu["代理机构联系方式"])); s_agencytel != "" {
+		data["s_agencytel"] = s_agencytel
+	}
+	if s_winnerperson := CleanContactPerson(qu.ObjToString(zhipu["中标单位联系人"])); s_winnerperson != "" {
+		data["s_winnerperson"] = s_winnerperson
+	}
+	if s_winnertel := CleanContactTel(qu.ObjToString(zhipu["中标单位联系方式"])); s_winnertel != "" {
+		data["s_winnertel"] = s_winnertel
+	}
+	//标的物字段
+	if zhipu["s_purchasinglist"] != nil {
+		data["s_purchasinglist"] = zhipu["s_purchasinglist"]
+	}
+
+	//分类字段
+	//o_toptype, _ := qu.ObjToString(tmp["toptype"]), qu.ObjToString(tmp["subtype"])
+	//ts_type := *qu.ObjToMap(ai_info["任务2"])
+	//toptype_ai, subtype_ai := qu.ObjToString(ts_type["一级分类"]), qu.ObjToString(ts_type["二级分类"])
+	//new_toptype, new_subtype := TopSubConvert(o_toptype, toptype_ai, subtype_ai)
+	//if new_toptype != "" && new_subtype != "" {
+	//	data["s_toptype"] = new_toptype
+	//	data["s_subtype"] = new_subtype
+	//}
+
+	//解析分包···
+	p_info := *qu.ObjToMap(ai_info["任务2"])
+	if pkg := PackageConvert(p_info); len(pkg) > 0 {
+		data["s_pkg"] = pkg
+	}
+
+	return data
+}
+
+// 转换分包
+func PackageConvert(res map[string]interface{}) map[string]interface{} {
+	//转格式...
+	if res == nil {
+		return map[string]interface{}{}
+	}
+	ai_pkg := map[string]interface{}{}
+	s_winner, s_bidamount, s_budget, com_package := "", 0.0, 0.0, []map[string]interface{}{}
+	win_arr, win_temp := []string{}, map[string]string{}
+	pkginfo := ul.IsMarkInterfaceMap(res["分包信息"])
+	for _, v := range pkginfo {
+		name := qu.ObjToString(v["标段名称"])
+		code := qu.ObjToString(v["标段/包号"])
+		winner := CleanWinner(qu.ObjToString(v["中标单位"]))
+		bidamount, bidamount_unit := CleanMoney([]interface{}{v["中标金额"], ""})
+		budget, budget_unit := CleanMoney([]interface{}{v["预算金额"], ""})
+
+		if !bidamount_unit {
+			bidamount = ConvertMoney(bidamount, qu.ObjToString(v["中标金额单位"]))
+		}
+		if !budget_unit {
+			budget = ConvertMoney(budget, qu.ObjToString(v["预算金额单位"]))
+		}
+
+		if bidamount > 1000000000.0 {
+			bidamount = 0.0
+		}
+		if budget > 1000000000.0 {
+			budget = 0.0
+		}
+		//各种编号编号
+		projectcode := CleanPcode(qu.ObjToString(v["项目编号"]), []string{})
+		packagecode := CleanOtherCode(qu.ObjToString(v["标段编号"]))
+		contractcode := CleanOtherCode(qu.ObjToString(v["合同编号"]))
+
+		//分包信息结构
+		package_id := uuid.New().String()
+		package_id = strings.ReplaceAll(package_id, "-", "")
+		p := map[string]interface{}{
+			"package_id":   package_id,
+			"name":         name,
+			"code":         code,
+			"budget":       budget,
+			"winner":       winner,
+			"bidamount":    bidamount,
+			"projectcode":  projectcode,
+			"packagecode":  packagecode,
+			"contractcode": contractcode,
+		}
+
+		com_package = append(com_package, p)
+		//去重计算单位与总金额-精度丢失···
+		s_bid1 := decimal.NewFromFloat(s_bidamount)
+		s_bid2 := decimal.NewFromFloat(bidamount)
+		s_bid_add := s_bid1.Add(s_bid2)
+		s_bidamount, _ = s_bid_add.Float64()
+
+		s_bud1 := decimal.NewFromFloat(s_budget)
+		s_bud2 := decimal.NewFromFloat(budget)
+		s_bud_add := s_bud1.Add(s_bud2)
+		s_budget, _ = s_bud_add.Float64()
+
+		if win_temp[winner] == "" && winner != "" {
+			win_arr = append(win_arr, winner)
+			win_temp[winner] = winner
+		}
+	}
+	s_winner = strings.Join(win_arr, ",")
+	ai_pkg["s_winner"] = s_winner
+	ai_pkg["s_bidamount"] = s_bidamount
+	ai_pkg["s_budget"] = s_budget
+	ai_pkg["com_package"] = com_package
+	return ai_pkg
+}
+
+// 分类校验···
+func TopSubConvert(old_toptype string, toptype_ai string, subtype_ai string) (string, string) {
+	if old_toptype == "拟建" {
+		return "拟建", "拟建"
+	} else if old_toptype == "产权" {
+		return "产权", "产权"
+	} else if old_toptype == "采购意向" {
+		return "采购意向", "采购意向"
+	} else {
+
+	}
+	if toptype_ai == "采购意向公告" {
+		return "采购意向", "采购意向"
+	} else if toptype_ai == "合同公告" {
+		return "其它", "合同"
+	} else if toptype_ai == "验收公告" {
+		return "其它", "验收"
+	} else if toptype_ai == "违规公告" {
+		return "其它", "违规"
+	} else {
+
+	}
+
+	//映射大模型识别的分类
+	new_toptype, new_subtype := "", ""
+	Sub := DSToptype[toptype_ai]
+	if Sub != nil {
+		if toptype_ai == "招标公告" {
+			new_toptype = "招标"
+			if Sub[subtype_ai] != "" {
+				new_subtype = Sub[subtype_ai]
+			} else {
+				new_subtype = "招标"
+			}
+		} else if toptype_ai == "结果公告" {
+			new_toptype = "结果"
+			if Sub[subtype_ai] != "" {
+				new_subtype = Sub[subtype_ai]
+			} else {
+				new_subtype = "其它"
+			}
+		} else if toptype_ai == "预告公告" {
+			new_toptype = "预告"
+			if Sub[subtype_ai] != "" {
+				new_subtype = Sub[subtype_ai]
+			} else {
+				new_subtype = "预告"
+			}
+		} else {
+
+		}
+	}
+	return new_toptype, new_subtype
+}
+
+// 获取附件名字信息
+func getfnsinfo(tmp map[string]interface{}) []string {
+	arr := []string{}
+	if projectinfo := qu.ObjToMap(tmp["projectinfo"]); projectinfo != nil {
+		if attachments := qu.ObjToMap((*projectinfo)["attachments"]); attachments != nil {
+			for _, v := range *attachments {
+				if info := qu.ObjToMap(v); info != nil {
+					if filename := qu.ObjToString((*info)["filename"]); filename != "" {
+						arr = append(arr, filename)
+					}
+				}
+			}
+		}
+	}
+	return arr
+}

+ 3 - 2
clean/c_money.go

@@ -195,7 +195,8 @@ func CleanMoney(data []interface{}) (float64, bool) {
 			ret = ret2[0]
 		}
 	}
-	f, _ := strconv.ParseFloat(strconv.FormatFloat(ret.(float64), 'f', 4, 64), 64)
+	f := util.Float64All(ret)
+	//f, _ := strconv.ParseFloat(strconv.FormatFloat(ret.(float64), 'f', 4, 64), 64)
 	//if f < 1 {
 	//	f = 0
 	//}
@@ -206,7 +207,7 @@ func CleanMoney(data []interface{}) (float64, bool) {
 	// 		f = f * 10000
 	// 	}
 	// }
-	data[0] = f
+	data[0] = util.Float64All(ret)
 	if f == 0 && !moneyUnitRegBool.MatchString(fmt.Sprint(tmpstr)) {
 		data = append(data, false)
 		return 0.0, isFindUnit

+ 13 - 1
config.json

@@ -2,8 +2,12 @@
   "udpport": ":1791",
   "bid_name": "bidding",
   "ext_name": "zktest_package_1011",
-  "reading": 500,
+  "reading": 100,
   "udp_max": 10000,
+  "len_max": 20000,
+  "byte_max": 50000,
+  "flash_model": "glm-4-flash",
+  "model_type": "deepseek",
   "smail": {
     "to": "zhengkun@topnet.net.cn,xuzhiheng@topnet.net.cn",
     "api": "http://172.17.145.179:19281/_send/_mail"
@@ -32,5 +36,13 @@
     "username": "zhengkun",
     "password": "zk@123123"
   },
+  "py_mgo": {
+    "local": true,
+    "l_addr": "127.0.0.1:12002",
+    "addr": "172.17.4.86:27080",
+    "dbname" : "jyqyfw",
+    "username": "",
+    "password": ""
+  },
   "nextNode": []
 }

+ 12 - 1
extract/extension.go

@@ -40,7 +40,7 @@ func ConfrimExtractInfo(q map[string]interface{}) map[string]interface{} {
 }
 
 // 获取附件名字信息
-func getpnsinfo(tmp map[string]interface{}) []string {
+func GetFnsInfo(tmp map[string]interface{}) []string {
 	arr := []string{}
 	if projectinfo := qu.ObjToMap(tmp["projectinfo"]); projectinfo != nil {
 		if attachments := qu.ObjToMap((*projectinfo)["attachments"]); attachments != nil {
@@ -132,6 +132,17 @@ func CheckOutBuyerInfo(f_data map[string]interface{}) {
 	}
 }
 
+// 二次校验采购单位
+func CheckOutDeepSeekBuyerInfo(f_data map[string]interface{}) {
+	if s_buyer := qu.ObjToString(f_data["s_buyer"]); s_buyer != "" {
+		if zp_buyer := prompt.AcquireDeepSeekBuyerInfo(s_buyer); zp_buyer["实体单位"] != nil {
+			if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
+				f_data["s_buyer"] = ns_buyer
+			}
+		}
+	}
+}
+
 // 合并字段
 func MergeInfo(infos []map[string]interface{}) map[string]interface{} {
 	info := map[string]interface{}{}

+ 83 - 1
extract/extract.go

@@ -8,6 +8,7 @@ import (
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"strings"
 	"sync"
+	"time"
 	"unicode/utf8"
 )
 
@@ -121,7 +122,7 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	}
 
 	//字段清洗
-	fns := getpnsinfo(v) //获取附件名字
+	fns := GetFnsInfo(v) //获取附件名字
 	f_data = clean.CleanFieldInfo(f_info, fns, isTable)
 
 	//采购单位二级校验
@@ -165,3 +166,84 @@ func RunResetUpdateFieldInfo(arr []string, name string, s_name string) {
 	//}
 	//log.Debug("本轮重置更新结束......")
 }
+
+/*
+************************************************************
+************************************************************
+************************************************************
+支持新模型-deepseek的轮询查询
+*/
+
+func RunDeepSeek() {
+	log.Debug("执行轮询定时···deepseek···")
+	tmp := ul.BidMgo.FindById("bidding", "65ab0ff666cf0db42a81ecb5")
+	ExtractDeepSeekInfo(tmp)
+	return
+	for {
+		pool_mgo := make(chan bool, ul.Reading)
+		wg_mgo := &sync.WaitGroup{}
+		sess := ul.BidMgo.GetMgoConn()
+		defer ul.BidMgo.DestoryMongoConn(sess)
+		q, total, isok := map[string]interface{}{}, 0, 0
+		it := sess.DB(ul.BidMgo.DbName).C("zktest_sample_data_source_0").Find(&q).Iter()
+		for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+			if total%1000 == 0 {
+				log.Debug("cur ai index ", total)
+			}
+			isok++
+			pool_mgo <- true
+			wg_mgo.Add(1)
+			go func(tmp map[string]interface{}) {
+				defer func() {
+					<-pool_mgo
+					wg_mgo.Done()
+				}()
+				//抽取deepseek数据···并更新
+				data := ExtractDeepSeekInfo(tmp)
+				if len(data) > 0 {
+					data["_id"] = tmp["_id"]
+					ul.BidMgo.Save("zktest_deepseek_0122", data)
+				}
+			}(tmp)
+			tmp = make(map[string]interface{})
+		}
+		wg_mgo.Wait()
+		log.Debug("新模型deepseek处理完毕······", isok)
+		time.Sleep(time.Second * 1800)
+	}
+}
+
+// deepseek模型识别字段
+func ExtractDeepSeekInfo(tmp map[string]interface{}) map[string]interface{} {
+	//基础信息
+	tmpid := ul.BsonTOStringId(tmp["_id"])
+	title := qu.ObjToString(tmp["title"])
+	old_detail := getDetailText(tmp, tmpid) //获取正文文本
+	//过滤信息
+	if NotInProgressInfo(title, old_detail, tmp) {
+		return map[string]interface{}{}
+	}
+	//识别结构,短文本结构,不想进行分类识别
+	shorText := false
+	if utf8.RuneCountInString(old_detail) < 100 {
+		shorText = true
+	}
+	//文本格式转换
+	new_detail := ul.HttpConvertToMarkdown(title + "\n" + old_detail)
+	//短文本判断是否有效性
+	if shorText {
+		if info := prompt.AcquireJudgeDeepSeekShortInfo(new_detail); info["结果"] != "是" {
+			return map[string]interface{}{}
+		}
+	}
+	//获取通用该字段
+	f_info := prompt.AcquireExtractFieldDeepSeekInfo(new_detail)
+	//******字段清洗******
+	f_data := clean.CleanDeepSeekInfo(f_info, tmp)
+	//******二级校验******
+	CheckOutDeepSeekBuyerInfo(f_data)
+	//******强制判断******
+	ForcedLogicDecideInfo(f_data)
+
+	return f_data
+}

+ 105 - 147
main.go

@@ -1,28 +1,31 @@
 package main
 
 import (
+	"data_ai/extract"
 	"data_ai/tool"
 	"data_ai/udp"
 	"data_ai/ul"
 	"fmt"
 	log "github.com/donnie4w/go-logger/logger"
-	"github.com/gogf/gf/v2/util/gconv"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"strings"
 	"sync"
-	"unicode/utf8"
 )
 
 func init() {
-	ul.IsLocal = false //是否本地
-	ul.IsTool = false  //是否工具
-	ul.IsFull = false  //是否全量
+	ul.IsLocal = true //是否本地
+	ul.IsTool = false //是否工具
+	ul.IsFull = false //是否全量
 	if ul.IsTool {
 		ul.InitToolVar()
 	} else {
 		ul.InitGlobalVar()
 		if !ul.IsFull {
-			udp.InitProcessVar()
+			if ul.ModelType == "deepseek" {
+				go extract.RunDeepSeek()
+			} else {
+				udp.InitProcessVar()
+			}
 		}
 	}
 	if !ul.IsTool {
@@ -39,11 +42,51 @@ func main() {
 		tool.StartToolInfo()
 		return
 	}
-	//extract.TestSingleFieldInfo("bidding", "677cf41c3309c0998bb6ddda")
+	update1()
 	lock := make(chan bool)
 	<-lock
 }
 
+func update1() {
+	dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
+	for _, v := range dataArr {
+		tmpid := ul.BsonTOStringId(v["_id"])
+		data := ul.BidMgo.FindById("zktest_sample_data_source_0", tmpid)
+		title := qu.ObjToString(data["title"])
+		detail := qu.ObjToString(data["detail"])
+		ul.BidMgo.UpdateById("zktest_sample_data", tmpid, map[string]interface{}{
+			"$set": map[string]interface{}{
+				"detail": title + "\n" + detail,
+			},
+		})
+	}
+	log.Debug("is over ...")
+}
+
+func export1() {
+	dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
+	pool_mgo := make(chan bool, 1)
+	wg_mgo := &sync.WaitGroup{}
+	for _, v := range dataArr {
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			tmpid := ul.BsonTOStringId(v["_id"])
+			data := ul.BidMgo.FindById("bidding", tmpid)
+			if len(data) == 0 || data == nil {
+				log.Debug("异常")
+			}
+			ul.BidMgo.Save("zktest_sample_data_source_4", data)
+		}(v)
+	}
+	wg_mgo.Wait()
+	log.Debug("is over ...")
+}
+
 // 对比程序
 func compare1() {
 	fields := map[string]string{
@@ -58,80 +101,76 @@ func compare1() {
 		"budget":      "float",
 		"bidamount":   "float",
 	}
-	dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{})
-	dataArr1, _ := ul.BidMgo.Find("zktest_sample_data_source_1", map[string]interface{}{}, nil, map[string]interface{}{})
-	dataArr2, _ := ul.BidMgo.Find("zktest_sample_data_source_2", map[string]interface{}{}, nil, map[string]interface{}{})
-	dataArr3, _ := ul.BidMgo.Find("zktest_sample_data_source_3", map[string]interface{}{}, nil, map[string]interface{}{})
-
-	biaozhu := creat(dataArr, false) //标注数据···
-	source1 := creat(dataArr1, true)
-	source2 := creat(dataArr2, true)
-	source3 := creat(dataArr3, true)
-
-	log.Debug("数据源:", len(biaozhu))
-	log.Debug("对比源:", len(source1))
-	log.Debug("对比源:", len(source2))
-	log.Debug("对比源:", len(source3))
-	dataArr = nil
+	dataArr1, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{})
+	dataArr2, _ := ul.BidMgo.Find("zktest_deepseek_0122", map[string]interface{}{}, nil, map[string]interface{}{})
+	biaozhu := creat(dataArr1, false) //标注数据···
+	deepseek := creat(dataArr2, true)
 	dataArr1 = nil
 	dataArr2 = nil
-	dataArr3 = nil
 	//计数
-	tj1 := duibi(fields, biaozhu, source1)
-	tj2 := duibi(fields, biaozhu, source2)
-	tj3 := duibi(fields, biaozhu, source3)
-
+	tj := duibi(fields, biaozhu, deepseek)
 	log.Debug("...................")
 	arr := []string{"toptype", "subtype", "area", "city", "projectname", "projectcode", "buyer", "budget", "s_winner", "bidamount"}
 	for _, v := range arr {
-		t1, s1 := tj1[v]["total"], tj1[v]["same"]
-		t2, s2 := tj2[v]["total"], tj2[v]["same"]
-		t3, s3 := tj3[v]["total"], tj3[v]["same"]
+		t1, s1 := tj[v]["total"], tj[v]["same"]
 		f1 := fmt.Sprintf("模型flash~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t1, s1, (float64(s1)/float64(t1))*100.0, "%")
-		f2 := fmt.Sprintf("模型air~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t2, s2, (float64(s2)/float64(t2))*100.0, "%")
-		f3 := fmt.Sprintf("模型deepseek~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t3, s3, (float64(s3)/float64(t3))*100.0, "%")
 		log.Debug(f1)
-		log.Debug(f2)
-		log.Debug(f3)
 	}
 }
 
 // 构建数据
-func creat(dataArr []map[string]interface{}, is_zhipu bool) map[string]map[string]interface{} {
+func creat(dataArr []map[string]interface{}, deepseek bool) map[string]map[string]interface{} {
 	dict := map[string]map[string]interface{}{}
 	for _, biaozhu := range dataArr {
-		if is_zhipu {
-			ai_zhipu := *qu.ObjToMap(biaozhu["ai_zhipu"])
-			if len(ai_zhipu) > 0 {
-
-			} else {
-				continue
-			}
+		if deepseek {
+			tmpid := ul.BsonTOStringId(biaozhu["_id"])
+			toptype := qu.ObjToString(biaozhu["s_toptype"])
+			subtype := qu.ObjToString(biaozhu["s_subtype"])
+			area := qu.ObjToString(biaozhu["s_area"])
+			city := qu.ObjToString(biaozhu["s_city"])
+			projectname := qu.ObjToString(biaozhu["s_projectname"])
+			projectcode := qu.ObjToString(biaozhu["s_projectcode"])
+			budget := qu.Float64All(biaozhu["s_budget"])
+			bidamount := qu.Float64All(biaozhu["s_bidamount"])
+			buyer := qu.ObjToString(biaozhu["s_buyer"])
+			s_winner := qu.ObjToString(biaozhu["s_winner"])
+			info := map[string]interface{}{}
+			info["toptype"] = toptype
+			info["subtype"] = subtype
+			info["area"] = area
+			info["city"] = city
+			info["projectname"] = projectname
+			info["projectcode"] = projectcode
+			info["budget"] = budget
+			info["bidamount"] = bidamount
+			info["buyer"] = buyer
+			info["s_winner"] = s_winner
+			dict[tmpid] = info
+		} else {
+			tmpid := ul.BsonTOStringId(biaozhu["_id"])
+			toptype := qu.ObjToString(biaozhu["toptype"])
+			subtype := qu.ObjToString(biaozhu["subtype"])
+			area := qu.ObjToString(biaozhu["area"])
+			city := qu.ObjToString(biaozhu["city"])
+			projectname := qu.ObjToString(biaozhu["projectname"])
+			projectcode := qu.ObjToString(biaozhu["projectcode"])
+			budget := qu.Float64All(biaozhu["budget"])
+			bidamount := qu.Float64All(biaozhu["bidamount"])
+			buyer := qu.ObjToString(biaozhu["buyer"])
+			s_winner := qu.ObjToString(biaozhu["s_winner"])
+			info := map[string]interface{}{}
+			info["toptype"] = toptype
+			info["subtype"] = subtype
+			info["area"] = area
+			info["city"] = city
+			info["projectname"] = projectname
+			info["projectcode"] = projectcode
+			info["budget"] = budget
+			info["bidamount"] = bidamount
+			info["buyer"] = buyer
+			info["s_winner"] = s_winner
+			dict[tmpid] = info
 		}
-		tmpid := ul.BsonTOStringId(biaozhu["_id"])
-		toptype := qu.ObjToString(biaozhu["toptype"])
-		subtype := qu.ObjToString(biaozhu["subtype"])
-		area := qu.ObjToString(biaozhu["area"])
-		city := qu.ObjToString(biaozhu["city"])
-		projectname := qu.ObjToString(biaozhu["projectname"])
-		projectcode := qu.ObjToString(biaozhu["projectcode"])
-		budget := qu.Float64All(biaozhu["budget"])
-		bidamount := qu.Float64All(biaozhu["bidamount"])
-		buyer := qu.ObjToString(biaozhu["buyer"])
-		s_winner := qu.ObjToString(biaozhu["s_winner"])
-
-		info := map[string]interface{}{}
-		info["toptype"] = toptype
-		info["subtype"] = subtype
-		info["area"] = area
-		info["city"] = city
-		info["projectname"] = projectname
-		info["projectcode"] = projectcode
-		info["budget"] = budget
-		info["bidamount"] = bidamount
-		info["buyer"] = buyer
-		info["s_winner"] = s_winner
-		dict[tmpid] = info
 	}
 	return dict
 }
@@ -183,88 +222,7 @@ func duibi(fields map[string]string, biaozhu map[string]map[string]interface{},
 	return tj
 }
 
-func export1() {
-	dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
-	pool_mgo := make(chan bool, 1)
-	wg_mgo := &sync.WaitGroup{}
-	for _, v := range dataArr {
-		pool_mgo <- true
-		wg_mgo.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-pool_mgo
-				wg_mgo.Done()
-			}()
-			tmpid := ul.BsonTOStringId(v["_id"])
-			data := ul.BidMgo.FindById("bidding", tmpid)
-			if len(data) == 0 || data == nil {
-				log.Debug("异常")
-			}
-			ul.BidMgo.Save("zktest_sample_data_source_3", data)
-			//ul.BidMgo.Save("zktest_sample_data_source_2", data)
-		}(v)
-	}
-	wg_mgo.Wait()
-	log.Debug("is over ...")
-}
-
 // 测试调试数据
-func test1() {
-	q, total := map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$gt":  ul.StringTOBsonId("677638b13309c0998ba2488f"),
-			"$lte": ul.StringTOBsonId("6777a3d23309c0998ba89797"),
-		},
-	}, 0
-	count, _ := ul.BidMgo.Count("bidding", q)
-	log.Debug("查询数量:", count)
-
-	pool_mgo := make(chan bool, 10)
-	wg_mgo := &sync.WaitGroup{}
-
-	sess := ul.BidMgo.GetMgoConn()
-	defer ul.BidMgo.DestoryMongoConn(sess)
-
-	it := sess.DB(ul.BidMgo.DbName).C(ul.Bid_Name).Find(&q).Iter()
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%1000 == 0 {
-			log.Debug("cur ai index ", total)
-		}
-		pool_mgo <- true
-		wg_mgo.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-pool_mgo
-				wg_mgo.Done()
-			}()
-			//tmpid := ul.BsonTOStringId(tmp["_id"])
-			s1, s2 := gconv.String(tmp["budget"]), gconv.String(tmp["bidamount"])
-			r1, r2 := strings.Split(s1, "."), strings.Split(s2, ".")
-			if len(r1) == 2 {
-				l1 := utf8.RuneCountInString(r1[1])
-				if l1 > 3 {
-					//log.Debug("问题金额:", tmpid, "~", s1)
-					ul.BidMgo.Save("zktest_err_0106", tmp)
-					return
-				}
-			}
-
-			if len(r2) == 2 {
-				l2 := utf8.RuneCountInString(r2[1])
-				if l2 > 3 {
-					//log.Debug("问题金额:", tmpid, "~", s2)
-					ul.BidMgo.Save("zktest_err_0106", tmp)
-					return
-				}
-			}
-		}(tmp)
-		tmp = make(map[string]interface{})
-	}
-	wg_mgo.Wait()
-	log.Debug("is over ...", total)
-
-}
-
 func c(s string) string {
 	s = strings.ReplaceAll(s, "(", "(")
 	s = strings.ReplaceAll(s, ")", ")")

+ 9 - 6
prompt/prompt_buyer.go

@@ -2,18 +2,21 @@ package prompt
 
 import (
 	"data_ai/ai"
-	"data_ai/ul"
 )
 
 // 获取外围抽取字段
 func AcquireBuyerInfo(detail string) map[string]interface{} {
 	content := PromptBuyerText(detail)
 	res := map[string]interface{}{}
-	if ul.ModelType == "DeepSeek" {
-		res = ai.PostDeepSeekInfo(content)
-	} else {
-		res = ai.PostZhiPuInfo(content)
-	}
+	res = ai.PostZhiPuInfo(content)
+	return res
+}
+
+// 获取外围抽取字段
+func AcquireDeepSeekBuyerInfo(detail string) map[string]interface{} {
+	content := PromptBuyerText(detail)
+	res := map[string]interface{}{}
+	res = ai.PostDeepSeekInfo(content)
 	return res
 }
 

+ 2 - 11
prompt/prompt_class.go

@@ -42,12 +42,7 @@ func AcquireClassInfo(detail string, title string, top string) (string, string)
 	}
 	top_content := PromptToptypeFieldText(detail, title)
 	top_zp, ok := map[string]interface{}{}, false
-	if ul.ModelType == "DeepSeek" {
-		top_zp, ok = ai.PostDeepSeekClassInfo(top_content)
-	} else {
-		top_zp, ok = ai.PostZhiPuClassInfo(top_content)
-	}
-
+	top_zp, ok = ai.PostZhiPuClassInfo(top_content)
 	if !ok {
 		return "", ""
 	}
@@ -76,11 +71,7 @@ func AcquireClassInfo(detail string, title string, top string) (string, string)
 	sub_zp := map[string]interface{}{}
 	if subtype == "" {
 		sub_content := PromptSubtypeFieldText(detail, title, toptype, tpInfo)
-		if ul.ModelType == "DeepSeek" {
-			sub_zp, ok = ai.PostDeepSeekClassInfo(sub_content)
-		} else {
-			sub_zp, ok = ai.PostZhiPuClassInfo(sub_content)
-		}
+		sub_zp, ok = ai.PostZhiPuClassInfo(sub_content)
 		if !ok {
 			return "", ""
 		}

+ 90 - 0
prompt/prompt_deepseek.go

@@ -0,0 +1,90 @@
+package prompt
+
+import (
+	"data_ai/ai"
+	"data_ai/ul"
+	"fmt"
+)
+
+var pmt_deepseek = `
+# 任务1(信息抽取)
+你是一位高效的信息整理专家,任务是精炼并准确抽取输入内容中的关键信息,避免任何非必要的文本,并以JSON格式输出。若信息缺失,填写"无"。
+提取规则:
+1、可推导上下文信息
+2、排除代理机构和中标单位干扰,如果入围供应商、入围中标人、中标候选人等存在多个时,选择第一名为中标单位,忽略其他排名。
+3、金额按照原文输出,不要转换量级,输出可以带单位如xx万元/xx元
+4、你在识别"项目的预算金额"的时候,一定不要识别业绩相关的内容。(合同内容如果没有明确指出甲方的预算金额,请不要识别)如果有多个预算金额存在,优先取预算金额含税总价。如果识别出的预算金额含有单位比如万元等,请务必提取完整。如果不能准确识别出"项目的预算金额,"请填写"无";
+5、你在识别"项目的中标金额"的时候,一定不要识别业绩相关的内容。优先使用合同的金额,合同的总价当做"项目的中标金额"。如果有多个中标金额存在,优先取中标金额的含税总价。如果原文没有明确的中标金额,可以选取第一名中标候选人的投标报价(金额单位请提取完整)。如果识别出项目的中标金额含有单位比如万元等,请务必提取完整。如果不能准确识别出"项目的中标金额",请填写"无";
+6、项目编号和标段编号同时存在时,优先选项目编号;不要使用证书编号当做项目编号的值;
+JSON字段:
+{"任务1":{
+"项目编号":"项目编号",
+"采购单位": "采购方/甲方/委托人",
+"中标单位": "中标方/供应商(多个用逗号分隔)",
+"预算金额": "项目预算金额",
+"中标金额": "中标金额",
+"中标金额单位": "中标金额单位",
+"预算金额单位": "预算金额单位",
+}}
+
+任务2 (分包提取)
+# 分包任务介绍
+对于招投标信息,部分项目会被拆分成多个包或标段,现在你需要根据抽取字段要求,从输入的内容提取相应的数据信息并严格输出JSON格式。若字段信息缺失,填写"无"。
+任务要求:
+    - 入围供应商、入围中标人、中标候选人存在多个时,选择第一名为中标单位
+    - 如果输入内容只有一个项目,那么则输出单个项目对应信息,如果一个项目被拆分成多个,那么输出多个被拆分的包或者标段
+    - 注意一个项目中存在多个标的物不能算作多包
+    - 注意面向企业采购金额不是中标金额,而是预算金额,不可以使用预算金额填充
+JSON字段:
+{"任务2":{
+"分包信息":[
+{
+"标段名称":(标包名称,通常包含地理区域、专业类别、标包内容描述等内容,不可以填写"标的名称"),
+"标段/包号":(标包编号,可以来自标段名称,如果不存在写"无",比如:一标段、二标段、包一、I包、I标段、标包一、标包编号等,不可以填写"项目编号"或"标的编号"),
+"中标单位":(中标单位名称,要求:中标角色包括但不限于成交供应商(注:当入围供应商/中标人存在多个,选择第一位为中标单位)、中标人、中标方、承包方、中选单位、服务商、第一|1名中标候选人(忽略其他中标候选人)。当流标显示流标,废标时显示废标。联合体投标时,请列出所有单位名称使用","分割),
+"预算金额":(预算金额、整体预留项目金额、最高限价、面向中小企业采购金额,要求:不可以用中标金额来填充),
+"中标金额":(项目中标后的成交金额、合同签订金额,如果不存在则使用“无”代替,要求:不能使用预算金额填充。多个金额时请进行计算,非单价,如果是单价,则等于单价*数量。),
+"中标金额单位":"对应金额的单位,如果不存在输出“无”",
+"预算金额单位":"对应金额的单位,如果不存在输出“无”",
+"项目编号":(项目编号,通常具有唯一性,它能够准确无误地标识一个特定的项目,不要使用证书编号当做项目编号的值),
+"标段编号":(通常为标段的唯一识别码,由数字、字母或其组合构成,不要使用纯汉字),
+"合同编号":(通常为合同提供了一个独一无二的标识,不要使用证书编号当做项目编号的值),
+},
+....
+],
+}}
+
+# 输入内容:` + "\n" + `%s` + `
+
+# 合并上面两个任务,输出最终JSON结果:
+{"任务1":{...},
+"任务2":{...}
+}
+`
+
+// 获取抽取字段
+func AcquireExtractFieldDeepSeekInfo(detail string) map[string]interface{} {
+	content := PromptDeepSeekFieldText(detail, pmt_deepseek)
+	res := map[string]interface{}{}
+	res = ai.PostDeepSeekInfo(content)
+	return res
+}
+
+// 提示语构建
+func PromptDeepSeekFieldText(detail string, prompt string) string {
+	content := fmt.Sprintf(prompt, detail)
+	l := len(content)
+	if l > ul.MaxByte {
+		detail = truncatedText(detail, len(detail)-(l-ul.MaxByte))
+		content = fmt.Sprintf(prompt, detail)
+	}
+	return content
+}
+
+// 判断短文本
+func AcquireJudgeDeepSeekShortInfo(detail string) map[string]interface{} {
+	content := PromptFieldText(detail, pmt_field_prefix)
+	res := map[string]interface{}{}
+	res = ai.PostDeepSeekAI(content)
+	return res
+}

+ 5 - 21
prompt/prompt_field.go

@@ -71,7 +71,7 @@ var pmt_field2 = `
 你在识别"开标地点"时,输出开标的具体地址;
 你在识别"招标文件获取开始时间"时,输出招标文件的的具体开始时间,输出格式为:YYYY-MM-DD HH:MM:SS,如果格式不对,请转化为:YYYY-MM-DD HH:MM:SS;
 你在识别"招标文件获取结束时间"时,输出招标文件的的具体结束时间,输出格式为:YYYY-MM-DD HH:MM:SS,如果格式不对,请转化为:YYYY-MM-DD HH:MM:SS;
-你在识别"中标金额折扣率"时,输出中标费用的上浮率或者下浮率或者折扣率,输出格式为(上浮率:xx% 或 下浮率:xx% 或 折扣率;xx% ),如果格式不对,请转化为:(上浮率:xx% 或 下浮率:xx% 或 折扣率;xx% ),不要带具体价格费用的数据,没有识别出来,请填写"无",如果识别到多个值,请填写"无";
+你在识别"中标金额折扣率"时,输出中标费用的上浮率或者下浮率或者折扣率,输出格式为(上浮率:xx%% 或 下浮率:xx%% 或 折扣率;xx%% ),如果格式不对,请转化为:(上浮率:xx%% 或 下浮率:xx%% 或 折扣率;xx%% ),不要带具体价格费用的数据,没有识别出来,请填写"无",如果识别到多个值,请填写"无";
 请将上述的识别结果、信息分类结果,按照JSON格式输出,
 严格按照json格式
 {
@@ -120,11 +120,7 @@ var pmt_field3 = `
 func AcquireJudgeShortInfo(detail string) map[string]interface{} {
 	content := PromptFieldText(detail, pmt_field_prefix)
 	res := map[string]interface{}{}
-	if ul.ModelType == "DeepSeek" {
-		res = ai.PostDeepSeekInfo(content)
-	} else {
-		res = ai.PostZhiPuInfo(content)
-	}
+	res = ai.PostZhiPuInfo(content)
 	return res
 }
 
@@ -132,11 +128,7 @@ func AcquireJudgeShortInfo(detail string) map[string]interface{} {
 func AcquireExtractFieldInfoFirst(detail string) map[string]interface{} {
 	content := PromptFieldText(detail, pmt_field1)
 	res := map[string]interface{}{}
-	if ul.ModelType == "DeepSeek" {
-		res = ai.PostDeepSeekInfo(content)
-	} else {
-		res = ai.PostZhiPuInfo(content)
-	}
+	res = ai.PostZhiPuInfo(content)
 	return res
 }
 
@@ -144,11 +136,7 @@ func AcquireExtractFieldInfoFirst(detail string) map[string]interface{} {
 func AcquireExtractFieldInfoSecond(detail string) map[string]interface{} {
 	content := PromptFieldText(detail, pmt_field2)
 	res := map[string]interface{}{}
-	if ul.ModelType == "DeepSeek" {
-		res = ai.PostDeepSeekInfo(content)
-	} else {
-		res = ai.PostZhiPuInfo(content)
-	}
+	res = ai.PostZhiPuInfo(content)
 	return res
 }
 
@@ -156,11 +144,7 @@ func AcquireExtractFieldInfoSecond(detail string) map[string]interface{} {
 func AcquireExtractFieldInfoThird(detail string) map[string]interface{} {
 	content := PromptFieldText(detail, pmt_field3)
 	res := map[string]interface{}{}
-	if ul.ModelType == "DeepSeek" {
-		res = ai.PostDeepSeekInfo(content)
-	} else {
-		res = ai.PostZhiPuInfo(content)
-	}
+	res = ai.PostZhiPuInfo(content)
 	return res
 }
 

+ 1 - 5
prompt/prompt_package.go

@@ -155,11 +155,7 @@ func PromptMultiplePackageText(detail string) string {
 func AcquireNewMultiplePackageInfo(detail string, isTable bool) map[string]interface{} {
 	content := PromptMultiplePackageText(detail)
 	res := map[string]interface{}{}
-	if ul.ModelType == "DeepSeek" {
-		res = ai.PostDeepSeekPackageInfo(content)
-	} else {
-		res = ai.PostZhiPuPackageInfo(content)
-	}
+	res = ai.PostZhiPuPackageInfo(content)
 	//转格式...
 	ai_pkg := map[string]interface{}{}
 	s_winner, s_bidamount, s_budget, com_package := "", 0.0, 0.0, []map[string]interface{}{}

+ 16 - 19
ul/attr.go

@@ -3,25 +3,22 @@ package ul
 import "regexp"
 
 var (
-	SourceMgo, QyxyMgo      *MongodbSim
-	BidMgo                  *MongodbSim
-	SysConfig               map[string]interface{}
-	ToolConfig              map[string]interface{}
-	Bid_Name, Ext_Name      string
-	TimeLayout              = "2006-01-02 15:04:05"
-	Url                     = "https://www.jianyu360.cn/article/content/%s.html"
-	CleanResultReg          = regexp.MustCompile("((\\s|\n| |\\[|\\]|\\`|json)+)")
-	SaveResultReg           = regexp.MustCompile("([{].*[}])")
-	MaxLen                  = 20000
-	MaxByte                 = 50000
-	MaxUdp                  = 10000
-	RulesPname              = []*ExtReg{}
-	IsTool, IsFull, IsLocal bool
-	Reading                 int
-	FlashModel              string
-	SpecialTextReg          = regexp.MustCompile("(原网页|见附件|下载附件|(查看|访问)(源网|原网)|详情请下载附件!|详情请访问原网页!)")
-	Escape                  = regexp.MustCompile("(json|`|\n|\\\\)")
-	ModelType               string
+	SourceMgo, QyxyMgo               *MongodbSim
+	BidMgo, PyMgo                    *MongodbSim
+	SysConfig                        map[string]interface{}
+	ToolConfig                       map[string]interface{}
+	Bid_Name, Ext_Name               string
+	TimeLayout                       = "2006-01-02 15:04:05"
+	Url                              = "https://www.jianyu360.cn/article/content/%s.html"
+	CleanResultReg                   = regexp.MustCompile("((\\s|\n| |\\[|\\]|\\`|json)+)")
+	SaveResultReg                    = regexp.MustCompile("([{].*[}])")
+	RulesPname                       = []*ExtReg{}
+	IsTool, IsFull, IsLocal          bool
+	FlashModel                       string
+	SpecialTextReg                   = regexp.MustCompile("(原网页|见附件|下载附件|(查看|访问)(源网|原网)|详情请下载附件!|详情请访问原网页!)")
+	Escape                           = regexp.MustCompile("(json|`|\n|\\\\)")
+	Reading, MaxLen, MaxByte, MaxUdp int
+	ModelType                        string
 )
 
 type ExtReg struct {

+ 26 - 5
ul/init.go

@@ -31,10 +31,11 @@ func InitToolVar() {
 // 其它属性
 func initOther() {
 	//glm-4-air glm-4-flash
-	FlashModel = "glm-4-air"
-	//ModelType = "DeepSeek"
-	ModelType = "Flash"
+	FlashModel = qu.ObjToString(SysConfig["flash_model"])
+	ModelType = qu.ObjToString(SysConfig["model_type"])
 	MaxUdp = qu.IntAllDef(SysConfig["udp_max"], 10000)
+	MaxLen = qu.IntAllDef(SysConfig["len_max"], 20000)
+	MaxByte = qu.IntAllDef(SysConfig["byte_max"], 50000)
 }
 
 // 初始化mgo
@@ -103,6 +104,26 @@ func initMgo() {
 	} else {
 		QyxyMgo.InitPool()
 	}
+
+	//企业数据
+	py_cfg := *qu.ObjToMap(SysConfig["py_mgo"])
+	py_local := py_cfg["local"].(bool)
+	py_addr := qu.ObjToString(py_cfg["addr"])
+	if py_local {
+		py_addr = qu.ObjToString(py_cfg["l_addr"])
+	}
+	PyMgo = &MongodbSim{
+		MongodbAddr: py_addr,
+		DbName:      qu.ObjToString(py_cfg["dbname"]),
+		Size:        10,
+		UserName:    qu.ObjToString(py_cfg["username"]),
+		Password:    qu.ObjToString(py_cfg["password"]),
+	}
+	if py_local {
+		PyMgo.InitPoolDirect()
+	} else {
+		PyMgo.InitPool()
+	}
 }
 
 // 初始化mgo
@@ -265,8 +286,8 @@ func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
 func PostMarkDownText(html string) string {
 	url := "http://172.17.162.35:18811/md"
 	if IsLocal {
-		//url = "http://172.17.0.11:8888/md"
-		url = "http://192.168.3.13:8888/md"
+		url = "http://172.17.0.11:8888/md"
+		//url = "http://192.168.3.13:8888/md"
 	}
 	// 创建请求数据
 	jsonData, err := json.Marshal(map[string]interface{}{"html": html})