Browse Source

更新···0924

zhengkun 10 tháng trước cách đây
mục cha
commit
34a58d0739
18 tập tin đã thay đổi với 1112 bổ sung383 xóa
  1. 0 34
      ai/ai_baidu.go
  2. 125 6
      ai/ai_zhipu.go
  3. 2 2
      clean/c_all.go
  4. 381 25
      clean/c_money.go
  5. 2 1
      config.json
  6. 16 3
      extract/extract.go
  7. 289 165
      extract/test.go
  8. 1 7
      main.go
  9. 1 6
      prompt/prompt_buyer.go
  10. 33 28
      prompt/prompt_class.go
  11. 31 14
      prompt/prompt_field.go
  12. 139 61
      prompt/prompt_package.go
  13. 17 17
      tool.json
  14. 51 6
      tool/tool.go
  15. 2 2
      udp/udprocess.go
  16. 4 4
      udp/udptask.go
  17. 3 0
      ul/attr.go
  18. 15 2
      ul/init.go

+ 0 - 34
ai/ai_baidu.go

@@ -41,37 +41,3 @@ func PostBaiDuAI(content string) map[string]interface{} {
 	json.Unmarshal([]byte(result), &res)
 	return res
 }
-
-// 请求质谱数据外围字段...
-func PostZhiPuInfo(content string) map[string]interface{} {
-	zp, ok := map[string]interface{}{}, 0
-	for {
-		ok++
-		if zp = PostZhiPuAI(content); len(zp) > 0 {
-			break
-		}
-		if ok >= 2 {
-			break
-		}
-	}
-	return zp
-}
-
-// 请求质谱数据-分类字段
-func PostZhiPuClassInfo(content string) (map[string]interface{}, bool) {
-	zp := map[string]interface{}{}
-	times := 0
-	ok := false
-	for {
-		times++
-		zp = PostClassZhiPuAI(content)
-		if len(zp) > 0 {
-			ok = true
-			break
-		}
-		if times >= 2 {
-			break
-		}
-	}
-	return zp, ok
-}

+ 125 - 6
ai/ai_zhipu.go

@@ -9,9 +9,10 @@ import (
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"net/http"
 	"strings"
+	"time"
 )
 
-// 智谱清言
+// 智谱清言-通用外围
 func PostZhiPuAI(content string) map[string]interface{} {
 	// API的URL
 	apiURL := "https://open.bigmodel.cn/api/paas/v4/chat/completions"
@@ -23,7 +24,7 @@ func PostZhiPuAI(content string) map[string]interface{} {
 	})
 	//glm-4-air	glm-4-0520  glm-4-flash
 	requestData := map[string]interface{}{
-		"model":       "glm-4-flash",
+		"model":       ul.FlashModel,
 		"messages":    messages,
 		"temperature": 0.1,
 		"max_tokens":  4096,
@@ -41,10 +42,9 @@ func PostZhiPuAI(content string) map[string]interface{} {
 	req.Header.Set("Authorization", "Bearer 3d84d30b7ab4c94dbf71853cb7e44719.hLLS4CA2MqVQs6kR")
 	// 发起请求 14543f0d69d6987c8782fd846e164f26.DXaoS1axLaMP892a
 	client := &http.Client{}
-	//client.Timeout = 10 * time.Second
+	client.Timeout = 180 * time.Second
 	resp, err := client.Do(req)
 	if err != nil {
-		log.Debug("Error: %s", err)
 		return map[string]interface{}{}
 	}
 	defer resp.Body.Close()
@@ -84,7 +84,7 @@ func PostClassZhiPuAI(content string) map[string]interface{} {
 	})
 	//glm-4-air	glm-4-0520  glm-4-flash
 	requestData := map[string]interface{}{
-		"model":       "glm-4-flash",
+		"model":       ul.FlashModel,
 		"messages":    messages,
 		"temperature": 0.2,
 		"top_p":       0.7,
@@ -103,12 +103,71 @@ func PostClassZhiPuAI(content string) map[string]interface{} {
 	req.Header.Set("Authorization", "Bearer 3d84d30b7ab4c94dbf71853cb7e44719.hLLS4CA2MqVQs6kR")
 	// 发起请求 14543f0d69d6987c8782fd846e164f26.DXaoS1axLaMP892a
 	client := &http.Client{}
-	//client.Timeout = 10 * time.Second
+	client.Timeout = 180 * time.Second
 	resp, err := client.Do(req)
+	if err != nil {
+		return map[string]interface{}{}
+	}
+	defer resp.Body.Close()
+
+	// 解析响应
+	body, _ := ioutil.ReadAll(resp.Body)
+	res := make(map[string]interface{})
+	json.Unmarshal(body, &res)
+	if res != nil {
+		if choices := ul.IsMarkInterfaceMap(res["choices"]); len(choices) > 0 {
+			if message := qu.ObjToMap(choices[0]["message"]); message != nil {
+				result := qu.ObjToString((*message)["content"])
+				result = strings.ReplaceAll(result, "\n", "")
+				result = strings.ReplaceAll(result, "json", "")
+				result = strings.ReplaceAll(result, "`", "")
+				if new_result := ul.SaveResultReg.FindString(result); new_result != "" {
+					result = new_result
+				}
+				dict := make(map[string]interface{})
+				json.Unmarshal([]byte(result), &dict)
+				return dict
+			}
+		}
+	}
+	return map[string]interface{}{}
+}
+
+// 智谱清言-分类字段
+func PostPackageZhiPuAI(content string) map[string]interface{} {
+	// API的URL
+	apiURL := "https://open.bigmodel.cn/api/paas/v4/chat/completions"
+	// 构造请求数据
+	messages := []map[string]interface{}{}
+	messages = append(messages, map[string]interface{}{
+		"role":    "user",
+		"content": content,
+	})
+	//glm-4-air	glm-4-0520  glm-4-flash
+	requestData := map[string]interface{}{
+		"model":       ul.FlashModel,
+		"messages":    messages,
+		"temperature": 0.1,
+		"max_tokens":  4096,
+	}
+	jsonData, _ := json.Marshal(requestData)
+	// 创建HTTP请求
+	req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData))
 	if err != nil {
 		log.Debug("Error: %s", err)
 		return map[string]interface{}{}
 	}
+	// 设置请求头
+	req.Header.Set("Content-Type", "application/json")
+	// 如果API需要认证,可以在这里设置认证信息
+	req.Header.Set("Authorization", "Bearer 3d84d30b7ab4c94dbf71853cb7e44719.hLLS4CA2MqVQs6kR")
+	// 发起请求 14543f0d69d6987c8782fd846e164f26.DXaoS1axLaMP892a
+	client := &http.Client{}
+	client.Timeout = 180 * time.Second
+	resp, err := client.Do(req)
+	if err != nil {
+		return map[string]interface{}{}
+	}
 	defer resp.Body.Close()
 
 	// 解析响应
@@ -119,6 +178,11 @@ func PostClassZhiPuAI(content string) map[string]interface{} {
 		if choices := ul.IsMarkInterfaceMap(res["choices"]); len(choices) > 0 {
 			if message := qu.ObjToMap(choices[0]["message"]); message != nil {
 				result := qu.ObjToString((*message)["content"])
+				//最终正确的结果
+				arr := strings.Split(result, "最终正确的结果")
+				if len(arr) > 1 {
+					result = arr[1]
+				}
 				result = strings.ReplaceAll(result, "\n", "")
 				result = strings.ReplaceAll(result, "json", "")
 				result = strings.ReplaceAll(result, "`", "")
@@ -133,3 +197,58 @@ func PostClassZhiPuAI(content string) map[string]interface{} {
 	}
 	return map[string]interface{}{}
 }
+
+/*****************************
+******************************
+******************************
+******************************
+******************************
+******************************/
+// 请求质谱数据外围字段...
+func PostZhiPuInfo(content string) map[string]interface{} {
+	zp, ok := map[string]interface{}{}, 0
+	for {
+		ok++
+		if zp = PostZhiPuAI(content); len(zp) > 0 {
+			break
+		}
+		if ok >= 2 {
+			break
+		}
+	}
+	return zp
+}
+
+// 请求质谱多包字段...
+func PostZhiPuPackageInfo(content string) map[string]interface{} {
+	zp, ok := map[string]interface{}{}, 0
+	for {
+		ok++
+		if zp = PostPackageZhiPuAI(content); len(zp) > 0 {
+			break
+		}
+		if ok >= 2 {
+			break
+		}
+	}
+	return zp
+}
+
+// 请求质谱数据-分类字段
+func PostZhiPuClassInfo(content string) (map[string]interface{}, bool) {
+	zp := map[string]interface{}{}
+	times := 0
+	ok := false
+	for {
+		times++
+		zp = PostClassZhiPuAI(content)
+		if len(zp) > 0 {
+			ok = true
+			break
+		}
+		if times >= 2 {
+			break
+		}
+	}
+	return zp, ok
+}

+ 2 - 2
clean/c_all.go

@@ -27,10 +27,10 @@ func CleanFieldInfo(zhipu map[string]interface{}, fns []string) map[string]inter
 	if s_pcode := CleanPcode(qu.ObjToString(zhipu["项目编号"]), fns); s_pcode != "" {
 		data["s_projectcode"] = s_pcode
 	}
-	if s_budget := CleanMoney(zhipu["预算金额"]); s_budget > 0.0 && s_budget < 1000000000.0 {
+	if s_budget := CleanMoney([]interface{}{zhipu["预算金额"], ""}); s_budget > 0.0 && s_budget < 1000000000.0 {
 		data["s_budget"] = s_budget
 	}
-	if s_bidamount := CleanMoney(zhipu["中标金额"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
+	if s_bidamount := CleanMoney([]interface{}{zhipu["中标金额"], ""}); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
 		data["s_bidamount"] = s_bidamount
 	}
 	if s_winner := CleanWinner(qu.ObjToString(zhipu["中标单位"])); s_winner != "" {

+ 381 - 25
clean/c_money.go

@@ -7,6 +7,7 @@ import (
 	"regexp"
 	"strconv"
 	"strings"
+	"unicode/utf8"
 )
 
 var (
@@ -15,35 +16,390 @@ var (
 	numReg1   = regexp.MustCompile("([0-9\\.]+)")
 )
 
-// 清洗金额
-func CleanMoney(money interface{}) float64 {
-	tmpstr := ""
-	if _, ok := money.(float64); ok {
-		tmpstr = fmt.Sprintf("%f", money)
+var unpkvBidamountReg = regexp.MustCompile("^([Xx]\\+[1-9\\.]+元/每)")
+var specBidamountReg = regexp.MustCompile("^([0-9.]+)E([1-7])$")
+var regUnitMoneyClean = regexp.MustCompile("^(.*单价[0-9.]+元[/][袋|块])[,,](含税总价[0-9.]+[万元]+)[.。]$")
+var blackMoneyClean = regexp.MustCompile("^([0-9.]+以下[万]?|分)$")
+var impactMoneyClean = regexp.MustCompile("(分二串口|分站模块)")
+
+// 大写金额补充
+var impactMoneyeplenish = regexp.MustCompile("^([壹贰叁肆伍陆柒捌玖]分)")
+
+// 特殊金额-格式-重置
+var resetAamountReg = regexp.MustCompile("[.](0|00)[.](0|00)")
+var regPercentMoney, _ = regexp.Compile(`[0-9.]+[((]?[%|%][))]?`)
+var regQianw, _ = regexp.Compile(`\d{1,2}千万`)
+var kxjsReg = regexp.MustCompile("[0-9][E|e]{1}[-—+]{1}[0-9]{1,2}")
+
+var regOperator, _ = regexp.Compile(`[*|+|)*)]`)
+var regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
+var regStrUnit, _ = regexp.Compile(`[元|万|亿]`)
+var regStrJe = regexp.MustCompile(`([1-9]\d*|0)(\.\d_+)?[\s|元|万|亿]{0,3}`)
+var regStrChar = `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]`
+var moneyRegChar, _ = regexp.Compile(regStrChar)
+var contentUnit, _ = regexp.Compile(`(万元|单位/万)`)
+var numCapitals, _ = regexp.Compile(`([〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]{4,40})`)
+
+var moneyUnitRegBool = regexp.MustCompile(`(中标金额|成交金额|合同金额|中标价|成交价|成交价格|中标\(成交\)金额|投标报价|中标标价|成交结果)?[::\s]?(0|零|0.0|¥0)+(0|\.)*[\s]?(万|元|){0,2}[\s]?((人民币))?$`)
+
+var cutAllSpace, _ = regexp.Compile(`\s*`)
+
+var spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n", "\u0001"}
+
+var moneyClearSpidercode map[string]interface{}
+
+var moneyChar = map[string]interface{}{ //"〇": "0", "零": "0",壹贰叁肆伍陆柒捌玖
+	"一": float64(1), "壹": float64(1), "二": float64(2), "贰": float64(2), "三": float64(3), "叁": float64(3), "四": float64(4), "肆": float64(4), "五": float64(5), "伍": float64(5),
+	"六": float64(6), "陆": float64(6), "七": float64(7), "柒": float64(7), "八": float64(8), "捌": float64(8), "九": float64(9), "玖": float64(9), "十": float64(10), "拾": float64(10),
+	"百": float64(100), "佰": float64(100), "千": float64(1000), "仟": float64(1000), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000),
+	"零": float64(0), "点": ".", "角": float64(0.1), "分": float64(0.01),
+}
+var NumChar = map[string]interface{}{
+	"一": 1, "二": 1, "三": 1, "四": 1, "五": 1, "六": 1, "七": 1, "八": 1, "久": 1, "十": 1,
+}
+var moneyUnit = map[string]float64{
+	"元": float64(1), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000), //单位
+}
+
+func init() {
+	regOperator, _ = regexp.Compile(`[*|+|)*)]`)
+	regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
+	regStrUnit, _ = regexp.Compile(`[元|万|亿]`)
+	regStrJe = regexp.MustCompile(`([1-9]\d*|0)(\.\d_+)?[\s|元|万|亿]{0,3}`)
+	regStrChar = `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]`
+	moneyRegChar, _ = regexp.Compile(regStrChar)
+	contentUnit, _ = regexp.Compile(`(万元|单位/万)`)
+	numCapitals, _ = regexp.Compile(`([〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]{4,40})`)
+
+	regQianw, _ = regexp.Compile(`\d{1,2}千万`)
+	kxjsReg = regexp.MustCompile("[0-9][E|e]{1}[-—+]{1}[0-9]{1,2}")
+	regPercentMoney, _ = regexp.Compile(`[0-9.]+[((]?[%|%][))]?`)
+}
+
+// 金额转换
+func CleanMoney(data []interface{}) float64 {
+	//isfindUnit := true
+	tmpstr := (data)[0]
+	totmpstr := ""
+	if _, ok := tmpstr.(float64); ok {
+		totmpstr = fmt.Sprintf("%f", tmpstr)
 	} else {
-		tmpstr = util.ObjToString(money)
+		totmpstr = util.ObjToString(tmpstr)
 	}
 	//去除空格
-	tmpstr = moneyReg1.ReplaceAllString(tmpstr, "")
-	//科学计数法
-	if moneyReg2.MatchString(tmpstr) {
-		price := util.Float64All(moneyReg2.ReplaceAllString(tmpstr, "${1}"))
-		if unit := util.Float64All(moneyReg2.ReplaceAllString(tmpstr, "${2}")); unit > 0.0 && price > 0.0 {
-			tmpstr = fmt.Sprintf("%f", math.Pow(10, unit)*price)
-		}
-	}
-	if num := numReg1.FindString(tmpstr); num != "" {
-		f_num := util.Float64All(num)
-		if strings.Contains(tmpstr, "万") {
-			f, _ := strconv.ParseFloat(strconv.FormatFloat(f_num*10000.0, 'f', 4, 64), 64)
-			return f
-		} else if strings.Contains(tmpstr, "亿") {
-			f, _ := strconv.ParseFloat(strconv.FormatFloat(f_num*10000.0*10000.0, 'f', 4, 64), 64)
-			return f
+	totmpstr = strings.ReplaceAll(totmpstr, " ", "")
+	(data)[0] = totmpstr
+
+	//特殊转换-科学计数法
+	if specBidamountReg.MatchString(totmpstr) {
+		price := util.Float64All(specBidamountReg.ReplaceAllString(totmpstr, "${1}"))
+		if unit := util.Float64All(specBidamountReg.ReplaceAllString(totmpstr, "${2}")); unit > 0.0 && price > 0.0 {
+			totmpstr = fmt.Sprintf("%f", math.Pow(10, unit)*price)
+			(data)[0] = totmpstr
+		}
+	}
+	//异常替换
+	if unpkvBidamountReg.MatchString(totmpstr) {
+		totmpstr = unpkvBidamountReg.ReplaceAllString(totmpstr, "")
+		(data)[0] = totmpstr
+	}
+	if resetAamountReg.MatchString(totmpstr) {
+		totmpstr = resetAamountReg.ReplaceAllString(totmpstr, ".0")
+		(data)[0] = totmpstr
+	}
+	//单位指定
+	if regUnitMoneyClean.MatchString(totmpstr) {
+		totmpstr = regUnitMoneyClean.ReplaceAllString(totmpstr, "$2")
+		(data)[0] = totmpstr
+	}
+
+	//特殊替换
+	if impactMoneyClean.MatchString(totmpstr) {
+		totmpstr = impactMoneyClean.ReplaceAllString(totmpstr, "")
+		(data)[0] = totmpstr
+	}
+
+	//大写金额补充
+	if impactMoneyeplenish.MatchString(totmpstr) {
+		totmpstr = "零元" + totmpstr
+		(data)[0] = totmpstr
+	}
+
+	//黑名单
+	if blackMoneyClean.MatchString(totmpstr) {
+		totmpstr = ""
+		(data)[0] = totmpstr
+	}
+
+	//未含税总价1454400.00元,税率6%,含税总价1541664.00元
+	Percent := regPercentMoney.FindAllString(totmpstr, -1)
+	for _, v := range Percent {
+		totmpstr = strings.ReplaceAll(totmpstr, v, "")
+	}
+	totmpstr = strings.ReplaceAll(totmpstr, "_", "")
+	(data)[0] = totmpstr //过滤到%相关数字
+
+	if utf8.RuneCountInString(totmpstr) > 100 { //过长-字符无有效金额
+		(data)[0] = 0
+		data = append(data, false)
+		return 0.0
+	}
+
+	if utf8.RuneCountInString(totmpstr) > 20 {
+		if numCapitals.MatchString(totmpstr) {
+			tmpstr = numCapitals.FindString(totmpstr)
+		} else if regStrJe.MatchString(totmpstr) {
+			tmpstr = regStrJe.FindString(totmpstr)
 		} else {
-			f, _ := strconv.ParseFloat(strconv.FormatFloat(f_num, 'f', 4, 64), 64)
-			return f
+			(data)[0] = 0
+			data = append(data, false)
+			return 0.0
+		}
+	}
+
+	ret := capitalMoney(data)[0]
+	if ret.(float64) < float64(10000) || ret.(float64) > float64(50000000000) {
+		ret2, _ := numMoney(data)
+		//isfindUnit = b
+		if ret2[0].(float64) > ret.(float64) {
+			ret = ret2[0]
+		}
+	}
+	f, _ := strconv.ParseFloat(strconv.FormatFloat(ret.(float64), 'f', 4, 64), 64)
+	//if f < 1 {
+	//	f = 0
+	//}
+	//若果金额小于50,全文检索单位:万
+	// if f < 50 && f > 0 && isfindUnit {
+	// 	rep := contentUnit.FindAllStringIndex(fmt.Sprint(data[1]), -1)
+	// 	if len(rep) > 0 {
+	// 		f = f * 10000
+	// 	}
+	// }
+	data[0] = f
+	if f == 0 && !moneyUnitRegBool.MatchString(fmt.Sprint(tmpstr)) {
+		data = append(data, false)
+		return 0.0
+	}
+	data = append(data, true)
+	if len(data) > 0 {
+		return util.Float64All(data[0])
+	} else {
+		return 0.0
+	}
+}
+
+// 数字金额转换
+func numMoney(data []interface{}) ([]interface{}, bool) {
+	tmp := fmt.Sprintf("%f", data[0])
+	tmp = strings.ReplaceAll(tmp, "(不含税)", "")
+	//费率转换% ‰
+	flv := float64(1)
+	if strings.HasSuffix(tmp, "%") {
+		flv = 0.01
+	} else if strings.HasSuffix(tmp, "‰") {
+		flv = 0.001
+	}
+	repUnit := float64(1)
+	if regQianw.MatchString(tmp) {
+		tmp = strings.Replace(tmp, "千万", "万", -1)
+		repUnit = float64(1000)
+	}
+	tmp = replaceSymbol(tmp, []string{",", ",", "(", ")", "(", ")", ":", "\n"})
+	tmp = replaceString(tmp, []string{"万元", "亿元", "."}, []string{"万", "亿", "."})
+	tmp = fmt.Sprint(CutAllSpace([]interface{}{tmp, data[1]})[0])
+	rets := regNumFloat.FindAllString(tmp, -1)
+	fnums := []float64{}
+	unitstrs := []string{}
+	if len(rets) > 0 {
+		pindex := 0 //单位前置
+		for k, v := range rets {
+			f, err := strconv.ParseFloat(v, 64)
+			if err == nil {
+				fnums = append(fnums, f)
+				index := strings.Index(tmp, v)
+				//单位后置
+				start := index + len(v)
+				end := start + 3
+				//log.Println("vvv", tmp, v, pindex, index, start)
+				if k > 0 {
+					if start >= pindex+3 {
+						pstart := pindex + 3
+						if pstart >= index {
+							pstart = index
+						}
+						if len(tmp) > end {
+							unitstrs = append(unitstrs, tmp[pstart:index]+tmp[start:end])
+						} else {
+							unitstrs = append(unitstrs, tmp[pstart:index]+tmp[start:])
+						}
+					} else {
+						if len(tmp) > end {
+							unitstrs = append(unitstrs, tmp[start:end])
+						} else {
+							unitstrs = append(unitstrs, tmp[start:])
+						}
+					}
+				} else {
+					if len(tmp) > end {
+						if index-3 >= 0 {
+							unitstrs = append(unitstrs, tmp[index-3:index]+tmp[start:end])
+						} else {
+							unitstrs = append(unitstrs, tmp[start:end])
+						}
+					} else {
+						if index-3 >= 0 {
+							unitstrs = append(unitstrs, tmp[index-3:index]+tmp[start:])
+						} else {
+							unitstrs = append(unitstrs, tmp[start:])
+						}
+					}
+				}
+				pindex = start
+			}
+		}
+	}
+	//log.Println("unitstrs", fnums, unitstrs)
+	unit := float64(0)
+	fnum := float64(0)
+	for k, v := range fnums {
+		fnum = v
+		units := regStrUnit.FindAllString(unitstrs[k], -1)
+		for _, v := range units {
+			if moneyUnit[v] != 0 {
+				unit = moneyUnit[v]
+				break
+			}
+		}
+		if unit != float64(0) { //取第一个
+			break
+		}
+	}
+	fnum = fnum * repUnit
+	if unit == float64(0) {
+		data[0] = fnum * flv
+	} else {
+		data[0] = fnum * unit * flv
+	}
+	if unit == 10000 {
+		return data, false
+	} else {
+		return data, true
+	}
+}
+
+// 大写数子金额转换
+func capitalMoney(data []interface{}) []interface{} {
+	nodes := []float64{}
+	node := float64(0)
+	tmp := float64(0)
+	decimals := 0.0
+	ishaspoint := false //是否含小数点
+	fnum := float64(0)
+	end := false
+	str := fmt.Sprint(data[0])
+	//提取第一个大写信息
+
+	if strings.Contains(str, "壹") {
+		str = strings.ReplaceAll(str, "一", "壹")
+	}
+
+	strmatch := numCapitals.FindAllStringSubmatch(str, -1)
+
+	if len(strmatch) > 0 {
+		str = strmatch[0][0]
+	}
+	suffixUnit := float64(1)
+	if strings.HasSuffix(str, "万") || strings.HasSuffix(str, "万元") || strings.HasSuffix(str, "万元整") {
+		index := strings.LastIndex(str, "万")
+		str = str[0:index]
+		suffixUnit = float64(10000)
+	}
+	yy := false
+	moneyRegChar.ReplaceAllStringFunc(str, func(key string) string {
+		if key == "元" || key == "圆" || key == "点" {
+			ishaspoint = true
+		}
+		if v, ok := moneyChar[key].(float64); ok && !end {
+			if ishaspoint && v > 10 { //排除后面有其他的单位
+				return ""
+			}
+			//fmt.Println(key, v, fnum)
+			if v < 10 && v >= 0 {
+				if ishaspoint { //小数部分
+					if v >= 1 {
+						fnum = v
+					} else if v < 1 && v > 0 {
+						decimals += fnum * v
+					}
+				} else {
+					if tmp != float64(0) {
+						node += tmp
+					}
+					tmp = float64(v)
+				}
+			} else if v == 10000 || v == 100000000 { //单位万、亿
+				if tmp != float64(0) {
+					node += tmp
+					tmp = float64(0)
+				}
+				nodes = append(nodes, node*float64(v))
+				if v == 100000000 {
+					yy = true
+				}
+				node = float64(0)
+			} else {
+				if v == 10 && tmp == 0 {
+					tmp = 1
+				}
+				tmp = tmp * float64(v)
+				node += tmp
+				tmp = float64(0)
+			}
 		}
+		if key == "整" || key == "正" || key == "分" {
+			end = true
+		}
+		return ""
+	})
+	if yy {
+		nodes = append(nodes, node*suffixUnit, tmp)
+	} else {
+		nodes = append(nodes, node, tmp)
+	}
+	ret := float64(0)
+	for _, v := range nodes {
+		ret += v
+	}
+	if yy {
+		return []interface{}{(ret + decimals), data[1]}
+	} else {
+		return []interface{}{(ret + decimals) * suffixUnit, data[1]}
 	}
-	return 0.0
+}
+
+// 过滤符号
+func replaceSymbol(con string, rep []string) string {
+	for _, v := range rep {
+		con = strings.Replace(con, v, "", -1)
+	}
+	return con
+}
+
+// 符号替换
+func replaceString(con string, ret, rep []string) string {
+	for k, v := range ret {
+		if len(rep) > k {
+			con = strings.Replace(con, v, rep[k], -1)
+		}
+	}
+	return con
+}
+
+// 清理所有空白符
+func CutAllSpace(data []interface{}) []interface{} {
+	tmp := cutAllSpace.ReplaceAllString(fmt.Sprint(data[0]), "")
+	tmp = replaceSymbol(tmp, spaces)
+	data[0] = tmp
+	return data
 }

+ 2 - 1
config.json

@@ -3,6 +3,7 @@
   "bid_name": "bidding",
   "ext_name": "result_20220218",
   "reading": 500,
+  "udp_max": 10000,
   "smail": {
     "to": "zhengkun@topnet.net.cn,xuzhiheng@topnet.net.cn",
     "api": "http://172.17.145.179:19281/_send/_mail"
@@ -11,7 +12,7 @@
     "local": true,
     "l_addr": "127.0.0.1:12005",
     "addr": "172.17.189.140:27080,172.17.189.141:27081",
-    "dbname" : "qfw_ai",
+    "dbname" : "qfw",
     "username": "zhengkun",
     "password": "zk@123123"
   },

+ 16 - 3
extract/extract.go

@@ -6,6 +6,7 @@ import (
 	"data_ai/ul"
 	log "github.com/donnie4w/go-logger/logger"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"strings"
 	"sync"
 	"unicode/utf8"
 )
@@ -21,7 +22,11 @@ func ExtractFieldInfo(sid string, eid string) {
 	//先查询抽取表-确定大模型需要识别到范围
 	dict := ConfrimExtractInfo(q)
 	log.Debug("查询语句...", q, "~", len(dict))
-
+	if len(dict) >= ul.MaxUdp { //根据数量限制使用具体模型
+		ul.FlashModel = "glm-4-flashx"
+	} else {
+		ul.FlashModel = "glm-4-flash"
+	}
 	pool_mgo := make(chan bool, ul.Reading)
 	wg_mgo := &sync.WaitGroup{}
 
@@ -30,8 +35,8 @@ func ExtractFieldInfo(sid string, eid string) {
 	total, isok := 0, 0
 	it := sess.DB(ul.SourceMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("_id").Iter()
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%5000 == 0 {
-			log.Debug("cur index ", total)
+		if total%200 == 0 {
+			log.Debug("cur ai index ", total)
 		}
 		tmpid := ul.BsonTOStringId(tmp["_id"])
 		infoformat := qu.IntAll(tmp["infoformat"])
@@ -66,6 +71,12 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	detail := qu.ObjToString(v["detail"])
 	filetext := qu.ObjToString(v["filetext"]) //此处为附件信息···
 	title := qu.ObjToString(v["title"])
+	if strings.Contains(title, "开标记录") { //开标记录舍弃
+		return map[string]interface{}{}
+	}
+	if v["jyfb_data"] != nil { //剑鱼发布舍弃qi
+		return map[string]interface{}{}
+	}
 	fns := getpnsinfo(v) //获取附件名字
 	f_data := map[string]interface{}{}
 	if ul.IsTool && utf8.RuneCountInString(detail) < 100 {
@@ -90,6 +101,8 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 		s_toptype, s_subtype = "拟建", "拟建"
 	} else if qu.ObjToString(v["toptype"]) == "产权" {
 		s_toptype, s_subtype = "产权", "产权"
+	} else if qu.ObjToString(v["toptype"]) == "采购意向" {
+		s_toptype, s_subtype = "采购意向", "采购意向"
 	} else {
 		s_toptype, s_subtype = prompt.AcquireClassInfo(detail, title)
 	}

+ 289 - 165
extract/test.go

@@ -11,10 +11,13 @@ import (
 	"os"
 	"strings"
 	"sync"
+	"time"
 	"unicode/utf8"
 )
 
+// 验证单条数据···
 func TestSingleFieldInfo(name string, tmpid string) {
+	now := time.Now().Unix()
 	tmp := ul.BidMgo.FindById(name, tmpid)
 	if len(tmp) == 0 || tmp == nil {
 		log.Debug("未查询到数据...", tmpid)
@@ -25,6 +28,240 @@ func TestSingleFieldInfo(name string, tmpid string) {
 	for k, v := range data {
 		log.Debug(k, "~", v)
 	}
+	log.Debug("耗时···", time.Now().Unix()-now)
+}
+
+// 调试错误数据···
+func TestErrInfo0923() {
+	arr := []string{
+		"66e39b7bb25c3e1debf2cb66",
+		"66e39b7bb25c3e1debf2cb52",
+		"66e39b76b25c3e1debf2cb1c",
+		"66e39b71b25c3e1debf2ca58",
+		"66e39b71b25c3e1debf2ca47",
+		"66e39b71b25c3e1debf2ca3e",
+		"66e39b71b25c3e1debf2ca1d",
+		"66e39b62b25c3e1debf2c9a0",
+		"66e39b0db25c3e1debf2c788",
+		"66e39af3b25c3e1debf2c711",
+		"66e39ae5b25c3e1debf2c6ac",
+		"66e39ad5b25c3e1debf2c607",
+		"66e39ad5b25c3e1debf2c5fc",
+		"66e39ad1b25c3e1debf2c5e1",
+		"66e39acbb25c3e1debf2c56c",
+		"66e39ac6b25c3e1debf2c533",
+		"66e39ac1b25c3e1debf2c526",
+		"66e39a9eb25c3e1debf2c4e7",
+		"66e39a80b25c3e1debf2c441",
+		"66e39a5cb25c3e1debf2c357",
+		"66e39a30b25c3e1debf2c18f",
+		"66e39a12b25c3e1debf2c0cc",
+		"66e39a08b25c3e1debf2c050",
+		"66e39a08b25c3e1debf2bfce",
+		"66e3993fb25c3e1debf2b875",
+		"66e3990db25c3e1debf2b55b",
+		"66e398f1b25c3e1debf2b4bc",
+		"66e397ccb25c3e1debf2abed",
+		"66e397b9b25c3e1debf2ab81",
+		"66e3977db25c3e1debf2a7ae",
+		"66e396b3b25c3e1debf2a049",
+		"66e396b3b25c3e1debf29f97",
+		"66e3969eb25c3e1debf29e62",
+		"66e395feb25c3e1debf29abb",
+		"66e395e0b25c3e1debf298d2",
+		"66e395d6b25c3e1debf297e6",
+		"66e395ccb25c3e1debf296d1",
+		"66e39554b25c3e1debf29331",
+		"66e39517b25c3e1debf29160",
+		"66e394c7b25c3e1debf28f42",
+		"66e394bdb25c3e1debf28ef6",
+		"66e394b3b25c3e1debf28e48",
+		"66e3944fb25c3e1debf28ab5",
+		"66e393ccb25c3e1debf28729",
+		"66e393c2b25c3e1debf286dd",
+		"66e393c2b25c3e1debf286a5",
+		"66e393aeb25c3e1debf28572",
+		"66e3934ab25c3e1debf28423",
+		"66e39322b25c3e1debf282c6",
+		"66e392d1b25c3e1debf2809d",
+		"66e39212b25c3e1debf279d1",
+		"66e39209b25c3e1debf279c7",
+		"66e391f5b25c3e1debf2779e",
+		"66e391eab25c3e1debf2773f",
+		"66e391e0b25c3e1debf276a8",
+		"66e39168b25c3e1debf27347",
+		"66e3912cb25c3e1debf2714a",
+		"66e390e6b25c3e1debf26ee7",
+		"66e390e4b25c3e1debf26e7c",
+		"66e390b3b25c3e1debf26cce",
+		"66e3906cb25c3e1debf26a8f",
+		"66e3901bb25c3e1debf26822",
+		"66e38ff5b25c3e1debf26714",
+		"66e38fd5b25c3e1debf26694",
+		"66e38fb8b25c3e1debf265a4",
+		"66e38f90b25c3e1debf264c5",
+		"66e38f7bb25c3e1debf263de",
+		"66e38f68b25c3e1debf263b8",
+		"66e38f5eb25c3e1debf2638c",
+		"66e38f4ab25c3e1debf2633b",
+		"66e38f40b25c3e1debf2631a",
+		"66e38f18b25c3e1debf261c4",
+		"66e38ef8b25c3e1debf260e7",
+		"66e38ec8b25c3e1debf26063",
+		"66e38eb3b25c3e1debf26017",
+		"66e38e95b25c3e1debf25f78",
+		"66e38e6db25c3e1debf25ef0",
+		"66e38e61b25c3e1debf25eb4",
+		"66e38a57b25c3e1debf24a45",
+		"66e38a47b25c3e1debf24a09",
+		"66e38a47b25c3e1debf249dd",
+		"66e38a47b25c3e1debf249a1",
+		"66e38a47b25c3e1debf24998",
+		"66e38a47b25c3e1debf24995",
+		"66e38a1fb25c3e1debf2494a",
+		"66e389f3b25c3e1debf2482e",
+		"66e389f3b25c3e1debf24824",
+		"66e389f3b25c3e1debf2481d",
+		"66e389f3b25c3e1debf24819",
+		"66e389f3b25c3e1debf24816",
+		"66e389f3b25c3e1debf24808",
+		"66e389d5b25c3e1debf247ec",
+		"66e389adb25c3e1debf24792",
+		"66e389a6b25c3e1debf24733",
+		"66e389a6b25c3e1debf24727",
+		"66e389a3b25c3e1debf246f3",
+		"66e3899cb25c3e1debf246ee",
+		"66e3874db25c3e1debf23d91",
+		"66e38739b25c3e1debf23d28",
+		"66e38720b25c3e1debf23cf3",
+		"66e386f3b25c3e1debf23cac",
+		"66e386dfb25c3e1debf23c5b",
+		"66e386dfb25c3e1debf23c5a",
+		"66e386d5b25c3e1debf23c1b",
+		"66e38699b25c3e1debf23b3b",
+		"66e38694b25c3e1debf23b35",
+		"66e3867bb25c3e1debf23af4",
+		"66e38671b25c3e1debf23ad6",
+		"66e38671b25c3e1debf23aa3",
+		"66e38671b25c3e1debf23a97",
+		"66e38661b25c3e1debf23a6a",
+		"66e38657b25c3e1debf23a27",
+		"66e3864db25c3e1debf23a10",
+		"66e385f3b25c3e1debf23954",
+		"66e385dfb25c3e1debf2393a",
+		"66e385d0b25c3e1debf238de",
+		"66e385c6b25c3e1debf23896",
+		"66e385adb25c3e1debf2381b",
+		"66e385a3b25c3e1debf237eb",
+		"66e385a3b25c3e1debf237e0",
+		"66e38570b25c3e1debf23778",
+		"66e38566b25c3e1debf23765",
+		"66e3855cb25c3e1debf23755",
+		"66e38534b25c3e1debf2369e",
+		"66e38526b25c3e1debf2367c",
+		"66e3851cb25c3e1debf23646",
+		"66e3851cb25c3e1debf2363d",
+		"66e3851cb25c3e1debf2363a",
+		"66e38512b25c3e1debf23621",
+		"66e38512b25c3e1debf2360e",
+		"66e384f8b25c3e1debf23590",
+		"66e384eeb25c3e1debf23572",
+		"66e384c2b25c3e1debf2350b",
+		"66e3847fb25c3e1debf23474",
+	}
+
+	pool_mgo := make(chan bool, 200)
+	wg_mgo := &sync.WaitGroup{}
+	log.Debug("预计处理条数···", len(arr))
+	for k, v := range arr {
+		if k%10 == 0 {
+			log.Debug("cur index ", k)
+		}
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(v string) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			data := ul.BidMgo.FindById("bidding", v)
+			if len(data) >= 0 && data != nil {
+				detail := qu.ObjToString(data["detail"])
+				filetext := qu.ObjToString(data["filetext"]) //此处为附件信息···
+				title := qu.ObjToString(data["title"])
+				if strings.Contains(title, "开标记录") { //开标记录舍弃
+					ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
+						"ispkg": 0,
+						"s_id":  v,
+					})
+					log.Debug("开标记录舍弃···")
+					return
+				}
+				if data["jyfb_data"] != nil { //剑鱼发布舍弃qi
+					ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
+						"ispkg": 0,
+						"s_id":  v,
+					})
+					log.Debug("剑鱼发布舍弃···")
+					return
+				}
+				if ul.IsTool && utf8.RuneCountInString(detail) < 100 {
+					detail = filetext
+				}
+				if utf8.RuneCountInString(detail) < 100 {
+					ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
+						"ispkg": 0,
+						"s_id":  v,
+					})
+					log.Debug("长度不符舍弃···")
+					return
+				}
+				//获取外围字段数据
+				//分包判断-获取信息
+				//ispkg, pkg := false, map[string]interface{}{}
+				//if ispkg = prompt.AcquireIsPackageInfo(detail); ispkg {
+				//	if pkg = prompt.AcquireMultiplePackageInfo(detail); len(pkg) > 0 {
+				//
+				//	}
+				//}
+				pkg := prompt.AcquireMultiplePackageInfo(detail)
+				if s_pkg, ok := pkg["s_pkg"].(map[string]map[string]interface{}); ok {
+					if len(s_pkg) <= 0 {
+						ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
+							"ispkg": 0,
+							"s_id":  v,
+						})
+					} else if len(s_pkg) == 1 {
+						ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
+							"ispkg": 1,
+							"pkg":   pkg,
+							"s_id":  v,
+						})
+					} else {
+						ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
+							"ispkg": 2,
+							"s_id":  v,
+							"pkg":   pkg,
+						})
+					}
+				} else {
+					ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
+						"ispkg": 0,
+						"s_id":  v,
+					})
+				}
+
+			} else {
+				log.Debug("未查询到数据···")
+				ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
+					"ispkg": -1,
+				})
+			}
+
+		}(v)
+	}
+	wg_mgo.Wait()
+	log.Debug("is over ···")
 }
 
 // 导出需要修复的
@@ -420,116 +657,8 @@ func TestExportAiBuyer() {
 }
 
 func TestIsPackage() {
-	tmpArr := []string{
-		"669e83fe66cf0db42a6520b3",
-		"669e892066cf0db42a652c9b",
-		"669e904966cf0db42a653b5d",
-		"669f16f466cf0db42a669069",
-		"669f186c66cf0db42a669bf0",
-		"669efb6766cf0db42a65e0b4",
-		"669f004266cf0db42a65f201",
-		"669f02a666cf0db42a65fff3",
-		"669f172766cf0db42a669193",
-		"669ec89566cf0db42a659020",
-		"669e86b266cf0db42a6526ac",
-		"669e86e466cf0db42a6527b7",
-		"669e87b766cf0db42a652a3e",
-		"669f082d66cf0db42a662323",
-		"669e95e966cf0db42a654dd1",
-		"669ea39466cf0db42a656311",
-		"669f140366cf0db42a66772f",
-		"669ee59466cf0db42a65b8aa",
-		"669f05a166cf0db42a66117b",
-		"669e90d666cf0db42a653e0a",
-		"669f08c466cf0db42a66273c",
-		"669f155166cf0db42a6682c7",
-		"669ef0ff66cf0db42a65c83a",
-		"669efdc166cf0db42a65e8f3",
-		"669f090066cf0db42a6629d0",
-		"669f111366cf0db42a665ce7",
-		"669f15fb66cf0db42a668901",
-		"669f0baa66cf0db42a663a72",
-		"669f039766cf0db42a66044e",
-		"669eff3e66cf0db42a65ee73",
-		"669f12c366cf0db42a666b9d",
-		"669e913b66cf0db42a653ffc",
-		"669e833466cf0db42a651e3a",
-		"669f071e66cf0db42a661b03",
-		"669f1a1266cf0db42a66a892",
-		"669f0aec66cf0db42a6635e8",
-		"669f169c66cf0db42a668e1d",
-		"669ed6c966cf0db42a65a75d",
-		"669f072866cf0db42a661b26",
-		"669f185866cf0db42a669af0",
-		"669f15d366cf0db42a6687aa",
-		"669f182466cf0db42a669960",
-		"669f0ed066cf0db42a664e5c",
-		"669f076466cf0db42a661cd4",
-		"669f172966cf0db42a6691c0",
-		"669f198466cf0db42a66a385",
-		"669f1ad366cf0db42a66afb9",
-		"669f156666cf0db42a668403",
-		"669f093c66cf0db42a662c08",
-		"669f0d8266cf0db42a6646cb",
-		"669f06e866cf0db42a661a1d",
-		"669f1bd766cf0db42a66b86e",
-		"669efcd066cf0db42a65e4f4",
-	}
-	pkgArr := []int{
-		1,
-		1,
-		1,
-		1,
-		0,
-		1,
-		1,
-		1,
-		1,
-		1,
-		1,
-		1,
-		1,
-		0,
-		0,
-		1,
-		1,
-		1,
-		1,
-		1,
-		0,
-		0,
-		0,
-		1,
-		0,
-		0,
-		0,
-		1,
-		0,
-		1,
-		0,
-		0,
-		1,
-		0,
-		1,
-		0,
-		1,
-		1,
-		0,
-		1,
-		0,
-		0,
-		1,
-		1,
-		1,
-		1,
-		1,
-		1,
-		0,
-		1,
-		1,
-		0,
-		1,
-	}
+	tmpArr := []string{}
+	pkgArr := []int{}
 	ok := 0
 	for k, v := range tmpArr {
 		data := ul.SourceMgo.FindById("ai_41411", v)
@@ -548,74 +677,69 @@ func TestIsPackage() {
 }
 
 func TestPackageInfo() {
-	query := map[string]interface{}{
-		"new_pkg": map[string]interface{}{
-			"$exists": 1,
-		},
-	}
-	dataArr, _ := ul.SourceMgo.Find("ai_41411_zhipu", query, nil, map[string]interface{}{})
-	log.Debug("查询数量...", len(dataArr))
-
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	q, total := map[string]interface{}{"ai_zhipu.ispkg": true}, 0
+	it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("-_id").Iter()
+	isok := 0
 	os.Remove("test.xlsx")
 	f := new_xlsx.NewFile()
 	sheet, _ := f.AddSheet("数据信息")
 	row := sheet.AddRow()
-	writeRow(row, []string{"唯一标识", "站点", "剑鱼链接", "子包名称", "子包单位", "子包金额"})
-	for _, v := range dataArr {
-		tmpid := ul.BsonTOStringId(v["_id"])
-		ttt := ul.SourceMgo.FindById("ai_41411", tmpid)
-		site := qu.ObjToString(ttt["site"])
+	writeRow(row, []string{"序号", "唯一标识", "站点", "项目名称", "一级分类", "二级分类", "原文链接", "剑鱼链接", "子包名称", "子包单位", "子包金额"})
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Debug("cur index ", total, "~", isok)
+		}
+		tmpid := ul.BsonTOStringId(tmp["_id"])
+		site := qu.ObjToString(tmp["site"])
+		projectname := qu.ObjToString(tmp["projectname"])
+		toptype := qu.ObjToString(tmp["toptype"])
+		subtype := qu.ObjToString(tmp["subtype"])
+		href := qu.ObjToString(tmp["href"])
 		jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
-		p_info := *qu.ObjToMap(v["new_pkg"])
-		p_arr := ul.IsMarkInterfaceMap(p_info["分包信息"])
-		for _, v1 := range p_arr {
-			row = sheet.AddRow()
-			arr := []string{}
-			arr = append(arr, tmpid)
-			arr = append(arr, site)
-			arr = append(arr, jyhref)
-			arr = append(arr, qu.ObjToString(v1["包项目名称"]))
-			arr = append(arr, qu.ObjToString(v1["中标单位"]))
-			arr = append(arr, qu.ObjToString(v1["中标金额"]))
-			writeRow(row, arr)
+		ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
+		if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
+			if s_info := qu.ObjToMap((*s_pkg)["s_pkg"]); s_info != nil && len(*s_info) > 1 {
+				isok++
+				for _, v := range *s_info {
+					if v1 := qu.ObjToMap(v); v1 != nil {
+						row = sheet.AddRow()
+						arr := []string{}
+						arr = append(arr, fmt.Sprintf("%d", isok))
+						arr = append(arr, tmpid)
+						arr = append(arr, site)
+						arr = append(arr, projectname)
+						arr = append(arr, toptype)
+						arr = append(arr, subtype)
+						arr = append(arr, href)
+						arr = append(arr, jyhref)
+						arr = append(arr, qu.ObjToString((*v1)["name"]))
+						arr = append(arr, qu.ObjToString((*v1)["winner"]))
+						bidamount := qu.Float64All((*v1)["bidamount"])
+						if bidamount > 0.0 {
+							arr = append(arr, fmt.Sprintf("%.2f", bidamount))
+						} else {
+							arr = append(arr, "")
+						}
+						writeRow(row, arr)
+					}
+				}
+				if isok > 1000 {
+					break
+				}
+			}
 		}
+		tmp = make(map[string]interface{})
 	}
+	log.Debug("is over ", total, isok)
+
 	if err := f.Save("test.xlsx"); err != nil {
 		fmt.Println("保存xlsx失败:", err)
 	} else {
 		fmt.Println("保存xlsx成功:", err)
 	}
 	log.Debug("is over ...")
-
-	return
-	//分包判断,获取信息
-	pool_mgo := make(chan bool, 80)
-	wg_mgo := &sync.WaitGroup{}
-	for k, v := range dataArr {
-		if k%10 == 0 {
-			log.Debug(k, "~", v["_id"])
-		}
-		pool_mgo <- true
-		wg_mgo.Add(1)
-		go func(v map[string]interface{}) {
-			defer func() {
-				<-pool_mgo
-				wg_mgo.Done()
-			}()
-			tmpid := ul.BsonTOStringId(v["_id"])
-			data := ul.SourceMgo.FindById("ai_41411", tmpid)
-			if detail := qu.ObjToString(data["detail"]); utf8.RuneCountInString(detail) > 100 {
-				pkg := prompt.AcquireMultiplePackageInfo(detail)
-				//最终结果...
-				ul.SourceMgo.UpdateById("ai_41411_zhipu", tmpid, map[string]interface{}{
-					"$set": map[string]interface{}{
-						"new_pkg": pkg,
-					},
-				})
-			}
-		}(v)
-	}
-	wg_mgo.Wait()
 }
 
 // 更新链接

+ 1 - 7
main.go

@@ -15,21 +15,15 @@ func init() {
 	} else {
 		log.Debug("正常版本···")
 		ul.InitGlobalVar()
-		return
 		udp.InitProcessVar()
 	}
 }
 
 func main() {
 	if ul.IsTool {
-		log.Debug("main tool ...")
 		tool.StartToolInfo()
 	} else {
-		log.Debug("main ...")
-		//extract.TestSingleFieldInfo("bidding", "66c2fb9066cf0db42adf7c21")
-		//extract.TestExportJinErInfo()
-		//count, err := ul.SourceMgo.Count("zktest_repeat_new", map[string]interface{}{"repeat": 1})
-		//log.Debug(count, err)
+		//extract.TestSingleFieldInfo("bidding", "66e39b71b25c3e1debf2ca47")
 	}
 	lock := make(chan bool)
 	<-lock

+ 1 - 6
prompt/prompt_buyer.go

@@ -2,8 +2,6 @@ package prompt
 
 import (
 	"data_ai/ai"
-	"data_ai/ul"
-	"unicode/utf8"
 )
 
 // 获取外围抽取字段
@@ -13,11 +11,8 @@ func AcquireBuyerInfo(detail string) map[string]interface{} {
 	return zp
 }
 
-// 提示词优选
+// 提示词优选 - 提问词补偿不需要限制
 func PromptBuyerText(detail string) string {
-	if utf8.RuneCountInString(detail) > ul.MaxLen {
-		detail = string([]rune(detail)[:ul.MaxLen])
-	}
 	content := `请根据我提供的正文进行"实体单位"的抽取;
 你在识别"实体单位"的时候,只能返回一个实体单位,不要返回多个实体单位,如果识别不出来,请填写"无";
 请将上述的识别结果、信息分类结果,按照JSON格式输出,

+ 33 - 28
prompt/prompt_class.go

@@ -5,9 +5,30 @@ import (
 	"data_ai/ul"
 	"fmt"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
-	"unicode/utf8"
 )
 
+var pmt_class = `
+你是一个文本处理模型,根据下述信息做"招投标信息分类":
+一、信息分类选项:[合同公告;验收公告;违规公告;结果公告;预告公告;招标公告;采购意向公告]
+二、信息分类权重(权重由高到低排序):[合同公告;验收公告;违规公告;结果公告;预告公告;招标公告;采购意向公告]
+三、信息分类参考资料:
+	1、合同公告:合同公告是指在招投标结束后,招标人与中标人正式签订合同后发布的公告。它通常包含合同的主要内容、双方的基本信息、合同金额、签订日期等关键信息,旨在公示合同的签订情况,确保招投标活动的透明度和合法性。正文内容中提到了合同编号或者合同的具体内容,表明这是一篇合同公告。请结合列举的“关键词”进行推导,关键词:[合同公示,合同公告,合同备案,合同签订];
+	2、验收公告:宣布招标人对项目或服务完成情况进行评估和确认。请结合列举的“关键词”进行推导,关键词:[验收公告,验收报告,验收公示,电子验收单公示,验收结果];
+	3、违规公告:宣布参与招投标的任何一方违反相关法律法规或原则。请结合列举的“关键词”进行推导,关键词:[不良记录,行政处罚,不良行为,虚假合同,虚假材料];
+	4、结果公告:发布的是招投标公告评选过程或评标结果的信息,还涉及开标记录、结果变更的情况。通常会列出候选人、供应商、中标人、中标金额等信息,包括"候选公告","结果变更公告","成交公告","流标公告","废标公告"这几种类型。请结合列举的"关键词"进行推导,关键词:[评标,得分,工期,项目负责人,开标记录,中标候选,候选人,成交候选,候选入围,成交,成交金额,成交公告,供应商名称,采购结果,中选公告,中选结果,中选金额,流标,取消公告,取消采购,二次流标,比选失败,招标结束,采购程序终止,废标,作废,终止,废止公告,项目终止,招标公告作废,未成交公告,未成交公示,取消成交候选供应商,取消中标候选供应商,结果变更,中标结果变更,废标结果变更,变更中标人,重新公示,重新确认中标结果,重新确定中标人,重新确认中标公告,结果顺延];
+	5、预告公告:在招标公告正式开始前,发布资格预审信息、预审评审结果、招投标文件过程中问题的分析和评估意见、招投标项目的具体需求、技术要求等信息。请结合列举的"关键词"进行推导,关键词:[预告,预公告,预公示,资审,资审及业绩公示,参加资格预审,预审邀请,需求调研,调研公告,采购计划,审前公示,预审公告,资格预审,预审公告,招标预审,预审,预审结果,资格预审公示,资审结果,审查结果,论证意见,方案征集公告,审核前公示,专家组论证意见,需求论证公示,征求意见,采购需求,项目需求公示,论证公示,需求评审公告,需求公示,论证意见公示,征求意见,征集意见];
+	6、招标公告:招标单位或招标人公布项目的基本信息、标准、招标条件、价格和要求等,邀请符合资格的投标人参与项目投标。根据不同的招标方式和流程,可包括"单一来源公告"、"询价公告"、"竞价公告","邀标公告","竞争谈判公告","招标变更公告"。请结合列举的"关键词"进行推导,关键词:[招标公告,二次招标,招标报名,招标采购,单一来源,单一采购,询价,询价公告,议价采购,询价邀请,竞价,网上竞价,竞价公告,竞价开始(结束),邀请,邀请招标,投标邀请,邀标公告,邀请书,公开邀请,报价邀请,磋商,竞争性谈判,竞争性磋商,磋商公告,招标变更];
+	7、采购意向公告:请结合列举的"关键词"进行推导,关键词:[采购意向,意向公告,招标计划,采办计划,预计采购];
+我希望你根据正文内容,仅从"信息分类选项"中选取一个合适的分类。在识别过程中出现多个结果时,按照"信息分类权重"优先选举一个权重大的,并将分类识别结果严格按照下述JSON格式输出。
+JSON格式:
+{
+"信息分类":"招投标信息分类",
+"理由":"请给出信息分类结果的理由",
+}
+按照以上要求输出,不要联想,不要无中生有,不要生成解释,对于尚未确定或未明确的信息请在JSON对应的值填写:无
+正文内容:` + `%s` + "\n" + `%s` + `
+结果JSON:`
+
 // 获取分类信息...
 func AcquireClassInfo(detail string, title string) (string, string) {
 	top_content := PromptToptypeFieldText(detail, title)
@@ -101,38 +122,22 @@ func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]inte
 }
 
 func PromptToptypeFieldText(detail, title string) string {
-	if utf8.RuneCountInString(detail) > ul.MaxLen {
-		detail = string([]rune(detail)[:ul.MaxLen])
-	}
 	//1、合同验收违规公告:包含招标人与中标人签订的合同公告,或者宣布招标人对项目、服务完成情况的评估、确认的公告,亦或者宣布参与招投标的任何一方违反法律法规、原则的公告。包括。请结合列举的"关键词"进行推导,关键词:[验收公告,验收报告,验收公示,电子验收单公示,验收结果,合同公示,合同公告,合同备案,合同签订,不良记录,行政处罚,不良行为,虚假合同,虚假材料],
-	content := `
-你是一个文本处理模型,根据下述信息做"招投标信息分类":
-一、信息分类选项:[合同公告;验收公告;违规公告;结果公告;预告公告;招标公告;采购意向公告]
-二、信息分类权重(权重由高到低排序):[合同公告;验收公告;违规公告;结果公告;预告公告;招标公告;采购意向公告]
-三、信息分类参考资料:
-	1、合同公告:合同公告是指在招投标结束后,招标人与中标人正式签订合同后发布的公告。它通常包含合同的主要内容、双方的基本信息、合同金额、签订日期等关键信息,旨在公示合同的签订情况,确保招投标活动的透明度和合法性。正文内容中提到了合同编号或者合同的具体内容,表明这是一篇合同公告。请结合列举的“关键词”进行推导,关键词:[合同公示,合同公告,合同备案,合同签订];
-	2、验收公告:宣布招标人对项目或服务完成情况进行评估和确认。请结合列举的“关键词”进行推导,关键词:[验收公告,验收报告,验收公示,电子验收单公示,验收结果];
-	3、违规公告:宣布参与招投标的任何一方违反相关法律法规或原则。请结合列举的“关键词”进行推导,关键词:[不良记录,行政处罚,不良行为,虚假合同,虚假材料];
-	4、结果公告:发布的是招投标公告评选过程或评标结果的信息,还涉及开标记录、结果变更的情况。通常会列出候选人、供应商、中标人、中标金额等信息,包括"候选公告","结果变更公告","成交公告","流标公告","废标公告"这几种类型。请结合列举的"关键词"进行推导,关键词:[评标,得分,工期,项目负责人,开标记录,中标候选,候选人,成交候选,候选入围,成交,成交金额,成交公告,供应商名称,采购结果,中选公告,中选结果,中选金额,流标,取消公告,取消采购,二次流标,比选失败,招标结束,采购程序终止,废标,作废,终止,废止公告,项目终止,招标公告作废,未成交公告,未成交公示,取消成交候选供应商,取消中标候选供应商,结果变更,中标结果变更,废标结果变更,变更中标人,重新公示,重新确认中标结果,重新确定中标人,重新确认中标公告,结果顺延];
-	5、预告公告:在招标公告正式开始前,发布资格预审信息、预审评审结果、招投标文件过程中问题的分析和评估意见、招投标项目的具体需求、技术要求等信息。请结合列举的"关键词"进行推导,关键词:[预告,预公告,预公示,资审,资审及业绩公示,参加资格预审,预审邀请,需求调研,调研公告,采购计划,审前公示,预审公告,资格预审,预审公告,招标预审,预审,预审结果,资格预审公示,资审结果,审查结果,论证意见,方案征集公告,审核前公示,专家组论证意见,需求论证公示,征求意见,采购需求,项目需求公示,论证公示,需求评审公告,需求公示,论证意见公示,征求意见,征集意见];
-	6、招标公告:招标单位或招标人公布项目的基本信息、标准、招标条件、价格和要求等,邀请符合资格的投标人参与项目投标。根据不同的招标方式和流程,可包括"单一来源公告"、"询价公告"、"竞价公告","邀标公告","竞争谈判公告","招标变更公告"。请结合列举的"关键词"进行推导,关键词:[招标公告,二次招标,招标报名,招标采购,单一来源,单一采购,询价,询价公告,议价采购,询价邀请,竞价,网上竞价,竞价公告,竞价开始(结束),邀请,邀请招标,投标邀请,邀标公告,邀请书,公开邀请,报价邀请,磋商,竞争性谈判,竞争性磋商,磋商公告,招标变更];
-	7、采购意向公告:请结合列举的"关键词"进行推导,关键词:[采购意向,意向公告,招标计划,采办计划,预计采购];
-我希望你根据正文内容,仅从"信息分类选项"中选取一个合适的分类。在识别过程中出现多个结果时,按照"信息分类权重"优先选举一个权重大的,并将分类识别结果严格按照下述JSON格式输出。
-JSON格式:
-{
-"信息分类":"招投标信息分类",
-"理由":"请给出信息分类结果的理由",
-}
-按照以上要求输出,不要联想,不要无中生有,不要生成解释,对于尚未确定或未明确的信息请在JSON对应的值填写:无
-正文内容:` + title + "\n" + detail + `
-结果JSON:`
+	content := fmt.Sprintf(pmt_class, title, detail)
+	l := len(content)
+	if l > ul.MaxByte {
+		detail = truncatedText(detail, len(detail)-(l-ul.MaxByte))
+		content = fmt.Sprintf(pmt_class, title, detail)
+	}
 	return content
 }
 
 func PromptSubtypeFieldText(detail, title, toptype string, tpInfo *ul.TypeInfo) string {
-	if utf8.RuneCountInString(detail) > ul.MaxLen {
-		detail = string([]rune(detail)[:ul.MaxLen])
-	}
 	content := fmt.Sprintf(tpInfo.SubtypeAiText, title, detail)
+	l := len(content)
+	if l > ul.MaxByte {
+		detail = truncatedText(detail, len(detail)-(l-ul.MaxByte))
+		content = fmt.Sprintf(tpInfo.SubtypeAiText, title, detail)
+	}
 	return content
 }

+ 31 - 14
prompt/prompt_field.go

@@ -3,22 +3,11 @@ package prompt
 import (
 	"data_ai/ai"
 	"data_ai/ul"
+	"fmt"
 	"unicode/utf8"
 )
 
-// 获取外围抽取字段
-func AcquireExtractFieldInfo(detail string) map[string]interface{} {
-	content := PromptFieldText(detail)
-	zp := ai.PostZhiPuInfo(content)
-	return zp
-}
-
-// 提示词优选
-func PromptFieldText(detail string) string {
-	if utf8.RuneCountInString(detail) > ul.MaxLen {
-		detail = string([]rune(detail)[:ul.MaxLen])
-	}
-	content := `
+var pmt_field = `
 你是一个文本处理模型,专门用于分析文本提取信息,你具备以下能力:
 1、实体识别抽取
 2、信息归属地域
@@ -48,6 +37,34 @@ func PromptFieldText(detail string) string {
 }
 
 请回答我的问题,不要联想,不要无中生有,不要生成解释,对于尚未确定或未明确的信息请在JSON对应的值填写:无
-正文内容:` + "\n" + detail + "\n结果JSON:"
+正文内容:` + "\n" + `%s` + "\n结果JSON:"
+
+// 获取外围抽取字段
+func AcquireExtractFieldInfo(detail string) map[string]interface{} {
+	content := PromptFieldText(detail)
+	zp := ai.PostZhiPuInfo(content)
+	return zp
+}
+
+// 提示词优选
+func PromptFieldText(detail string) string {
+	content := fmt.Sprintf(pmt_field, detail)
+	l := len(content)
+	if l > ul.MaxByte {
+		detail = truncatedText(detail, len(detail)-(l-ul.MaxByte))
+		content = fmt.Sprintf(pmt_field, detail)
+	}
 	return content
 }
+
+// 截断文本长度
+func truncatedText(detail string, n int) string {
+	if n <= 0 {
+		if utf8.RuneCountInString(detail) > ul.MaxLen {
+			detail = string([]rune(detail)[:ul.MaxLen])
+			return detail
+		}
+		return detail
+	}
+	return detail[:n]
+}

+ 139 - 61
prompt/prompt_package.go

@@ -7,13 +7,108 @@ import (
 	"fmt"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"strings"
-	"unicode/utf8"
 )
 
+var pmt_pkg_0 = `
+你是一名"招标工程师",拥有写标书及阅读理解公告的能力。请帮我判断下方公告中出现了几个项目:
+	注意:多个项目通常会以多个分包、多个标段的形式出现。业绩关联的项目要排除在外;
+	json形式回答,急着要结果,避免解释要干脆利落:
+	{
+	"分包":"(是/否)",
+	}
+	请回答我的问题,不要联想,不要无中生有,不要生成多余解释;
+	公告:` + `%s` + "\n结果JSON:"
+
+var pmt_pkg_1 = `
+你是一名’招标工程师’,拥有写标书及阅读理解公告的能力,根据要求抽取所需的内容,抽取内容要实事求是,不会无中生有。
+公告start:
+{` + `%s` + `}
+公告end
+
+任务如下:
+第一步、公告预处理:
+将上述公告中信息拆分为两部分
+标的信息:标的名称、标的编号、规格型号、数量等相关信息
+其他信息:去除上述"标的信息"剩余文本内容,执行第二步、第三步、第四步
+不输出内容,仅做思考,完成后输出:"已拆分完成"即可
+
+第二步、请根据第一步拆解分后的"其他信息",精确提取并整理以下信息,形成一个便于统计的JSON:
+注意:入围供应商、入围中标人、中标候选人存在多个时,选择第一名为中标单位,忽略其他排名。请务必确保每一条记录都来源于"公告内容", 对于尚未确定或未在文中明确提供的信息,请在相应value位置填写“无”
+{
+"分包信息":[
+{
+"标段名称":(标包名称,通常包含地理区域、专业类别、标包内容描述等内容,不可以填写"标的名称"),
+"标段/包号":(标包编号,可以来自标段名称,如果不存在写"无",比如:一标段、二标段、包一、I包、I标段、标包一、标包编号等,不可以填写"项目编号"或"标的编号"),
+"中标单位":(中标单位名称,要求:中标角色包括但不限于成交供应商(注:当入围供应商/中标人存在多个,选择第一位为中标单位)、中标人、中标方、承包方、中选单位、服务商、第一|1名中标候选人(忽略其他中标候选人)。当流标显示流标,废标时显示废标。联合体投标时,请列出所有单位名称使用","分割),
+"中标金额":(中标金额数值及单位,要求:不能使用预算金额。多个金额时请进行计算,非单价,如果是单价,则等于单价*数量。),
+"预算金额":(预算金额数值及单位,要求:不可以用中标金额来填充),
+"投标折扣系数":""
+},
+....
+],
+}
+
+第三步、审查第一步输出的结果,
+0.检查"标段名称"及"标段/包号"是否真的在原文中出现,
+1.检查"标段名称"及"标段/包号"是否有明确提及,如果只有标的名称明确提及,请删除
+2.检查中标金额是否在原文中明确提及,否则删除第一步的中标金额结果
+3.检查中标金额与预算金额中的数值与单位是否正确
+4.修正答案
+
+第四步、根据第一、二、三步结果总结得到最终正确的结果
+
+答:
+第一步结果:
+
+第二步结果:
+
+第三步验证:
+
+最终正确的结果:`
+
+var pmt_pkg_2 = `
+你是一名’招标工程师’,拥有写标书及阅读理解公告的能力,根据要求抽取所需的内容,抽取内容要实事求是,不会无中生有。
+公告start:
+{` + `%s` + `}
+公告end
+
+任务如下:
+第一步、请根据公告内容,精确提取并整理以下信息,形成一个便于统计的JSON:
+注意:入围供应商、入围中标人、中标候选人存在多个时,选择第一名为中标单位,忽略其他排名。请务必确保每一条记录都来源于"公告内容", 对于尚未确定或未在文中明确提供的信息,请在相应value位置填写“无”
+{
+"分包信息":[
+{
+"标段名称":(标包名称,通常包含地理区域、专业类别、标包内容描述等内容,不可以填写"标的名称"),
+"标段/包号":(标包编号,可以来自标段名称,如果不存在写"无",比如:一标段、二标段、包一、I包、I标段、标包一、标包编号等,不可以填写"项目编号"或"标的编号"),
+"中标单位":(中标单位名称,要求:中标角色包括但不限于成交供应商(注:当入围供应商/中标人存在多个,选择第一位为中标单位)、中标人、中标方、承包方、中选单位、服务商、第一|1名中标候选人(忽略其他中标候选人)。当流标显示流标,废标时显示废标。联合体投标时,请列出所有单位名称使用","分割),
+"中标金额":(中标金额数值及单位,如果公告中没有明确说明,输出"无",不能使用预算金额。),
+"投标折扣系数":"",
+"包预算金额":(预算金额数值及单位,要求:不可以用中标金额来填充)
+},
+....
+],
+}
+
+第二步、审查第一步输出的结果,
+0.检查"标段名称"及"标段/包号"是否真的在原文中出现,
+1.检查"标段名称"及"标段/包号"是否有明确提及,如果只有标的名称明确提及,请删除
+2.检查"中标金额"是否在原文中明确提及,否则删除第一步的中标金额结果
+3.检查中标金额与预算金额中的数值与单位是否正确
+4.修正答案
+
+第三步、根据第一、二步结果总结得到最终正确的结果
+
+答:
+第一步结果:
+
+第二步结果:
+
+最终正确的结果:`
+
 // 获取是否为分包信息
 func AcquireIsPackageInfo(detail string) bool {
 	content := PromptIsPackageText(detail)
-	zp := ai.PostZhiPuInfo(content)
+	zp := ai.PostZhiPuPackageInfo(content)
 	if qu.ObjToString(zp["分包"]) == "是" {
 		return true
 	}
@@ -23,80 +118,63 @@ func AcquireIsPackageInfo(detail string) bool {
 // 获取标讯多包信息
 func AcquireMultiplePackageInfo(detail string) map[string]interface{} {
 	content := PromptMultiplePackageText(detail)
-	zp := ai.PostZhiPuInfo(content)
-	//后续在转格式...
+	zp := ai.PostZhiPuPackageInfo(content)
+	//转格式...
 	ai_pkg := map[string]interface{}{}
 	s_winner, s_bidamount, s_pkg := "", 0.0, map[string]map[string]interface{}{}
 	win_arr, win_temp := []string{}, map[string]string{}
-	if score := qu.Float64All(zp["分包信息score"]); score >= 90.0 {
-		pkginfo := ul.IsMarkInterfaceMap(zp["分包信息"])
-		for k, v := range pkginfo { //
-			key := fmt.Sprintf("%d", k+1)
-			name := qu.ObjToString(v["标段/包号"])
-			winner := clean.CleanWinner(qu.ObjToString(v["中标单位"]))
-			bidamount := clean.CleanMoney((v["中标金额"]))
-			//分包信息结构
-			s_pkg[key] = map[string]interface{}{
-				"name":      name,
-				"winner":    winner,
-				"bidamount": bidamount,
-			}
-			//去重计算单位与总金额
-			s_bidamount += bidamount
-			if win_temp[winner] == "" && winner != "" {
-				win_arr = append(win_arr, winner)
-				win_temp[winner] = winner
-			}
+	pkginfo := ul.IsMarkInterfaceMap(zp["分包信息"])
+	for k, v := range pkginfo { //
+		key := fmt.Sprintf("%d", k+1)
+		name := qu.ObjToString(v["标段名称"])
+		code := qu.ObjToString(v["标段/包号"])
+		winner := clean.CleanWinner(qu.ObjToString(v["中标单位"]))
+		bidamount := clean.CleanMoney([]interface{}{v["中标金额"], ""})
+		//分包信息结构
+		s_pkg[key] = map[string]interface{}{
+			"name":      name,
+			"code":      code,
+			"winner":    winner,
+			"bidamount": bidamount,
+		}
+		//去重计算单位与总金额
+		s_bidamount += bidamount
+		if win_temp[winner] == "" && winner != "" {
+			win_arr = append(win_arr, winner)
+			win_temp[winner] = winner
 		}
-		s_winner = strings.Join(win_arr, ",")
-		ai_pkg["s_winner"] = s_winner
-		ai_pkg["s_bidamount"] = s_bidamount
-		ai_pkg["s_pkg"] = s_pkg
 	}
+	s_winner = strings.Join(win_arr, ",")
+	ai_pkg["s_winner"] = s_winner
+	ai_pkg["s_bidamount"] = s_bidamount
+	ai_pkg["s_pkg"] = s_pkg
 	return ai_pkg
 }
 
 // 分包判断-提问词
 func PromptIsPackageText(detail string) string {
-	if utf8.RuneCountInString(detail) > ul.MaxLen {
-		detail = string([]rune(detail)[:ul.MaxLen])
-	}
-	content := `
-	你是一名"招标工程师",拥有写标书及阅读理解公告的能力。请帮我判断下方公告中出现了几个项目:
-	注意:多个项目通常会以多个分包、多个标段的形式出现。业绩关联的项目要排除在外;
-	json形式回答,急着要结果,避免解释要干脆利落:
-	{
-	"分包":"(是/否)",
+	content := fmt.Sprintf(pmt_pkg_0, detail)
+	l := len(content)
+	if l > ul.MaxByte {
+		detail = truncatedText(detail, len(detail)-(l-ul.MaxByte))
+		content = fmt.Sprintf(pmt_pkg_0, detail)
 	}
-	请回答我的问题,不要联想,不要无中生有,不要生成多余解释;
-	公告:` + detail + "\n结果JSON:"
-
 	return content
 }
 
+// 分包信息提取
 func PromptMultiplePackageText(detail string) string {
-	if utf8.RuneCountInString(detail) > ul.MaxLen {
-		detail = string([]rune(detail)[:ul.MaxLen])
+	pmt_pkg := ""
+	if strings.Contains(detail, "标的名称") || strings.Contains(detail, "标的编号") {
+		pmt_pkg = pmt_pkg_1
+	} else {
+		pmt_pkg = pmt_pkg_2
+	}
+	content := fmt.Sprintf(pmt_pkg, detail)
+	l := len(content)
+	if l > ul.MaxByte {
+		detail = truncatedText(detail, len(detail)-(l-ul.MaxByte))
+		content = fmt.Sprintf(pmt_pkg, detail)
 	}
-	content := `
-请根据提供的招投标文件内容,精确提取并整理以下信息,形成一个便于统计的JSON:
-{
-"分包信息":[{
-"标段/包号":(形式如下但不限于:一标段、包一、I包、I标段,不可以填写项目编号),
-"中标单位":(中标角色包括但不限于成交供应商(注:当入围供应商/中标人存在多个,选择第一位为中标单位)、中标人、中标方、承包方、中选单位、服务商、第一|1名中标候选人(忽略其他中标候选人)。当流标显示流标,废标时显示废标。联合体投标时,请列出所有单位名称使用","分割),
-"中标金额":(中标金额数值及单位,多个金额时请进行计算。非单价,如果是单价,则等于单价*数量),
-"投标折扣系数":"",
-]
-"包预算金额":(预算金额数值及单位),
-},
-....
-],
-# 下方打分要求:根据抽取内容与要求匹配程度打分0-100,默认100分,1、当原文中存在,未抽取到扣分。2、原文中存在多个,选择有歧义扣分。3、原文中不存在,未抽取到不扣分
-"分包信息score":,
-}
-请务必确保每一条记录都地囊括了各分包项目的详细信息,并按照指定的JSON格式排列数据, 对于尚未确定或未在文件中明确提供的信息,请在相应value位置填写“无”,输出结果JSON即可
-请回答我的问题,不要联想,不要无中生有;
-招投标文件内容:` + detail + "\n结果JSON:"
-
 	return content
 }

+ 17 - 17
tool.json

@@ -1,28 +1,28 @@
 {
-  "bid_name": "bidding",
-  "ext_name": "20240828Gd_23",
+  "reading": 500,
+  "ext_name": "20240909Hs_dz",
   "s_mgo": {
     "local": false,
-    "l_addr": "192.168.3.166:27082",
-    "addr": "192.168.3.166:27082",
-    "dbname" : "zhaoxiuzhen",
+    "l_addr": "192.168.3.167:27080",
+    "addr": "192.168.3.167:27080",
+    "dbname" : "jyqyfw_historyData2024_1",
     "username": "",
     "password": ""
   },
   "b_mgo": {
-    "local": true,
-    "l_addr": "127.0.0.1:12005",
-    "addr": "172.17.189.140:27080,172.17.189.141:27081",
-    "dbname" : "qfw",
-    "username": "zhengkun",
-    "password": "zk@123123"
+    "local": false,
+    "l_addr": "192.168.3.167:27080",
+    "addr": "192.168.3.167:27080",
+    "dbname" : "zhengkun",
+    "username": "",
+    "password": ""
   },
   "qy_mgo": {
-    "local": true,
-    "l_addr": "127.0.0.1:12005",
-    "addr": "172.17.189.140:27080,172.17.189.141:27081",
-    "dbname" : "mixdata",
-    "username": "zhengkun",
-    "password": "zk@123123"
+    "local": false,
+    "l_addr": "192.168.3.167:27080",
+    "addr": "192.168.3.167:27080",
+    "dbname" : "zhengkun",
+    "username": "",
+    "password": ""
   }
 }

+ 51 - 6
tool/tool.go

@@ -6,22 +6,24 @@ import (
 	log "github.com/donnie4w/go-logger/logger"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"strings"
 	"sync"
 )
 
 var unset_check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
 
+// 工具修正程序
 func StartToolInfo() {
-	log.Debug("开始大模型修正数据···")
+	log.Debug("工具开始大模型修正数据······")
 	q := map[string]interface{}{}
-	pool_mgo := make(chan bool, 50)
+	pool_mgo := make(chan bool, ul.Reading)
 	wg_mgo := &sync.WaitGroup{}
 	sess := ul.SourceMgo.GetMgoConn()
 	defer ul.SourceMgo.DestoryMongoConn(sess)
 	total, isok := 0, 0
 	it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Sort("_id").Iter()
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%1000 == 0 {
+		if total%100 == 0 {
 			log.Debug("cur index ", total)
 		}
 		isok++
@@ -57,7 +59,6 @@ func StartToolInfo() {
 	}
 	wg_mgo.Wait()
 	log.Debug("ai is over ...")
-
 }
 
 // 大模型与抽取数据合并计算
@@ -66,6 +67,7 @@ func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interfa
 		return false
 	}
 	//记录抽取原值
+	//记录抽取原值
 	ext_ai_record := map[string]interface{}{}
 	ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
 	//分类字段···
@@ -113,7 +115,6 @@ func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interfa
 		(*update_check)["budget"] = s_budget
 		ext_ai_record["budget"] = tmp["budget"]
 	}
-
 	//地域字段···
 	o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"])
 	s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"])
@@ -155,9 +156,17 @@ func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interfa
 		}
 		if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
 			(*update_check)["s_winner"] = s_winner
-			(*update_check)["winner"] = s_winner
 			ext_ai_record["s_winner"] = tmp["s_winner"]
+
+			(*update_check)["winner"] = s_winner
 			ext_ai_record["winner"] = tmp["winner"]
+			//对于winner来说...规则值有包含关系,采用规则值
+			if winner := qu.ObjToString(tmp["winner"]); winner != "" {
+				if strings.Contains(s_winner, winner) {
+					delete((*update_check), "winner")
+					delete(ext_ai_record, "winner")
+				}
+			}
 		}
 		isRulePkg := false
 		if pkg := *qu.ObjToMap(tmp["package"]); len(pkg) > 1 && (s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同") {
@@ -206,9 +215,45 @@ func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interfa
 	}
 	(*update_check)["ext_ai_record"] = ext_ai_record
 
+	//根据识别金额的进行选取与修正
+	if r_budget := qu.Float64All((*update_check)["budget"]); r_budget > 0.0 && r_budget < 1000000000.0 {
+		if o_budget := qu.Float64All(tmp["budget"]); o_budget > 0.0 {
+			if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
+				(*update_check)["budget"] = filterAmount(r_budget, o_budget)
+			}
+		}
+	}
+	if r_bidamount := qu.Float64All((*update_check)["bidamount"]); r_bidamount > 0.0 && r_bidamount < 1000000000.0 {
+		if o_bidamount := qu.Float64All(tmp["bidamount"]); o_bidamount > 0.0 {
+			if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
+				(*update_check)["bidamount"] = filterAmount(r_bidamount, o_bidamount)
+			}
+		}
+	}
+
 	return false
 }
 
+// 筛选金额
+func filterAmount(f1 float64, f2 float64) float64 {
+	//选取一个合适的金额 ...
+	if f1 > f2 {
+		if f1 > 100000000.0 {
+			return f2
+		} else {
+			return f1
+		}
+	} else if f1 < f2 {
+		if f2 > 100000000.0 {
+			return f1
+		} else {
+			return f2
+		}
+	} else {
+		return f1
+	}
+}
+
 // 核算分包信息
 func staffInfo(pkg map[string]interface{}) bool {
 	//鉴定中标单位

+ 2 - 2
udp/udprocess.go

@@ -32,7 +32,7 @@ func InitProcessVar() {
 	//执行监控
 	go lastUdpMonitoring()
 	go nextUdpMonitoring()
-	go getRepeatTask()
+	go getDataAiTask()
 
 	//监听···
 	updport := ul.SysConfig["udpport"].(string)
@@ -88,7 +88,7 @@ func ProcessUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 						"sid": sid,
 						"eid": eid,
 					})
-					log.Debug("udp收到任务...数量:", len(taskList), "具体任务:", taskList)
+					log.Debug("udp收到任务...数量:", len(taskList), "具体任务:", sid, "~", eid)
 				}
 				udplock.Unlock()
 			}

+ 4 - 4
udp/udptask.go

@@ -9,7 +9,7 @@ import (
 )
 
 // 监听-获取-分发任务
-func getRepeatTask() {
+func getDataAiTask() {
 	for {
 		if len(taskList) > 0 && !isGetask {
 			getasklock.Lock()
@@ -20,7 +20,7 @@ func getRepeatTask() {
 				end_id := qu.ObjToString(taskList[len_list-1]["eid"])
 				if first_id != "" && end_id != "" {
 					taskList = taskList[len_list:]
-					log.Debug("合并段落~正常~", first_id, "~", end_id, "~剩余任务池~", len(taskList), taskList)
+					log.Debug("合并段落~正常~", first_id, "~", end_id, "~剩余任务池~", len(taskList))
 					extract.ExtractFieldInfo(first_id, end_id)
 					log.Debug("AI识别数据完成...发送下节点udp...")
 					sendNextNode(first_id, end_id)
@@ -29,7 +29,7 @@ func getRepeatTask() {
 					mapInfo := taskList[0]
 					if mapInfo != nil {
 						taskList = taskList[1:]
-						log.Debug("获取任务段处理中~~~剩余任务池~~~", len(taskList), taskList)
+						log.Debug("获取任务段处理中~~~剩余任务池~~~", len(taskList))
 						sid := qu.ObjToString(mapInfo["sid"])
 						eid := qu.ObjToString(mapInfo["eid"])
 						extract.ExtractFieldInfo(sid, eid)
@@ -44,7 +44,7 @@ func getRepeatTask() {
 				mapInfo := taskList[0]
 				if mapInfo != nil {
 					taskList = taskList[1:]
-					log.Debug("获取任务段处理中~~~剩余任务池~~~", len(taskList), taskList)
+					log.Debug("获取任务段处理中~~~剩余任务池~~~", len(taskList))
 					sid := qu.ObjToString(mapInfo["sid"])
 					eid := qu.ObjToString(mapInfo["eid"])
 					extract.ExtractFieldInfo(sid, eid)

+ 3 - 0
ul/attr.go

@@ -12,9 +12,12 @@ var (
 	CleanResultReg     = regexp.MustCompile("((\\s|\n| |\\[|\\]|\\`|json)+)")
 	SaveResultReg      = regexp.MustCompile("([{].*[}])")
 	MaxLen             = 3000
+	MaxByte            = 8000
+	MaxUdp             = 10000
 	RulesPname         = []*ExtReg{}
 	IsTool             bool
 	Reading            int
+	FlashModel         string
 )
 
 type ExtReg struct {

+ 15 - 2
ul/init.go

@@ -11,11 +11,22 @@ func InitGlobalVar() {
 	qu.ReadConfig(&SysConfig) //加载配置文件
 	initMgo()
 	initPCD()
+	initOther()
 }
 func InitToolVar() {
 	qu.ReadConfig("./tool.json", &ToolConfig) //加载配置文件
+	if len(ToolConfig) == 0 || ToolConfig == nil {
+		log.Fatal("缺少tool.json文件")
+	}
 	initToolMgo()
 	initPCD()
+	initOther()
+}
+
+// 其它属性
+func initOther() {
+	FlashModel = "glm-4-flash"
+	MaxUdp = qu.IntAllDef(SysConfig["udp_max"], 10000)
 }
 
 // 初始化mgo
@@ -88,8 +99,10 @@ func initMgo() {
 
 // 初始化mgo
 func initToolMgo() {
-
-	Bid_Name, Ext_Name = qu.ObjToString(ToolConfig["bid_name"]), qu.ObjToString(ToolConfig["ext_name"])
+	Reading, Ext_Name = qu.IntAll(ToolConfig["reading"]), qu.ObjToString(ToolConfig["ext_name"])
+	if Reading == 0 {
+		Reading = 500
+	}
 	//源数据
 	b_cfg := *qu.ObjToMap(ToolConfig["b_mgo"])
 	b_local := b_cfg["local"].(bool)