Bladeren bron

大模型识别-

zhengkun 1 week geleden
bovenliggende
commit
8491d83b9b
28 gewijzigde bestanden met toevoegingen van 1458 en 477 verwijderingen
  1. 84 0
      ai/ai_baidu.go
  2. 5 4
      ai/ai_deekseek.go
  3. 91 0
      ai/ai_doubao.go
  4. 17 0
      clean/c_all.go
  5. 2 0
      clean/c_deepseek.go
  6. 19 5
      clean/c_money.go
  7. 15 0
      clean/c_unit.go
  8. 3 3
      config.json
  9. 43 0
      extract/clean.go
  10. 48 4
      extract/extension.go
  11. 91 50
      extract/extract.go
  12. 25 16
      extract/full.go
  13. 3 3
      extract/test.go
  14. 354 0
      extract/test1.go
  15. 65 192
      main.go
  16. 48 0
      mark
  17. 9 1
      prompt/prompt_buyer.go
  18. 37 24
      prompt/prompt_deepseek.go
  19. 7 2
      prompt/prompt_field.go
  20. 7 2
      prompt/prompt_package.go
  21. 7 2
      tool.json
  22. 284 0
      tool/replenish.go
  23. 30 0
      tool/service.go
  24. 54 131
      tool/tool.go
  25. 0 2
      udp/udprocess.go
  26. 18 16
      ul/attr.go
  27. 48 0
      ul/global.go
  28. 44 20
      ul/init.go

+ 84 - 0
ai/ai_baidu.go

@@ -1,14 +1,98 @@
 package ai
 
 import (
+	"bytes"
 	"context"
 	"data_ai/ul"
 	"encoding/json"
 	"github.com/baidubce/bce-qianfan-sdk/go/qianfan"
 	log "github.com/donnie4w/go-logger/logger"
+	"io/ioutil"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"net/http"
 	"os"
+	"time"
 )
 
+// 通用提示语···
+func PostBaiDuDSAI(content string) map[string]interface{} {
+	// API的URL
+	apiURL := "https://qianfan.baidubce.com/v2/chat/completions"
+	// 构造请求数据
+	messages := []map[string]interface{}{}
+	messages = append(messages, map[string]interface{}{
+		"role":    "user",
+		"content": content,
+	})
+	//glm-4-air	glm-4-0520  glm-4-flash
+	requestData := map[string]interface{}{
+		"model":       "deepseek-v3",
+		"temperature": 0.1,
+		"top_p":       0.7,
+		"messages":    messages,
+	}
+	jsonData, _ := json.Marshal(requestData)
+	// 创建HTTP请求
+	req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData))
+	if err != nil {
+		log.Debug("Error: %s", err)
+		return map[string]interface{}{}
+	}
+	// 设置请求头
+	req.Header.Add("Content-Type", "application/json")
+	req.Header.Add("appid", "app-myQb9VYP")
+	req.Header.Add("Authorization", "Bearer bce-v3/ALTAK-tiBiZmOp65qBQpU9Ipv8z/b3fe9ad37d7e6b006418d85758b107ad99a72f46")
+
+	client := &http.Client{}
+	client.Timeout = 300 * time.Second
+	resp, err := client.Do(req)
+	if err != nil {
+		return map[string]interface{}{}
+	}
+	defer resp.Body.Close()
+
+	// 解析响应
+	body, _ := ioutil.ReadAll(resp.Body)
+	res := make(map[string]interface{})
+	json.Unmarshal(body, &res)
+	if res != nil {
+		if choices := ul.IsMarkInterfaceMap(res["choices"]); len(choices) > 0 {
+			if message := qu.ObjToMap(choices[0]["message"]); message != nil {
+				result := qu.ObjToString((*message)["content"])
+				result = ul.Escape.ReplaceAllString(result, "")
+				if new_result := ul.SaveResultReg.FindString(result); new_result != "" {
+					result = new_result
+				}
+				dict := make(map[string]interface{})
+				json.Unmarshal([]byte(result), &dict)
+				return dict
+			}
+		}
+	}
+	return map[string]interface{}{}
+}
+
+/*****************************
+******************************
+******************************
+******************************
+******************************
+******************************/
+// 请求数据外围字段···重试1次···
+func PostBaiDuDSInfo(content string) map[string]interface{} {
+	zp, ok := map[string]interface{}{}, 0
+	for {
+		ok++
+		if zp = PostBaiDuDSAI(content); len(zp) > 0 {
+			break
+		}
+		if ok >= req_retry_deepseek {
+			break
+		}
+	}
+	return zp
+}
+
 // 百度千帆
 func PostBaiDuAI(content string) map[string]interface{} {
 	// 使用安全认证AK/SK鉴权,通过环境变量初始化;替换下列示例中参数,安全认证Access Key替换your_iam_ak,Secret Key替换your_iam_sk

+ 5 - 4
ai/ai_deekseek.go

@@ -26,7 +26,8 @@ func PostDeepSeekAI(content string) map[string]interface{} {
 	//glm-4-air	glm-4-0520  glm-4-flash
 	requestData := map[string]interface{}{
 		"model":       "deepseek-chat",
-		"temperature": 1,
+		"temperature": 0.1,
+		"top_p":       0.7,
 		"messages":    messages,
 	}
 	jsonData, _ := json.Marshal(requestData)
@@ -39,10 +40,10 @@ func PostDeepSeekAI(content string) map[string]interface{} {
 	// 设置请求头
 	req.Header.Add("Content-Type", "application/json")
 	req.Header.Add("Accept", "application/json")
-	req.Header.Add("Authorization", "Bearer sk-7e40165d06a04f3c87e2fc15e69df625")
+	req.Header.Add("Authorization", "Bearer sk-5ac10ddcaa754e1cb06f2c33f7d28767")
 
 	client := &http.Client{}
-	client.Timeout = 120 * time.Second
+	client.Timeout = 300 * time.Second
 	resp, err := client.Do(req)
 	if err != nil {
 		return map[string]interface{}{}
@@ -205,7 +206,7 @@ func PostDeepSeekInfo(content string) map[string]interface{} {
 		if zp = PostDeepSeekAI(content); len(zp) > 0 {
 			break
 		}
-		if ok >= req_retry {
+		if ok >= req_retry_deepseek {
 			break
 		}
 	}

+ 91 - 0
ai/ai_doubao.go

@@ -0,0 +1,91 @@
+package ai
+
+import (
+	"bytes"
+	"data_ai/ul"
+	"encoding/json"
+	log "github.com/donnie4w/go-logger/logger"
+	"io/ioutil"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"net/http"
+	"time"
+)
+
+// 通用提示语···
+func PostDouBaoDSAI(content string) map[string]interface{} {
+	// API的URL
+	apiURL := "https://ark.cn-beijing.volces.com/api/v3/chat/completions"
+	// 构造请求数据
+	messages := []map[string]interface{}{}
+	messages = append(messages, map[string]interface{}{
+		"role":    "user",
+		"content": content,
+	})
+	//ep-20250225173301-w557p 线上
+	//ep-20250313104433-ptcxr 测试
+	requestData := map[string]interface{}{
+		"model":       "ep-20250225173301-w557p",
+		"temperature": 0.1,
+		"top_p":       0.7,
+		"messages":    messages,
+	}
+	jsonData, _ := json.Marshal(requestData)
+	// 创建HTTP请求
+	req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData))
+	if err != nil {
+		log.Debug("Error: %s", err)
+		return map[string]interface{}{}
+	}
+	// 设置请求头
+	req.Header.Add("Content-Type", "application/json")
+	req.Header.Add("Authorization", "Bearer df50c86b-24f5-4475-b09b-f1f85e5e6564")
+
+	client := &http.Client{}
+	client.Timeout = 300 * time.Second
+	resp, err := client.Do(req)
+	if err != nil {
+		return map[string]interface{}{}
+	}
+	defer resp.Body.Close()
+
+	// 解析响应
+	body, _ := ioutil.ReadAll(resp.Body)
+	res := make(map[string]interface{})
+	json.Unmarshal(body, &res)
+	if res != nil {
+		if choices := ul.IsMarkInterfaceMap(res["choices"]); len(choices) > 0 {
+			if message := qu.ObjToMap(choices[0]["message"]); message != nil {
+				result := qu.ObjToString((*message)["content"])
+				result = ul.Escape.ReplaceAllString(result, "")
+				if new_result := ul.SaveResultReg.FindString(result); new_result != "" {
+					result = new_result
+				}
+				dict := make(map[string]interface{})
+				json.Unmarshal([]byte(result), &dict)
+				return dict
+			}
+		}
+	}
+	return map[string]interface{}{}
+}
+
+/*****************************
+******************************
+******************************
+******************************
+******************************
+******************************/
+// 请求数据外围字段···重试1次···
+func PostDouBaoDSInfo(content string) map[string]interface{} {
+	zp, ok := map[string]interface{}{}, 0
+	for {
+		ok++
+		if zp = PostDouBaoDSAI(content); len(zp) > 0 {
+			break
+		}
+		if ok >= req_retry_deepseek {
+			break
+		}
+	}
+	return zp
+}

+ 17 - 0
clean/c_all.go

@@ -1,6 +1,7 @@
 package clean
 
 import (
+	"github.com/shopspring/decimal"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"regexp"
 )
@@ -87,6 +88,12 @@ func CleanFieldInfo(zhipu map[string]interface{}, fns []string, isTable bool) ma
 	if s_docendtime := CleanTime(qu.ObjToString(zhipu["招标文件获取结束时间"])); s_docendtime > 0 {
 		data["s_docendtime"] = s_docendtime
 	}
+	if s_signstarttime := CleanTime(qu.ObjToString(zhipu["投标的报名开始时间"])); s_signstarttime > 0 {
+		data["s_signstarttime"] = s_signstarttime
+	}
+	if s_signendtime := CleanTime(qu.ObjToString(zhipu["投标的报名截止时间"])); s_signendtime > 0 {
+		data["s_signendtime"] = s_signendtime
+	}
 
 	//联系方式方式
 	if s_buyerperson := CleanContactPerson(qu.ObjToString(zhipu["采购单位联系人"])); s_buyerperson != "" {
@@ -137,5 +144,15 @@ func CleanFieldInfo(zhipu map[string]interface{}, fns []string, isTable bool) ma
 		delete(data, "s_bidamount")
 	}
 
+	//校验金额
+	if data["s_bidamount"] != nil {
+		ns_bidamount, _ := decimal.NewFromFloat(qu.Float64All(data["s_bidamount"])).Round(2).Float64()
+		data["s_bidamount"] = ns_bidamount
+	}
+	if data["s_budget"] != nil {
+		ns_budget, _ := decimal.NewFromFloat(qu.Float64All(data["s_budget"])).Round(2).Float64()
+		data["s_budget"] = ns_budget
+	}
+
 	return data
 }

+ 2 - 0
clean/c_deepseek.go

@@ -155,6 +155,8 @@ func CleanDeepSeekInfo(ai_info map[string]interface{}, tmp map[string]interface{
 		data["s_pkg"] = pkg
 	}
 
+	//最终根据分类-删除
+
 	return data
 }
 

+ 19 - 5
clean/c_money.go

@@ -222,7 +222,12 @@ func CleanMoney(data []interface{}) (float64, bool) {
 
 // 数字金额转换
 func numMoney(data []interface{}) ([]interface{}, bool) {
-	tmp := fmt.Sprintf("%f", data[0])
+	tmp := ""
+	if _, ok := data[0].(float64); ok {
+		tmp = fmt.Sprintf("%f", data[0])
+	} else {
+		tmp = util.ObjToString(data[0])
+	}
 	tmp = strings.ReplaceAll(tmp, "(不含税)", "")
 	//费率转换% ‰
 	flv := float64(1)
@@ -308,9 +313,18 @@ func numMoney(data []interface{}) ([]interface{}, bool) {
 	}
 	fnum = fnum * repUnit
 	if unit == float64(0) {
-		data[0] = fnum * flv
+		num1 := decimal.NewFromFloat(fnum)
+		num2 := decimal.NewFromFloat(flv)
+		decimalValue := num1.Mul(num2)
+		decimal_res, _ := decimalValue.Float64()
+		data[0] = decimal_res
 	} else {
-		data[0] = fnum * unit * flv
+		num1 := decimal.NewFromFloat(fnum)
+		num2 := decimal.NewFromFloat(unit)
+		num3 := decimal.NewFromFloat(flv)
+		decimalValue := num1.Mul(num2).Mul(num3)
+		decimal_res, _ := decimalValue.Float64()
+		data[0] = decimal_res
 	}
 	if unit == 10000 {
 		return data, false
@@ -374,7 +388,7 @@ func capitalMoney(data []interface{}) []interface{} {
 					node += tmp
 					tmp = float64(0)
 				}
-				nodes = append(nodes, node*float64(v))
+				nodes = append(nodes, node*util.Float64All(v))
 				if v == 100000000 {
 					yy = true
 				}
@@ -383,7 +397,7 @@ func capitalMoney(data []interface{}) []interface{} {
 				if v == 10 && tmp == 0 {
 					tmp = 1
 				}
-				tmp = tmp * float64(v)
+				tmp = tmp * util.Float64All(v)
 				node += tmp
 				tmp = float64(0)
 			}

+ 15 - 0
clean/c_unit.go

@@ -1,10 +1,14 @@
 package clean
 
 import (
+	"regexp"
 	"strings"
 	"unicode/utf8"
 )
 
+var c_unit_1 = regexp.MustCompile("([((]本级[))])$")
+var c_unit_2 = regexp.MustCompile("(局)$")
+
 // 清洗采购单位
 func CleanBuyer(buyer string) string {
 	if buyer == "无" || buyer == "有限公司" {
@@ -14,6 +18,14 @@ func CleanBuyer(buyer string) string {
 	//中文括弧
 	buyer = strings.ReplaceAll(buyer, "(", "(")
 	buyer = strings.ReplaceAll(buyer, ")", ")")
+	//清除后缀-本级
+	buyer = c_unit_1.ReplaceAllString(buyer, "")
+
+	if utf8.RuneCountInString(buyer) == 3 {
+		if c_unit_2.MatchString(buyer) {
+			return buyer
+		}
+	}
 
 	if utf8.RuneCountInString(buyer) < 4 {
 		buyer = ""
@@ -30,6 +42,9 @@ func CleanWinner(s_winner string) string {
 	if utf8.RuneCountInString(s_winner) < 3 {
 		s_winner = ""
 	}
+	s_winner = strings.ReplaceAll(s_winner, "(", "(")
+	s_winner = strings.ReplaceAll(s_winner, ")", ")")
+
 	return s_winner
 }
 

+ 3 - 3
config.json

@@ -1,7 +1,7 @@
 {
   "udpport": ":1791",
   "bid_name": "bidding",
-  "ext_name": "zktest_package_1011",
+  "ext_name": "bidding",
   "reading": 100,
   "udp_max": 10000,
   "len_max": 20000,
@@ -39,8 +39,8 @@
   "py_mgo": {
     "local": true,
     "l_addr": "127.0.0.1:12002",
-    "addr": "172.17.4.86:27080",
-    "dbname" : "jyqyfw",
+    "addr": "172.20.45.130:27082",
+    "dbname" : "jyqyfw_historyData2025",
     "username": "",
     "password": ""
   },

+ 43 - 0
extract/clean.go

@@ -0,0 +1,43 @@
+package extract
+
+import (
+	"regexp"
+	"strings"
+)
+
+// 中国电信集团有限公司驻马店分公司 2025-3-08 17:10:30 提交报价¥266000 竞价成交
+var clean1 = regexp.MustCompile("(([\u4E00-\u9FA5]{5,30}公司)[\\s-0-9::]+提交报价[¥]?([\\s0-9]+)竞价成交\n)")
+var clean2 = regexp.MustCompile("([\\s ]+([0-9]+)[\\s ]+([0-9]+)[\\s ]+)")
+var blTextReg *regexp.Regexp = regexp.MustCompile("(打分表|负责人|单位|个人|投标人|项目|企业)业绩|业绩奖项|主要人员相关资料|唱标记录|否决投标的?情况说明")
+var unblTextReg *regexp.Regexp = regexp.MustCompile("(项目业绩案例|类似项目业绩)")
+var beforeTextReg *regexp.Regexp = regexp.MustCompile("(招标代理机构|招标单位|招标人)[::].{4,25}\n")
+
+// 清洗文本
+func CleanText(detail string) string {
+
+	//业绩排除
+	detail = ExcludeYeJi(detail)
+	//特殊文本结构转换
+	detail = clean1.ReplaceAllString(detail, "\n中标单位:${2}\n中标金额:${3}")
+	//对数字空格进行转换
+	detail = clean2.ReplaceAllString(detail, "${2}${3}")
+
+	return detail
+}
+
+// 排除业绩
+func ExcludeYeJi(detail string) string {
+	if blTextReg.MatchString(detail) && !unblTextReg.MatchString(detail) {
+		if strings.Index(detail, "业绩") > 1 {
+			before_arr := []string{} //如果有采购单位信息-文本置前
+			if beforeTextReg.MatchString(detail) {
+				before_arr = beforeTextReg.FindAllString(detail, -1)
+			}
+			detail = detail[:strings.Index(detail, "业绩")]
+			if len(before_arr) > 0 {
+				detail = strings.Join(before_arr, "\n") + detail
+			}
+		}
+	}
+	return detail
+}

+ 48 - 4
extract/extension.go

@@ -5,6 +5,7 @@ import (
 	"data_ai/prompt"
 	"data_ai/ul"
 	log "github.com/donnie4w/go-logger/logger"
+	"github.com/shopspring/decimal"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"regexp"
 	"strings"
@@ -19,6 +20,10 @@ var Reg = regexp.MustCompile("[^0-9A-Za-z\u4e00-\u9fa5]+")
 var Filter = regexp.MustCompile("<[^>]*?>|[\\s\u3000\u2003\u00a0]")
 var SpecialTextReg = regexp.MustCompile("(原网页|见附件|下载附件|(查看|访问)(源网|原网)|详情请下载附件!|详情请访问原网页!)")
 
+var CleanReg0 = regexp.MustCompile("([eE][\\+])")
+var CleanReg1 = regexp.MustCompile("([::](([1-9][.][0-9]+)([eE][\\+])([0]+[6-9])))")
+var CleanReg2 = regexp.MustCompile("((([1-9][.][0-9]+)([eE][\\+])([0]*[6-9])))")
+
 // 确认抽取范围
 func ConfrimExtractInfo(q map[string]interface{}) map[string]interface{} {
 	dict := map[string]interface{}{}
@@ -58,6 +63,11 @@ func GetFnsInfo(tmp map[string]interface{}) []string {
 
 // 获取正文数据
 func getDetailText(v map[string]interface{}, tmpid string) string {
+	//按照最新文本请求的数据···
+	//detail := ul.PostDetailContentHtmlText("detail", tmpid)
+	//if detail != "" {
+	//	return detail
+	//}
 	detail := qu.ObjToString(v["detail"])
 	if ul.IsTool {
 		if details := qu.ObjToString(v["details"]); details != "" {
@@ -67,10 +77,6 @@ func getDetailText(v map[string]interface{}, tmpid string) string {
 		if utf8.RuneCountInString(detail) < 100 && filetext != "" {
 			detail = filetext
 		}
-	} else {
-		//if bs := ul.OssGetObject(tmpid); bs != "" {
-		//	detail = bs
-		//}
 	}
 	return detail
 }
@@ -178,3 +184,41 @@ func ForcedLogicDecideInfo(f_data map[string]interface{}) {
 		}
 	}
 }
+
+// 科学计数法标记
+func ScientificUnit(detail string) (string, float64) {
+	if !CleanReg0.MatchString(detail) {
+		return "", 0.0
+	}
+	x, u := "", ""
+	//符合条件1···修最后一个金额
+	if arr := CleanReg1.FindAllString(detail, -1); len(arr) > 0 {
+		str1 := arr[len(arr)-1]
+		x = CleanReg1.ReplaceAllString(str1, "${3}")
+		u = CleanReg1.ReplaceAllString(str1, "${5}")
+	} else {
+		//符合条件2···修第一个金额
+		if str2 := CleanReg2.FindString(detail); str2 != "" {
+			x = CleanReg2.ReplaceAllString(str2, "${3}")
+			u = CleanReg2.ReplaceAllString(str2, "${5}")
+		} else {
+			return "", 0.0
+		}
+	}
+	ut := qu.IntAll(u)
+	if ut >= 10 {
+		return "", 0.0
+	}
+	if xf := qu.Float64All(x); xf > 0.0 {
+		a := decimal.NewFromFloat(xf)
+		b := decimal.New(1, int32(ut))
+		v := a.Mul(b)
+		fv, _ := v.Float64()
+		sv := v.String()
+		if fv > 10000000000 {
+			return "", 0.0
+		}
+		return sv, fv
+	}
+	return "", 0.0
+}

+ 91 - 50
extract/extract.go

@@ -37,7 +37,7 @@ func ExtractFieldInfo(sid string, eid string) {
 	total, isok := 0, 0
 	it := sess.DB(ul.BidMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("_id").Iter()
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%200 == 0 {
+		if total%1000 == 0 {
 			log.Debug("cur ai index ", total)
 		}
 		tmpid := ul.BsonTOStringId(tmp["_id"])
@@ -54,11 +54,23 @@ func ExtractFieldInfo(sid string, eid string) {
 				<-pool_mgo
 				wg_mgo.Done()
 			}()
+			info := map[string]interface{}{}
 			u_id := ul.BsonTOStringId(tmp["_id"])
-			data := ResolveInfo(tmp)
-			if len(data) > 0 || u_id == "" {
+			//大模型数据···
+			ai_zhipu := ResolveInfo(tmp, u_id)
+			if len(ai_zhipu) > 0 {
+				info["ai_zhipu"] = ai_zhipu
+			}
+			//科学计数法标记···数据标记···会冗余
+			s, f := ScientificUnit(qu.ObjToString(tmp["detail"]))
+			if s != "" && f > 0.0 {
+				info["e_bidamount"] = f
+			}
+
+			//更新方法
+			if len(info) > 0 && u_id != "" {
 				ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
-					"$set": map[string]interface{}{"ai_zhipu": data},
+					"$set": info,
 				})
 			}
 		}(tmp)
@@ -70,8 +82,7 @@ func ExtractFieldInfo(sid string, eid string) {
 }
 
 // 获取处理数据...
-func ResolveInfo(v map[string]interface{}) map[string]interface{} {
-	tmpid := ul.BsonTOStringId(v["_id"])
+func ResolveInfo(v map[string]interface{}, tmpid string) map[string]interface{} {
 	title := qu.ObjToString(v["title"])
 	old_detail := getDetailText(v, tmpid) //获取正文文本
 	//是否表格
@@ -90,18 +101,20 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	}
 	//文本格式转换
 	new_detail := ul.HttpConvertToMarkdown(old_detail)
+	//特殊文本转换
+	new_detail = CleanText(new_detail)
 	//短文本判断是否有效性
 	if shorText {
 		if info := prompt.AcquireJudgeShortInfo(new_detail); info["结果"] != "是" {
 			return map[string]interface{}{}
 		}
 	}
+
 	//获取外围字段数据-拆分合并字段
 	f_info_1 := prompt.AcquireExtractFieldInfoFirst(new_detail)
 	f_info_2 := prompt.AcquireExtractFieldInfoSecond(new_detail)
 	f_info_3 := prompt.AcquireExtractFieldInfoThird(new_detail)
 	f_info := MergeInfo([]map[string]interface{}{f_info_1, f_info_2, f_info_3})
-
 	//非短文本以下识别-纯测试
 	if !shorText {
 		//获取分包信息
@@ -127,6 +140,14 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 
 	//采购单位二级校验
 	CheckOutBuyerInfo(f_data)
+	//标题提取采购单位
+	if qu.ObjToString(f_data["s_buyer"]) == "" {
+		if zp_buyer := prompt.AcquireBuyerInfo(title); zp_buyer["实体单位"] != nil {
+			if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
+				f_data["s_buyer"] = ns_buyer
+			}
+		}
+	}
 
 	//强制逻辑判断-
 	ForcedLogicDecideInfo(f_data)
@@ -135,38 +156,6 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	return f_data
 }
 
-// 暂时不启用...无限重试
-func RunResetUpdateFieldInfo(arr []string, name string, s_name string) {
-	//log.Debug("开始重置更新...", len(arr))
-	//reset := []string{}
-	//for k, v := range arr {
-	//	log.Debug("...", k, "...", v)
-	//	data := ul.SourceMgo.FindById(name, v)
-	//	content := PromptFieldText(qu.ObjToString(data["detail"]))
-	//	zp, ok := map[string]interface{}{}, 0
-	//	for {
-	//		ok++
-	//		if zp = ai.PostZhiPuAI(content); len(zp) > 0 {
-	//			break
-	//		}
-	//		if ok >= 5 {
-	//			log.Debug("请求数据失败...", v)
-	//			reset = append(reset, v)
-	//			break
-	//		}
-	//	}
-	//	ul.SourceMgo.UpdateById(s_name, v, map[string]interface{}{
-	//		"$set": map[string]interface{}{
-	//			"zhipu": zp,
-	//		},
-	//	})
-	//}
-	//if len(reset) > 0 { //无限尝试
-	//	RunResetUpdateFieldInfo(reset, name, s_name)
-	//}
-	//log.Debug("本轮重置更新结束......")
-}
-
 /*
 ************************************************************
 ************************************************************
@@ -175,22 +164,28 @@ func RunResetUpdateFieldInfo(arr []string, name string, s_name string) {
 */
 
 func RunDeepSeek() {
-	log.Debug("执行轮询定时···deepseek···")
-	tmp := ul.BidMgo.FindById("bidding", "65ab0ff666cf0db42a81ecb5")
-	ExtractDeepSeekInfo(tmp)
+	log.Debug("执行轮询定时···deepseek···", ul.Ext_Name)
+	tmp_data := ul.PyMgo.FindById(ul.Ext_Name, "67c50d2088dabe81a67a2468")
+	ttt := ExtractDeepSeekInfo(tmp_data)
+	log.Debug(ttt)
 	return
 	for {
+		log.Debug("开始处理线程数···", ul.Reading)
 		pool_mgo := make(chan bool, ul.Reading)
 		wg_mgo := &sync.WaitGroup{}
-		sess := ul.BidMgo.GetMgoConn()
-		defer ul.BidMgo.DestoryMongoConn(sess)
+		sess := ul.PyMgo.GetMgoConn()
+		defer ul.PyMgo.DestoryMongoConn(sess)
 		q, total, isok := map[string]interface{}{}, 0, 0
-		it := sess.DB(ul.BidMgo.DbName).C("zktest_sample_data_source_0").Find(&q).Iter()
+		it := sess.DB(ul.PyMgo.DbName).C(ul.Ext_Name).Find(&q).Iter()
 		for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-			if total%1000 == 0 {
+			if total%100 == 0 {
 				log.Debug("cur ai index ", total)
 			}
 			isok++
+			if tmp["ai_updatetime"] != nil {
+				tmp = make(map[string]interface{})
+				continue
+			}
 			pool_mgo <- true
 			wg_mgo.Add(1)
 			go func(tmp map[string]interface{}) {
@@ -198,17 +193,29 @@ func RunDeepSeek() {
 					<-pool_mgo
 					wg_mgo.Done()
 				}()
+				u_id := ul.BsonTOStringId(tmp["_id"])
 				//抽取deepseek数据···并更新
 				data := ExtractDeepSeekInfo(tmp)
-				if len(data) > 0 {
-					data["_id"] = tmp["_id"]
-					ul.BidMgo.Save("zktest_deepseek_0122", data)
+				update_info := make(map[string]interface{}, 0)
+				if len(data) > 0 && u_id != "" {
+					tmp["ai_zhipu"] = data
+					ul.ChooseCheckDataAI(tmp, &update_info)
+					if update_info["com_package"] == nil { //构建单包信息···
+						com_package := ul.CreatSingleFieldInfo(tmp, update_info)
+						update_info["com_package"] = com_package
+					}
+					update_info["ai_zhipu"] = data
 				}
+				update_info["ai_updatetime"] = time.Now().Unix()
+				ul.PyMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
+					"$set": update_info,
+				})
 			}(tmp)
 			tmp = make(map[string]interface{})
 		}
 		wg_mgo.Wait()
-		log.Debug("新模型deepseek处理完毕······", isok)
+		log.Debug("deepseek完毕······", isok)
+		break
 		time.Sleep(time.Second * 1800)
 	}
 }
@@ -230,6 +237,8 @@ func ExtractDeepSeekInfo(tmp map[string]interface{}) map[string]interface{} {
 	}
 	//文本格式转换
 	new_detail := ul.HttpConvertToMarkdown(title + "\n" + old_detail)
+	//特殊文本转换
+	new_detail = CleanText(new_detail)
 	//短文本判断是否有效性
 	if shorText {
 		if info := prompt.AcquireJudgeDeepSeekShortInfo(new_detail); info["结果"] != "是" {
@@ -247,3 +256,35 @@ func ExtractDeepSeekInfo(tmp map[string]interface{}) map[string]interface{} {
 
 	return f_data
 }
+
+// 暂时不启用...无限重试
+func RunResetUpdateFieldInfo(arr []string, name string, s_name string) {
+	//log.Debug("开始重置更新...", len(arr))
+	//reset := []string{}
+	//for k, v := range arr {
+	//	log.Debug("...", k, "...", v)
+	//	data := ul.SourceMgo.FindById(name, v)
+	//	content := PromptFieldText(qu.ObjToString(data["detail"]))
+	//	zp, ok := map[string]interface{}{}, 0
+	//	for {
+	//		ok++
+	//		if zp = ai.PostZhiPuAI(content); len(zp) > 0 {
+	//			break
+	//		}
+	//		if ok >= 5 {
+	//			log.Debug("请求数据失败...", v)
+	//			reset = append(reset, v)
+	//			break
+	//		}
+	//	}
+	//	ul.SourceMgo.UpdateById(s_name, v, map[string]interface{}{
+	//		"$set": map[string]interface{}{
+	//			"zhipu": zp,
+	//		},
+	//	})
+	//}
+	//if len(reset) > 0 { //无限尝试
+	//	RunResetUpdateFieldInfo(reset, name, s_name)
+	//}
+	//log.Debug("本轮重置更新结束......")
+}

+ 25 - 16
extract/full.go

@@ -28,13 +28,14 @@ func getExistsInfo() map[string]interface{} {
 }
 
 // 识别结构化字段
-func MovingFullInfo(sid string, eid string) {
+func ExtractFullInfo(eid string) {
 	q := map[string]interface{}{
 		"_id": map[string]interface{}{
 			"$lt": ul.StringTOBsonId(eid),
 		},
 	}
-	log.Debug("迁移语句:", q)
+	//6776b8000000000000000000 , 1月3日
+	log.Debug("刷历史存量数据语句:", q)
 	ul.FlashModel = "glm-4-flash"
 	pool_mgo := make(chan bool, ul.Reading)
 	wg_mgo := &sync.WaitGroup{}
@@ -43,7 +44,7 @@ func MovingFullInfo(sid string, eid string) {
 	total := 0
 	it := sess.DB(ul.BidMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("-_id").Iter()
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%1000 == 0 {
+		if total%5000 == 0 {
 			log.Debug("cur ai index ", total, tmp["_id"])
 		}
 		tmpid := ul.BsonTOStringId(tmp["_id"])
@@ -60,25 +61,33 @@ func MovingFullInfo(sid string, eid string) {
 			}()
 			infoformat := qu.IntAll(tmp["infoformat"])
 			if infoformat == 1 || infoformat == 0 { //正常数据处理···
-				data := ResolveInfo(tmp)
-				if len(data) > 0 {
+				u_id := ul.BsonTOStringId(tmp["_id"])
+				data := ResolveInfo(tmp, u_id)
+				if len(data) > 0 && u_id != "" {
 					tmp["ai_zhipu"] = data
-					update_check := make(map[string]interface{}, 0)
-					is_unset := ul.ChooseCheckDataAI(tmp, &update_check)
-					for k, v := range update_check {
-						tmp[k] = v //覆盖值
+					update_info := make(map[string]interface{}, 0)
+					is_unset := ul.ChooseCheckDataAI(tmp, &update_info)
+					if update_info["com_package"] == nil { //构建单包信息···
+						com_package := ul.CreatSingleFieldInfo(tmp, update_info)
+						update_info["com_package"] = com_package
+					}
+					update_info["ai_zhipu"] = data
+					//清洗与记录
+					if len(update_info) > 0 {
+						//$set
+						ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
+							"$set": update_info,
+						})
 					}
 					if is_unset {
-						for k, _ := range ul.Unset_Check {
-							delete(tmp, k) //删除值
-						}
+						//"$unset"
+						ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
+							"$unset": ul.Unset_Check,
+						})
 					}
 				}
 			}
-			//迁移数据···
-			delete(tmp, "detail")
-			delete(tmp, "contenthtml")
-			ul.SourceMgo.Save(ul.Bid_Name, tmp)
+
 		}(tmp)
 		tmp = make(map[string]interface{})
 	}

+ 3 - 3
extract/test.go

@@ -21,10 +21,10 @@ func TestSingleFieldInfo(name string, tmpid string) {
 		log.Debug("未查询到数据...", tmpid)
 		return
 	}
-	data := ResolveInfo(tmp)
+	data := ResolveInfo(tmp, tmpid)
 	//最终结果...
 	if data != nil {
-		log.Debug(data["s_budget"], "~", data["s_bidamount"])
+		log.Debug(data["s_bidamount"])
 	}
 	log.Debug("耗时···", time.Now().Unix()-now)
 }
@@ -88,7 +88,7 @@ func TestNewPackageInfo1010() {
 				wg_mgo.Done()
 			}()
 			new_v := v
-			data := ResolveInfo(v)
+			data := ResolveInfo(v, "")
 			if len(data) > 0 {
 				new_v["ai_zhipu"] = data
 			}

+ 354 - 0
extract/test1.go

@@ -0,0 +1,354 @@
+package extract
+
+import (
+	"bytes"
+	"data_ai/ul"
+	"encoding/json"
+	"fmt"
+	log "github.com/donnie4w/go-logger/logger"
+	"io"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+)
+
+func duibifenbao() {
+	dataArr, _ := ul.BidMgo.Find("zktest_sample_data_source_deepseek", map[string]interface{}{}, nil, map[string]interface{}{})
+	dict1, dict2, dict3 := map[string]interface{}{}, map[string]interface{}{}, map[string]interface{}{}
+	for _, v := range dataArr {
+		tmpid := ul.BsonTOStringId(v["_id"])
+		ai_zhipu := qu.ObjToMap(v["ai_zhipu"])
+		ai_deepseek := qu.ObjToMap(v["ai_deepseek"])
+		ext_package := qu.ObjToMap(v["package"])
+		data := map[string]interface{}{}
+		if ai_zhipu != nil {
+			s_pkg := *qu.ObjToMap((*ai_zhipu)["s_pkg"])
+			com_package := ul.IsMarkInterfaceMap(s_pkg["com_package"])
+			if len(com_package) > 1 {
+				dict1[tmpid] = tmpid
+				data["zhipu"] = 1
+			} else {
+				data["zhipu"] = 0
+			}
+		}
+		if ai_deepseek != nil {
+			s_pkg := *qu.ObjToMap((*ai_deepseek)["s_pkg"])
+			com_package := ul.IsMarkInterfaceMap(s_pkg["com_package"])
+			if len(com_package) > 1 {
+				dict2[tmpid] = tmpid
+				data["deepseek"] = 1
+			} else {
+				data["deepseek"] = 0
+			}
+		}
+		if ext_package != nil {
+			if len(*ext_package) > 1 {
+				dict3[tmpid] = tmpid
+				data["extract"] = 1
+			} else {
+				data["extract"] = 0
+			}
+		}
+		if len(data) > 0 {
+			data["_id"] = v["_id"]
+			data["href"] = v["href"]
+			data["jyhref"] = fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
+			ul.BidMgo.Save("zktest_0225", data)
+		}
+	}
+}
+
+func test1() {
+	dict1 := map[string]interface{}{}
+	dict2 := map[string]interface{}{}
+	for k, _ := range dict2 {
+		if dict1[k] == nil {
+			log.Debug(k)
+		}
+	}
+	log.Debug("is over ...")
+}
+
+// 合并
+func merge1(ai_coll string, ai_key string) {
+	dataArr, _ := ul.SourceMgo.Find(ai_coll, map[string]interface{}{}, nil, map[string]interface{}{})
+	log.Debug("查询量···", len(dataArr))
+	pool_mgo := make(chan bool, 10)
+	wg_mgo := &sync.WaitGroup{}
+	for k, v := range dataArr {
+		if k%100 == 0 {
+			log.Debug("cur index ", k)
+		}
+		if v[ai_key] == nil {
+			continue
+		}
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			u_id := ul.BsonTOStringId(tmp["_id"])
+			if u_id != "664af2af66cf0db42a3d217e" {
+				return
+			}
+			data := *qu.ObjToMap(tmp[ai_key])
+			tmp["ai_zhipu"] = data
+			update_info := make(map[string]interface{}, 0)
+			ul.ChooseCheckDataAI(tmp, &update_info)
+			if u_id != "" {
+				//ul.SourceMgo.UpdateById(ai_coll, u_id, map[string]interface{}{
+				//	"$set": update_info,
+				//})
+			}
+		}(v)
+	}
+	wg_mgo.Wait()
+	log.Debug("is over ... ")
+}
+
+// 对比程序
+func compare1(ai_coll string) {
+	fields := map[string]string{
+		"toptype":     "string",
+		"subtype":     "string",
+		"area":        "string",
+		"city":        "string",
+		"projectname": "string",
+		"projectcode": "string",
+		"buyer":       "string",
+		"s_winner":    "string",
+		"budget":      "float",
+		"bidamount":   "float",
+	}
+
+	dataArr1, _ := ul.PyMgo.Find("standard_sample_data", map[string]interface{}{}, nil, map[string]interface{}{})
+	dataArr2, _ := ul.SourceMgo.Find(ai_coll, map[string]interface{}{}, nil, map[string]interface{}{})
+	log.Debug("查询数量:", len(dataArr1), len(dataArr2))
+	biaozhu, check_exclude, exclude_all := creat(dataArr1, false) //标注数据···
+	deepseek, _, _ := creat(dataArr2, false)
+	dataArr1 = nil
+	dataArr2 = nil
+	//计数
+	tj_deepseek := duibi(fields, biaozhu, deepseek, check_exclude, exclude_all)
+	log.Debug("...................")
+	arr := []string{"toptype", "subtype", "area", "city", "projectname", "projectcode", "buyer", "budget", "s_winner", "bidamount"}
+	for _, v := range arr {
+		t2, s2 := tj_deepseek[v]["total"], tj_deepseek[v]["same"]
+		f2 := fmt.Sprintf("模型deepseek~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t2, s2, (float64(s2)/float64(t2))*100.0, "%")
+		log.Debug(f2)
+	}
+}
+
+// 构建数据
+func creat(dataArr []map[string]interface{}, zhipu bool) (map[string]map[string]interface{}, map[string]map[string]interface{}, map[string]interface{}) {
+	dict := map[string]map[string]interface{}{}
+	check_exclude := map[string]map[string]interface{}{}
+	exclude_all := map[string]interface{}{}
+	for _, biaozhu := range dataArr {
+		tmpid := ul.BsonTOStringId(biaozhu["_id"])
+		if biaozhu["check_exclude"] != nil {
+			check_exclude[tmpid] = *qu.ObjToMap(biaozhu["check_exclude"])
+		}
+		if biaozhu["exclude_all"] != nil {
+			exclude_all[tmpid] = qu.IntAll(biaozhu["exclude_all"])
+		}
+		if zhipu {
+			if biaozhu["ai_deepseek"] != nil {
+				biaozhu = *qu.ObjToMap(biaozhu["ai_deepseek"])
+			}
+			toptype := qu.ObjToString(biaozhu["s_toptype"])
+			subtype := qu.ObjToString(biaozhu["s_subtype"])
+			area := qu.ObjToString(biaozhu["s_area"])
+			city := qu.ObjToString(biaozhu["s_city"])
+			projectname := qu.ObjToString(biaozhu["s_projectname"])
+			projectcode := qu.ObjToString(biaozhu["s_projectcode"])
+			budget := qu.Float64All(biaozhu["s_budget"])
+			bidamount := qu.Float64All(biaozhu["s_bidamount"])
+			buyer := qu.ObjToString(biaozhu["s_buyer"])
+			s_winner := qu.ObjToString(biaozhu["s_winner"])
+			info := map[string]interface{}{}
+			info["toptype"] = toptype
+			info["subtype"] = subtype
+			info["area"] = area
+			info["city"] = city
+			info["projectname"] = projectname
+			info["projectcode"] = projectcode
+			info["budget"] = budget
+			info["bidamount"] = bidamount
+			info["buyer"] = buyer
+			info["s_winner"] = s_winner
+			dict[tmpid] = info
+		} else {
+			toptype := qu.ObjToString(biaozhu["toptype"])
+			subtype := qu.ObjToString(biaozhu["subtype"])
+			area := qu.ObjToString(biaozhu["area"])
+			city := qu.ObjToString(biaozhu["city"])
+			projectname := qu.ObjToString(biaozhu["projectname"])
+			projectcode := qu.ObjToString(biaozhu["projectcode"])
+			budget := qu.Float64All(biaozhu["budget"])
+			bidamount := qu.Float64All(biaozhu["bidamount"])
+			buyer := qu.ObjToString(biaozhu["buyer"])
+			s_winner := qu.ObjToString(biaozhu["s_winner"])
+			info := map[string]interface{}{}
+			info["toptype"] = toptype
+			info["subtype"] = subtype
+			info["area"] = area
+			info["city"] = city
+			info["projectname"] = projectname
+			info["projectcode"] = projectcode
+			info["budget"] = budget
+			info["bidamount"] = bidamount
+			info["buyer"] = buyer
+			info["s_winner"] = s_winner
+			dict[tmpid] = info
+		}
+	}
+	return dict, check_exclude, exclude_all
+}
+
+func duibi(fields map[string]string, biaozhu map[string]map[string]interface{}, source map[string]map[string]interface{}, check_exclude map[string]map[string]interface{}, exclude_all map[string]interface{}) map[string]map[string]int {
+	//计数
+	tj := map[string]map[string]int{}
+	for tmpid, tmp := range source {
+		bz := biaozhu[tmpid]
+		exclude := check_exclude[tmpid]
+		if qu.IntAll(exclude_all[tmpid]) == 1 {
+			continue //整条过滤
+		}
+		for filed, typeof := range fields {
+			if exclude[filed] != nil {
+				continue
+			}
+			nums := tj[filed]
+			if nums == nil {
+				nums = map[string]int{}
+			}
+			if typeof == "string" {
+				b_value := qu.ObjToString(bz[filed])
+				s_value := qu.ObjToString(tmp[filed])
+				//字符串通用转换
+				b_value, s_value = c(b_value), c(s_value)
+
+				if b_value == "" && s_value == "" {
+
+				} else {
+					nums["total"] = qu.IntAll(nums["total"]) + 1
+					if b_value == s_value {
+						nums["same"] = qu.IntAll(nums["same"]) + 1
+					} else {
+						if filed == "buyer" {
+							//log.Debug("标注:", b_value, "~", "模板:", s_value)
+						}
+					}
+				}
+			} else if typeof == "float" {
+				b_value := qu.Float64All(bz[filed])
+				s_value := qu.Float64All(tmp[filed])
+				if b_value == 0.0 && s_value == 0.0 {
+
+				} else {
+					nums["total"] = qu.IntAll(nums["total"]) + 1
+					if b_value == s_value {
+						nums["same"] = qu.IntAll(nums["same"]) + 1
+					} else {
+						if filed == "budget" {
+							//log.Debug(fmt.Sprintf("%f", b_value), "~", fmt.Sprintf("%f", s_value), "~", tmpid)
+						}
+					}
+				}
+			} else {
+
+			}
+			tj[filed] = nums
+		}
+	}
+	return tj
+}
+
+func update1() {
+	dataArr, _ := ul.BidMgo.Find("zktest_deepseek_0124", map[string]interface{}{}, nil, map[string]interface{}{})
+	for _, v := range dataArr {
+		//tmpid := ul.BsonTOStringId(v["_id"])
+		if v["ai_zhipu"] != nil {
+			ai_zhipu := *qu.ObjToMap(v["ai_zhipu"])
+			if ai_zhipu["s_pkg"] != nil {
+				s_pkg := *qu.ObjToMap(ai_zhipu["s_pkg"])
+				s_budget := qu.Float64All(s_pkg["s_budget"])
+				s_bidamount := qu.Float64All(s_pkg["s_bidamount"])
+				s_winner := qu.ObjToString(s_pkg["s_winner"])
+				if s_budget > 0.0 && s_budget > qu.Float64All(ai_zhipu["s_budget"]) {
+					ai_zhipu["s_budget"] = s_budget
+				}
+				if s_bidamount > 0.0 && s_bidamount > qu.Float64All(ai_zhipu["s_bidamount"]) {
+					ai_zhipu["s_bidamount"] = s_bidamount
+				}
+				if s_winner != "" {
+					ai_zhipu["s_winner"] = s_winner
+				}
+			}
+			ul.BidMgo.Save("zktest_deepseek_0124_1", map[string]interface{}{
+				"ai_zhipu": ai_zhipu,
+				"_id":      v["_id"],
+			})
+		}
+	}
+	log.Debug("is over ...")
+}
+
+func export1() {
+	dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
+	pool_mgo := make(chan bool, 1)
+	wg_mgo := &sync.WaitGroup{}
+	for _, v := range dataArr {
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			tmpid := ul.BsonTOStringId(v["_id"])
+			data := ul.BidMgo.FindById("bidding", tmpid)
+			if len(data) == 0 || data == nil {
+				log.Debug("异常")
+			}
+			ul.BidMgo.Save("zktest_sample_data_source_4", data)
+		}(v)
+	}
+	wg_mgo.Wait()
+	log.Debug("is over ...")
+}
+
+// 替换字符串数据
+func c(s string) string {
+	s = strings.ReplaceAll(s, "(", "(")
+	s = strings.ReplaceAll(s, ")", ")")
+	s = strings.ReplaceAll(s, ",", ",")
+	s = strings.ReplaceAll(s, " ", "")
+	s = strings.ReplaceAll(s, "、", "")
+
+	return s
+}
+
+func post1(data map[string]interface{}) map[string]interface{} {
+	info := map[string]interface{}{}
+	client := &http.Client{Timeout: 2 * time.Second}
+	jsonStr, _ := json.Marshal(data)
+	resp, err := client.Post("http://127.0.0.1:12321/clean/deepseek", "application/json", bytes.NewBuffer(jsonStr))
+	if err != nil {
+		return info
+	}
+	res, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return info
+	}
+	err = json.Unmarshal(res, &info)
+	if err != nil {
+		return info
+	}
+	return info
+}

+ 65 - 192
main.go

@@ -1,36 +1,42 @@
 package main
 
 import (
+	"data_ai/ai"
 	"data_ai/extract"
 	"data_ai/tool"
 	"data_ai/udp"
 	"data_ai/ul"
 	"fmt"
 	log "github.com/donnie4w/go-logger/logger"
-	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
-	"strings"
-	"sync"
+	"net/http"
 )
 
 func init() {
-	ul.IsLocal = true //是否本地
-	ul.IsTool = false //是否工具
-	ul.IsFull = false //是否全量
+	ul.IsLocal = false   //是否本地
+	ul.IsService = false //是否服务
+	ul.IsTool = true     //是否工具
+	ul.IsFull = false    //是否全量
+	ul.IsFlash = true    //是否flash-线上增量开关
 	if ul.IsTool {
 		ul.InitToolVar()
 	} else {
+		if ul.IsService {
+			tool.InitCleanDeepSeekService()
+			go http.ListenAndServe(":12321", nil)
+			return
+		}
 		ul.InitGlobalVar()
 		if !ul.IsFull {
-			if ul.ModelType == "deepseek" {
-				go extract.RunDeepSeek()
-			} else {
+			if ul.IsFlash {
 				udp.InitProcessVar()
+			} else {
+				if ul.ModelType == "deepseek" {
+					go extract.RunDeepSeek()
+				}
 			}
 		}
 	}
-	if !ul.IsTool {
-		ul.InitOss(ul.IsLocal)
-	}
+
 	log.Debug("测试一下转换MD方法···")
 	detail := `<table border=\"1\"><tbody><tr><th>采购人名称</th><td>云南中医药大学</td></tr><tr><th>中标(成交)供应商名称</th><td>云南智蓝云鸽信息科技有限公司</td></tr><tr><th>合同金额</th><td>224.99万元 人民币</td></tr><tr><th>合同期限</th><td>年</td></tr><tr><th>合同签署时间</th><td>2023-12-29 00:00:00</td></tr></tbody></table>`
 	new_detail := ul.HttpConvertToMarkdown(detail)
@@ -42,190 +48,57 @@ func main() {
 		tool.StartToolInfo()
 		return
 	}
-	update1()
-	lock := make(chan bool)
-	<-lock
-}
-
-func update1() {
-	dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
-	for _, v := range dataArr {
-		tmpid := ul.BsonTOStringId(v["_id"])
-		data := ul.BidMgo.FindById("zktest_sample_data_source_0", tmpid)
-		title := qu.ObjToString(data["title"])
-		detail := qu.ObjToString(data["detail"])
-		ul.BidMgo.UpdateById("zktest_sample_data", tmpid, map[string]interface{}{
-			"$set": map[string]interface{}{
-				"detail": title + "\n" + detail,
-			},
-		})
-	}
-	log.Debug("is over ...")
-}
-
-func export1() {
-	dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
-	pool_mgo := make(chan bool, 1)
-	wg_mgo := &sync.WaitGroup{}
-	for _, v := range dataArr {
-		pool_mgo <- true
-		wg_mgo.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-pool_mgo
-				wg_mgo.Done()
-			}()
-			tmpid := ul.BsonTOStringId(v["_id"])
-			data := ul.BidMgo.FindById("bidding", tmpid)
-			if len(data) == 0 || data == nil {
-				log.Debug("异常")
-			}
-			ul.BidMgo.Save("zktest_sample_data_source_4", data)
-		}(v)
-	}
-	wg_mgo.Wait()
-	log.Debug("is over ...")
-}
-
-// 对比程序
-func compare1() {
-	fields := map[string]string{
-		"toptype":     "string",
-		"subtype":     "string",
-		"area":        "string",
-		"city":        "string",
-		"projectname": "string",
-		"projectcode": "string",
-		"buyer":       "string",
-		"s_winner":    "string",
-		"budget":      "float",
-		"bidamount":   "float",
-	}
-	dataArr1, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{})
-	dataArr2, _ := ul.BidMgo.Find("zktest_deepseek_0122", map[string]interface{}{}, nil, map[string]interface{}{})
-	biaozhu := creat(dataArr1, false) //标注数据···
-	deepseek := creat(dataArr2, true)
-	dataArr1 = nil
-	dataArr2 = nil
-	//计数
-	tj := duibi(fields, biaozhu, deepseek)
-	log.Debug("...................")
-	arr := []string{"toptype", "subtype", "area", "city", "projectname", "projectcode", "buyer", "budget", "s_winner", "bidamount"}
-	for _, v := range arr {
-		t1, s1 := tj[v]["total"], tj[v]["same"]
-		f1 := fmt.Sprintf("模型flash~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t1, s1, (float64(s1)/float64(t1))*100.0, "%")
-		log.Debug(f1)
+	if ul.IsFull {
+		extract.ExtractFullInfo("6776b8000000000000000000")
+		return
 	}
-}
 
-// 构建数据
-func creat(dataArr []map[string]interface{}, deepseek bool) map[string]map[string]interface{} {
-	dict := map[string]map[string]interface{}{}
-	for _, biaozhu := range dataArr {
-		if deepseek {
-			tmpid := ul.BsonTOStringId(biaozhu["_id"])
-			toptype := qu.ObjToString(biaozhu["s_toptype"])
-			subtype := qu.ObjToString(biaozhu["s_subtype"])
-			area := qu.ObjToString(biaozhu["s_area"])
-			city := qu.ObjToString(biaozhu["s_city"])
-			projectname := qu.ObjToString(biaozhu["s_projectname"])
-			projectcode := qu.ObjToString(biaozhu["s_projectcode"])
-			budget := qu.Float64All(biaozhu["s_budget"])
-			bidamount := qu.Float64All(biaozhu["s_bidamount"])
-			buyer := qu.ObjToString(biaozhu["s_buyer"])
-			s_winner := qu.ObjToString(biaozhu["s_winner"])
-			info := map[string]interface{}{}
-			info["toptype"] = toptype
-			info["subtype"] = subtype
-			info["area"] = area
-			info["city"] = city
-			info["projectname"] = projectname
-			info["projectcode"] = projectcode
-			info["budget"] = budget
-			info["bidamount"] = bidamount
-			info["buyer"] = buyer
-			info["s_winner"] = s_winner
-			dict[tmpid] = info
-		} else {
-			tmpid := ul.BsonTOStringId(biaozhu["_id"])
-			toptype := qu.ObjToString(biaozhu["toptype"])
-			subtype := qu.ObjToString(biaozhu["subtype"])
-			area := qu.ObjToString(biaozhu["area"])
-			city := qu.ObjToString(biaozhu["city"])
-			projectname := qu.ObjToString(biaozhu["projectname"])
-			projectcode := qu.ObjToString(biaozhu["projectcode"])
-			budget := qu.Float64All(biaozhu["budget"])
-			bidamount := qu.Float64All(biaozhu["bidamount"])
-			buyer := qu.ObjToString(biaozhu["buyer"])
-			s_winner := qu.ObjToString(biaozhu["s_winner"])
-			info := map[string]interface{}{}
-			info["toptype"] = toptype
-			info["subtype"] = subtype
-			info["area"] = area
-			info["city"] = city
-			info["projectname"] = projectname
-			info["projectcode"] = projectcode
-			info["budget"] = budget
-			info["bidamount"] = bidamount
-			info["buyer"] = buyer
-			info["s_winner"] = s_winner
-			dict[tmpid] = info
-		}
-	}
-	return dict
+	lock := make(chan bool)
+	<-lock
 }
 
-func duibi(fields map[string]string, biaozhu map[string]map[string]interface{}, source map[string]map[string]interface{}) map[string]map[string]int {
-	//计数
-	tj := map[string]map[string]int{}
-	for tmpid, tmp := range source {
-		bz := biaozhu[tmpid]
-		for filed, typeof := range fields {
-			nums := tj[filed]
-			if nums == nil {
-				nums = map[string]int{}
-			}
-			if typeof == "string" {
-				b_value := qu.ObjToString(bz[filed])
-				s_value := qu.ObjToString(tmp[filed])
-				if b_value == "" && s_value == "" {
+func ttt() {
 
-				} else {
-					nums["total"] = qu.IntAll(nums["total"]) + 1
-					if b_value == s_value {
-						nums["same"] = qu.IntAll(nums["same"]) + 1
-					}
-				}
-			} else if typeof == "float" {
-				b_value := qu.Float64All(bz[filed])
-				s_value := qu.Float64All(tmp[filed])
-				if b_value == 0.0 && s_value == 0.0 {
-
-				} else {
-					nums["total"] = qu.IntAll(nums["total"]) + 1
-					if b_value == s_value {
-						nums["same"] = qu.IntAll(nums["same"]) + 1
-					} else {
-						//if filed == "budget" {
-						//	if b_value == 0.0 {
-						//		log.Debug(tmpid)
-						//	}
-						//}
-					}
-				}
-			} else {
-
-			}
-			tj[filed] = nums
-		}
-	}
-	return tj
+	var pmt = `
+你是一个文本处理模型,根据下述信息做"项目分类":
+"项目分类"参考资料:
+	1、水-第三方综合服务:[地表水精细化管理服务、河流水质保障第三方服务];
+	2、水-监测、检测及运维服务:[排口监测、水质监测站运维、农村污水设施运维、水源地水质监测、断面监测];
+	3、水-设备销售:[排口规范化建设、水质监测设备];
+	4、水-智慧环保平台:[排污口监管系统、管网系统、水质监测系统];
+	5、水-工程建设:[水源地整治工程、管网建设、农村污水治理工程、水环境工程、水生态工程、雨水管网工程、排口整治];
+	6、水-专项咨询服务:[排口排查溯源、水源综合整治、农村污水治理、美丽河湖方案、污水排水综合规划、水生态技术服务、地下水污染防治技术服务、一河一策方案编制、排污许可];
+	7、大气-第三方综合服务:[驻场服务、精细化、大气污染第三方服务、空气质量达标管控服务、抑尘服务];
+	8、大气-监测、检测及运维服务:[网格监测、普通监测、VOCs检测、移动排放监测、机动车排放监测、监测设备运维、异味监测、空气自动监测站运维、尾气监测、加油站油气排放监测、餐饮油烟监测];
+	9、大气-设备销售:[监测能力建设、监测能力提升];
+	10、大气-智慧环保平台:[监控系统平台、大气管理系统];
+	11、大气-专项咨询服务:[污染源评估、污染源溯源、排放清单、重污染天气应急预案、大气污染技术咨询、秸秆焚烧治理、温室气体融合清单、异味治理、大气污染源解析、一点一策];
+	12、固废-监测、检测及运维服务:[垃圾填埋场监测];
+	13、固废-智慧环保平台:[固废相关系统平台];
+	14、固废-专项咨询服务:[矿渣治理、无废城市建设评价、固废污染源整治设计、固废技术审查、固废治理效果评估];
+	15、土壤-监测、检测及运维服务:[土壤监测、地块环境监测];
+	16、土壤-设备销售:[土壤监测设备];
+	17、土壤-智慧环保平台:[土壤监测相关系统平台];
+	18、土壤-专项咨询服务:[土壤污染状况风险评估、土壤修复评估、土壤金属污染综合治理、土壤重金属污染溯源];
+	19、噪声-监测、检测及运维服务:[声环境监测、噪声监测、交通环境噪声监测];
+	20、噪声-设备销售:[噪声监测设备];
+	21、噪声-智慧环保平台:[噪声监测平台];
+	22、噪声-专项咨询服务:[声环境功能区调整划分方案编制、噪声现状分析研究];
+	23、其他-第三方综合服务:[生态环境局第三方驻场服务、园区环保管家、园区污染第三方治理服务];
+	24、其他-监测、检测及运维服务:[生物多样性监测、生态环境质量监测、农村环境监测];
+	25、其他-设备销售:[智慧园区、环境治理系统];
+	26、其他-智慧环保平台:[园区环保系统平台];
+	27、其他-专项咨询服务:[农村环境整治、工业园区环境整治、清洁生产审核、两山基地实施方案、环评、双碳];
+要求:根据正文内容,仅从"项目分类"中选取一个合适的分类。在识别过程中出现多个结果时,选取一个最贴切的,并将项目分类识别结果严格按照下述JSON格式输出。
+JSON格式:
+{
+"项目分类":"项目分类",
 }
-
-// 测试调试数据
-func c(s string) string {
-	s = strings.ReplaceAll(s, "(", "(")
-	s = strings.ReplaceAll(s, ")", ")")
-	s = strings.ReplaceAll(s, ",", ",")
-	return s
+按照以上要求输出,不要联想,不要无中生有,不要生成解释,对于尚未确定或未明确的信息请在JSON对应的值填写:无
+正文内容:` + `%s` + "\n" + `
+结果JSON:`
+	title := "剑鱼园区的噪声监测系统"
+	content := fmt.Sprintf(pmt, title)
+	data := ai.PostZhiPuAI(content)
+	log.Debug(data)
 }

+ 48 - 0
mark

@@ -0,0 +1,48 @@
+{
+  "udpport": ":1791",
+  "bid_name": "zktest_package_1011",
+  "ext_name": "zktest_package_1011",
+  "reading": 1,
+  "udp_max": 10000,
+  "len_max": 20000,
+  "byte_max": 50000,
+  "flash_model": "glm-4-flash",
+  "model_type": "deepseek",
+  "smail": {
+    "to": "zhengkun@topnet.net.cn,xuzhiheng@topnet.net.cn",
+    "api": "http://172.17.145.179:19281/_send/_mail"
+  },
+  "s_mgo": {
+    "local": true,
+    "l_addr": "127.0.0.1:12005",
+    "addr": "172.31.31.202:27081,172.20.45.128:27080",
+    "dbname" : "qfw",
+    "username": "zhengkun",
+    "password": "zk@123123"
+  },
+  "b_mgo": {
+    "local": true,
+    "l_addr": "127.0.0.1:12005",
+    "addr": "172.31.31.202:27081,172.20.45.128:27080",
+    "dbname" : "qfw",
+    "username": "zhengkun",
+    "password": "zk@123123"
+  },
+  "qy_mgo": {
+    "local": true,
+    "l_addr": "127.0.0.1:12005",
+    "addr": "172.31.31.202:27081,172.20.45.128:27080",
+    "dbname" : "mixdata",
+    "username": "zhengkun",
+    "password": "zk@123123"
+  },
+  "py_mgo": {
+    "local": true,
+    "l_addr": "127.0.0.1:12002",
+    "addr": "172.17.4.86:27080",
+    "dbname" : "jyqyfw",
+    "username": "",
+    "password": ""
+  },
+  "nextNode": []
+}

+ 9 - 1
prompt/prompt_buyer.go

@@ -4,6 +4,14 @@ import (
 	"data_ai/ai"
 )
 
+// 获取外围抽取字段
+func AcquireTitleBuyerInfo(detail string) map[string]interface{} {
+	content := PromptBuyerText(detail)
+	res := map[string]interface{}{}
+	res = ai.PostZhiPuInfo(content)
+	return res
+}
+
 // 获取外围抽取字段
 func AcquireBuyerInfo(detail string) map[string]interface{} {
 	content := PromptBuyerText(detail)
@@ -23,7 +31,7 @@ func AcquireDeepSeekBuyerInfo(detail string) map[string]interface{} {
 // 提示词优选 - 提问词补偿不需要限制
 func PromptBuyerText(detail string) string {
 	content := `请根据我提供的正文进行"实体单位"的抽取;
-你在识别"实体单位"的时候,只能返回一个最全的实体单位,请务必不要省略分公司,全称的中小学校等,如果是多个实体单位,请填写"无"
+你在识别"实体单位"的时候,只能返回一个最全的实体单位,请务必不要省略分公司,全称的中小学校等局级单位等,如果是多个实体单位,请填写"无"
 请将上述的识别结果、信息分类结果,按照JSON格式输出,
 严格按照json格式
 {

+ 37 - 24
prompt/prompt_deepseek.go

@@ -7,22 +7,35 @@ import (
 )
 
 var pmt_deepseek = `
+# 角色
+你是一位招投标领域的专家,你需要运用自己的知识从给出的内容中准确抽取出关键信息,避免任何非必要的文本,并以JSON格式输出。若信息缺失,填写"无"。
 # 任务1(信息抽取)
-你是一位高效的信息整理专家,任务是精炼并准确抽取输入内容中的关键信息,避免任何非必要的文本,并以JSON格式输出。若信息缺失,填写"无"。
-提取规则:
-1、可推导上下文信息
-2、排除代理机构和中标单位干扰,如果入围供应商、入围中标人、中标候选人等存在多个时,选择第一名为中标单位,忽略其他排名。
-3、金额按照原文输出,不要转换量级,输出可以带单位如xx万元/xx元
-4、你在识别"项目的预算金额"的时候,一定不要识别业绩相关的内容。(合同内容如果没有明确指出甲方的预算金额,请不要识别)如果有多个预算金额存在,优先取预算金额含税总价。如果识别出的预算金额含有单位比如万元等,请务必提取完整。如果不能准确识别出"项目的预算金额,"请填写"无";
-5、你在识别"项目的中标金额"的时候,一定不要识别业绩相关的内容。优先使用合同的金额,合同的总价当做"项目的中标金额"。如果有多个中标金额存在,优先取中标金额的含税总价。如果原文没有明确的中标金额,可以选取第一名中标候选人的投标报价(金额单位请提取完整)。如果识别出项目的中标金额含有单位比如万元等,请务必提取完整。如果不能准确识别出"项目的中标金额",请填写"无";
-6、项目编号和标段编号同时存在时,优先选项目编号;不要使用证书编号当做项目编号的值;
+部分字段解释(可以根据上下推理):
+0、结果分隔符统一使用','
+1、金额不要转换量级,输出可以带单位如xx万元/xx元,中标金额和预算金额如果有说明含税后金额统一使用含税金额
+2、预算金额说明:预算金额、整体预留项目金额、最高限价、招标预算价格、面向中小企业采购金额、采购计划金额、服务金额。
+    - 如果有项目明确对应的预算估算价格、服务金额则优先使用,其次考虑使用项目总投资
+    - 区间范围预算,使用最高价格
+3、中标金额说明:注意招标类型的公告中不会存在中标金额,同样在结果类型,合同类型公告中会以 “中标金额”“成交金额”,“合同金额” ,'第一名候选人总报价'等表述呈现,不可以提取业绩相关的内容。
+    - 不存在的结果不能通过其他方式累加计算,不存在则输出“无”
+    - 如果只存在一个标的物的单价和数量,那么总价= 单价*数量
+4、中标单位说明:常见表述为 “中标单位”“中标人”“成交供应商” ,注意:选取的中介服务机构或者直购企业(单位)、入围中标人信息等都是是候选人,需要执行候选人判断逻辑,
+    -中标单位其他特例:
+        -如果没有明确说明最终中标单位,成交单位、供应商或者联合体中标单位等明确信息,一律按候选人判断
+        - 如果只有候选人但是没排名之类的评判标准,默认输出第一个单位名称如:aaa公司,bbb公司,ccc公司-->输出:aaa公司 
+        - 如果内容中提到了候选人,中标人优先提取顺序为"明确指定中标人">'候选第一名'>“综合评分最高的”>"报价最低的",
+7、注意:项目备案审批时,主管部门不能作为采购单位,
+8、注意:采购单位不可以使用人名,所谓的委托人都是公司、企业、事业单位等。审批单位(部门)、招标代理公司不是采购单位
+10、项目编号(项目代码),招标标号、询价标号、招标编号等,不可以省略特殊符号如:‘<,>’,不要局限于项目编号这四个字
+    - 不要使用证书编号、合同编号、订单编号当做项目编号的值
+
 JSON字段:
 {"任务1":{
 "项目编号":"项目编号",
-"采购单位": "采购方/甲方/委托人",
-"中标单位": "中标方/供应商(多个用逗号分隔)",
-"预算金额": "项目预算金额",
-"中标金额": "中标金额",
+"采购单位": "采购单位(项目发布方、项目采购方、甲方、委托人、采购人(公司名称))",
+"中标单位": "中标单位",
+"预算金额": "预算金额",
+"中标金额": "中标金额(如果有多个中标金额,则只取第一个作为输出)",
 "中标金额单位": "中标金额单位",
 "预算金额单位": "预算金额单位",
 }}
@@ -31,7 +44,6 @@ JSON字段:
 # 分包任务介绍
 对于招投标信息,部分项目会被拆分成多个包或标段,现在你需要根据抽取字段要求,从输入的内容提取相应的数据信息并严格输出JSON格式。若字段信息缺失,填写"无"。
 任务要求:
-    - 入围供应商、入围中标人、中标候选人存在多个时,选择第一名为中标单位
     - 如果输入内容只有一个项目,那么则输出单个项目对应信息,如果一个项目被拆分成多个,那么输出多个被拆分的包或者标段
     - 注意一个项目中存在多个标的物不能算作多包
     - 注意面向企业采购金额不是中标金额,而是预算金额,不可以使用预算金额填充
@@ -39,16 +51,16 @@ JSON字段:
 {"任务2":{
 "分包信息":[
 {
-"标段名称":(标包名称,通常包含地理区域、专业类别、标包内容描述等内容,不可以填写"标的名称"),
+"标段名称":(标包名称,通常包含地理区域、专业类别、标包内容描述等内容,不可以填写"标的名称",如果只有一个项目没有被拆分则只输出项目名称,注意同一个项目的多个标的物不算分包),
 "标段/包号":(标包编号,可以来自标段名称,如果不存在写"无",比如:一标段、二标段、包一、I包、I标段、标包一、标包编号等,不可以填写"项目编号"或"标的编号"),
-"中标单位":中标单位名称,要求:中标角色包括但不限于成交供应商(注:当入围供应商/中标人存在多个,选择第一位为中标单位)、中标人、中标方、承包方、中选单位、服务商、第一|1名中标候选人(忽略其他中标候选人)。当流标显示流标,废标时显示废标。联合体投标时,请列出所有单位名称使用","分割),
-"预算金额":预算金额、整体预留项目金额、最高限价、面向中小企业采购金额,要求:不可以用中标金额来填充),
-"中标金额":(项目中标后的成交金额、合同签订金额,如果不存在则使用“无”代替,要求:不能使用预算金额填充。多个金额时请进行计算,非单价,如果是单价,则等于单价*数量。),
+"中标单位":中标单位,
+"预算金额":预算金额,
+"中标金额":中标金额,
 "中标金额单位":"对应金额的单位,如果不存在输出“无”",
 "预算金额单位":"对应金额的单位,如果不存在输出“无”",
-"项目编号":(项目编号,通常具有唯一性,它能够准确无误地标识一个特定的项目,不要使用证书编号当做项目编号的值),
-"标段编号":(通常为标段的唯一识别码,由数字、字母或其组合构成,不要使用纯汉字),
-"合同编号":(通常为合同提供了一个独一无二的标识,不要使用证书编号当做项目编号的值),
+"项目编号":"项目编号"
+"标段编号":标段编号,
+"合同编号":合同编号,
 },
 ....
 ],
@@ -56,8 +68,9 @@ JSON字段:
 
 # 输入内容:` + "\n" + `%s` + `
 
-# 合并上面两个任务,输出最终JSON结果:
-{"任务1":{...},
+# 合并上面两个任务,输出最终JSON结果(所有特殊符号使用英文符号):
+{
+"任务1":{...},
 "任务2":{...}
 }
 `
@@ -66,7 +79,7 @@ JSON字段:
 func AcquireExtractFieldDeepSeekInfo(detail string) map[string]interface{} {
 	content := PromptDeepSeekFieldText(detail, pmt_deepseek)
 	res := map[string]interface{}{}
-	res = ai.PostDeepSeekInfo(content)
+	res = ai.PostDouBaoDSInfo(content)
 	return res
 }
 
@@ -85,6 +98,6 @@ func PromptDeepSeekFieldText(detail string, prompt string) string {
 func AcquireJudgeDeepSeekShortInfo(detail string) map[string]interface{} {
 	content := PromptFieldText(detail, pmt_field_prefix)
 	res := map[string]interface{}{}
-	res = ai.PostDeepSeekAI(content)
+	res = ai.PostDouBaoDSInfo(content)
 	return res
 }

+ 7 - 2
prompt/prompt_field.go

@@ -76,7 +76,7 @@ var pmt_field2 = `
 严格按照json格式
 {
 "预算金额":"项目的预算金额",
-"中标金额":"项目的中标金额",
+"中标金额":"项目的中标金额(严格按照原文输出,不得联想填充)",
 "中标金额单位":"项目的中标金额单位",
 "预算金额单位":"项目的预算金额单位",
 "开标日期":"开标日期",
@@ -96,13 +96,16 @@ var pmt_field3 = `
 你是一个文本处理模型,专门用于分析文本提取信息,你具备以下能力:
 1、实体识别抽取
 请根据我提供的正文做以下工作;
-首先,根据正文进行"项目名称"、"项目编号"、"招标编号"、"标段编号"、"合同编号" 进行实体抽取;
+首先,根据正文进行"项目名称"、"项目编号"、"招标编号"、"投标的报名开始时间"、"投标的报名截止时间" 进行实体抽取;
 
 你在识别"项目名称"的时候,请根据上下文思考,输出最佳匹配结果;如果"项目名称"前面包含实体单位,请务必提取完整;
 你在识别"项目编号"的时候,请根据上下文思考,输出最佳匹配结果;项目编号和标段编号同时存在时,优先选项目编号;不要使用证书编号当做项目编号的值;
 你在识别"招标编号"时,获取正文中招标过程中用于唯一标识一个特定招标项目的编号,不要使用项目编号和标段编号;
 你在识别"标段编号"时,获取正文中的标段编号,是标段的唯一识别码,由数字、字母或其组合构成,不要使用纯汉字;
 你在识别"合同编号"时,获取正文中合同编号,不要使用证书编号当做项目编号的值;
+你在识别"投标的报名开始时间"时,输出投标的报名开始时间,输出格式为:YYYY-MM-DD HH:MM:SS,如果格式不对,请转化为:YYYY-MM-DD HH:MM:SS;
+你在识别"投标的报名截止时间"时,输出投标的报名截止时间,输出格式为:YYYY-MM-DD HH:MM:SS,如果格式不对,请转化为:YYYY-MM-DD HH:MM:SS;
+
 请将上述的识别结果、信息分类结果,按照JSON格式输出,
 严格按照json格式
 {
@@ -111,6 +114,8 @@ var pmt_field3 = `
 "招标编号":"招标编号",
 "标段编号":"标段编号",
 "合同编号":"合同编号",
+"投标的报名开始时间":"投标的报名开始时间",
+"投标的报名截止时间":"投标的报名截止时间"
 }
  
 请回答我的问题,不要联想,不要无中生有,不要生成解释,对于尚未确定或未明确的信息请在JSON对应的值填写:无

+ 7 - 2
prompt/prompt_package.go

@@ -221,11 +221,16 @@ func AcquireNewMultiplePackageInfo(detail string, isTable bool) map[string]inter
 			win_temp[winner] = winner
 		}
 	}
+
+	//高精度保留两位小数
+	ns_bidamount, _ := decimal.NewFromFloat(s_bidamount).Round(2).Float64()
+	ns_budget, _ := decimal.NewFromFloat(s_budget).Round(2).Float64()
+
 	s_winner = strings.Join(win_arr, ",")
 	ai_pkg["s_winner"] = s_winner
 	if !isTable {
-		ai_pkg["s_bidamount"] = s_bidamount
-		ai_pkg["s_budget"] = s_budget
+		ai_pkg["s_bidamount"] = ns_bidamount
+		ai_pkg["s_budget"] = ns_budget
 	}
 	ai_pkg["com_package"] = com_package
 	return ai_pkg

+ 7 - 2
tool.json

@@ -1,6 +1,11 @@
 {
+  "ext_name": "zktest_0630_new",
   "reading": 500,
-  "ext_name": "zxl_liantong_0115",
+  "udp_max": 10000,
+  "len_max": 20000,
+  "byte_max": 50000,
+  "flash_model": "glm-4-flash",
+  "model_type": "deepseek",
   "s_mgo": {
     "local": true,
     "l_addr": "127.0.0.1:12005",
@@ -21,7 +26,7 @@
     "local": true,
     "l_addr": "127.0.0.1:12005",
     "addr": "172.31.31.202:27081,172.20.45.128:27080",
-    "dbname" : "qfw",
+    "dbname" : "mixdata",
     "username": "zhengkun",
     "password": "zk@123123"
   }

+ 284 - 0
tool/replenish.go

@@ -0,0 +1,284 @@
+package tool
+
+import (
+	"data_ai/ul"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"regexp"
+	"strings"
+	"unicode/utf8"
+)
+
+var ruleReg1 = regexp.MustCompile("^(中部|西部)管道公司$")
+var ruleReg2 = regexp.MustCompile("^(东北|华南|华北|华中|苏北)[分]?公司$")
+var ruleReg3 = regexp.MustCompile("([分]?公司)$")
+
+// 补充规则大全···
+func getReplenishRules(tmp map[string]interface{}, update map[string]interface{}) {
+	for _, v := range ul.ReplensihRules {
+		rule_name := qu.ObjToString(v["rule"])
+		switch rule_name {
+		case "补充前缀地域":
+			RuleInfo1(tmp, update, v)
+		case "省市映射规则":
+			RuleInfo2(tmp, update, v)
+		case "省市前缀规则":
+			RuleInfo2(tmp, update, v)
+		case "单位全称映射":
+			RuleInfo3(tmp, update, v)
+		case "地域修正规则":
+			RuleInfo4(tmp, update, v)
+		case "省级简称拼接":
+			RuleInfo5(tmp, update, v)
+		case "市级全称拼接":
+			RuleInfo5(tmp, update, v)
+		default:
+
+		}
+	}
+
+}
+
+// 核对采购单位
+func RuleInfo1(tmp map[string]interface{}, update map[string]interface{}, info map[string]interface{}) {
+	if qu.ObjToString(tmp["buyer"]) != "" || qu.ObjToString(update["buyer"]) != "" {
+		return
+	}
+	site := qu.ObjToString(tmp["site"])
+	if qu.ObjToString(info[site]) != site {
+		return
+	}
+	area := qu.ObjToString(tmp["area"])
+	if qu.ObjToString(update["area"]) != "" && qu.ObjToString(update["area"]) != "全国" {
+		area = qu.ObjToString(update["area"])
+	}
+	buyer := qu.ObjToString(info["buyer"])
+	if buyer == "" {
+		return
+	}
+	//是否补前缀
+	prefix := qu.IntAll(info["prefix"])
+	if prefix == 1 {
+		if area == "" || area == "全国" {
+			update["buyer"] = buyer
+		} else {
+			if area == "内蒙古" {
+				update["buyer"] = buyer + area + "自治区" + "分公司"
+			} else {
+				update["buyer"] = buyer + area + "分公司"
+			}
+		}
+	} else {
+		update["buyer"] = buyer
+	}
+}
+
+// 采购单位补充
+func RuleInfo2(tmp map[string]interface{}, update map[string]interface{}, info map[string]interface{}) {
+	area := qu.ObjToString(tmp["area"])
+	city := qu.ObjToString(tmp["city"])
+	buyer := qu.ObjToString(tmp["buyer"])
+	if utf8.RuneCountInString(buyer) != 3 { //简称长度···
+		return
+	}
+	if u_area := qu.ObjToString(update["area"]); u_area != "" {
+		area = u_area
+	}
+	if u_city := qu.ObjToString(update["city"]); u_city != "" {
+		city = u_city
+	}
+	if u_buyer := qu.ObjToString(update["buyer"]); u_buyer != "" {
+		buyer = u_buyer
+	}
+
+	rule := qu.ObjToString(info["rule"])
+	name := qu.ObjToString(info["name"])
+	if rule == "省市映射规则" {
+		key := area + "-" + city + "-" + buyer
+		if key == name {
+			if s_name := qu.ObjToString(info["s_name"]); s_name != "" {
+				update["buyer"] = s_name
+			}
+		}
+	}
+	if rule == "省市前缀规则" {
+		if buyer == name {
+			if city != "" {
+				update["buyer"] = city + buyer
+			} else if area != "" && area != "全国" {
+				update["buyer"] = area + buyer
+			} else {
+
+			}
+		}
+	}
+}
+
+// 采购单位映射逻辑
+func RuleInfo3(tmp map[string]interface{}, update map[string]interface{}, info map[string]interface{}) {
+	buyer := qu.ObjToString(tmp["buyer"])
+	if u_buyer := qu.ObjToString(update["buyer"]); u_buyer != "" {
+		buyer = u_buyer
+	}
+	site := qu.ObjToString(tmp["site"])
+	if site == qu.ObjToString(info["site"]) && buyer == qu.ObjToString(info["buyer"]) {
+		if name := qu.ObjToString(info["name"]); name != "" {
+			update["buyer"] = name
+		}
+	}
+}
+
+// 双向验证地域
+func RuleInfo4(tmp map[string]interface{}, update map[string]interface{}, info map[string]interface{}) {
+	flash_key := qu.ObjToString(update["area"]) + "-" + qu.ObjToString(update["city"])
+	rule_key := qu.ObjToString(tmp["area"]) + "-" + qu.ObjToString(tmp["city"]) + "-" + qu.ObjToString(tmp["district"])
+	flash := qu.ObjToString(info["s_name"])
+	rule := qu.ObjToString(info["name"])
+	if flash_key == flash && rule_key == rule {
+		delete(update, "area")
+		delete(update, "city")
+		delete(update, "district")
+	}
+}
+
+func RuleInfo5(tmp map[string]interface{}, update map[string]interface{}, info map[string]interface{}) {
+	buyer := qu.ObjToString(tmp["buyer"])
+	if u_buyer := qu.ObjToString(update["buyer"]); u_buyer != "" {
+		buyer = u_buyer
+	}
+	site := qu.ObjToString(tmp["site"])
+	name := qu.ObjToString(info["name"])
+	if buyer == "" || site != qu.ObjToString(info["site"]) {
+		return
+	}
+	//规则名字···
+	rule := qu.ObjToString(info["rule"])
+	prefix := qu.IntAll(info["prefix"])
+	if rule == "省级简称拼接" {
+		if ruleReg1.MatchString(buyer) || ruleReg2.MatchString(buyer) {
+			update["buyer"] = name + buyer
+			return
+		}
+		if suffix := ruleReg3.FindString(buyer); suffix != "" {
+			str := strings.TrimSuffix(buyer, suffix)
+			if ul.S_ProvinceDict[str] != nil {
+				update["buyer"] = name + buyer
+				return
+			}
+		}
+	}
+
+	if rule == "市级全称拼接" {
+		if suffix := ruleReg3.FindString(buyer); suffix != "" {
+			str := strings.TrimSuffix(buyer, suffix)
+			if ul.S_CityDict[str] != nil {
+				if citys := ul.S_CityDict[str]; len(citys) == 1 {
+					if prefix == 1 {
+						update["buyer"] = name + citys[0].P_Name + "有限公司" + buyer
+					} else {
+						update["buyer"] = name + buyer
+					}
+					return
+				}
+			} else {
+				str = str + "市"
+				if ul.S_CityDict[str] != nil {
+					if citys := ul.S_CityDict[str]; len(citys) == 1 {
+						if prefix == 1 {
+							update["buyer"] = name + citys[0].P_Name + "有限公司" + buyer
+						} else {
+							update["buyer"] = name + buyer
+						}
+						return
+					}
+				}
+			}
+		}
+	}
+
+}
+
+/*******************************************
+*******************************************
+*******************************************
+*******************************************
+*******************************************/
+// 校验金额与折扣率···
+func getVerifyBidDiscount(tmp map[string]interface{}, update map[string]interface{}) bool {
+	bidamount, biddiscount := qu.Float64All(tmp["bidamount"]), qu.Float64All(tmp["biddiscount"])
+	if f1 := qu.Float64All(update["bidamount"]); f1 > 0 {
+		bidamount = f1
+	}
+	if f2 := qu.Float64All(update["biddiscount"]); f2 > 0 {
+		biddiscount = f2
+	}
+	if biddiscount > 0 && bidamount > 0 && bidamount == biddiscount {
+		//删除金额
+		delete(update, "bidamount")
+		if qu.Float64All(tmp["bidamount"]) > 0 {
+			return true
+		}
+	}
+	return false
+}
+
+// 最终单位选取校验
+func getTheBestUnit(tmp map[string]interface{}, update_info map[string]interface{}) {
+	buyer := qu.ObjToString(update_info["buyer"])
+	s_winner := qu.ObjToString(update_info["s_winner"])
+	agency := qu.ObjToString(update_info["agency"])
+	if buyer == "" {
+		buyer = qu.ObjToString(tmp["buyer"])
+	}
+	if s_winner == "" {
+		s_winner = qu.ObjToString(tmp["s_winner"])
+	}
+	if agency == "" {
+		agency = qu.ObjToString(tmp["agency"])
+	}
+
+	if buyer == s_winner && buyer != "" {
+		if strings.Contains(buyer, "公司") {
+			update_info["buyer"] = ""
+		} else {
+			update_info["s_winner"] = ""
+			update_info["winner"] = ""
+		}
+	}
+	if agency != "" {
+		if agency == buyer || agency == s_winner {
+			update_info["agency"] = ""
+		}
+	}
+}
+
+// 可续计数法修正
+func getScientificNotation(tmp map[string]interface{}, update map[string]interface{}) {
+	subtype := qu.ObjToString(tmp["subtype"])
+	bidamount := qu.Float64All(tmp["bidamount"])
+	budget := qu.Float64All(tmp["budget"])
+	if u_s := qu.ObjToString(update["subtype"]); u_s != "" {
+		subtype = u_s
+	}
+	if u_b := qu.Float64All(update["bidamount"]); u_b > 0.0 {
+		bidamount = u_b
+	}
+	if u_t := qu.Float64All(update["budget"]); u_t > 0.0 {
+		budget = u_t
+	}
+	e_bidamount := qu.Float64All(tmp["e_bidamount"])
+	if e_bidamount <= 0.0 {
+		return
+	}
+
+	//用科学计数法的金额-替换中标金额
+	if subtype == "合同" || subtype == "中标" || subtype == "成交" || subtype == "单一" {
+		if bidamount > 0.0 && bidamount < 50.0 {
+			update["bidamount"] = e_bidamount
+		}
+	} else {
+		if budget > 0.0 && budget < 50.0 {
+			update["budget"] = e_bidamount
+		}
+	}
+
+}

+ 30 - 0
tool/service.go

@@ -0,0 +1,30 @@
+package tool
+
+import (
+	"data_ai/clean"
+	"encoding/json"
+	log "github.com/donnie4w/go-logger/logger"
+	"io/ioutil"
+	"net/http"
+	"sync"
+)
+
+var datalock sync.Mutex
+
+func InitCleanDeepSeekService() {
+	//提取地域信息
+	http.HandleFunc("/clean/deepseek", func(w http.ResponseWriter, r *http.Request) {
+		datalock.Lock()
+		data, _ := ioutil.ReadAll(r.Body)
+		tmp := map[string]interface{}{}
+		err := json.Unmarshal([]byte(data), &tmp)
+		if err != nil {
+			log.Debug(err)
+		}
+		info := clean.CleanDeepSeekInfo(tmp, map[string]interface{}{})
+		res, _ := json.Marshal(info)
+		w.Write(res)
+		datalock.Unlock()
+	})
+	log.Debug("初始化接口完毕...")
+}

+ 54 - 131
tool/tool.go

@@ -4,9 +4,6 @@ import (
 	"data_ai/extract"
 	"data_ai/ul"
 	log "github.com/donnie4w/go-logger/logger"
-	"github.com/google/uuid"
-	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
-	"strings"
 	"sync"
 	"time"
 )
@@ -33,29 +30,67 @@ func StartToolInfo() {
 				<-pool_mgo
 				wg_mgo.Done()
 			}()
-			u_id := ul.BsonTOStringId(tmp["_id"])
-			data := extract.ResolveInfo(tmp)
-			if len(data) > 0 || u_id == "" {
-				tmp["ai_zhipu"] = data
+			u_id, s_id := "", false
+			if sid, bid := tmp["_id"].(string); bid {
+				s_id = true
+				u_id = sid
+			} else {
+				u_id = ul.BsonTOStringId(tmp["_id"])
+			}
+			data := extract.ResolveInfo(tmp, u_id)
+			if len(data) > 0 && u_id != "" {
+				//更新-
 				update_info := make(map[string]interface{}, 0)
+				update_info["ai_zhipu"] = data
+				tmp["ai_zhipu"] = data
+				//是否删除
 				is_unset := ul.ChooseCheckDataAI(tmp, &update_info)
 				if update_info["com_package"] == nil { //构建单包信息···
-					com_package := CreatSingleFieldInfo(tmp, update_info)
+					com_package := ul.CreatSingleFieldInfo(tmp, update_info)
 					update_info["com_package"] = com_package
 				}
-				update_info["ai_zhipu"] = data
-				//清洗与记录
+				//补充规则大全····
+				getReplenishRules(tmp, update_info)
+				//最终单位修正与选取
+				getTheBestUnit(tmp, update_info)
+				//校验金额与折扣率(删除金额)
+				un_bid := getVerifyBidDiscount(tmp, update_info)
+				//校验是否科学计数法影响金额
+				getScientificNotation(tmp, update_info)
+
+				//最终更新
 				if len(update_info) > 0 {
-					//$set
-					ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
-						"$set": update_info,
-					})
+					if s_id {
+						ul.SourceMgo.UpdateStrId(ul.Ext_Name, u_id, map[string]interface{}{
+							"$set": update_info,
+						})
+					} else {
+						ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
+							"$set": update_info,
+						})
+					}
 				}
 				if is_unset {
-					//"$unset"
-					ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
-						"$unset": ul.Unset_Check,
-					})
+					if s_id {
+						ul.SourceMgo.UpdateStrId(ul.Ext_Name, u_id, map[string]interface{}{
+							"$unset": ul.Unset_Check,
+						})
+					} else {
+						ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
+							"$unset": ul.Unset_Check,
+						})
+					}
+				}
+				if un_bid && !is_unset {
+					if s_id {
+						ul.SourceMgo.UpdateStrId(ul.Ext_Name, u_id, map[string]interface{}{
+							"$unset": map[string]interface{}{"bidamount": 1},
+						})
+					} else {
+						ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
+							"$unset": map[string]interface{}{"bidamount": 1},
+						})
+					}
 				}
 			}
 		}(tmp)
@@ -75,121 +110,9 @@ func TestToolSingleInfo(name string, tmpid string) {
 		return
 	}
 	u_id := ul.BsonTOStringId(tmp["_id"])
-	data := extract.ResolveInfo(tmp)
+	data := extract.ResolveInfo(tmp, u_id)
 	if len(data) > 0 || u_id == "" {
 		tmp["ai_zhipu"] = data
-		update_info := make(map[string]interface{}, 0)
-		is_unset := ul.ChooseCheckDataAI(tmp, &update_info)
-		if update_info["com_package"] == nil { //构建单包信息···
-			com_package := CreatSingleFieldInfo(tmp, update_info)
-			update_info["com_package"] = com_package
-		}
-		update_info["ai_zhipu"] = data
-		//清洗与记录
-		if len(update_info) > 0 {
-
-		}
-		if is_unset {
-
-		}
-		log.Debug(update_info)
 	}
 	log.Debug("耗时···", time.Now().Unix()-now)
 }
-
-// 工具更新程序
-//func StartToolUpdateInfo() {
-//	log.Debug("工具开始大模型修正数据······")
-//	q := map[string]interface{}{}
-//	pool_mgo := make(chan bool, ul.Reading)
-//	wg_mgo := &sync.WaitGroup{}
-//	sess := ul.SourceMgo.GetMgoConn()
-//	defer ul.SourceMgo.DestoryMongoConn(sess)
-//	total, isok := 0, 0
-//	it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Sort("_id").Iter()
-//	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-//		if total%100 == 0 {
-//			log.Debug("cur index ", total)
-//		}
-//		isok++
-//		pool_mgo <- true
-//		wg_mgo.Add(1)
-//		go func(tmp map[string]interface{}) {
-//			defer func() {
-//				<-pool_mgo
-//				wg_mgo.Done()
-//			}()
-//			u_id := ul.BsonTOStringId(tmp["_id"])
-//			b_id := qu.ObjToString(tmp["id"])
-//			data := ul.BidMgo.FindById("bidding", b_id)
-//			if len(data) > 0 && u_id != "" && b_id != "" && data["ai_zhipu"] != nil {
-//				ai_zhipu := qu.ObjToMap(data["ai_zhipu"])
-//				tmp["ai_zhipu"] = ai_zhipu
-//				update_info := make(map[string]interface{}, 0)
-//				is_unset := ul.ChooseCheckDataAI(tmp, &update_info)
-//				update_info["ai_zhipu"] = ai_zhipu
-//				//清洗与记录
-//				if len(update_info) > 0 { //$set
-//					ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
-//						"$set": update_info,
-//					})
-//				}
-//				if is_unset { //"$unset"
-//					ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
-//						"$unset": ul.Unset_Check,
-//					})
-//				}
-//			}
-//		}(tmp)
-//		tmp = make(map[string]interface{})
-//	}
-//	wg_mgo.Wait()
-//	log.Debug("ai update is over ...")
-//}
-
-func CreatSingleFieldInfo(tmp map[string]interface{}, update_info map[string]interface{}) []map[string]interface{} {
-	pkgArr := []map[string]interface{}{}
-	com_package := map[string]interface{}{}
-	package_id := uuid.New().String()
-	package_id = strings.ReplaceAll(package_id, "-", "")
-	com_package["package_id"] = package_id
-	com_package["name"] = qu.ObjToString(tmp["projectname"])
-	com_package["projectcode"] = qu.ObjToString(tmp["projectcode"])
-	com_package["packagecode"] = qu.ObjToString(tmp["packagecode"])
-	com_package["contractcode"] = qu.ObjToString(tmp["contractcode"])
-	if update_info["budget"] != nil {
-		com_package["budget"] = update_info["budget"]
-	} else {
-		if tmp["budget"] != nil {
-			com_package["budget"] = tmp["budget"]
-		}
-	}
-	toptype := qu.ObjToString(update_info["toptype"])
-	subtype := qu.ObjToString(update_info["subtype"])
-	if toptype == "" {
-		toptype = qu.ObjToString(tmp["toptype"])
-	}
-	if subtype == "" {
-		subtype = qu.ObjToString(tmp["subtype"])
-	}
-	//中标类
-	if toptype == "结果" || toptype == "其它" || subtype == "单一" {
-		if update_info["bidamount"] != nil {
-			com_package["bidamount"] = update_info["bidamount"]
-		} else {
-			if tmp["bidamount"] != nil {
-				com_package["bidamount"] = tmp["bidamount"]
-			}
-		}
-
-		if update_info["winner"] != nil {
-			com_package["winner"] = update_info["winner"]
-		} else {
-			if tmp["winner"] != nil {
-				com_package["winner"] = tmp["winner"]
-			}
-		}
-	}
-	pkgArr = append(pkgArr, com_package)
-	return pkgArr
-}

+ 0 - 2
udp/udprocess.go

@@ -94,11 +94,9 @@ func ProcessUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 			}
 		}
 	case mu.OP_NOOP: //下个节点回应
-		nextlock.Lock()
 		str := string(data)
 		udptaskmap.Delete(str)
 		log.Debug("其他节点回应:", str)
-		nextlock.Unlock()
 	}
 }
 

+ 18 - 16
ul/attr.go

@@ -3,22 +3,24 @@ package ul
 import "regexp"
 
 var (
-	SourceMgo, QyxyMgo               *MongodbSim
-	BidMgo, PyMgo                    *MongodbSim
-	SysConfig                        map[string]interface{}
-	ToolConfig                       map[string]interface{}
-	Bid_Name, Ext_Name               string
-	TimeLayout                       = "2006-01-02 15:04:05"
-	Url                              = "https://www.jianyu360.cn/article/content/%s.html"
-	CleanResultReg                   = regexp.MustCompile("((\\s|\n| |\\[|\\]|\\`|json)+)")
-	SaveResultReg                    = regexp.MustCompile("([{].*[}])")
-	RulesPname                       = []*ExtReg{}
-	IsTool, IsFull, IsLocal          bool
-	FlashModel                       string
-	SpecialTextReg                   = regexp.MustCompile("(原网页|见附件|下载附件|(查看|访问)(源网|原网)|详情请下载附件!|详情请访问原网页!)")
-	Escape                           = regexp.MustCompile("(json|`|\n|\\\\)")
-	Reading, MaxLen, MaxByte, MaxUdp int
-	ModelType                        string
+	SourceMgo, QyxyMgo                          *MongodbSim
+	BidMgo, PyMgo                               *MongodbSim
+	SysConfig                                   map[string]interface{}
+	ToolConfig                                  map[string]interface{}
+	Bid_Name, Ext_Name                          string
+	TimeLayout                                  = "2006-01-02 15:04:05"
+	Url                                         = "https://www.jianyu360.cn/article/content/%s.html"
+	CleanResultReg                              = regexp.MustCompile("((\\s|\n| |\\[|\\]|\\`|json)+)")
+	SaveResultReg                               = regexp.MustCompile("([{].*[}])")
+	RulesPname                                  = []*ExtReg{}
+	IsTool, IsFull, IsLocal, IsService, IsFlash bool
+	FlashModel                                  string
+	SpecialTextReg                              = regexp.MustCompile("(原网页|见附件|下载附件|(查看|访问)(源网|原网)|详情请下载附件!|详情请访问原网页!)")
+	Escape                                      = regexp.MustCompile("(json|`|\n|\\\\)")
+	Reading, MaxLen, MaxByte, MaxUdp            int
+	ModelType                                   string
+	//补充规则全···
+	ReplensihRules []map[string]interface{}
 )
 
 type ExtReg struct {

+ 48 - 0
ul/global.go

@@ -1,6 +1,7 @@
 package ul
 
 import (
+	"github.com/google/uuid"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"strings"
 )
@@ -472,3 +473,50 @@ func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]inte
 	}
 	return toptype_ai, subtype_ai
 }
+
+func CreatSingleFieldInfo(tmp map[string]interface{}, update_info map[string]interface{}) []map[string]interface{} {
+	pkgArr := []map[string]interface{}{}
+	com_package := map[string]interface{}{}
+	package_id := uuid.New().String()
+	package_id = strings.ReplaceAll(package_id, "-", "")
+	com_package["package_id"] = package_id
+	com_package["name"] = qu.ObjToString(tmp["projectname"])
+	com_package["projectcode"] = qu.ObjToString(tmp["projectcode"])
+	com_package["packagecode"] = qu.ObjToString(tmp["packagecode"])
+	com_package["contractcode"] = qu.ObjToString(tmp["contractcode"])
+	if update_info["budget"] != nil {
+		com_package["budget"] = update_info["budget"]
+	} else {
+		if tmp["budget"] != nil {
+			com_package["budget"] = tmp["budget"]
+		}
+	}
+	toptype := qu.ObjToString(update_info["toptype"])
+	subtype := qu.ObjToString(update_info["subtype"])
+	if toptype == "" {
+		toptype = qu.ObjToString(tmp["toptype"])
+	}
+	if subtype == "" {
+		subtype = qu.ObjToString(tmp["subtype"])
+	}
+	//中标类
+	if toptype == "结果" || toptype == "其它" || subtype == "单一" {
+		if update_info["bidamount"] != nil {
+			com_package["bidamount"] = update_info["bidamount"]
+		} else {
+			if tmp["bidamount"] != nil {
+				com_package["bidamount"] = tmp["bidamount"]
+			}
+		}
+
+		if update_info["winner"] != nil {
+			com_package["winner"] = update_info["winner"]
+		} else {
+			if tmp["winner"] != nil {
+				com_package["winner"] = tmp["winner"]
+			}
+		}
+	}
+	pkgArr = append(pkgArr, com_package)
+	return pkgArr
+}

+ 44 - 20
ul/init.go

@@ -9,14 +9,16 @@ import (
 	"io/ioutil"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"net/http"
+	"net/url"
+	"strings"
 	"time"
 )
 
 func InitGlobalVar() {
 	qu.ReadConfig(&SysConfig) //加载配置文件
 	initMgo()
-	initPCD()
 	initOther()
+	initPCD()
 }
 func InitToolVar() {
 	qu.ReadConfig("./tool.json", &ToolConfig) //加载配置文件
@@ -24,8 +26,9 @@ func InitToolVar() {
 		log.Fatal("缺少tool.json文件")
 	}
 	initToolMgo()
+	initToolOther()
 	initPCD()
-	initOther()
+	InitToolRules()
 }
 
 // 其它属性
@@ -38,6 +41,16 @@ func initOther() {
 	MaxByte = qu.IntAllDef(SysConfig["byte_max"], 50000)
 }
 
+// 其它属性
+func initToolOther() {
+	//glm-4-air glm-4-flash
+	FlashModel = qu.ObjToString(ToolConfig["flash_model"])
+	ModelType = qu.ObjToString(ToolConfig["model_type"])
+	MaxUdp = qu.IntAllDef(ToolConfig["udp_max"], 10000)
+	MaxLen = qu.IntAllDef(ToolConfig["len_max"], 20000)
+	MaxByte = qu.IntAllDef(ToolConfig["byte_max"], 50000)
+}
+
 // 初始化mgo
 func initMgo() {
 	Reading = qu.IntAll(SysConfig["reading"])
@@ -248,22 +261,15 @@ func initPCD() {
 }
 
 // 加载规则...
-func InitPnameRules() {
-	//q := map[string]interface{}{"s_field": "projectname", "pid": "5cdd1c70e138234848c1d776", "delete": false}
-	//rs, _ := ExtMgo.Find("rule_logicback", q, bson.M{"_id": 1}, nil)
-	//for _, v := range rs {
-	//	s_rule := qu.ObjToString(v["s_rule"])
-	//	tmp := strings.Split(s_rule, "__")
-	//	pattern := tmp[0]
-	//	if len(tmp) == 2 {
-	//		reg := &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
-	//		RulesPname = append(RulesPname, reg)
-	//	} else {
-	//		reg := &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
-	//		RulesPname = append(RulesPname, reg)
-	//	}
-	//}
-	//log.Debug("加载项目名称规则...", len(RulesPname))
+func InitToolRules() {
+	ReplensihRules = []map[string]interface{}{}
+	dataArr, _ := BidMgo.Find("bidding_bc", map[string]interface{}{}, nil, nil)
+	for _, v := range dataArr {
+		if qu.IntAll(v["isopen"]) == 1 {
+			ReplensihRules = append(ReplensihRules, v)
+		}
+	}
+	log.Debug("补充校验规则加载完毕···", len(ReplensihRules))
 }
 
 func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
@@ -286,8 +292,7 @@ func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
 func PostMarkDownText(html string) string {
 	url := "http://172.17.162.35:18811/md"
 	if IsLocal {
-		url = "http://172.17.0.11:8888/md"
-		//url = "http://192.168.3.13:8888/md"
+		url = "http://172.17.0.19:8888/md"
 	}
 	// 创建请求数据
 	jsonData, err := json.Marshal(map[string]interface{}{"html": html})
@@ -357,3 +362,22 @@ func PostPurchasingList(data map[string]interface{}) map[string]interface{} {
 	}
 	return info
 }
+
+func PostDetailContentHtmlText(bucket_id string, object_name string) string {
+	//api_url := "http://172.17.162.27:18011/ossservice/biddetail"
+	api_url := "https://www.jianyu360.cn/ossservice/biddetail"
+	data := url.Values{"bucket_id": {bucket_id}, "object_name": {object_name}}
+	req, _ := http.NewRequest("POST", api_url, strings.NewReader(data.Encode()))
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	client := &http.Client{}
+	client.Timeout = 5 * time.Second
+	resp, _ := client.Do(req)
+	defer resp.Body.Close()
+	if resp.StatusCode != 200 {
+		return ""
+	}
+	//读取响应体
+	body, _ := ioutil.ReadAll(resp.Body)
+	str := string(body)
+	return str
+}