ソースを参照

调整ai识别,便捷

zhengkun 11 ヶ月 前
コミット
92c3dea816
16 ファイル変更921 行追加27 行削除
  1. 2 0
      ai/ai_zhipu.go
  2. 2 2
      clean/c_all.go
  3. 9 2
      clean/c_buyer.go
  4. 14 2
      clean/c_pcode.go
  5. 1 0
      clean/c_pname.go
  6. 5 10
      config.json
  7. 32 3
      extract/extract.go
  8. 395 2
      extract/test.go
  9. 21 4
      main.go
  10. 31 0
      prompt/prompt_buyer.go
  11. 28 0
      tool.json
  12. 296 0
      tool/tool.go
  13. 0 1
      udp/udprocess.go
  14. 3 0
      ul/attr.go
  15. 74 1
      ul/init.go
  16. 8 0
      ul/mgo.go

+ 2 - 0
ai/ai_zhipu.go

@@ -41,6 +41,7 @@ func PostZhiPuAI(content string) map[string]interface{} {
 	req.Header.Set("Authorization", "Bearer 3d84d30b7ab4c94dbf71853cb7e44719.hLLS4CA2MqVQs6kR")
 	// 发起请求 14543f0d69d6987c8782fd846e164f26.DXaoS1axLaMP892a
 	client := &http.Client{}
+	//client.Timeout = 10 * time.Second
 	resp, err := client.Do(req)
 	if err != nil {
 		log.Debug("Error: %s", err)
@@ -102,6 +103,7 @@ func PostClassZhiPuAI(content string) map[string]interface{} {
 	req.Header.Set("Authorization", "Bearer 3d84d30b7ab4c94dbf71853cb7e44719.hLLS4CA2MqVQs6kR")
 	// 发起请求 14543f0d69d6987c8782fd846e164f26.DXaoS1axLaMP892a
 	client := &http.Client{}
+	//client.Timeout = 10 * time.Second
 	resp, err := client.Do(req)
 	if err != nil {
 		log.Debug("Error: %s", err)

+ 2 - 2
clean/c_all.go

@@ -12,7 +12,7 @@ var (
 	pcodeReg2 = regexp.MustCompile("([\\*]+)")
 )
 
-func CleanFieldInfo(zhipu map[string]interface{}) map[string]interface{} {
+func CleanFieldInfo(zhipu map[string]interface{}, fns []string) map[string]interface{} {
 	data := map[string]interface{}{}
 	if s_area, s_city := CleanRegion(qu.ObjToString(zhipu["省份"]), qu.ObjToString(zhipu["城市"])); s_area != "" || s_city != "" {
 		data["s_area"] = s_area
@@ -24,7 +24,7 @@ func CleanFieldInfo(zhipu map[string]interface{}) map[string]interface{} {
 	if s_pname := CleanPname(qu.ObjToString(zhipu["项目名称"])); s_pname != "" {
 		data["s_projectname"] = s_pname
 	}
-	if s_pcode := CleanPcode(qu.ObjToString(zhipu["项目编号"])); s_pcode != "" {
+	if s_pcode := CleanPcode(qu.ObjToString(zhipu["项目编号"]), fns); s_pcode != "" {
 		data["s_projectcode"] = s_pcode
 	}
 	if s_budget := CleanMoney(zhipu["预算金额"]); s_budget > 0.0 && s_budget < 1000000000.0 {

+ 9 - 2
clean/c_buyer.go

@@ -1,6 +1,9 @@
 package clean
 
-import "unicode/utf8"
+import (
+	"strings"
+	"unicode/utf8"
+)
 
 // 清洗采购单位
 func CleanBuyer(buyer string) string {
@@ -8,7 +11,11 @@ func CleanBuyer(buyer string) string {
 		return ""
 	}
 	buyer = fieldReg1.ReplaceAllString(buyer, "")
-	if utf8.RuneCountInString(buyer) < 3 {
+	//中文括弧
+	buyer = strings.ReplaceAll(buyer, "(", "(")
+	buyer = strings.ReplaceAll(buyer, ")", ")")
+
+	if utf8.RuneCountInString(buyer) < 4 {
 		buyer = ""
 	}
 	return buyer

+ 14 - 2
clean/c_pcode.go

@@ -1,9 +1,12 @@
 package clean
 
-import "unicode/utf8"
+import (
+	"strings"
+	"unicode/utf8"
+)
 
 // 清洗项目编号
-func CleanPcode(pcode string) string {
+func CleanPcode(pcode string, fns []string) string {
 	if pcode == "无" {
 		return ""
 	}
@@ -13,5 +16,14 @@ func CleanPcode(pcode string) string {
 	if utf8.RuneCountInString(pcode) < 5 {
 		pcode = ""
 	}
+
+	//校验与附件名字否是一致-舍弃
+	for _, v := range fns {
+		if utf8.RuneCountInString(v) >= utf8.RuneCountInString(pcode) {
+			if strings.Contains(v, pcode) {
+				return ""
+			}
+		}
+	}
 	return pcode
 }

+ 1 - 0
clean/c_pname.go

@@ -29,5 +29,6 @@ func CleanPname(pname string) string {
 	if utf8.RuneCountInString(pname) < 5 {
 		pname = ""
 	}
+
 	return pname
 }

+ 5 - 10
config.json

@@ -2,12 +2,13 @@
   "udpport": ":1791",
   "bid_name": "bidding",
   "ext_name": "result_20220218",
+  "reading": 500,
   "smail": {
     "to": "zhengkun@topnet.net.cn,xuzhiheng@topnet.net.cn",
     "api": "http://172.17.145.179:19281/_send/_mail"
   },
   "s_mgo": {
-    "local": false,
+    "local": true,
     "l_addr": "127.0.0.1:12005",
     "addr": "172.17.189.140:27080,172.17.189.141:27081",
     "dbname" : "qfw_ai",
@@ -15,7 +16,7 @@
     "password": "zk@123123"
   },
   "b_mgo": {
-    "local": false,
+    "local": true,
     "l_addr": "127.0.0.1:12005",
     "addr": "172.17.189.140:27080,172.17.189.141:27081",
     "dbname" : "qfw",
@@ -23,18 +24,12 @@
     "password": "zk@123123"
   },
   "qy_mgo": {
-    "local": false,
+    "local": true,
     "l_addr": "127.0.0.1:12005",
     "addr": "172.17.189.140:27080,172.17.189.141:27081",
     "dbname" : "mixdata",
     "username": "zhengkun",
     "password": "zk@123123"
   },
-  "nextNode": [
-    {
-      "addr": "127.0.0.1",
-      "port": 1792,
-      "stype": ""
-    }
-  ]
+  "nextNode": []
 }

+ 32 - 3
extract/extract.go

@@ -22,7 +22,7 @@ func ExtractFieldInfo(sid string, eid string) {
 	dict := ConfrimExtractInfo(q)
 	log.Debug("查询语句...", q, "~", len(dict))
 
-	pool_mgo := make(chan bool, 90)
+	pool_mgo := make(chan bool, ul.Reading)
 	wg_mgo := &sync.WaitGroup{}
 
 	sess := ul.SourceMgo.GetMgoConn()
@@ -64,12 +64,16 @@ func ExtractFieldInfo(sid string, eid string) {
 // 获取处理数据...
 func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	detail := qu.ObjToString(v["detail"])
+	filetext := qu.ObjToString(v["filetext"]) //此处为附件信息···
 	title := qu.ObjToString(v["title"])
+	fns := getpnsinfo(v) //获取附件名字
 	f_data := map[string]interface{}{}
+	if ul.IsTool && utf8.RuneCountInString(detail) < 100 {
+		detail = filetext
+	}
 	if utf8.RuneCountInString(detail) < 100 {
 		return f_data
 	}
-
 	//获取外围字段数据
 	f_info := prompt.AcquireExtractFieldInfo(detail)
 	//分包判断-获取信息
@@ -93,8 +97,16 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	f_info["s_subtype"] = s_subtype
 
 	//字段清洗
-	f_data = clean.CleanFieldInfo(f_info)
+	f_data = clean.CleanFieldInfo(f_info, fns)
 
+	//对于某些字段进行二级校验
+	if s_buyer := qu.ObjToString(f_data["s_buyer"]); s_buyer != "" {
+		if zp_buyer := prompt.AcquireBuyerInfo(s_buyer); zp_buyer["实体单位"] != nil {
+			if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
+				f_data["s_buyer"] = ns_buyer
+			}
+		}
+	}
 	return f_data
 }
 
@@ -117,6 +129,23 @@ func ConfrimExtractInfo(q map[string]interface{}) map[string]interface{} {
 	return dict
 }
 
+// 获取附件名字信息
+func getpnsinfo(tmp map[string]interface{}) []string {
+	arr := []string{}
+	if projectinfo := qu.ObjToMap(tmp["projectinfo"]); projectinfo != nil {
+		if attachments := qu.ObjToMap((*projectinfo)["attachments"]); attachments != nil {
+			for _, v := range *attachments {
+				if info := qu.ObjToMap(v); info != nil {
+					if filename := qu.ObjToString((*info)["filename"]); filename != "" {
+						arr = append(arr, filename)
+					}
+				}
+			}
+		}
+	}
+	return arr
+}
+
 // 暂时不启用...无限重试
 func RunResetUpdateFieldInfo(arr []string, name string, s_name string) {
 	//log.Debug("开始重置更新...", len(arr))

+ 395 - 2
extract/test.go

@@ -1,6 +1,7 @@
 package extract
 
 import (
+	"data_ai/clean"
 	"data_ai/prompt"
 	"data_ai/ul"
 	"fmt"
@@ -8,15 +9,15 @@ import (
 	new_xlsx "github.com/tealeg/xlsx/v3"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"os"
+	"strings"
 	"sync"
 	"unicode/utf8"
 )
 
 func TestSingleFieldInfo(name string, tmpid string) {
-	tmp := ul.SourceMgo.FindById(name, tmpid)
+	tmp := ul.BidMgo.FindById(name, tmpid)
 	if len(tmp) == 0 || tmp == nil {
 		log.Debug("未查询到数据...", tmpid)
-
 		return
 	}
 	data := ResolveInfo(tmp)
@@ -26,6 +27,398 @@ func TestSingleFieldInfo(name string, tmpid string) {
 	}
 }
 
+// 导出需要修复的
+func TestFullJinOrCodeInfo() {
+	q := map[string]interface{}{}
+	pool_mgo := make(chan bool, 20)
+	wg_mgo := &sync.WaitGroup{}
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	total := 0
+	it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Debug("cur index ", total)
+		}
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			tmpid := ul.BsonTOStringId(tmp["_id"])
+			isPcode, update := false, map[string]interface{}{}
+			ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"])
+			o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0
+			if ext_ai_record != nil {
+				o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"])
+				o_budget = qu.Float64All((*ext_ai_record)["budget"])
+				o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"])
+			}
+
+			if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 {
+				if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
+					update["budget"] = filterAmount(r_budget, o_budget)
+				}
+			}
+			if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 {
+				if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
+					update["bidamount"] = filterAmount(r_bidamount, o_bidamount)
+				}
+			}
+			//对于编号
+			if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" {
+				if o_projectcode != projectcode {
+					if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil {
+						fns := getpnsinfo(data) //获取附件名字
+						for _, v := range fns {
+							if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) {
+								if strings.Contains(v, projectcode) {
+									isPcode = true
+									break
+								}
+							}
+						}
+						if isPcode {
+							update["projectcode"] = o_projectcode
+						}
+					}
+				}
+			}
+			if len(update) > 0 {
+				//更新抽取表
+				ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{
+					"$set": update,
+				})
+				//保存待修复表
+				update["_id"] = tmp["_id"]
+				ul.SourceMgo.Save("zzzzz_kkk_uc_0907", update)
+			}
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg_mgo.Wait()
+	log.Debug("repair ai is over ...")
+}
+
+// 修复金额和编号
+func TestRepairJinOrCodeInfo() {
+	q := map[string]interface{}{}
+	pool_mgo := make(chan bool, 20)
+	wg_mgo := &sync.WaitGroup{}
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	total := 0
+	it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Debug("cur index ", total)
+		}
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			tmpid := ul.BsonTOStringId(tmp["_id"])
+			isPcode, update := false, map[string]interface{}{}
+			ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"])
+			o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0
+			if ext_ai_record != nil {
+				o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"])
+				o_budget = qu.Float64All((*ext_ai_record)["budget"])
+				o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"])
+			}
+
+			if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 {
+				if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
+					update["budget"] = filterAmount(r_budget, o_budget)
+				}
+			}
+			if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 {
+				if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
+					update["bidamount"] = filterAmount(r_bidamount, o_bidamount)
+				}
+			}
+			//对于编号
+			if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" {
+				if o_projectcode != projectcode {
+					if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil {
+						fns := getpnsinfo(data) //获取附件名字
+						for _, v := range fns {
+							if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) {
+								if strings.Contains(v, projectcode) {
+									isPcode = true
+									break
+								}
+							}
+						}
+						if isPcode {
+							update["projectcode"] = o_projectcode
+						}
+					}
+				}
+			}
+			if len(update) > 0 {
+				ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
+					"$set": update,
+				})
+			}
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg_mgo.Wait()
+	log.Debug("repair ai is over ...")
+}
+
+// 筛选金额
+func filterAmount(f1 float64, f2 float64) float64 {
+	//选取一个合适的金额 ...
+	if f1 > f2 {
+		if f1 > 100000000.0 {
+			return f2
+		} else {
+			return f1
+		}
+	} else if f1 < f2 {
+		if f2 > 100000000.0 {
+			return f1
+		} else {
+			return f2
+		}
+	} else {
+		return f1
+	}
+}
+
+func TestExportJinErInfo() {
+	q := map[string]interface{}{}
+	pool_mgo := make(chan bool, 20)
+	wg_mgo := &sync.WaitGroup{}
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	total, isok := 0, 0
+	it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Debug("cur index ", total)
+		}
+		isok++
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			tmpid := ul.BsonTOStringId(tmp["_id"])
+			budget := qu.Float64All(tmp["budget"])
+			bidamount := qu.Float64All(tmp["bidamount"])
+			saveinfo := map[string]interface{}{}
+			if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
+				ext_budget := qu.Float64All((*ext_ai_record)["budget"])
+				ext_bidamount := qu.Float64All((*ext_ai_record)["bidamount"])
+				if budget > 0.0 && ext_budget > 0.0 {
+					if budget/ext_budget == 10000.0 || ext_budget/budget == 10000.0 {
+						saveinfo["budget"] = budget
+						saveinfo["ext_budget"] = ext_budget
+					}
+				}
+				if bidamount > 0.0 && ext_bidamount > 0.0 {
+					if bidamount/ext_bidamount == 10000.0 || ext_bidamount/bidamount == 10000.0 {
+						saveinfo["bidamount"] = bidamount
+						saveinfo["ext_bidamount"] = ext_bidamount
+					}
+				}
+			}
+			if len(saveinfo) > 0 && tmpid != "" {
+				saveinfo["toptype"] = tmp["toptype"]
+				saveinfo["subtype"] = tmp["subtype"]
+				saveinfo["href"] = tmp["href"]
+				saveinfo["jyhref"] = tmp["jytest_href"]
+				ul.SourceMgo.Save("zktest_zzzzzkkk_0903", saveinfo)
+			}
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg_mgo.Wait()
+	log.Debug("repair ai is over ...", isok)
+}
+
+// 修正buyer等字段
+func TestRepairBuyerInfo(name string) {
+	q := map[string]interface{}{}
+	pool_mgo := make(chan bool, 20)
+	wg_mgo := &sync.WaitGroup{}
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	total, isok := 0, 0
+	it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Debug("cur index ", total)
+		}
+		isok++
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			tmpid := ul.BsonTOStringId(tmp["_id"])
+			buyer := qu.ObjToString(tmp["buyer"])
+			agency := qu.ObjToString(tmp["agency"])
+			winner := qu.ObjToString(tmp["winner"])
+			update := map[string]interface{}{}
+			if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
+				o_buyer := qu.ObjToString((*ext_ai_record)["buyer"])
+				if buyer == agency && o_buyer != "" {
+					update["buyer"] = o_buyer
+				}
+				o_winner := qu.ObjToString((*ext_ai_record)["winner"])
+				if o_winner != "" && strings.Contains(winner, o_winner) && o_winner != o_winner {
+					update["winner"] = o_winner
+				}
+			}
+			if len(update) > 0 && tmpid != "" {
+				ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
+					"$set": update,
+				})
+			}
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg_mgo.Wait()
+	log.Debug("repair ai is over ...", isok)
+
+}
+
+func TestDelUpBuyerAi() {
+	dataArr, _ := ul.SourceMgo.Find("zktest_buyer_0828_new", map[string]interface{}{}, nil, nil)
+	pool_mgo := make(chan bool, 50)
+	wg_mgo := &sync.WaitGroup{}
+	for k, v := range dataArr {
+		if k%1000 == 0 {
+			log.Debug(k, "~", v["_id"])
+		}
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(v map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+
+			buyer := qu.ObjToString(v["buyer"])
+			tmpid := ul.BsonTOStringId(v["_id"])
+			data1 := ul.SourceMgo.FindById("result_20220218", tmpid)
+			if len(data1) > 0 {
+				ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{
+					"$set": map[string]interface{}{"buyer": buyer},
+				})
+			}
+			data2 := ul.SourceMgo.FindById("result_20220219", tmpid)
+			if len(data2) > 0 {
+				ul.SourceMgo.UpdateById("result_20220219", tmpid, map[string]interface{}{
+					"$set": map[string]interface{}{"buyer": buyer},
+				})
+			}
+
+		}(v)
+	}
+
+	wg_mgo.Wait()
+	log.Debug("del ai is over ...")
+}
+
+func TestAiBuyerInfo() {
+	//dataArr, _ := ul.SourceMgo.Find("zktest_buyer_info", map[string]interface{}{}, nil, nil)
+
+	q := map[string]interface{}{}
+	pool_mgo := make(chan bool, 50)
+	wg_mgo := &sync.WaitGroup{}
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	total, isok := 0, 0
+	it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%1000 == 0 {
+			log.Debug("cur index ", total)
+		}
+		isok++
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			tmpid := ul.BsonTOStringId(tmp["_id"])
+			if buyer := qu.ObjToString(tmp["buyer"]); buyer != "" {
+				if zp_buyer := prompt.AcquireBuyerInfo(buyer); zp_buyer["实体单位"] != nil {
+					if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
+						ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
+							"$set": map[string]interface{}{"buyer": ns_buyer},
+						})
+					}
+				}
+			}
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg_mgo.Wait()
+	log.Debug("repair ai is over ...", isok)
+}
+
+func TestExportAiBuyer() {
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	pool_mgo := make(chan bool, 10)
+	wg_mgo := &sync.WaitGroup{}
+	q, total := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"$lte": ul.StringTOBsonId("66cd8299b25c3e1deb9488dd"),
+		},
+	}, 0
+	it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Select(map[string]interface{}{
+		"ai_zhipu":      1,
+		"ext_ai_record": 1,
+	}).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Debug("cur index ", total, "~", tmp["_id"])
+		}
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			ai_buyer, ext_buyer := "", ""
+			if ai_zhipu := qu.ObjToMap(tmp["ai_zhipu"]); ai_zhipu != nil {
+				ai_buyer = qu.ObjToString((*ai_zhipu)["s_buyer"])
+			}
+			if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
+				ext_buyer = qu.ObjToString((*ext_ai_record)["buyer"])
+			}
+			if ai_buyer != "" {
+				ul.SourceMgo.Save("zktest_buyer_0827", map[string]interface{}{
+					"_id":       tmp["_id"],
+					"ai_buyer":  ai_buyer,
+					"ext_buyer": ext_buyer,
+				})
+			}
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg_mgo.Wait()
+	log.Debug("export is over ", total)
+}
+
 func TestIsPackage() {
 	tmpArr := []string{
 		"669e83fe66cf0db42a6520b3",

+ 21 - 4
main.go

@@ -1,19 +1,36 @@
 package main
 
 import (
+	"data_ai/tool"
 	"data_ai/udp"
 	"data_ai/ul"
 	log "github.com/donnie4w/go-logger/logger"
 )
 
 func init() {
-	ul.InitGlobalVar()
-	udp.InitProcessVar()
-	
+	ul.IsTool = false
+	if ul.IsTool {
+		log.Debug("工具版本···")
+		ul.InitToolVar()
+	} else {
+		log.Debug("正常版本···")
+		ul.InitGlobalVar()
+		return
+		udp.InitProcessVar()
+	}
 }
 
 func main() {
-	log.Debug("main ...")
+	if ul.IsTool {
+		log.Debug("main tool ...")
+		tool.StartToolInfo()
+	} else {
+		log.Debug("main ...")
+		//extract.TestSingleFieldInfo("bidding", "66c2fb9066cf0db42adf7c21")
+		//extract.TestExportJinErInfo()
+		//count, err := ul.SourceMgo.Count("zktest_repeat_new", map[string]interface{}{"repeat": 1})
+		//log.Debug(count, err)
+	}
 	lock := make(chan bool)
 	<-lock
 }

+ 31 - 0
prompt/prompt_buyer.go

@@ -0,0 +1,31 @@
+package prompt
+
+import (
+	"data_ai/ai"
+	"data_ai/ul"
+	"unicode/utf8"
+)
+
+// 获取外围抽取字段
+func AcquireBuyerInfo(detail string) map[string]interface{} {
+	content := PromptBuyerText(detail)
+	zp := ai.PostZhiPuInfo(content)
+	return zp
+}
+
+// 提示词优选
+func PromptBuyerText(detail string) string {
+	if utf8.RuneCountInString(detail) > ul.MaxLen {
+		detail = string([]rune(detail)[:ul.MaxLen])
+	}
+	content := `请根据我提供的正文进行"实体单位"的抽取;
+你在识别"实体单位"的时候,只能返回一个实体单位,不要返回多个实体单位,如果识别不出来,请填写"无";
+请将上述的识别结果、信息分类结果,按照JSON格式输出,
+严格按照json格式
+{
+"实体单位":"实体单位",
+}
+请回答我的问题,不要联想,不要无中生有,不要生成解释,对于尚未确定或未明确的信息请在JSON对应的值填写:无
+正文内容:` + "\n" + detail + "\n结果JSON:"
+	return content
+}

+ 28 - 0
tool.json

@@ -0,0 +1,28 @@
+{
+  "bid_name": "bidding",
+  "ext_name": "20240828Gd_23",
+  "s_mgo": {
+    "local": false,
+    "l_addr": "192.168.3.166:27082",
+    "addr": "192.168.3.166:27082",
+    "dbname" : "zhaoxiuzhen",
+    "username": "",
+    "password": ""
+  },
+  "b_mgo": {
+    "local": true,
+    "l_addr": "127.0.0.1:12005",
+    "addr": "172.17.189.140:27080,172.17.189.141:27081",
+    "dbname" : "qfw",
+    "username": "zhengkun",
+    "password": "zk@123123"
+  },
+  "qy_mgo": {
+    "local": true,
+    "l_addr": "127.0.0.1:12005",
+    "addr": "172.17.189.140:27080,172.17.189.141:27081",
+    "dbname" : "mixdata",
+    "username": "zhengkun",
+    "password": "zk@123123"
+  }
+}

+ 296 - 0
tool/tool.go

@@ -0,0 +1,296 @@
+package tool
+
+import (
+	"data_ai/extract"
+	"data_ai/ul"
+	log "github.com/donnie4w/go-logger/logger"
+	"go.mongodb.org/mongo-driver/bson/primitive"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"sync"
+)
+
+var unset_check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
+
+func StartToolInfo() {
+	log.Debug("开始大模型修正数据···")
+	q := map[string]interface{}{}
+	pool_mgo := make(chan bool, 50)
+	wg_mgo := &sync.WaitGroup{}
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	total, isok := 0, 0
+	it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Sort("_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%1000 == 0 {
+			log.Debug("cur index ", total)
+		}
+		isok++
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			u_id := ul.BsonTOStringId(tmp["_id"])
+			data := extract.ResolveInfo(tmp)
+			if len(data) > 0 || u_id == "" {
+				tmp["ai_zhipu"] = data
+				update_check := make(map[string]interface{}, 0)
+				is_unset := getCheckDataAI(tmp, &update_check)
+				//最终计算是否清洗
+				if len(update_check) > 0 {
+					//$set
+					ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
+						"$set": update_check,
+					})
+				}
+				if is_unset {
+					//"$unset"
+					ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
+						"$unset": unset_check,
+					})
+				}
+			}
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg_mgo.Wait()
+	log.Debug("ai is over ...")
+
+}
+
+// 大模型与抽取数据合并计算
+func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interface{}) bool {
+	if tmp["ai_zhipu"] == nil {
+		return false
+	}
+	//记录抽取原值
+	ext_ai_record := map[string]interface{}{}
+	ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
+	//分类字段···
+	s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
+	ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
+	if ns_toptype != s_toptype || ns_subtype != s_subtype {
+		ext_ai_record["s_toptype"] = ns_toptype
+		ext_ai_record["s_subtype"] = ns_subtype
+	}
+	//赋值···
+	s_toptype, s_subtype = ns_toptype, ns_subtype
+	if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" {
+		s_toptype = qu.ObjToString(tmp["toptype"])
+		s_subtype = qu.ObjToString(tmp["subtype"])
+	} else {
+		if s_toptype != "" && s_subtype != "" {
+			(*update_check)["toptype"] = s_toptype
+			(*update_check)["subtype"] = s_subtype
+			ext_ai_record["toptype"] = tmp["toptype"]
+			ext_ai_record["subtype"] = tmp["subtype"]
+		} else {
+			s_toptype = qu.ObjToString(tmp["toptype"])
+			s_subtype = qu.ObjToString(tmp["subtype"])
+		}
+	}
+
+	//基础字段···
+	if s_buyer := qu.ObjToString(ai_zhipu["s_buyer"]); s_buyer != "" {
+		(*update_check)["buyer"] = s_buyer
+		ext_ai_record["buyer"] = tmp["buyer"]
+		if agency := qu.ObjToString(tmp["agency"]); agency != "" && agency == s_buyer {
+			delete((*update_check), "buyer")
+			delete(ext_ai_record, "buyer")
+		}
+	}
+	if s_projectname := qu.ObjToString(ai_zhipu["s_projectname"]); s_projectname != "" {
+		(*update_check)["projectname"] = s_projectname
+		ext_ai_record["projectname"] = tmp["projectname"]
+	}
+	if s_projectcode := qu.ObjToString(ai_zhipu["s_projectcode"]); s_projectcode != "" {
+		(*update_check)["projectcode"] = s_projectcode
+		ext_ai_record["projectcode"] = tmp["projectcode"]
+	}
+	if s_budget := qu.Float64All(ai_zhipu["s_budget"]); s_budget > 0.0 && s_budget < 1000000000.0 {
+		(*update_check)["budget"] = s_budget
+		ext_ai_record["budget"] = tmp["budget"]
+	}
+
+	//地域字段···
+	o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"])
+	s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"])
+	if s_area != "" && s_area != "全国" {
+		(*update_check)["area"] = s_area
+		if s_city != "" {
+			(*update_check)["city"] = s_city
+			if o_district != "" {
+				//判断抽取的区县是否合理···
+				isT := false
+				if ds := ul.S_DistrictDict[o_district]; ds != nil {
+					for _, v := range ds {
+						if v.C_Name == s_city && v.P_Name == s_area {
+							isT = true
+							break
+						}
+					}
+				}
+				if !isT {
+					(*update_check)["district"] = ""
+				}
+			}
+		} else {
+			if o_area != s_area {
+				(*update_check)["city"] = ""
+				(*update_check)["district"] = ""
+			}
+		}
+		ext_ai_record["area"] = tmp["area"]
+		ext_ai_record["city"] = tmp["city"]
+		ext_ai_record["district"] = tmp["district"]
+	}
+
+	if s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+		//先用外围字段替换
+		if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
+			(*update_check)["bidamount"] = s_bidamount
+			ext_ai_record["bidamount"] = tmp["bidamount"]
+		}
+		if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
+			(*update_check)["s_winner"] = s_winner
+			(*update_check)["winner"] = s_winner
+			ext_ai_record["s_winner"] = tmp["s_winner"]
+			ext_ai_record["winner"] = tmp["winner"]
+		}
+		isRulePkg := false
+		if pkg := *qu.ObjToMap(tmp["package"]); len(pkg) > 1 && (s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同") {
+			if !staffInfo(pkg) {
+				isRulePkg = true
+			}
+		}
+		if isRulePkg { //优先采用大模型分包-值替换
+			if ispkg, ok := ai_zhipu["ispkg"].(bool); ispkg && ok {
+				if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
+					if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
+						(*update_check)["s_winner"] = p_winner
+						(*update_check)["winner"] = p_winner
+						ext_ai_record["s_winner"] = tmp["s_winner"]
+						ext_ai_record["winner"] = tmp["winner"]
+					}
+					if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
+						(*update_check)["bidamount"] = p_bidamount
+						ext_ai_record["bidamount"] = tmp["bidamount"]
+					}
+					if s_package := qu.ObjToMap((*s_pkg)["s_pkg"]); s_package != nil {
+						(*update_check)["package"] = s_package
+						ext_ai_record["package"] = tmp["package"]
+					}
+				}
+			}
+		}
+	} else if s_subtype == "单一" {
+		if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
+			(*update_check)["bidamount"] = s_bidamount
+			ext_ai_record["bidamount"] = tmp["bidamount"]
+		}
+		if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
+			(*update_check)["s_winner"] = s_winner
+			(*update_check)["winner"] = s_winner
+			ext_ai_record["s_winner"] = tmp["s_winner"]
+			ext_ai_record["winner"] = tmp["winner"]
+		}
+	} else {
+		(*update_check)["ext_ai_record"] = ext_ai_record
+		for k, _ := range unset_check {
+			if tmp[k] != nil {
+				return true
+			}
+		}
+	}
+	(*update_check)["ext_ai_record"] = ext_ai_record
+
+	return false
+}
+
+// 核算分包信息
+func staffInfo(pkg map[string]interface{}) bool {
+	//鉴定中标单位
+	is_w := 0
+	for _, v := range pkg {
+		info := *qu.ObjToMap(v)
+		if winner := qu.ObjToString(info["winner"]); winner != "" {
+			is_w++
+		}
+	}
+	//鉴定中标金额
+	is_b := 0
+	for _, v := range pkg {
+		info := *qu.ObjToMap(v)
+		if bidamount := qu.Float64All(info["bidamount"]); bidamount > 0.0 {
+			is_b++
+		}
+	}
+	if is_w != len(pkg) && is_w > 0 {
+		return false
+	}
+	if is_b != len(pkg) && is_b > 0 {
+		return false
+	}
+	if is_w == 0 || is_b == 0 {
+		return false
+	}
+	return true
+}
+
+func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]interface{}) (string, string) {
+	toptype_rule := qu.ObjToString(data["toptype"])
+	subtype_rule := qu.ObjToString(data["subtype"])
+	//1、结果类 中标和成交错误校正
+	s_winner := qu.ObjToString(data["s_winner"])
+	winnerorder := IsMarkInterfaceMap(data["winnerorder"])
+	if toptype_ai == "结果" && toptype_rule == "结果" {
+		if subtype_ai == "成交" && subtype_rule == "成交" && len(winnerorder) > 0 { //规则、大模型都错
+			return "结果", "中标"
+		}
+		if ((subtype_ai == "中标" || subtype_ai == "其它") && subtype_rule == "成交") || ((subtype_ai == "成交" || subtype_ai == "其它") && subtype_rule == "中标") {
+			if len(winnerorder) > 0 { //有中标候选人->中标
+				return toptype_ai, "中标"
+			}
+			if s_winner != "" || data["bidamount"] != nil {
+				return toptype_ai, "成交"
+			}
+		}
+	}
+	//2、招标、结果错误校正
+	if toptype_ai != "结果" && toptype_rule == "结果" {
+		//return toptype_rule,subtype_rule//默认规则为准
+		if len(winnerorder) > 0 || s_winner != "" || data["bidamount"] != nil {
+			return toptype_rule, subtype_rule
+		} else {
+			return toptype_ai, subtype_ai
+		}
+	} else if toptype_ai == "结果" && toptype_rule != "结果" && (subtype_ai == "中标" || subtype_ai == "成交") { //结果-变更
+		//return toptype_rule,subtype_rule//默认规则为准
+		if len(winnerorder) > 0 { //有中标候选人->中标
+			return toptype_ai, "中标" //这里subtype返回"中标",避免ai识别错误
+		} else if s_winner != "" || data["bidamount"] != nil {
+			return toptype_ai, "成交" //这里subtype返回"成交",避免ai识别错误
+		} else {
+			return toptype_ai, subtype_ai
+		}
+	}
+	return toptype_ai, subtype_ai
+}
+
+func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
+	p_list := []map[string]interface{}{}
+	if list_3, ok_3 := t.([]map[string]interface{}); ok_3 {
+		p_list = list_3
+		return p_list
+	}
+	if yl_list_1, ok_1 := t.(primitive.A); ok_1 {
+		p_list = qu.ObjArrToMapArr(yl_list_1)
+	} else {
+		if yl_list_2, ok_2 := t.([]interface{}); ok_2 {
+			p_list = qu.ObjArrToMapArr(yl_list_2)
+		}
+	}
+	return p_list
+}

+ 0 - 1
udp/udprocess.go

@@ -106,7 +106,6 @@ func ProcessUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 func sendNextNode(sid string, eid string) {
 	//更新记录状态
 	updateProcessUdpIdsInfo(sid, eid)
-
 	for _, node := range nextNode {
 		key := sid + "-" + eid + "-" + qu.ObjToString(node["stype"])
 		by, _ := json.Marshal(map[string]interface{}{

+ 3 - 0
ul/attr.go

@@ -6,12 +6,15 @@ var (
 	SourceMgo, QyxyMgo *MongodbSim
 	BidMgo             *MongodbSim
 	SysConfig          map[string]interface{}
+	ToolConfig         map[string]interface{}
 	Bid_Name, Ext_Name string
 	Url                = "https://www.jianyu360.cn/article/content/%s.html"
 	CleanResultReg     = regexp.MustCompile("((\\s|\n| |\\[|\\]|\\`|json)+)")
 	SaveResultReg      = regexp.MustCompile("([{].*[}])")
 	MaxLen             = 3000
 	RulesPname         = []*ExtReg{}
+	IsTool             bool
+	Reading            int
 )
 
 type ExtReg struct {

+ 74 - 1
ul/init.go

@@ -12,10 +12,18 @@ func InitGlobalVar() {
 	initMgo()
 	initPCD()
 }
+func InitToolVar() {
+	qu.ReadConfig("./tool.json", &ToolConfig) //加载配置文件
+	initToolMgo()
+	initPCD()
+}
 
 // 初始化mgo
 func initMgo() {
-
+	Reading = qu.IntAll(SysConfig["reading"])
+	if Reading == 0 {
+		Reading = 500
+	}
 	Bid_Name, Ext_Name = qu.ObjToString(SysConfig["bid_name"]), qu.ObjToString(SysConfig["ext_name"])
 	//源数据
 	b_cfg := *qu.ObjToMap(SysConfig["b_mgo"])
@@ -78,6 +86,71 @@ func initMgo() {
 	}
 }
 
+// 初始化mgo
+func initToolMgo() {
+
+	Bid_Name, Ext_Name = qu.ObjToString(ToolConfig["bid_name"]), qu.ObjToString(ToolConfig["ext_name"])
+	//源数据
+	b_cfg := *qu.ObjToMap(ToolConfig["b_mgo"])
+	b_local := b_cfg["local"].(bool)
+	b_addr := qu.ObjToString(b_cfg["addr"])
+	if b_local {
+		b_addr = qu.ObjToString(b_cfg["l_addr"])
+	}
+	BidMgo = &MongodbSim{
+		MongodbAddr: b_addr,
+		DbName:      qu.ObjToString(b_cfg["dbname"]),
+		Size:        10,
+		UserName:    qu.ObjToString(b_cfg["username"]),
+		Password:    qu.ObjToString(b_cfg["password"]),
+	}
+	if b_local {
+		BidMgo.InitPoolDirect()
+	} else {
+		BidMgo.InitPool()
+	}
+
+	//源数据
+	s_cfg := *qu.ObjToMap(ToolConfig["s_mgo"])
+	s_local := s_cfg["local"].(bool)
+	s_addr := qu.ObjToString(s_cfg["addr"])
+	if s_local {
+		s_addr = qu.ObjToString(s_cfg["l_addr"])
+	}
+	SourceMgo = &MongodbSim{
+		MongodbAddr: s_addr,
+		DbName:      qu.ObjToString(s_cfg["dbname"]),
+		Size:        10,
+		UserName:    qu.ObjToString(s_cfg["username"]),
+		Password:    qu.ObjToString(s_cfg["password"]),
+	}
+	if s_local {
+		SourceMgo.InitPoolDirect()
+	} else {
+		SourceMgo.InitPool()
+	}
+
+	//企业数据
+	qy_cfg := *qu.ObjToMap(ToolConfig["qy_mgo"])
+	qy_local := qy_cfg["local"].(bool)
+	qy_addr := qu.ObjToString(qy_cfg["addr"])
+	if qy_local {
+		qy_addr = qu.ObjToString(qy_cfg["l_addr"])
+	}
+	QyxyMgo = &MongodbSim{
+		MongodbAddr: qy_addr,
+		DbName:      qu.ObjToString(qy_cfg["dbname"]),
+		Size:        10,
+		UserName:    qu.ObjToString(qy_cfg["username"]),
+		Password:    qu.ObjToString(qy_cfg["password"]),
+	}
+	if qy_local {
+		QyxyMgo.InitPoolDirect()
+	} else {
+		QyxyMgo.InitPool()
+	}
+}
+
 // 标准化省市区···
 func initPCD() {
 	S_ProvinceDict = make(map[string][]S_Province, 0)

+ 8 - 0
ul/mgo.go

@@ -221,6 +221,14 @@ func (m *MongodbSim) CreateIndex(c string, models []mongo.IndexModel) bool {
 	}
 }
 
+// 查询数量
+func (m *MongodbSim) Count(coll string, query map[string]interface{}) (int64, error) {
+	m.Open()
+	defer m.Close()
+	c, err := m.C.Database(m.DbName).Collection(coll).CountDocuments(m.Ctx, query)
+	return c, err
+}
+
 // 批量插入
 func (m *MongodbSim) UpSertBulk(c string, doc ...[]map[string]interface{}) (map[int64]interface{}, bool) {
 	m.Open()