Procházet zdrojové kódy

新分包,以及工具调整

zhengkun před 9 měsíci
rodič
revize
e2dfc0453c
15 změnil soubory, kde provedl 694 přidání a 1070 odebrání
  1. 4 0
      ai/ai_zhipu.go
  2. 0 1
      clean/c_all.go
  3. 2 2
      config.json
  4. 14 7
      extract/extract.go
  5. 85 0
      extract/full.go
  6. 28 691
      extract/test.go
  7. 6 0
      go.mod
  8. 12 0
      go.sum
  9. 12 4
      main.go
  10. 122 58
      prompt/prompt_package.go
  11. 19 19
      tool.json
  12. 4 287
      tool/tool.go
  13. 1 1
      ul/attr.go
  14. 363 0
      ul/global.go
  15. 22 0
      ul/md.go

+ 4 - 0
ai/ai_zhipu.go

@@ -139,6 +139,10 @@ func PostPackageZhiPuAI(content string) map[string]interface{} {
 	apiURL := "https://open.bigmodel.cn/api/paas/v4/chat/completions"
 	// 构造请求数据
 	messages := []map[string]interface{}{}
+	messages = append(messages, map[string]interface{}{
+		"role":    "system",
+		"content": "你是一名’招标工程师’,拥有写标书及阅读理解公告的能力,根据要求抽取所需的内容,抽取内容要实事求是,不会无中生有。",
+	})
 	messages = append(messages, map[string]interface{}{
 		"role":    "user",
 		"content": content,

+ 0 - 1
clean/c_all.go

@@ -43,7 +43,6 @@ func CleanFieldInfo(zhipu map[string]interface{}, fns []string) map[string]inter
 	if zhipu["ispkg"] != nil {
 		data["ispkg"] = zhipu["ispkg"]
 	}
-
 	//分类字段
 	s_toptype := qu.ObjToString(zhipu["s_toptype"])
 	s_subtype := qu.ObjToString(zhipu["s_subtype"])

+ 2 - 2
config.json

@@ -1,7 +1,7 @@
 {
   "udpport": ":1791",
   "bid_name": "bidding",
-  "ext_name": "result_20220218",
+  "ext_name": "zzzzzkkk",
   "reading": 500,
   "udp_max": 10000,
   "smail": {
@@ -12,7 +12,7 @@
     "local": true,
     "l_addr": "127.0.0.1:12005",
     "addr": "172.17.189.140:27080,172.17.189.141:27081",
-    "dbname" : "qfw",
+    "dbname" : "qfw_ai",
     "username": "zhengkun",
     "password": "zk@123123"
   },

+ 14 - 7
extract/extract.go

@@ -85,16 +85,23 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	if utf8.RuneCountInString(detail) < 100 {
 		return f_data
 	}
+	detail = ul.ConvertToMarkdown(detail)
 	//获取外围字段数据
 	f_info := prompt.AcquireExtractFieldInfo(detail)
-	//分包判断-获取信息
-	ispkg, pkg := false, map[string]interface{}{}
-	if ispkg = prompt.AcquireIsPackageInfo(detail); ispkg {
-		f_info["ispkg"] = ispkg
-		if pkg = prompt.AcquireMultiplePackageInfo(detail); len(pkg) > 0 {
-			f_info["s_pkg"] = pkg
-		}
+	//获取分包信息
+	pkg := prompt.AcquireNewMultiplePackageInfo(detail)
+	if len(pkg) > 0 {
+		f_info["s_pkg"] = pkg
 	}
+	//分包判断-获取分包方法舍弃
+	//ispkg, pkg := false, map[string]interface{}{}
+	//if ispkg = prompt.AcquireIsPackageInfo(detail); ispkg {
+	//	f_info["ispkg"] = ispkg
+	//	if pkg = prompt.AcquireMultiplePackageInfo(detail); len(pkg) > 0 {
+	//		f_info["s_pkg"] = pkg
+	//	}
+	//}
+
 	//获取分类字段数据
 	s_toptype, s_subtype := "", ""
 	if qu.ObjToString(v["toptype"]) == "拟建" {

+ 85 - 0
extract/full.go

@@ -0,0 +1,85 @@
+package extract
+
+import (
+	"data_ai/ul"
+	log "github.com/donnie4w/go-logger/logger"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"sync"
+)
+
+// 获取已存在数据···
+func getExistsInfo() map[string]interface{} {
+	log.Debug("开始构建已存在数据···")
+	sess := ul.SourceMgo.GetMgoConn()
+	defer ul.SourceMgo.DestoryMongoConn(sess)
+	dict := map[string]interface{}{}
+	q, total := map[string]interface{}{}, 0
+	it := sess.DB(ul.SourceMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%100000 == 0 {
+			log.Debug("cur ai index ", total, tmp["_id"])
+		}
+		tmpid := ul.BsonTOStringId(tmp["_id"])
+		dict[tmpid] = ""
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("is exists ...", total, "~", len(dict))
+	return dict
+}
+
+// 识别结构化字段
+func MovingFullInfo(sid string, eid string) {
+	dict := getExistsInfo()
+	q := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"$lt": ul.StringTOBsonId(eid),
+		},
+	}
+	ul.FlashModel = "glm-4-flash"
+	pool_mgo := make(chan bool, ul.Reading)
+	wg_mgo := &sync.WaitGroup{}
+	sess := ul.BidMgo.GetMgoConn()
+	defer ul.BidMgo.DestoryMongoConn(sess)
+	total := 0
+	it := sess.DB(ul.BidMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("-_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%1000 == 0 {
+			log.Debug("cur ai index ", total, tmp["_id"])
+		}
+		tmpid := ul.BsonTOStringId(tmp["_id"])
+		if tmpid == "" || dict[tmpid] != nil { //已存在数据···不迁移
+			tmp = make(map[string]interface{})
+			continue
+		}
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			infoformat := qu.IntAll(tmp["infoformat"])
+			if infoformat == 1 { //正常数据处理···
+				data := ResolveInfo(tmp)
+				if len(data) > 0 {
+					tmp["ai_zhipu"] = data
+					update_check := make(map[string]interface{}, 0)
+					is_unset := ul.ChooseCheckDataAI(tmp, update_check)
+					for k, v := range update_check {
+						tmp[k] = v //覆盖值
+					}
+					if is_unset {
+						for k, _ := range ul.Unset_Check {
+							delete(tmp, k) //删除值
+						}
+					}
+				}
+			}
+			//迁移数据···
+			ul.SourceMgo.Save(ul.Bid_Name, tmp)
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg_mgo.Wait()
+	log.Debug("ai is over ...", total)
+}

+ 28 - 691
extract/test.go

@@ -1,15 +1,12 @@
 package extract
 
 import (
-	"data_ai/clean"
 	"data_ai/prompt"
 	"data_ai/ul"
 	"fmt"
 	log "github.com/donnie4w/go-logger/logger"
 	new_xlsx "github.com/tealeg/xlsx/v3"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
-	"os"
-	"strings"
 	"sync"
 	"time"
 	"unicode/utf8"
@@ -31,515 +28,37 @@ func TestSingleFieldInfo(name string, tmpid string) {
 	log.Debug("耗时···", time.Now().Unix()-now)
 }
 
-// 调试错误数据···
-func TestErrInfo0923() {
-	arr := []string{
-		"66e39b7bb25c3e1debf2cb66",
-		"66e39b7bb25c3e1debf2cb52",
-		"66e39b76b25c3e1debf2cb1c",
-		"66e39b71b25c3e1debf2ca58",
-		"66e39b71b25c3e1debf2ca47",
-		"66e39b71b25c3e1debf2ca3e",
-		"66e39b71b25c3e1debf2ca1d",
-		"66e39b62b25c3e1debf2c9a0",
-		"66e39b0db25c3e1debf2c788",
-		"66e39af3b25c3e1debf2c711",
-		"66e39ae5b25c3e1debf2c6ac",
-		"66e39ad5b25c3e1debf2c607",
-		"66e39ad5b25c3e1debf2c5fc",
-		"66e39ad1b25c3e1debf2c5e1",
-		"66e39acbb25c3e1debf2c56c",
-		"66e39ac6b25c3e1debf2c533",
-		"66e39ac1b25c3e1debf2c526",
-		"66e39a9eb25c3e1debf2c4e7",
-		"66e39a80b25c3e1debf2c441",
-		"66e39a5cb25c3e1debf2c357",
-		"66e39a30b25c3e1debf2c18f",
-		"66e39a12b25c3e1debf2c0cc",
-		"66e39a08b25c3e1debf2c050",
-		"66e39a08b25c3e1debf2bfce",
-		"66e3993fb25c3e1debf2b875",
-		"66e3990db25c3e1debf2b55b",
-		"66e398f1b25c3e1debf2b4bc",
-		"66e397ccb25c3e1debf2abed",
-		"66e397b9b25c3e1debf2ab81",
-		"66e3977db25c3e1debf2a7ae",
-		"66e396b3b25c3e1debf2a049",
-		"66e396b3b25c3e1debf29f97",
-		"66e3969eb25c3e1debf29e62",
-		"66e395feb25c3e1debf29abb",
-		"66e395e0b25c3e1debf298d2",
-		"66e395d6b25c3e1debf297e6",
-		"66e395ccb25c3e1debf296d1",
-		"66e39554b25c3e1debf29331",
-		"66e39517b25c3e1debf29160",
-		"66e394c7b25c3e1debf28f42",
-		"66e394bdb25c3e1debf28ef6",
-		"66e394b3b25c3e1debf28e48",
-		"66e3944fb25c3e1debf28ab5",
-		"66e393ccb25c3e1debf28729",
-		"66e393c2b25c3e1debf286dd",
-		"66e393c2b25c3e1debf286a5",
-		"66e393aeb25c3e1debf28572",
-		"66e3934ab25c3e1debf28423",
-		"66e39322b25c3e1debf282c6",
-		"66e392d1b25c3e1debf2809d",
-		"66e39212b25c3e1debf279d1",
-		"66e39209b25c3e1debf279c7",
-		"66e391f5b25c3e1debf2779e",
-		"66e391eab25c3e1debf2773f",
-		"66e391e0b25c3e1debf276a8",
-		"66e39168b25c3e1debf27347",
-		"66e3912cb25c3e1debf2714a",
-		"66e390e6b25c3e1debf26ee7",
-		"66e390e4b25c3e1debf26e7c",
-		"66e390b3b25c3e1debf26cce",
-		"66e3906cb25c3e1debf26a8f",
-		"66e3901bb25c3e1debf26822",
-		"66e38ff5b25c3e1debf26714",
-		"66e38fd5b25c3e1debf26694",
-		"66e38fb8b25c3e1debf265a4",
-		"66e38f90b25c3e1debf264c5",
-		"66e38f7bb25c3e1debf263de",
-		"66e38f68b25c3e1debf263b8",
-		"66e38f5eb25c3e1debf2638c",
-		"66e38f4ab25c3e1debf2633b",
-		"66e38f40b25c3e1debf2631a",
-		"66e38f18b25c3e1debf261c4",
-		"66e38ef8b25c3e1debf260e7",
-		"66e38ec8b25c3e1debf26063",
-		"66e38eb3b25c3e1debf26017",
-		"66e38e95b25c3e1debf25f78",
-		"66e38e6db25c3e1debf25ef0",
-		"66e38e61b25c3e1debf25eb4",
-		"66e38a57b25c3e1debf24a45",
-		"66e38a47b25c3e1debf24a09",
-		"66e38a47b25c3e1debf249dd",
-		"66e38a47b25c3e1debf249a1",
-		"66e38a47b25c3e1debf24998",
-		"66e38a47b25c3e1debf24995",
-		"66e38a1fb25c3e1debf2494a",
-		"66e389f3b25c3e1debf2482e",
-		"66e389f3b25c3e1debf24824",
-		"66e389f3b25c3e1debf2481d",
-		"66e389f3b25c3e1debf24819",
-		"66e389f3b25c3e1debf24816",
-		"66e389f3b25c3e1debf24808",
-		"66e389d5b25c3e1debf247ec",
-		"66e389adb25c3e1debf24792",
-		"66e389a6b25c3e1debf24733",
-		"66e389a6b25c3e1debf24727",
-		"66e389a3b25c3e1debf246f3",
-		"66e3899cb25c3e1debf246ee",
-		"66e3874db25c3e1debf23d91",
-		"66e38739b25c3e1debf23d28",
-		"66e38720b25c3e1debf23cf3",
-		"66e386f3b25c3e1debf23cac",
-		"66e386dfb25c3e1debf23c5b",
-		"66e386dfb25c3e1debf23c5a",
-		"66e386d5b25c3e1debf23c1b",
-		"66e38699b25c3e1debf23b3b",
-		"66e38694b25c3e1debf23b35",
-		"66e3867bb25c3e1debf23af4",
-		"66e38671b25c3e1debf23ad6",
-		"66e38671b25c3e1debf23aa3",
-		"66e38671b25c3e1debf23a97",
-		"66e38661b25c3e1debf23a6a",
-		"66e38657b25c3e1debf23a27",
-		"66e3864db25c3e1debf23a10",
-		"66e385f3b25c3e1debf23954",
-		"66e385dfb25c3e1debf2393a",
-		"66e385d0b25c3e1debf238de",
-		"66e385c6b25c3e1debf23896",
-		"66e385adb25c3e1debf2381b",
-		"66e385a3b25c3e1debf237eb",
-		"66e385a3b25c3e1debf237e0",
-		"66e38570b25c3e1debf23778",
-		"66e38566b25c3e1debf23765",
-		"66e3855cb25c3e1debf23755",
-		"66e38534b25c3e1debf2369e",
-		"66e38526b25c3e1debf2367c",
-		"66e3851cb25c3e1debf23646",
-		"66e3851cb25c3e1debf2363d",
-		"66e3851cb25c3e1debf2363a",
-		"66e38512b25c3e1debf23621",
-		"66e38512b25c3e1debf2360e",
-		"66e384f8b25c3e1debf23590",
-		"66e384eeb25c3e1debf23572",
-		"66e384c2b25c3e1debf2350b",
-		"66e3847fb25c3e1debf23474",
-	}
-
-	pool_mgo := make(chan bool, 200)
-	wg_mgo := &sync.WaitGroup{}
-	log.Debug("预计处理条数···", len(arr))
-	for k, v := range arr {
-		if k%10 == 0 {
-			log.Debug("cur index ", k)
-		}
-		pool_mgo <- true
-		wg_mgo.Add(1)
-		go func(v string) {
-			defer func() {
-				<-pool_mgo
-				wg_mgo.Done()
-			}()
-			data := ul.BidMgo.FindById("bidding", v)
-			if len(data) >= 0 && data != nil {
-				detail := qu.ObjToString(data["detail"])
-				filetext := qu.ObjToString(data["filetext"]) //此处为附件信息···
-				title := qu.ObjToString(data["title"])
-				if strings.Contains(title, "开标记录") { //开标记录舍弃
-					ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
-						"ispkg": 0,
-						"s_id":  v,
-					})
-					log.Debug("开标记录舍弃···")
-					return
-				}
-				if data["jyfb_data"] != nil { //剑鱼发布舍弃qi
-					ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
-						"ispkg": 0,
-						"s_id":  v,
-					})
-					log.Debug("剑鱼发布舍弃···")
-					return
-				}
-				if ul.IsTool && utf8.RuneCountInString(detail) < 100 {
-					detail = filetext
-				}
-				if utf8.RuneCountInString(detail) < 100 {
-					ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
-						"ispkg": 0,
-						"s_id":  v,
-					})
-					log.Debug("长度不符舍弃···")
-					return
-				}
-				//获取外围字段数据
-				//分包判断-获取信息
-				//ispkg, pkg := false, map[string]interface{}{}
-				//if ispkg = prompt.AcquireIsPackageInfo(detail); ispkg {
-				//	if pkg = prompt.AcquireMultiplePackageInfo(detail); len(pkg) > 0 {
-				//
-				//	}
-				//}
-				pkg := prompt.AcquireMultiplePackageInfo(detail)
-				if s_pkg, ok := pkg["s_pkg"].(map[string]map[string]interface{}); ok {
-					if len(s_pkg) <= 0 {
-						ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
-							"ispkg": 0,
-							"s_id":  v,
-						})
-					} else if len(s_pkg) == 1 {
-						ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
-							"ispkg": 1,
-							"pkg":   pkg,
-							"s_id":  v,
-						})
-					} else {
-						ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
-							"ispkg": 2,
-							"s_id":  v,
-							"pkg":   pkg,
-						})
-					}
-				} else {
-					ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
-						"ispkg": 0,
-						"s_id":  v,
-					})
-				}
-
-			} else {
-				log.Debug("未查询到数据···")
-				ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
-					"ispkg": -1,
-				})
-			}
-
-		}(v)
-	}
-	wg_mgo.Wait()
-	log.Debug("is over ···")
-}
-
-// 导出需要修复的
-func TestFullJinOrCodeInfo() {
-	q := map[string]interface{}{}
-	pool_mgo := make(chan bool, 20)
-	wg_mgo := &sync.WaitGroup{}
-	sess := ul.SourceMgo.GetMgoConn()
-	defer ul.SourceMgo.DestoryMongoConn(sess)
-	total := 0
-	it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter()
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%10000 == 0 {
-			log.Debug("cur index ", total)
-		}
-		pool_mgo <- true
-		wg_mgo.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-pool_mgo
-				wg_mgo.Done()
-			}()
-			tmpid := ul.BsonTOStringId(tmp["_id"])
-			isPcode, update := false, map[string]interface{}{}
-			ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"])
-			o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0
-			if ext_ai_record != nil {
-				o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"])
-				o_budget = qu.Float64All((*ext_ai_record)["budget"])
-				o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"])
-			}
-
-			if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 {
-				if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
-					update["budget"] = filterAmount(r_budget, o_budget)
-				}
-			}
-			if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 {
-				if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
-					update["bidamount"] = filterAmount(r_bidamount, o_bidamount)
-				}
-			}
-			//对于编号
-			if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" {
-				if o_projectcode != projectcode {
-					if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil {
-						fns := getpnsinfo(data) //获取附件名字
-						for _, v := range fns {
-							if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) {
-								if strings.Contains(v, projectcode) {
-									isPcode = true
-									break
-								}
-							}
-						}
-						if isPcode {
-							update["projectcode"] = o_projectcode
-						}
-					}
-				}
-			}
-			if len(update) > 0 {
-				//更新抽取表
-				ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{
-					"$set": update,
-				})
-				//保存待修复表
-				update["_id"] = tmp["_id"]
-				ul.SourceMgo.Save("zzzzz_kkk_uc_0907", update)
-			}
-		}(tmp)
-		tmp = make(map[string]interface{})
-	}
-	wg_mgo.Wait()
-	log.Debug("repair ai is over ...")
-}
-
-// 修复金额和编号
-func TestRepairJinOrCodeInfo() {
-	q := map[string]interface{}{}
-	pool_mgo := make(chan bool, 20)
-	wg_mgo := &sync.WaitGroup{}
-	sess := ul.SourceMgo.GetMgoConn()
-	defer ul.SourceMgo.DestoryMongoConn(sess)
-	total := 0
-	it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%10000 == 0 {
-			log.Debug("cur index ", total)
-		}
-		pool_mgo <- true
-		wg_mgo.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-pool_mgo
-				wg_mgo.Done()
-			}()
-			tmpid := ul.BsonTOStringId(tmp["_id"])
-			isPcode, update := false, map[string]interface{}{}
-			ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"])
-			o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0
-			if ext_ai_record != nil {
-				o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"])
-				o_budget = qu.Float64All((*ext_ai_record)["budget"])
-				o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"])
-			}
-
-			if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 {
-				if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
-					update["budget"] = filterAmount(r_budget, o_budget)
-				}
-			}
-			if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 {
-				if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
-					update["bidamount"] = filterAmount(r_bidamount, o_bidamount)
-				}
-			}
-			//对于编号
-			if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" {
-				if o_projectcode != projectcode {
-					if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil {
-						fns := getpnsinfo(data) //获取附件名字
-						for _, v := range fns {
-							if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) {
-								if strings.Contains(v, projectcode) {
-									isPcode = true
-									break
-								}
-							}
-						}
-						if isPcode {
-							update["projectcode"] = o_projectcode
-						}
-					}
-				}
-			}
-			if len(update) > 0 {
-				ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
-					"$set": update,
-				})
-			}
-		}(tmp)
-		tmp = make(map[string]interface{})
-	}
-	wg_mgo.Wait()
-	log.Debug("repair ai is over ...")
-}
-
-// 筛选金额
-func filterAmount(f1 float64, f2 float64) float64 {
-	//选取一个合适的金额 ...
-	if f1 > f2 {
-		if f1 > 100000000.0 {
-			return f2
-		} else {
-			return f1
-		}
-	} else if f1 < f2 {
-		if f2 > 100000000.0 {
-			return f1
-		} else {
-			return f2
-		}
-	} else {
-		return f1
+// 验证单条数据···
+func TestSinglePackageInfo(name string, tmpid string) {
+	now := time.Now().Unix()
+	tmp := ul.BidMgo.FindById(name, tmpid)
+	if len(tmp) == 0 || tmp == nil {
+		log.Debug("未查询到数据...", tmpid)
+		return
 	}
-}
-
-func TestExportJinErInfo() {
-	q := map[string]interface{}{}
-	pool_mgo := make(chan bool, 20)
-	wg_mgo := &sync.WaitGroup{}
-	sess := ul.SourceMgo.GetMgoConn()
-	defer ul.SourceMgo.DestoryMongoConn(sess)
-	total, isok := 0, 0
-	it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter()
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%10000 == 0 {
-			log.Debug("cur index ", total)
-		}
-		isok++
-		pool_mgo <- true
-		wg_mgo.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-pool_mgo
-				wg_mgo.Done()
-			}()
-			tmpid := ul.BsonTOStringId(tmp["_id"])
-			budget := qu.Float64All(tmp["budget"])
-			bidamount := qu.Float64All(tmp["bidamount"])
-			saveinfo := map[string]interface{}{}
-			if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
-				ext_budget := qu.Float64All((*ext_ai_record)["budget"])
-				ext_bidamount := qu.Float64All((*ext_ai_record)["bidamount"])
-				if budget > 0.0 && ext_budget > 0.0 {
-					if budget/ext_budget == 10000.0 || ext_budget/budget == 10000.0 {
-						saveinfo["budget"] = budget
-						saveinfo["ext_budget"] = ext_budget
-					}
-				}
-				if bidamount > 0.0 && ext_bidamount > 0.0 {
-					if bidamount/ext_bidamount == 10000.0 || ext_bidamount/bidamount == 10000.0 {
-						saveinfo["bidamount"] = bidamount
-						saveinfo["ext_bidamount"] = ext_bidamount
-					}
-				}
-			}
-			if len(saveinfo) > 0 && tmpid != "" {
-				saveinfo["toptype"] = tmp["toptype"]
-				saveinfo["subtype"] = tmp["subtype"]
-				saveinfo["href"] = tmp["href"]
-				saveinfo["jyhref"] = tmp["jytest_href"]
-				ul.SourceMgo.Save("zktest_zzzzzkkk_0903", saveinfo)
-			}
-		}(tmp)
-		tmp = make(map[string]interface{})
+	detail := qu.ObjToString(tmp["detail"])
+	filetext := qu.ObjToString(tmp["filetext"]) //此处为附件信息···
+	if utf8.RuneCountInString(detail) < 100 {
+		detail = filetext
 	}
-	wg_mgo.Wait()
-	log.Debug("repair ai is over ...", isok)
-}
-
-// 修正buyer等字段
-func TestRepairBuyerInfo(name string) {
-	q := map[string]interface{}{}
-	pool_mgo := make(chan bool, 20)
-	wg_mgo := &sync.WaitGroup{}
-	sess := ul.SourceMgo.GetMgoConn()
-	defer ul.SourceMgo.DestoryMongoConn(sess)
-	total, isok := 0, 0
-	it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%10000 == 0 {
-			log.Debug("cur index ", total)
-		}
-		isok++
-		pool_mgo <- true
-		wg_mgo.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-pool_mgo
-				wg_mgo.Done()
-			}()
-			tmpid := ul.BsonTOStringId(tmp["_id"])
-			buyer := qu.ObjToString(tmp["buyer"])
-			agency := qu.ObjToString(tmp["agency"])
-			winner := qu.ObjToString(tmp["winner"])
-			update := map[string]interface{}{}
-			if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
-				o_buyer := qu.ObjToString((*ext_ai_record)["buyer"])
-				if buyer == agency && o_buyer != "" {
-					update["buyer"] = o_buyer
-				}
-				o_winner := qu.ObjToString((*ext_ai_record)["winner"])
-				if o_winner != "" && strings.Contains(winner, o_winner) && o_winner != o_winner {
-					update["winner"] = o_winner
-				}
-			}
-			if len(update) > 0 && tmpid != "" {
-				ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
-					"$set": update,
-				})
-			}
-		}(tmp)
-		tmp = make(map[string]interface{})
+	detail = ul.ConvertToMarkdown(detail)
+	pkg := prompt.AcquireNewMultiplePackageInfo(detail)
+	//最终结果...
+	for k, v := range pkg {
+		log.Debug(k, "~", v)
 	}
-	wg_mgo.Wait()
-	log.Debug("repair ai is over ...", isok)
-
+	log.Debug("耗时···", time.Now().Unix()-now)
 }
 
-func TestDelUpBuyerAi() {
-	dataArr, _ := ul.SourceMgo.Find("zktest_buyer_0828_new", map[string]interface{}{}, nil, nil)
+// 新分包数据···
+func TestNewPackageInfo1010() {
+	dataArr, _ := ul.SourceMgo.Find("zktest_info_0930", map[string]interface{}{}, nil, nil)
+	log.Debug("数量···", len(dataArr))
 	pool_mgo := make(chan bool, 50)
 	wg_mgo := &sync.WaitGroup{}
 	for k, v := range dataArr {
-		if k%1000 == 0 {
-			log.Debug(k, "~", v["_id"])
+		if k%50 == 0 {
+			log.Debug("cur index ", k)
 		}
 		pool_mgo <- true
 		wg_mgo.Add(1)
@@ -548,197 +67,15 @@ func TestDelUpBuyerAi() {
 				<-pool_mgo
 				wg_mgo.Done()
 			}()
-
-			buyer := qu.ObjToString(v["buyer"])
-			tmpid := ul.BsonTOStringId(v["_id"])
-			data1 := ul.SourceMgo.FindById("result_20220218", tmpid)
-			if len(data1) > 0 {
-				ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{
-					"$set": map[string]interface{}{"buyer": buyer},
-				})
-			}
-			data2 := ul.SourceMgo.FindById("result_20220219", tmpid)
-			if len(data2) > 0 {
-				ul.SourceMgo.UpdateById("result_20220219", tmpid, map[string]interface{}{
-					"$set": map[string]interface{}{"buyer": buyer},
-				})
+			new_v := v
+			data := ResolveInfo(v)
+			if len(data) > 0 {
+				new_v["ai_zhipu"] = data
 			}
-
+			ul.SourceMgo.Save("zktest_info_0930_new", new_v)
 		}(v)
 	}
-
 	wg_mgo.Wait()
-	log.Debug("del ai is over ...")
-}
-
-func TestAiBuyerInfo() {
-	//dataArr, _ := ul.SourceMgo.Find("zktest_buyer_info", map[string]interface{}{}, nil, nil)
-
-	q := map[string]interface{}{}
-	pool_mgo := make(chan bool, 50)
-	wg_mgo := &sync.WaitGroup{}
-	sess := ul.SourceMgo.GetMgoConn()
-	defer ul.SourceMgo.DestoryMongoConn(sess)
-	total, isok := 0, 0
-	it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%1000 == 0 {
-			log.Debug("cur index ", total)
-		}
-		isok++
-		pool_mgo <- true
-		wg_mgo.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-pool_mgo
-				wg_mgo.Done()
-			}()
-			tmpid := ul.BsonTOStringId(tmp["_id"])
-			if buyer := qu.ObjToString(tmp["buyer"]); buyer != "" {
-				if zp_buyer := prompt.AcquireBuyerInfo(buyer); zp_buyer["实体单位"] != nil {
-					if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
-						ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
-							"$set": map[string]interface{}{"buyer": ns_buyer},
-						})
-					}
-				}
-			}
-		}(tmp)
-		tmp = make(map[string]interface{})
-	}
-	wg_mgo.Wait()
-	log.Debug("repair ai is over ...", isok)
-}
-
-func TestExportAiBuyer() {
-	sess := ul.SourceMgo.GetMgoConn()
-	defer ul.SourceMgo.DestoryMongoConn(sess)
-	pool_mgo := make(chan bool, 10)
-	wg_mgo := &sync.WaitGroup{}
-	q, total := map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$lte": ul.StringTOBsonId("66cd8299b25c3e1deb9488dd"),
-		},
-	}, 0
-	it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Select(map[string]interface{}{
-		"ai_zhipu":      1,
-		"ext_ai_record": 1,
-	}).Iter()
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%10000 == 0 {
-			log.Debug("cur index ", total, "~", tmp["_id"])
-		}
-		pool_mgo <- true
-		wg_mgo.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-pool_mgo
-				wg_mgo.Done()
-			}()
-			ai_buyer, ext_buyer := "", ""
-			if ai_zhipu := qu.ObjToMap(tmp["ai_zhipu"]); ai_zhipu != nil {
-				ai_buyer = qu.ObjToString((*ai_zhipu)["s_buyer"])
-			}
-			if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
-				ext_buyer = qu.ObjToString((*ext_ai_record)["buyer"])
-			}
-			if ai_buyer != "" {
-				ul.SourceMgo.Save("zktest_buyer_0827", map[string]interface{}{
-					"_id":       tmp["_id"],
-					"ai_buyer":  ai_buyer,
-					"ext_buyer": ext_buyer,
-				})
-			}
-		}(tmp)
-		tmp = make(map[string]interface{})
-	}
-	wg_mgo.Wait()
-	log.Debug("export is over ", total)
-}
-
-func TestIsPackage() {
-	tmpArr := []string{}
-	pkgArr := []int{}
-	ok := 0
-	for k, v := range tmpArr {
-		data := ul.SourceMgo.FindById("ai_41411", v)
-		if len(data) == 0 {
-			data = ul.SourceMgo.FindById("ai_294", v)
-		}
-		detail := qu.ObjToString(data["detail"])
-		ispkg := prompt.AcquireIsPackageInfo(detail)
-		if (ispkg && pkgArr[k] == 1) || (!ispkg && pkgArr[k] == 0) {
-			ok++
-		} else {
-			log.Debug("错误~", v)
-		}
-	}
-	log.Debug("is over ~ ", len(tmpArr)-ok)
-}
-
-func TestPackageInfo() {
-	sess := ul.SourceMgo.GetMgoConn()
-	defer ul.SourceMgo.DestoryMongoConn(sess)
-	q, total := map[string]interface{}{"ai_zhipu.ispkg": true}, 0
-	it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("-_id").Iter()
-	isok := 0
-	os.Remove("test.xlsx")
-	f := new_xlsx.NewFile()
-	sheet, _ := f.AddSheet("数据信息")
-	row := sheet.AddRow()
-	writeRow(row, []string{"序号", "唯一标识", "站点", "项目名称", "一级分类", "二级分类", "原文链接", "剑鱼链接", "子包名称", "子包单位", "子包金额"})
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%10000 == 0 {
-			log.Debug("cur index ", total, "~", isok)
-		}
-		tmpid := ul.BsonTOStringId(tmp["_id"])
-		site := qu.ObjToString(tmp["site"])
-		projectname := qu.ObjToString(tmp["projectname"])
-		toptype := qu.ObjToString(tmp["toptype"])
-		subtype := qu.ObjToString(tmp["subtype"])
-		href := qu.ObjToString(tmp["href"])
-		jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
-		ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
-		if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
-			if s_info := qu.ObjToMap((*s_pkg)["s_pkg"]); s_info != nil && len(*s_info) > 1 {
-				isok++
-				for _, v := range *s_info {
-					if v1 := qu.ObjToMap(v); v1 != nil {
-						row = sheet.AddRow()
-						arr := []string{}
-						arr = append(arr, fmt.Sprintf("%d", isok))
-						arr = append(arr, tmpid)
-						arr = append(arr, site)
-						arr = append(arr, projectname)
-						arr = append(arr, toptype)
-						arr = append(arr, subtype)
-						arr = append(arr, href)
-						arr = append(arr, jyhref)
-						arr = append(arr, qu.ObjToString((*v1)["name"]))
-						arr = append(arr, qu.ObjToString((*v1)["winner"]))
-						bidamount := qu.Float64All((*v1)["bidamount"])
-						if bidamount > 0.0 {
-							arr = append(arr, fmt.Sprintf("%.2f", bidamount))
-						} else {
-							arr = append(arr, "")
-						}
-						writeRow(row, arr)
-					}
-				}
-				if isok > 1000 {
-					break
-				}
-			}
-		}
-		tmp = make(map[string]interface{})
-	}
-	log.Debug("is over ", total, isok)
-
-	if err := f.Save("test.xlsx"); err != nil {
-		fmt.Println("保存xlsx失败:", err)
-	} else {
-		fmt.Println("保存xlsx成功:", err)
-	}
 	log.Debug("is over ...")
 }
 

+ 6 - 0
go.mod

@@ -21,15 +21,19 @@ require (
 	github.com/golang/snappy v0.0.4 // indirect
 	github.com/google/btree v1.0.0 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
+	github.com/google/uuid v1.6.0 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
+	github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/klauspost/compress v1.17.0 // indirect
 	github.com/kr/pretty v0.3.1 // indirect
 	github.com/kr/text v0.2.0 // indirect
 	github.com/magiconair/properties v1.8.7 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/mattn/go-runewidth v0.0.9 // indirect
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/montanaflynn/stats v0.7.1 // indirect
+	github.com/olekukonko/tablewriter v0.0.5 // indirect
 	github.com/olivere/elastic/v7 v7.0.32 // indirect
 	github.com/pelletier/go-toml/v2 v2.1.0 // indirect
 	github.com/peterbourgon/diskv/v3 v3.0.1 // indirect
@@ -37,6 +41,7 @@ require (
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/rogpeppe/fastuuid v1.2.0 // indirect
 	github.com/rogpeppe/go-internal v1.9.0 // indirect
+	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/sagikazarmark/locafero v0.4.0 // indirect
 	github.com/sagikazarmark/slog-shim v0.1.0 // indirect
 	github.com/shabbyrobe/xmlwriter v0.0.0-20200208144257-9fca06d00ffa // indirect
@@ -46,6 +51,7 @@ require (
 	github.com/spf13/cast v1.6.0 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/spf13/viper v1.18.2 // indirect
+	github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
 	github.com/stretchr/testify v1.8.4 // indirect
 	github.com/subosito/gotenv v1.6.0 // indirect
 	github.com/tealeg/xlsx/v3 v3.3.7 // indirect

+ 12 - 0
go.sum

@@ -73,8 +73,12 @@ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
 github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
+github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 h1:iCHtR9CQyktQ5+f3dMVZfwD2KWJUgm7M0gdL9NGr8KA=
+github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
 github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
 github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
@@ -93,6 +97,8 @@ github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0V
 github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
 github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
+github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
 github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
 github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
 github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
@@ -101,6 +107,8 @@ github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
 github.com/nsqio/go-nsq v1.1.0/go.mod h1:vKq36oyeVXgsS5Q8YEO7WghqidAVXQlcFxzQbQTuDEY=
+github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
+github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
 github.com/olivere/elastic/v7 v7.0.32 h1:R7CXvbu8Eq+WlsLgxmKVKPox0oOwAE/2T9Si5BnvK6E=
 github.com/olivere/elastic/v7 v7.0.32/go.mod h1:c7PVmLe3Fxq77PIfY/bZmxY/TAamBhCzZ8xDOE09a9k=
 github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
@@ -122,6 +130,8 @@ github.com/rogpeppe/fastuuid v1.2.0 h1:Ppwyp6VYCF1nvBTXL3trRso7mXMlRrw9ooo375wvi
 github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
 github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
 github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
+github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ=
 github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4=
 github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=
@@ -143,6 +153,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/spf13/viper v1.18.2 h1:LUXCnvUvSM6FXAsj6nnfc8Q2tp1dIgUfY9Kc8GsSOiQ=
 github.com/spf13/viper v1.18.2/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk=
+github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
+github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=

+ 12 - 4
main.go

@@ -8,22 +8,30 @@ import (
 )
 
 func init() {
-	ul.IsTool = false
+	ul.IsTool = true
+	ul.IsFull = false
 	if ul.IsTool {
 		log.Debug("工具版本···")
 		ul.InitToolVar()
 	} else {
-		log.Debug("正常版本···")
 		ul.InitGlobalVar()
-		udp.InitProcessVar()
+		if !ul.IsFull {
+			log.Debug("正常版本···")
+			udp.InitProcessVar()
+		} else {
+			log.Debug("全量版本···")
+		}
 	}
 }
 
 func main() {
 	if ul.IsTool {
 		tool.StartToolInfo()
+		return
 	} else {
-		//extract.TestSingleFieldInfo("bidding", "66e39b71b25c3e1debf2ca47")
+		if !ul.IsFull {
+			//extract.TestSinglePackageInfo("bidding", "66e3874db25c3e1debf23dbb")
+		}
 	}
 	lock := make(chan bool)
 	<-lock

+ 122 - 58
prompt/prompt_package.go

@@ -5,6 +5,7 @@ import (
 	"data_ai/clean"
 	"data_ai/ul"
 	"fmt"
+	"github.com/google/uuid"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"strings"
 )
@@ -20,9 +21,10 @@ var pmt_pkg_0 = `
 	公告:` + `%s` + "\n结果JSON:"
 
 var pmt_pkg_1 = `
-你是一名’招标工程师’,拥有写标书及阅读理解公告的能力,根据要求抽取所需的内容,抽取内容要实事求是,不会无中生有。
-公告start:
-{` + `%s` + `}
+公告start:
+
+%s
+
 公告end
 
 任务如下:
@@ -34,26 +36,29 @@ var pmt_pkg_1 = `
 
 第二步、请根据第一步拆解分后的"其他信息",精确提取并整理以下信息,形成一个便于统计的JSON:
 注意:入围供应商、入围中标人、中标候选人存在多个时,选择第一名为中标单位,忽略其他排名。请务必确保每一条记录都来源于"公告内容", 对于尚未确定或未在文中明确提供的信息,请在相应value位置填写“无”
+
 {
+"项目名称":"",
 "分包信息":[
 {
 "标段名称":(标包名称,通常包含地理区域、专业类别、标包内容描述等内容,不可以填写"标的名称"),
 "标段/包号":(标包编号,可以来自标段名称,如果不存在写"无",比如:一标段、二标段、包一、I包、I标段、标包一、标包编号等,不可以填写"项目编号"或"标的编号"),
 "中标单位":(中标单位名称,要求:中标角色包括但不限于成交供应商(注:当入围供应商/中标人存在多个,选择第一位为中标单位)、中标人、中标方、承包方、中选单位、服务商、第一|1名中标候选人(忽略其他中标候选人)。当流标显示流标,废标时显示废标。联合体投标时,请列出所有单位名称使用","分割),
 "中标金额":(中标金额数值及单位,要求:不能使用预算金额。多个金额时请进行计算,非单价,如果是单价,则等于单价*数量。),
-"预算金额":(预算金额数值及单位,要求:不可以用中标金额来填充),
-"投标折扣系数":""
+"投标折扣系数":"",
+"包预算金额":(预算金额数值及单位,要求:不可以用中标金额来填充)
 },
 ....
 ],
 }
 
-第三步、审查第步输出的结果,
+第三步、审查第步输出的结果,
 0.检查"标段名称"及"标段/包号"是否真的在原文中出现,
 1.检查"标段名称"及"标段/包号"是否有明确提及,如果只有标的名称明确提及,请删除
-2.检查中标金额是否在原文中明确提及,否则删除第一步的中标金额结果
+2.检查"中标金额"是否在原文中明确提及,否则删除第一步的中标金额结果
 3.检查中标金额与预算金额中的数值与单位是否正确
-4.修正答案
+4.中标单位如果是候选人、评标结果,检查中标单位是否为第一名,否则删除当前信息
+5.修正答案
 
 第四步、根据第一、二、三步结果总结得到最终正确的结果
 
@@ -64,21 +69,26 @@ var pmt_pkg_1 = `
 
 第三步验证:
 
-最终正确的结果:`
+最终正确的结果:
+`
 
 var pmt_pkg_2 = `
-你是一名’招标工程师’,拥有写标书及阅读理解公告的能力,根据要求抽取所需的内容,抽取内容要实事求是,不会无中生有。
-公告start:
-{` + `%s` + `}
+公告start:
+
+%s
+
 公告end
 
 任务如下:
+
 第一步、请根据公告内容,精确提取并整理以下信息,形成一个便于统计的JSON:
-注意:入围供应商、入围中标人、中标候选人存在多个时,选择第一名为中标单位,忽略其他排名。请务必确保每一条记录都来源于"公告内容", 对于尚未确定或未在文中明确提供的信息,请在相应value位置填写“无”
+注意:入围供应商、入围中标人、中标候选人、评标结果存在多个时,选择第一名为中标单位,忽略其他排名。请务必确保每一条记录都来源于"公告内容", 对于尚未确定或未在文中明确提供的信息,请在相应value位置填写“无”
+
 {
+"项目名称":"",
 "分包信息":[
 {
-"标段名称":(标包名称,通常包含地理区域、专业类别、标包内容描述等内容,不可以填写"标的名称"),
+"标段名称":(标包名称,通常包含地理区域、专业类别、标包内容描述等内容,不可以填写"标的名称"与"项目名称"。类似"涪陵区便捷超充站建设项目一标段和二标段预算服务"含有整体招标含义,应是个整体,不能拆分),
 "标段/包号":(标包编号,可以来自标段名称,如果不存在写"无",比如:一标段、二标段、包一、I包、I标段、标包一、标包编号等,不可以填写"项目编号"或"标的编号"),
 "中标单位":(中标单位名称,要求:中标角色包括但不限于成交供应商(注:当入围供应商/中标人存在多个,选择第一位为中标单位)、中标人、中标方、承包方、中选单位、服务商、第一|1名中标候选人(忽略其他中标候选人)。当流标显示流标,废标时显示废标。联合体投标时,请列出所有单位名称使用","分割),
 "中标金额":(中标金额数值及单位,如果公告中没有明确说明,输出"无",不能使用预算金额。),
@@ -94,7 +104,8 @@ var pmt_pkg_2 = `
 1.检查"标段名称"及"标段/包号"是否有明确提及,如果只有标的名称明确提及,请删除
 2.检查"中标金额"是否在原文中明确提及,否则删除第一步的中标金额结果
 3.检查中标金额与预算金额中的数值与单位是否正确
-4.修正答案
+4.中标单位如果是候选人、评标结果,检查中标单位是否为第一名,否则删除当前信息
+5.修正答案
 
 第三步、根据第一、二步结果总结得到最终正确的结果
 
@@ -103,53 +114,58 @@ var pmt_pkg_2 = `
 
 第二步结果:
 
-最终正确的结果:`
+
+最终正确的结果:
+ `
 
 // 获取是否为分包信息
-func AcquireIsPackageInfo(detail string) bool {
-	content := PromptIsPackageText(detail)
-	zp := ai.PostZhiPuPackageInfo(content)
-	if qu.ObjToString(zp["分包"]) == "是" {
-		return true
-	}
-	return false
-}
+//func AcquireIsPackageInfo(detail string) bool {
+//	content := PromptIsPackageText(detail)
+//	zp := ai.PostZhiPuPackageInfo(content)
+//	if qu.ObjToString(zp["分包"]) == "是" {
+//		return true
+//	}
+//	return false
+//}
 
 // 获取标讯多包信息
-func AcquireMultiplePackageInfo(detail string) map[string]interface{} {
-	content := PromptMultiplePackageText(detail)
-	zp := ai.PostZhiPuPackageInfo(content)
-	//转格式...
-	ai_pkg := map[string]interface{}{}
-	s_winner, s_bidamount, s_pkg := "", 0.0, map[string]map[string]interface{}{}
-	win_arr, win_temp := []string{}, map[string]string{}
-	pkginfo := ul.IsMarkInterfaceMap(zp["分包信息"])
-	for k, v := range pkginfo { //
-		key := fmt.Sprintf("%d", k+1)
-		name := qu.ObjToString(v["标段名称"])
-		code := qu.ObjToString(v["标段/包号"])
-		winner := clean.CleanWinner(qu.ObjToString(v["中标单位"]))
-		bidamount := clean.CleanMoney([]interface{}{v["中标金额"], ""})
-		//分包信息结构
-		s_pkg[key] = map[string]interface{}{
-			"name":      name,
-			"code":      code,
-			"winner":    winner,
-			"bidamount": bidamount,
-		}
-		//去重计算单位与总金额
-		s_bidamount += bidamount
-		if win_temp[winner] == "" && winner != "" {
-			win_arr = append(win_arr, winner)
-			win_temp[winner] = winner
-		}
-	}
-	s_winner = strings.Join(win_arr, ",")
-	ai_pkg["s_winner"] = s_winner
-	ai_pkg["s_bidamount"] = s_bidamount
-	ai_pkg["s_pkg"] = s_pkg
-	return ai_pkg
-}
+//func AcquireMultiplePackageInfo(detail string) map[string]interface{} {
+//	content := PromptMultiplePackageText(detail)
+//	zp := ai.PostZhiPuPackageInfo(content)
+//	//转格式...
+//	ai_pkg := map[string]interface{}{}
+//	s_winner, s_bidamount, s_pkg := "", 0.0, map[string]map[string]interface{}{}
+//	win_arr, win_temp := []string{}, map[string]string{}
+//	pkginfo := ul.IsMarkInterfaceMap(zp["分包信息"])
+//	for k, v := range pkginfo { //
+//		key := fmt.Sprintf("%d", k+1)
+//		name := qu.ObjToString(v["标段名称"])
+//		code := qu.ObjToString(v["标段/包号"])
+//		winner := clean.CleanWinner(qu.ObjToString(v["中标单位"]))
+//		bidamount := clean.CleanMoney([]interface{}{v["中标金额"], ""})
+//		if bidamount > 1000000000.0 {
+//			bidamount = 0.0
+//		}
+//		//分包信息结构
+//		s_pkg[key] = map[string]interface{}{
+//			"name":      name,
+//			"code":      code,
+//			"winner":    winner,
+//			"bidamount": bidamount,
+//		}
+//		//去重计算单位与总金额
+//		s_bidamount += bidamount
+//		if win_temp[winner] == "" && winner != "" {
+//			win_arr = append(win_arr, winner)
+//			win_temp[winner] = winner
+//		}
+//	}
+//	s_winner = strings.Join(win_arr, ",")
+//	ai_pkg["s_winner"] = s_winner
+//	ai_pkg["s_bidamount"] = s_bidamount
+//	ai_pkg["s_pkg"] = s_pkg
+//	return ai_pkg
+//}
 
 // 分包判断-提问词
 func PromptIsPackageText(detail string) string {
@@ -178,3 +194,51 @@ func PromptMultiplePackageText(detail string) string {
 	}
 	return content
 }
+
+// 新结构分包信息提取
+func AcquireNewMultiplePackageInfo(detail string) map[string]interface{} {
+	content := PromptMultiplePackageText(detail)
+	zp := ai.PostZhiPuPackageInfo(content)
+	//转格式...
+	ai_pkg := map[string]interface{}{}
+	s_winner, s_bidamount, s_budget, com_package := "", 0.0, 0.0, []map[string]interface{}{}
+	win_arr, win_temp := []string{}, map[string]string{}
+	pkginfo := ul.IsMarkInterfaceMap(zp["分包信息"])
+	for _, v := range pkginfo { //
+		name := qu.ObjToString(v["标段名称"])
+		code := qu.ObjToString(v["标段/包号"])
+		winner := clean.CleanWinner(qu.ObjToString(v["中标单位"]))
+		bidamount := clean.CleanMoney([]interface{}{v["中标金额"], ""})
+		budget := clean.CleanMoney([]interface{}{v["包预算金额"], ""})
+		if bidamount > 1000000000.0 {
+			bidamount = 0.0
+		}
+		if budget > 1000000000.0 {
+			budget = 0.0
+		}
+		//分包信息结构
+		package_id := uuid.New().String()
+		package_id = strings.ReplaceAll(package_id, "-", "")
+		com_package = append(com_package, map[string]interface{}{
+			"package_id": package_id,
+			"name":       name,
+			"code":       code,
+			"budget":     budget,
+			"winner":     winner,
+			"bidamount":  bidamount,
+		})
+		//去重计算单位与总金额
+		s_bidamount += bidamount
+		s_budget += budget
+		if win_temp[winner] == "" && winner != "" {
+			win_arr = append(win_arr, winner)
+			win_temp[winner] = winner
+		}
+	}
+	s_winner = strings.Join(win_arr, ",")
+	ai_pkg["s_winner"] = s_winner
+	ai_pkg["s_bidamount"] = s_bidamount
+	ai_pkg["s_budget"] = s_budget
+	ai_pkg["com_package"] = com_package
+	return ai_pkg
+}

+ 19 - 19
tool.json

@@ -1,28 +1,28 @@
 {
   "reading": 500,
-  "ext_name": "20240909Hs_dz",
+  "ext_name": "zxl_20240926",
   "s_mgo": {
-    "local": false,
-    "l_addr": "192.168.3.167:27080",
-    "addr": "192.168.3.167:27080",
-    "dbname" : "jyqyfw_historyData2024_1",
-    "username": "",
-    "password": ""
+    "local": true,
+    "l_addr": "127.0.0.1:12005",
+    "addr": "127.0.0.1:12005",
+    "dbname" : "qfw_ai",
+    "username": "zhengkun",
+    "password": "zk@123123"
   },
   "b_mgo": {
-    "local": false,
-    "l_addr": "192.168.3.167:27080",
-    "addr": "192.168.3.167:27080",
-    "dbname" : "zhengkun",
-    "username": "",
-    "password": ""
+    "local": true,
+    "l_addr": "127.0.0.1:12005",
+    "addr": "127.0.0.1:12005",
+    "dbname" : "qfw_ai",
+    "username": "zhengkun",
+    "password": "zk@123123"
   },
   "qy_mgo": {
-    "local": false,
-    "l_addr": "192.168.3.167:27080",
-    "addr": "192.168.3.167:27080",
-    "dbname" : "zhengkun",
-    "username": "",
-    "password": ""
+    "local": true,
+    "l_addr": "127.0.0.1:12005",
+    "addr": "127.0.0.1:12005",
+    "dbname" : "mixdata",
+    "username": "zhengkun",
+    "password": "zk@123123"
   }
 }

+ 4 - 287
tool/tool.go

@@ -4,14 +4,9 @@ import (
 	"data_ai/extract"
 	"data_ai/ul"
 	log "github.com/donnie4w/go-logger/logger"
-	"go.mongodb.org/mongo-driver/bson/primitive"
-	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
-	"strings"
 	"sync"
 )
 
-var unset_check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
-
 // 工具修正程序
 func StartToolInfo() {
 	log.Debug("工具开始大模型修正数据······")
@@ -39,8 +34,9 @@ func StartToolInfo() {
 			if len(data) > 0 || u_id == "" {
 				tmp["ai_zhipu"] = data
 				update_check := make(map[string]interface{}, 0)
-				is_unset := getCheckDataAI(tmp, &update_check)
-				//最终计算是否清洗
+				is_unset := ul.ChooseCheckDataAI(tmp, update_check)
+				update_check["ai_zhipu"] = data
+				//清洗与记录
 				if len(update_check) > 0 {
 					//$set
 					ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
@@ -50,7 +46,7 @@ func StartToolInfo() {
 				if is_unset {
 					//"$unset"
 					ul.SourceMgo.UpdateById(ul.Ext_Name, u_id, map[string]interface{}{
-						"$unset": unset_check,
+						"$unset": ul.Unset_Check,
 					})
 				}
 			}
@@ -60,282 +56,3 @@ func StartToolInfo() {
 	wg_mgo.Wait()
 	log.Debug("ai is over ...")
 }
-
-// 大模型与抽取数据合并计算
-func getCheckDataAI(tmp map[string]interface{}, update_check *map[string]interface{}) bool {
-	if tmp["ai_zhipu"] == nil {
-		return false
-	}
-	//记录抽取原值
-	//记录抽取原值
-	ext_ai_record := map[string]interface{}{}
-	ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
-	//分类字段···
-	s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
-	ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
-	if ns_toptype != s_toptype || ns_subtype != s_subtype {
-		ext_ai_record["s_toptype"] = ns_toptype
-		ext_ai_record["s_subtype"] = ns_subtype
-	}
-	//赋值···
-	s_toptype, s_subtype = ns_toptype, ns_subtype
-	if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" {
-		s_toptype = qu.ObjToString(tmp["toptype"])
-		s_subtype = qu.ObjToString(tmp["subtype"])
-	} else {
-		if s_toptype != "" && s_subtype != "" {
-			(*update_check)["toptype"] = s_toptype
-			(*update_check)["subtype"] = s_subtype
-			ext_ai_record["toptype"] = tmp["toptype"]
-			ext_ai_record["subtype"] = tmp["subtype"]
-		} else {
-			s_toptype = qu.ObjToString(tmp["toptype"])
-			s_subtype = qu.ObjToString(tmp["subtype"])
-		}
-	}
-
-	//基础字段···
-	if s_buyer := qu.ObjToString(ai_zhipu["s_buyer"]); s_buyer != "" {
-		(*update_check)["buyer"] = s_buyer
-		ext_ai_record["buyer"] = tmp["buyer"]
-		if agency := qu.ObjToString(tmp["agency"]); agency != "" && agency == s_buyer {
-			delete((*update_check), "buyer")
-			delete(ext_ai_record, "buyer")
-		}
-	}
-	if s_projectname := qu.ObjToString(ai_zhipu["s_projectname"]); s_projectname != "" {
-		(*update_check)["projectname"] = s_projectname
-		ext_ai_record["projectname"] = tmp["projectname"]
-	}
-	if s_projectcode := qu.ObjToString(ai_zhipu["s_projectcode"]); s_projectcode != "" {
-		(*update_check)["projectcode"] = s_projectcode
-		ext_ai_record["projectcode"] = tmp["projectcode"]
-	}
-	if s_budget := qu.Float64All(ai_zhipu["s_budget"]); s_budget > 0.0 && s_budget < 1000000000.0 {
-		(*update_check)["budget"] = s_budget
-		ext_ai_record["budget"] = tmp["budget"]
-	}
-	//地域字段···
-	o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"])
-	s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"])
-	if s_area != "" && s_area != "全国" {
-		(*update_check)["area"] = s_area
-		if s_city != "" {
-			(*update_check)["city"] = s_city
-			if o_district != "" {
-				//判断抽取的区县是否合理···
-				isT := false
-				if ds := ul.S_DistrictDict[o_district]; ds != nil {
-					for _, v := range ds {
-						if v.C_Name == s_city && v.P_Name == s_area {
-							isT = true
-							break
-						}
-					}
-				}
-				if !isT {
-					(*update_check)["district"] = ""
-				}
-			}
-		} else {
-			if o_area != s_area {
-				(*update_check)["city"] = ""
-				(*update_check)["district"] = ""
-			}
-		}
-		ext_ai_record["area"] = tmp["area"]
-		ext_ai_record["city"] = tmp["city"]
-		ext_ai_record["district"] = tmp["district"]
-	}
-
-	if s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
-		//先用外围字段替换
-		if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
-			(*update_check)["bidamount"] = s_bidamount
-			ext_ai_record["bidamount"] = tmp["bidamount"]
-		}
-		if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
-			(*update_check)["s_winner"] = s_winner
-			ext_ai_record["s_winner"] = tmp["s_winner"]
-
-			(*update_check)["winner"] = s_winner
-			ext_ai_record["winner"] = tmp["winner"]
-			//对于winner来说...规则值有包含关系,采用规则值
-			if winner := qu.ObjToString(tmp["winner"]); winner != "" {
-				if strings.Contains(s_winner, winner) {
-					delete((*update_check), "winner")
-					delete(ext_ai_record, "winner")
-				}
-			}
-		}
-		isRulePkg := false
-		if pkg := *qu.ObjToMap(tmp["package"]); len(pkg) > 1 && (s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同") {
-			if !staffInfo(pkg) {
-				isRulePkg = true
-			}
-		}
-		if isRulePkg { //优先采用大模型分包-值替换
-			if ispkg, ok := ai_zhipu["ispkg"].(bool); ispkg && ok {
-				if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
-					if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
-						(*update_check)["s_winner"] = p_winner
-						(*update_check)["winner"] = p_winner
-						ext_ai_record["s_winner"] = tmp["s_winner"]
-						ext_ai_record["winner"] = tmp["winner"]
-					}
-					if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
-						(*update_check)["bidamount"] = p_bidamount
-						ext_ai_record["bidamount"] = tmp["bidamount"]
-					}
-					if s_package := qu.ObjToMap((*s_pkg)["s_pkg"]); s_package != nil {
-						(*update_check)["package"] = s_package
-						ext_ai_record["package"] = tmp["package"]
-					}
-				}
-			}
-		}
-	} else if s_subtype == "单一" {
-		if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
-			(*update_check)["bidamount"] = s_bidamount
-			ext_ai_record["bidamount"] = tmp["bidamount"]
-		}
-		if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
-			(*update_check)["s_winner"] = s_winner
-			(*update_check)["winner"] = s_winner
-			ext_ai_record["s_winner"] = tmp["s_winner"]
-			ext_ai_record["winner"] = tmp["winner"]
-		}
-	} else {
-		(*update_check)["ext_ai_record"] = ext_ai_record
-		for k, _ := range unset_check {
-			if tmp[k] != nil {
-				return true
-			}
-		}
-	}
-	(*update_check)["ext_ai_record"] = ext_ai_record
-
-	//根据识别金额的进行选取与修正
-	if r_budget := qu.Float64All((*update_check)["budget"]); r_budget > 0.0 && r_budget < 1000000000.0 {
-		if o_budget := qu.Float64All(tmp["budget"]); o_budget > 0.0 {
-			if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
-				(*update_check)["budget"] = filterAmount(r_budget, o_budget)
-			}
-		}
-	}
-	if r_bidamount := qu.Float64All((*update_check)["bidamount"]); r_bidamount > 0.0 && r_bidamount < 1000000000.0 {
-		if o_bidamount := qu.Float64All(tmp["bidamount"]); o_bidamount > 0.0 {
-			if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
-				(*update_check)["bidamount"] = filterAmount(r_bidamount, o_bidamount)
-			}
-		}
-	}
-
-	return false
-}
-
-// 筛选金额
-func filterAmount(f1 float64, f2 float64) float64 {
-	//选取一个合适的金额 ...
-	if f1 > f2 {
-		if f1 > 100000000.0 {
-			return f2
-		} else {
-			return f1
-		}
-	} else if f1 < f2 {
-		if f2 > 100000000.0 {
-			return f1
-		} else {
-			return f2
-		}
-	} else {
-		return f1
-	}
-}
-
-// 核算分包信息
-func staffInfo(pkg map[string]interface{}) bool {
-	//鉴定中标单位
-	is_w := 0
-	for _, v := range pkg {
-		info := *qu.ObjToMap(v)
-		if winner := qu.ObjToString(info["winner"]); winner != "" {
-			is_w++
-		}
-	}
-	//鉴定中标金额
-	is_b := 0
-	for _, v := range pkg {
-		info := *qu.ObjToMap(v)
-		if bidamount := qu.Float64All(info["bidamount"]); bidamount > 0.0 {
-			is_b++
-		}
-	}
-	if is_w != len(pkg) && is_w > 0 {
-		return false
-	}
-	if is_b != len(pkg) && is_b > 0 {
-		return false
-	}
-	if is_w == 0 || is_b == 0 {
-		return false
-	}
-	return true
-}
-
-func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]interface{}) (string, string) {
-	toptype_rule := qu.ObjToString(data["toptype"])
-	subtype_rule := qu.ObjToString(data["subtype"])
-	//1、结果类 中标和成交错误校正
-	s_winner := qu.ObjToString(data["s_winner"])
-	winnerorder := IsMarkInterfaceMap(data["winnerorder"])
-	if toptype_ai == "结果" && toptype_rule == "结果" {
-		if subtype_ai == "成交" && subtype_rule == "成交" && len(winnerorder) > 0 { //规则、大模型都错
-			return "结果", "中标"
-		}
-		if ((subtype_ai == "中标" || subtype_ai == "其它") && subtype_rule == "成交") || ((subtype_ai == "成交" || subtype_ai == "其它") && subtype_rule == "中标") {
-			if len(winnerorder) > 0 { //有中标候选人->中标
-				return toptype_ai, "中标"
-			}
-			if s_winner != "" || data["bidamount"] != nil {
-				return toptype_ai, "成交"
-			}
-		}
-	}
-	//2、招标、结果错误校正
-	if toptype_ai != "结果" && toptype_rule == "结果" {
-		//return toptype_rule,subtype_rule//默认规则为准
-		if len(winnerorder) > 0 || s_winner != "" || data["bidamount"] != nil {
-			return toptype_rule, subtype_rule
-		} else {
-			return toptype_ai, subtype_ai
-		}
-	} else if toptype_ai == "结果" && toptype_rule != "结果" && (subtype_ai == "中标" || subtype_ai == "成交") { //结果-变更
-		//return toptype_rule,subtype_rule//默认规则为准
-		if len(winnerorder) > 0 { //有中标候选人->中标
-			return toptype_ai, "中标" //这里subtype返回"中标",避免ai识别错误
-		} else if s_winner != "" || data["bidamount"] != nil {
-			return toptype_ai, "成交" //这里subtype返回"成交",避免ai识别错误
-		} else {
-			return toptype_ai, subtype_ai
-		}
-	}
-	return toptype_ai, subtype_ai
-}
-
-func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
-	p_list := []map[string]interface{}{}
-	if list_3, ok_3 := t.([]map[string]interface{}); ok_3 {
-		p_list = list_3
-		return p_list
-	}
-	if yl_list_1, ok_1 := t.(primitive.A); ok_1 {
-		p_list = qu.ObjArrToMapArr(yl_list_1)
-	} else {
-		if yl_list_2, ok_2 := t.([]interface{}); ok_2 {
-			p_list = qu.ObjArrToMapArr(yl_list_2)
-		}
-	}
-	return p_list
-}

+ 1 - 1
ul/attr.go

@@ -15,7 +15,7 @@ var (
 	MaxByte            = 8000
 	MaxUdp             = 10000
 	RulesPname         = []*ExtReg{}
-	IsTool             bool
+	IsTool, IsFull     bool
 	Reading            int
 	FlashModel         string
 )

+ 363 - 0
ul/global.go

@@ -0,0 +1,363 @@
+package ul
+
+import (
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"strings"
+)
+
+var Unset_Check = map[string]interface{}{"winner": 1, "s_winner": 1, "bidamount": 1, "winnerorder": 1}
+
+// 选取字段
+func ChooseTheBestCoreField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) {
+	//基础字段···
+	if s_buyer := qu.ObjToString(ai_zhipu["s_buyer"]); s_buyer != "" {
+		update_info["buyer"] = s_buyer
+		ext_ai_record["buyer"] = tmp["buyer"]
+		if agency := qu.ObjToString(tmp["agency"]); agency != "" && agency == s_buyer {
+			delete(update_info, "buyer")
+			delete(ext_ai_record, "buyer")
+		}
+	}
+	if s_projectname := qu.ObjToString(ai_zhipu["s_projectname"]); s_projectname != "" {
+		update_info["projectname"] = s_projectname
+		ext_ai_record["projectname"] = tmp["projectname"]
+	}
+	if s_projectcode := qu.ObjToString(ai_zhipu["s_projectcode"]); s_projectcode != "" {
+		update_info["projectcode"] = s_projectcode
+		ext_ai_record["projectcode"] = tmp["projectcode"]
+	}
+	if s_budget := qu.Float64All(ai_zhipu["s_budget"]); s_budget > 0.0 && s_budget < 1000000000.0 {
+		update_info["budget"] = s_budget
+		ext_ai_record["budget"] = tmp["budget"]
+	}
+	//地域字段···
+	o_area, o_district := qu.ObjToString(tmp["area"]), qu.ObjToString(tmp["district"])
+	s_area, s_city := qu.ObjToString(ai_zhipu["s_area"]), qu.ObjToString(ai_zhipu["s_city"])
+	if s_area != "" && s_area != "全国" {
+		update_info["area"] = s_area
+		if s_city != "" {
+			update_info["city"] = s_city
+			if o_district != "" {
+				//判断抽取的区县是否合理···
+				isT := false
+				if ds := S_DistrictDict[o_district]; ds != nil {
+					for _, v := range ds {
+						if v.C_Name == s_city && v.P_Name == s_area {
+							isT = true
+							break
+						}
+					}
+				}
+				if !isT {
+					update_info["district"] = ""
+				}
+			}
+		} else {
+			if o_area != s_area {
+				update_info["city"] = ""
+				update_info["district"] = ""
+			}
+		}
+		ext_ai_record["area"] = tmp["area"]
+		ext_ai_record["city"] = tmp["city"]
+		ext_ai_record["district"] = tmp["district"]
+	}
+	//先用外围字段替换
+	if s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" || s_subtype == "单一" {
+		if s_bidamount := qu.Float64All(ai_zhipu["s_bidamount"]); s_bidamount > 0.0 && s_bidamount < 1000000000.0 {
+			update_info["bidamount"] = s_bidamount
+			ext_ai_record["bidamount"] = tmp["bidamount"]
+		}
+		if s_winner := qu.ObjToString(ai_zhipu["s_winner"]); s_winner != "" {
+			update_info["s_winner"] = s_winner
+			ext_ai_record["s_winner"] = tmp["s_winner"]
+			update_info["winner"] = s_winner
+			ext_ai_record["winner"] = tmp["winner"]
+			//对于winner来说...规则值有包含关系,采用规则值
+			if winner := qu.ObjToString(tmp["winner"]); winner != "" {
+				if strings.Contains(s_winner, winner) {
+					delete(update_info, "winner")
+					delete(ext_ai_record, "winner")
+				}
+			}
+		}
+		//旧版弃用
+		//isRulePkg := false
+		//if pkg := *qu.ObjToMap(tmp["package"]); len(pkg) > 1 && (s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同") {
+		//	if !staffInfo(pkg) {
+		//		isRulePkg = true
+		//	}
+		//}
+		//if isRulePkg { //优先采用大模型分包-值替换
+		//	if ispkg, ok := ai_zhipu["ispkg"].(bool); ispkg && ok {
+		//		if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
+		//			if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
+		//				(*update_check)["s_winner"] = p_winner
+		//				(*update_check)["winner"] = p_winner
+		//				ext_ai_record["s_winner"] = tmp["s_winner"]
+		//				ext_ai_record["winner"] = tmp["winner"]
+		//			}
+		//			if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
+		//				(*update_check)["bidamount"] = p_bidamount
+		//				ext_ai_record["bidamount"] = tmp["bidamount"]
+		//			}
+		//			if s_package := qu.ObjToMap((*s_pkg)["s_pkg"]); s_package != nil {
+		//				(*update_check)["package"] = s_package
+		//				ext_ai_record["package"] = tmp["package"]
+		//			}
+		//		}
+		//	}
+		//}
+	} else {
+
+	}
+}
+
+// 选取分类
+func ChooseTheBestClassField(ai_zhipu map[string]interface{}, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) (string, string) {
+	//分类字段···
+	s_toptype, s_subtype := qu.ObjToString(ai_zhipu["s_toptype"]), qu.ObjToString(ai_zhipu["s_subtype"])
+	ns_toptype, ns_subtype := CheckClassByOtherFileds(s_toptype, s_subtype, tmp)
+	if ns_toptype != s_toptype || ns_subtype != s_subtype {
+		ext_ai_record["s_toptype"] = ns_toptype
+		ext_ai_record["s_subtype"] = ns_subtype
+	}
+	//赋值···
+	s_toptype, s_subtype = ns_toptype, ns_subtype
+	if qu.ObjToString(tmp["toptype"]) == "拟建" || qu.ObjToString(tmp["toptype"]) == "产权" {
+		s_toptype = qu.ObjToString(tmp["toptype"])
+		s_subtype = qu.ObjToString(tmp["subtype"])
+	} else {
+		if s_toptype != "" && s_subtype != "" {
+			update_info["toptype"] = s_toptype
+			update_info["subtype"] = s_subtype
+			ext_ai_record["toptype"] = tmp["toptype"]
+			ext_ai_record["subtype"] = tmp["subtype"]
+		} else {
+			s_toptype = qu.ObjToString(tmp["toptype"])
+			s_subtype = qu.ObjToString(tmp["subtype"])
+		}
+	}
+	return s_toptype, s_subtype
+}
+
+// 选取分包
+func ChooseTheBestPackageField(ai_zhipu map[string]interface{}, s_toptype string, s_subtype string, tmp map[string]interface{}, update_info map[string]interface{}, ext_ai_record map[string]interface{}) {
+	//新分包判定···com_package - 默认大模型分包可信
+	if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
+		com_package := IsMarkInterfaceMap((*s_pkg)["com_package"])
+		//是否替换外围字段···质谱外围是否已提取
+		if len(com_package) > 1 { //多包字段覆盖
+			new_com_package := staffAiPackageInfo(com_package, s_toptype, s_subtype)
+			update_info["com_package"] = new_com_package
+
+			if p_budget := qu.Float64All((*s_pkg)["s_budget"]); p_budget > 0.0 {
+				update_info["budget"] = p_budget
+				ext_ai_record["budget"] = tmp["budget"]
+			}
+			if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+				if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" {
+					update_info["s_winner"] = p_winner
+					update_info["winner"] = p_winner
+					ext_ai_record["s_winner"] = tmp["s_winner"]
+					ext_ai_record["winner"] = tmp["winner"]
+
+					//对于winner来说...规则值有包含关系,采用规则值应用判重
+					if winner := qu.ObjToString(tmp["winner"]); winner != "" {
+						if strings.Contains(p_winner, winner) {
+							delete(update_info, "winner")
+							delete(ext_ai_record, "winner")
+						}
+					}
+				}
+				if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 {
+					update_info["bidamount"] = p_bidamount
+					ext_ai_record["bidamount"] = tmp["bidamount"]
+				}
+			}
+		} else if len(com_package) == 1 { //单包字段覆盖
+			if p_budget := qu.Float64All((*s_pkg)["s_budget"]); p_budget > 0.0 && update_info["budget"] == nil {
+				update_info["budget"] = p_budget
+				ext_ai_record["budget"] = tmp["budget"]
+			}
+			if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+				if p_winner := qu.ObjToString((*s_pkg)["s_winner"]); p_winner != "" && update_info["s_winner"] == nil {
+					update_info["s_winner"] = p_winner
+					update_info["winner"] = p_winner
+					ext_ai_record["s_winner"] = tmp["s_winner"]
+					ext_ai_record["winner"] = tmp["winner"]
+
+					//对于winner来说...规则值有包含关系,采用规则值应用判重
+					if winner := qu.ObjToString(tmp["winner"]); winner != "" {
+						if strings.Contains(p_winner, winner) {
+							delete(update_info, "winner")
+							delete(ext_ai_record, "winner")
+						}
+					}
+				}
+				if p_bidamount := qu.Float64All((*s_pkg)["s_bidamount"]); p_bidamount > 0.0 && update_info["bidamount"] == nil {
+					update_info["bidamount"] = p_bidamount
+					ext_ai_record["bidamount"] = tmp["bidamount"]
+				}
+			}
+		} else {
+
+		}
+	}
+}
+
+// 选取金额
+func ChooseTheBestAmountField(tmp map[string]interface{}, update_info map[string]interface{}) {
+	if r_budget := qu.Float64All(update_info["budget"]); r_budget > 0.0 && r_budget < 1000000000.0 {
+		if o_budget := qu.Float64All(tmp["budget"]); o_budget > 0.0 {
+			if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
+				update_info["budget"] = filterAmount(r_budget, o_budget)
+			}
+		}
+	}
+	if r_bidamount := qu.Float64All(update_info["bidamount"]); r_bidamount > 0.0 && r_bidamount < 1000000000.0 {
+		if o_bidamount := qu.Float64All(tmp["bidamount"]); o_bidamount > 0.0 {
+			if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
+				update_info["bidamount"] = filterAmount(r_bidamount, o_bidamount)
+			}
+		}
+	}
+}
+
+// 大模型与抽取数据合并计算
+func ChooseCheckDataAI(tmp map[string]interface{}, update_info map[string]interface{}) bool {
+	if tmp["ai_zhipu"] == nil {
+		return false
+	}
+	//记录抽取原值
+	ext_ai_record := map[string]interface{}{}
+	ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
+	//选取分类
+	s_toptype, s_subtype := ChooseTheBestClassField(ai_zhipu, tmp, update_info, ext_ai_record)
+	//选取字段
+	ChooseTheBestCoreField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, ext_ai_record)
+	//选取分包
+	ChooseTheBestPackageField(ai_zhipu, s_toptype, s_subtype, tmp, update_info, ext_ai_record)
+	//最终金额修正与选取
+	ChooseTheBestAmountField(tmp, update_info)
+
+	//字段记录
+	update_info["ext_ai_record"] = ext_ai_record
+
+	//跨分类是否删除结果类字段
+	if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+
+	} else {
+		for k, _ := range Unset_Check {
+			if tmp[k] != nil {
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
+// 筛选金额
+func filterAmount(f1 float64, f2 float64) float64 {
+	//选取一个合适的金额 ...
+	if f1 > f2 {
+		if f1 > 100000000.0 {
+			return f2
+		} else {
+			return f1
+		}
+	} else if f1 < f2 {
+		if f2 > 100000000.0 {
+			return f1
+		} else {
+			return f2
+		}
+	} else {
+		return f1
+	}
+}
+
+// 核算分包信息
+func staffInfo(pkg map[string]interface{}) bool {
+	//鉴定中标单位
+	is_w := 0
+	for _, v := range pkg {
+		info := *qu.ObjToMap(v)
+		if winner := qu.ObjToString(info["winner"]); winner != "" {
+			is_w++
+		}
+	}
+	//鉴定中标金额
+	is_b := 0
+	for _, v := range pkg {
+		info := *qu.ObjToMap(v)
+		if bidamount := qu.Float64All(info["bidamount"]); bidamount > 0.0 {
+			is_b++
+		}
+	}
+	if is_w != len(pkg) && is_w > 0 {
+		return false
+	}
+	if is_b != len(pkg) && is_b > 0 {
+		return false
+	}
+	if is_w == 0 || is_b == 0 {
+		return false
+	}
+	return true
+}
+
+// 核对ai分包
+func staffAiPackageInfo(com_package []map[string]interface{}, s_toptype string, s_subtype string) []map[string]interface{} {
+	if s_subtype == "单一" || s_subtype == "中标" || s_subtype == "成交" || s_subtype == "合同" {
+		return com_package
+	}
+	new_com_package := []map[string]interface{}{}
+	for _, v := range com_package {
+		delete(v, "winner")
+		delete(v, "bidamount")
+		new_com_package = append(new_com_package, v)
+	}
+	return new_com_package
+}
+
+func CheckClassByOtherFileds(toptype_ai, subtype_ai string, data map[string]interface{}) (string, string) {
+	toptype_rule := qu.ObjToString(data["toptype"])
+	subtype_rule := qu.ObjToString(data["subtype"])
+	//1、结果类 中标和成交错误校正
+	s_winner := qu.ObjToString(data["s_winner"])
+	winnerorder := IsMarkInterfaceMap(data["winnerorder"])
+	if toptype_ai == "结果" && toptype_rule == "结果" {
+		if subtype_ai == "成交" && subtype_rule == "成交" && len(winnerorder) > 0 { //规则、大模型都错
+			return "结果", "中标"
+		}
+		if ((subtype_ai == "中标" || subtype_ai == "其它") && subtype_rule == "成交") || ((subtype_ai == "成交" || subtype_ai == "其它") && subtype_rule == "中标") {
+			if len(winnerorder) > 0 { //有中标候选人->中标
+				return toptype_ai, "中标"
+			}
+			if s_winner != "" || data["bidamount"] != nil {
+				return toptype_ai, "成交"
+			}
+		}
+	}
+	//2、招标、结果错误校正
+	if toptype_ai != "结果" && toptype_rule == "结果" {
+		//return toptype_rule,subtype_rule//默认规则为准
+		if len(winnerorder) > 0 || s_winner != "" || data["bidamount"] != nil {
+			return toptype_rule, subtype_rule
+		} else {
+			return toptype_ai, subtype_ai
+		}
+	} else if toptype_ai == "结果" && toptype_rule != "结果" && (subtype_ai == "中标" || subtype_ai == "成交") { //结果-变更
+		//return toptype_rule,subtype_rule//默认规则为准
+		if len(winnerorder) > 0 { //有中标候选人->中标
+			return toptype_ai, "中标" //这里subtype返回"中标",避免ai识别错误
+		} else if s_winner != "" || data["bidamount"] != nil {
+			return toptype_ai, "成交" //这里subtype返回"成交",避免ai识别错误
+		} else {
+			return toptype_ai, subtype_ai
+		}
+	}
+	return toptype_ai, subtype_ai
+}

+ 22 - 0
ul/md.go

@@ -0,0 +1,22 @@
+package ul
+
+import (
+	"github.com/jaytaylor/html2text"
+	"regexp"
+)
+
+var md_reg1 = regexp.MustCompile("([ *]+)")
+var md_reg2 = regexp.MustCompile("([\\n]+)")
+var md_reg3 = regexp.MustCompile("([-]{3,})")
+
+func ConvertToMarkdown(html string) string {
+	markdown, err := html2text.FromString(html, html2text.Options{PrettyTables: true})
+	if err != nil {
+		return html
+	}
+	markdown = md_reg1.ReplaceAllString(markdown, " ")
+	markdown = md_reg2.ReplaceAllString(markdown, "\n")
+	markdown = md_reg3.ReplaceAllString(markdown, "---")
+
+	return markdown
+}