Browse Source

1、取消表格类识别
2、调试新分包拆分单位

zhengkun 6 months ago
parent
commit
582d6a4484
14 changed files with 183 additions and 86 deletions
  1. 2 0
      ai/ai_tongyi.go
  2. 7 5
      ai/ai_zhipu.go
  3. 7 2
      clean/c_all.go
  4. 3 3
      extract/extension.go
  5. 13 6
      extract/extract.go
  6. 1 1
      extract/test.go
  7. 11 7
      go.mod
  8. 23 0
      go.sum
  9. 70 33
      main.go
  10. 24 8
      prompt/prompt_package.go
  11. 16 16
      tool.json
  12. 2 2
      udp/udprocess.go
  13. 2 2
      ul/attr.go
  14. 2 1
      ul/init.go

+ 2 - 0
ai/ai_tongyi.go

@@ -9,6 +9,7 @@ import (
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"net/http"
 	"strings"
+	"time"
 )
 
 // 阿里通义
@@ -40,6 +41,7 @@ func PostTongYiAI(content string) map[string]interface{} {
 	req.Header.Set("Authorization", "Bearer sk-5db8cfa345754e329c492973b0ecad27")
 	// 发起请求
 	client := &http.Client{}
+	client.Timeout = 180 * time.Second
 	resp, err := client.Do(req)
 	if err != nil {
 		log.Debug("Error: %s", err)

+ 7 - 5
ai/ai_zhipu.go

@@ -12,6 +12,8 @@ import (
 	"time"
 )
 
+var req_retry = 1
+
 // 智谱清言-通用外围
 func PostZhiPuAI(content string) map[string]interface{} {
 	// API的URL
@@ -42,7 +44,7 @@ func PostZhiPuAI(content string) map[string]interface{} {
 	req.Header.Set("Authorization", "Bearer 3d84d30b7ab4c94dbf71853cb7e44719.hLLS4CA2MqVQs6kR")
 	// 发起请求 14543f0d69d6987c8782fd846e164f26.DXaoS1axLaMP892a
 	client := &http.Client{}
-	client.Timeout = 180 * time.Second
+	client.Timeout = 120 * time.Second
 	resp, err := client.Do(req)
 	if err != nil {
 		return map[string]interface{}{}
@@ -202,7 +204,7 @@ func PostPackageZhiPuAI(content string) map[string]interface{} {
 ******************************
 ******************************
 ******************************/
-// 请求质谱数据外围字段...
+// 请求质谱数据外围字段···重试1次···
 func PostZhiPuInfo(content string) map[string]interface{} {
 	zp, ok := map[string]interface{}{}, 0
 	for {
@@ -210,7 +212,7 @@ func PostZhiPuInfo(content string) map[string]interface{} {
 		if zp = PostZhiPuAI(content); len(zp) > 0 {
 			break
 		}
-		if ok >= 2 {
+		if ok >= req_retry {
 			break
 		}
 	}
@@ -225,7 +227,7 @@ func PostZhiPuPackageInfo(content string) map[string]interface{} {
 		if zp = PostPackageZhiPuAI(content); len(zp) > 0 {
 			break
 		}
-		if ok >= 2 {
+		if ok >= req_retry {
 			break
 		}
 	}
@@ -244,7 +246,7 @@ func PostZhiPuClassInfo(content string) (map[string]interface{}, bool) {
 			ok = true
 			break
 		}
-		if times >= 2 {
+		if times >= req_retry {
 			break
 		}
 	}

+ 7 - 2
clean/c_all.go

@@ -12,9 +12,8 @@ var (
 	pcodeReg2 = regexp.MustCompile("([\\*]+)")
 )
 
-func CleanFieldInfo(zhipu map[string]interface{}, fns []string) map[string]interface{} {
+func CleanFieldInfo(zhipu map[string]interface{}, fns []string, isTable bool) map[string]interface{} {
 	data := map[string]interface{}{}
-
 	//重点字段
 	if s_area, s_city := CleanRegion(qu.ObjToString(zhipu["省份"]), qu.ObjToString(zhipu["城市"])); s_area != "" || s_city != "" {
 		data["s_area"] = s_area
@@ -116,5 +115,11 @@ func CleanFieldInfo(zhipu map[string]interface{}, fns []string) map[string]inter
 		}
 	}
 
+	//表格类数据-分包金额以及外围字段不准确
+	if isTable {
+		delete(data, "s_budget")
+		delete(data, "s_bidamount")
+	}
+
 	return data
 }

+ 3 - 3
extract/extension.go

@@ -68,9 +68,9 @@ func getDetailText(v map[string]interface{}, tmpid string) string {
 			detail = filetext
 		}
 	} else {
-		if bs := ul.OssGetObject(tmpid); bs != "" {
-			detail = bs
-		}
+		//if bs := ul.OssGetObject(tmpid); bs != "" {
+		//	detail = bs
+		//}
 	}
 	return detail
 }

+ 13 - 6
extract/extract.go

@@ -6,6 +6,7 @@ import (
 	"data_ai/ul"
 	log "github.com/donnie4w/go-logger/logger"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"strings"
 	"sync"
 	"unicode/utf8"
 )
@@ -30,10 +31,10 @@ func ExtractFieldInfo(sid string, eid string) {
 	pool_mgo := make(chan bool, ul.Reading)
 	wg_mgo := &sync.WaitGroup{}
 
-	sess := ul.SourceMgo.GetMgoConn()
-	defer ul.SourceMgo.DestoryMongoConn(sess)
+	sess := ul.BidMgo.GetMgoConn()
+	defer ul.BidMgo.DestoryMongoConn(sess)
 	total, isok := 0, 0
-	it := sess.DB(ul.SourceMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("_id").Iter()
+	it := sess.DB(ul.BidMgo.DbName).C(ul.Bid_Name).Find(&q).Sort("_id").Iter()
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
 		if total%200 == 0 {
 			log.Debug("cur ai index ", total)
@@ -71,7 +72,13 @@ func ExtractFieldInfo(sid string, eid string) {
 func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	tmpid := ul.BsonTOStringId(v["_id"])
 	title := qu.ObjToString(v["title"])
-	old_detail := getDetailText(v, tmpid)        //获取正文文本
+	old_detail := getDetailText(v, tmpid) //获取正文文本
+
+	isTable := false //是否表格
+	if strings.Contains(old_detail, "<table>") {
+		isTable = true
+	}
+
 	if NotInProgressInfo(title, old_detail, v) { //过滤信息
 		return map[string]interface{}{}
 	}
@@ -97,7 +104,7 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 	//非短文本以下识别-纯测试
 	if !shorText {
 		//获取分包信息
-		if pkg := prompt.AcquireNewMultiplePackageInfo(new_detail); len(pkg) > 0 {
+		if pkg := prompt.AcquireNewMultiplePackageInfo(new_detail, isTable); len(pkg) > 0 {
 			f_info["s_pkg"] = pkg
 		}
 		//获取分类字段数据
@@ -115,7 +122,7 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
 
 	//字段清洗
 	fns := getpnsinfo(v) //获取附件名字
-	f_data = clean.CleanFieldInfo(f_info, fns)
+	f_data = clean.CleanFieldInfo(f_info, fns, isTable)
 
 	//采购单位二级校验
 	CheckOutBuyerInfo(f_data)

+ 1 - 1
extract/test.go

@@ -63,7 +63,7 @@ func TestSinglePackageInfo(name string, tmpid string) {
 		detail = filetext
 	}
 	detail = ul.HttpConvertToMarkdown(detail)
-	pkg := prompt.AcquireNewMultiplePackageInfo(detail)
+	pkg := prompt.AcquireNewMultiplePackageInfo(detail, false)
 	//最终结果...
 	for k, v := range pkg {
 		log.Debug(k, "~", v)

+ 11 - 7
go.mod

@@ -22,6 +22,7 @@ require (
 	github.com/donnie4w/gofer v0.0.0-20240219061552-aad2cd80fd6e // indirect
 	github.com/frankban/quicktest v1.14.6 // indirect
 	github.com/fsnotify/fsnotify v1.7.0 // indirect
+	github.com/gogf/gf/v2 v2.8.3 // indirect
 	github.com/golang/snappy v0.0.4 // indirect
 	github.com/gomarkdown/markdown v0.0.0-20230922112808-5421fefb8386 // indirect
 	github.com/gomarkdown/mdtohtml v0.0.0-20240124153210-d773061d1585 // indirect
@@ -38,9 +39,9 @@ require (
 	github.com/klauspost/compress v1.17.0 // indirect
 	github.com/kr/pretty v0.3.1 // indirect
 	github.com/kr/text v0.2.0 // indirect
-	github.com/magiconair/properties v1.8.7 // indirect
+	github.com/magiconair/properties v1.8.9 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
-	github.com/mattn/go-runewidth v0.0.9 // indirect
+	github.com/mattn/go-runewidth v0.0.16 // indirect
 	github.com/microcosm-cc/bluemonday v1.0.27 // indirect
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
@@ -53,6 +54,7 @@ require (
 	github.com/peterbourgon/diskv/v3 v3.0.1 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
+	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/rogpeppe/fastuuid v1.2.0 // indirect
 	github.com/rogpeppe/go-internal v1.9.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
@@ -74,14 +76,16 @@ require (
 	github.com/xdg-go/scram v1.1.2 // indirect
 	github.com/xdg-go/stringprep v1.0.4 // indirect
 	github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect
+	go.opentelemetry.io/otel v1.24.0 // indirect
+	go.opentelemetry.io/otel/trace v1.24.0 // indirect
 	go.uber.org/atomic v1.9.0 // indirect
 	go.uber.org/multierr v1.9.0 // indirect
-	golang.org/x/crypto v0.24.0 // indirect
+	golang.org/x/crypto v0.30.0 // indirect
 	golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3 // indirect
-	golang.org/x/net v0.26.0 // indirect
-	golang.org/x/sync v0.7.0 // indirect
-	golang.org/x/sys v0.21.0 // indirect
-	golang.org/x/text v0.16.0 // indirect
+	golang.org/x/net v0.32.0 // indirect
+	golang.org/x/sync v0.10.0 // indirect
+	golang.org/x/sys v0.28.0 // indirect
+	golang.org/x/text v0.21.0 // indirect
 	golang.org/x/time v0.5.0 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
 	gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 // indirect

+ 23 - 0
go.sum

@@ -55,6 +55,8 @@ github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
+github.com/gogf/gf/v2 v2.8.3 h1:h9Px3lqJnnH6It0AqHRz4/1hx0JmvaSf1IvUir5x1rA=
+github.com/gogf/gf/v2 v2.8.3/go.mod h1:n++xPYGUUMadw6IygLEgGZqc6y6DRLrJKg5kqCrPLWY=
 github.com/goji/httpauth v0.0.0-20160601135302-2da839ab0f4d/go.mod h1:nnjvkQ9ptGaCkuDUx6wNykzzlUixGxvkme+H/lnzb+A=
 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
@@ -127,10 +129,14 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
 github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
+github.com/magiconair/properties v1.8.9 h1:nWcCbLq1N2v/cpNsy5WvQ37Fb+YElfq20WJ/a8RkpQM=
+github.com/magiconair/properties v1.8.9/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
 github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
 github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
+github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
+github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
 github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
 github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
@@ -167,6 +173,9 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
+github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
+github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
 github.com/rogpeppe/fastuuid v1.2.0 h1:Ppwyp6VYCF1nvBTXL3trRso7mXMlRrw9ooo375wvi2s=
 github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
 github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
@@ -240,7 +249,11 @@ go.mongodb.org/mongo-driver v1.16.0 h1:tpRsfBJMROVHKpdGyc1BBEzzjDUWjItxbVSZ8Ls4B
 go.mongodb.org/mongo-driver v1.16.0/go.mod h1:oB6AhJQvFQL4LEHyXi6aJzQJtBiTQHiAd83l0GdFaiw=
 go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E=
 go.opentelemetry.io/otel v1.5.0/go.mod h1:Jm/m+rNp/z0eqJc74H7LPwQ3G87qkU/AnnAydAjSAHk=
+go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo=
+go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo=
 go.opentelemetry.io/otel/trace v1.5.0/go.mod h1:sq55kfhjXYr1zVSyexg0w1mpa03AYXR5eyTkB9NPPdE=
+go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI=
+go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU=
 go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
 go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE=
 go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
@@ -261,6 +274,8 @@ golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+
 golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
 golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
 golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
+golang.org/x/crypto v0.30.0 h1:RwoQn3GkWiMkzlX562cLB7OxWvjH1L8xutO2WoJcRoY=
+golang.org/x/crypto v0.30.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -305,6 +320,8 @@ golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
 golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
 golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
 golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
+golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI=
+golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -314,6 +331,8 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
 golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -338,6 +357,8 @@ golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
 golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
@@ -359,6 +380,8 @@ golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
 golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
+golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
 golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
 golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
 golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

+ 70 - 33
main.go

@@ -1,11 +1,14 @@
 package main
 
 import (
-	"data_ai/prompt"
 	"data_ai/tool"
 	"data_ai/udp"
 	"data_ai/ul"
-	"fmt"
+	log "github.com/donnie4w/go-logger/logger"
+	"github.com/gogf/gf/v2/util/gconv"
+	"strings"
+	"sync"
+	"unicode/utf8"
 )
 
 func init() {
@@ -23,6 +26,10 @@ func init() {
 	if !ul.IsTool {
 		ul.InitOss(ul.IsLocal)
 	}
+	log.Debug("测试一下转换MD方法···")
+	detail := `<table border=\"1\"><tbody><tr><th>采购人名称</th><td>云南中医药大学</td></tr><tr><th>中标(成交)供应商名称</th><td>云南智蓝云鸽信息科技有限公司</td></tr><tr><th>合同金额</th><td>224.99万元 人民币</td></tr><tr><th>合同期限</th><td>年</td></tr><tr><th>合同签署时间</th><td>2023-12-29 00:00:00</td></tr></tbody></table>`
+	new_detail := ul.HttpConvertToMarkdown(detail)
+	log.Debug(new_detail)
 }
 
 func main() {
@@ -30,41 +37,71 @@ func main() {
 		tool.StartToolInfo()
 		return
 	}
+	//extract.TestSingleFieldInfo("bidding", "67763c6b3309c0998ba25811")
 	lock := make(chan bool)
 	<-lock
 }
 
 // 测试调试数据
-func test() {
-	arr := []string{
-		"福建亿力集团有限公司三明优信公司",
-		"北京市顺义区疾病预防控制中心",
-		"海南博鳌乐城数字科技发展有限公司",
-		"济南市公安局章丘区分局",
-		"固镇县公安局,固镇县城市建设投资发展有限责任公司",
-		"中共耒阳市纪律检查委员会耒阳市监察委员会",
-		"苏州市吴江区公安局太湖新城片区公安",
-		"安徽公司马鞍山电厂",
-		"安徽公司合肥电厂",
-		"国能宿州热电有限公司",
-		"中山市港口镇下南小学",
-		"中山市港口镇大丰小学",
-		"中山市港口镇大南中学",
-		"中山市港口镇石特小学",
-		"中国铁路乌鲁木齐局集团有限公司阿勒泰基础设施段",
-		"常德市澧县澧州实验学校",
-		"宜章县杨梅山镇杨梅山学校",
-		"永州市东安县芦洪市镇人民政府",
-		"云南省彝医医院",
-		"台山市台城街道办事处长岭村松梅经济合作社",
-		"杭州市钱塘区听涛幼儿园",
-		"杭州市临平区启文中学",
-		"杭州市余杭区舟枕小学",
-		"宁波高新区信懋小学",
-		"杭州市钱塘区星华幼儿园",
-	}
-	for _, v := range arr {
-		zp_buyer := prompt.AcquireBuyerInfo(v)
-		fmt.Println(v, "~", zp_buyer["实体单位"])
+func test1() {
+	q, total := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"$gt":  ul.StringTOBsonId("677638b13309c0998ba2488f"),
+			"$lte": ul.StringTOBsonId("6777a3d23309c0998ba89797"),
+		},
+	}, 0
+	count, _ := ul.BidMgo.Count("bidding", q)
+	log.Debug("查询数量:", count)
+
+	pool_mgo := make(chan bool, 10)
+	wg_mgo := &sync.WaitGroup{}
+
+	sess := ul.BidMgo.GetMgoConn()
+	defer ul.BidMgo.DestoryMongoConn(sess)
+
+	it := sess.DB(ul.BidMgo.DbName).C(ul.Bid_Name).Find(&q).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%1000 == 0 {
+			log.Debug("cur ai index ", total)
+		}
+		pool_mgo <- true
+		wg_mgo.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-pool_mgo
+				wg_mgo.Done()
+			}()
+			//tmpid := ul.BsonTOStringId(tmp["_id"])
+			s1, s2 := gconv.String(tmp["budget"]), gconv.String(tmp["bidamount"])
+			r1, r2 := strings.Split(s1, "."), strings.Split(s2, ".")
+			if len(r1) == 2 {
+				l1 := utf8.RuneCountInString(r1[1])
+				if l1 > 3 {
+					//log.Debug("问题金额:", tmpid, "~", s1)
+					ul.BidMgo.Save("zktest_err_0106", tmp)
+					return
+				}
+			}
+
+			if len(r2) == 2 {
+				l2 := utf8.RuneCountInString(r2[1])
+				if l2 > 3 {
+					//log.Debug("问题金额:", tmpid, "~", s2)
+					ul.BidMgo.Save("zktest_err_0106", tmp)
+					return
+				}
+			}
+		}(tmp)
+		tmp = make(map[string]interface{})
 	}
+	wg_mgo.Wait()
+	log.Debug("is over ...", total)
+
+}
+
+func c(s string) string {
+	s = strings.ReplaceAll(s, "(", "(")
+	s = strings.ReplaceAll(s, ")", ")")
+	s = strings.ReplaceAll(s, ",", ",")
+	return s
 }

+ 24 - 8
prompt/prompt_package.go

@@ -6,6 +6,7 @@ import (
 	"data_ai/ul"
 	"fmt"
 	"github.com/google/uuid"
+	"github.com/shopspring/decimal"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"strings"
 )
@@ -146,7 +147,7 @@ func PromptMultiplePackageText(detail string) string {
 }
 
 // 新结构分包信息提取
-func AcquireNewMultiplePackageInfo(detail string) map[string]interface{} {
+func AcquireNewMultiplePackageInfo(detail string, isTable bool) map[string]interface{} {
 	content := PromptMultiplePackageText(detail)
 	zp := ai.PostZhiPuPackageInfo(content)
 	//转格式...
@@ -174,7 +175,7 @@ func AcquireNewMultiplePackageInfo(detail string) map[string]interface{} {
 		//分包信息结构
 		package_id := uuid.New().String()
 		package_id = strings.ReplaceAll(package_id, "-", "")
-		com_package = append(com_package, map[string]interface{}{
+		p := map[string]interface{}{
 			"package_id":   package_id,
 			"name":         name,
 			"code":         code,
@@ -184,10 +185,23 @@ func AcquireNewMultiplePackageInfo(detail string) map[string]interface{} {
 			"projectcode":  projectcode,
 			"packagecode":  packagecode,
 			"contractcode": contractcode,
-		})
-		//去重计算单位与总金额
-		s_bidamount += bidamount
-		s_budget += budget
+		}
+		if isTable {
+			delete(p, "budget")
+			delete(p, "bidamount")
+		}
+		com_package = append(com_package, p)
+		//去重计算单位与总金额-精度丢失···
+		s_bid1 := decimal.NewFromFloat(s_bidamount)
+		s_bid2 := decimal.NewFromFloat(bidamount)
+		s_bid_add := s_bid1.Add(s_bid2)
+		s_bidamount, _ = s_bid_add.Float64()
+
+		s_bud1 := decimal.NewFromFloat(s_budget)
+		s_bud2 := decimal.NewFromFloat(budget)
+		s_bud_add := s_bud1.Add(s_bud2)
+		s_budget, _ = s_bud_add.Float64()
+
 		if win_temp[winner] == "" && winner != "" {
 			win_arr = append(win_arr, winner)
 			win_temp[winner] = winner
@@ -195,8 +209,10 @@ func AcquireNewMultiplePackageInfo(detail string) map[string]interface{} {
 	}
 	s_winner = strings.Join(win_arr, ",")
 	ai_pkg["s_winner"] = s_winner
-	ai_pkg["s_bidamount"] = s_bidamount
-	ai_pkg["s_budget"] = s_budget
+	if !isTable {
+		ai_pkg["s_bidamount"] = s_bidamount
+		ai_pkg["s_budget"] = s_budget
+	}
 	ai_pkg["com_package"] = com_package
 	return ai_pkg
 }

+ 16 - 16
tool.json

@@ -1,28 +1,28 @@
 {
-  "reading": 50,
-  "ext_name": "20241015Hs_5_area",
+  "reading": 500,
+  "ext_name": "zktest_liantong_bidding",
   "s_mgo": {
     "local": false,
-    "l_addr": "192.168.3.166:27082",
-    "addr": "192.168.3.166:27082",
-    "dbname" : "zhaoxiuzhen",
-    "username": "",
-    "password": ""
+    "l_addr": "172.31.31.202:27081,172.20.45.128:27080",
+    "addr": "172.31.31.202:27081,172.20.45.128:27080",
+    "dbname" : "qfw",
+    "username": "zhengkun",
+    "password": "zk@123123"
   },
   "b_mgo": {
-    "local": true,
-    "l_addr": "127.0.0.1:12005",
-    "addr": "127.0.0.1:12005",
-    "dbname" : "qfw_ai",
+    "local": false,
+    "l_addr": "172.31.31.202:27081,172.20.45.128:27080",
+    "addr": "172.31.31.202:27081,172.20.45.128:27080",
+    "dbname" : "qfw",
     "username": "zhengkun",
     "password": "zk@123123"
   },
   "qy_mgo": {
     "local": false,
-    "l_addr": "192.168.3.166:27082",
-    "addr": "192.168.3.166:27082",
-    "dbname" : "zhengkun",
-    "username": "",
-    "password": ""
+    "l_addr": "172.31.31.202:27081,172.20.45.128:27080",
+    "addr": "172.31.31.202:27081,172.20.45.128:27080",
+    "dbname" : "mixdata",
+    "username": "zhengkun",
+    "password": "zk@123123"
   }
 }

+ 2 - 2
udp/udprocess.go

@@ -146,8 +146,8 @@ func updateProcessUdpIdsInfo(sid string, eid string) {
 			if up_id != "" {
 				update := map[string]interface{}{
 					"$set": map[string]interface{}{
-						"dataprocess_ai": 2,
-						"updatetime":     time.Now().Unix(),
+						"dataprocess": 4,
+						"updatetime":  time.Now().Unix(),
 					},
 				}
 				ul.BidMgo.UpdateById(task_coll, up_id, update)

+ 2 - 2
ul/attr.go

@@ -12,8 +12,8 @@ var (
 	Url                     = "https://www.jianyu360.cn/article/content/%s.html"
 	CleanResultReg          = regexp.MustCompile("((\\s|\n| |\\[|\\]|\\`|json)+)")
 	SaveResultReg           = regexp.MustCompile("([{].*[}])")
-	MaxLen                  = 3000
-	MaxByte                 = 8000
+	MaxLen                  = 20000
+	MaxByte                 = 50000
 	MaxUdp                  = 10000
 	RulesPname              = []*ExtReg{}
 	IsTool, IsFull, IsLocal bool

+ 2 - 1
ul/init.go

@@ -262,7 +262,8 @@ func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
 func PostMarkDownText(html string) string {
 	url := "http://172.17.162.35:18811/md"
 	if IsLocal {
-		url = "http://172.17.0.11:8888/md"
+		//url = "http://172.17.0.11:8888/md"
+		url = "http://192.168.3.13:8888/md"
 	}
 	// 创建请求数据
 	jsonData, err := json.Marshal(map[string]interface{}{"html": html})