2 Commits 63b4b89c53 ... e4c87779dc

Autor SHA1 Mensagem Data
  wangchuanjin e4c87779dc feat:xiugai há 2 semanas atrás
  wangchuanjin ac7275f1e8 feat:xiugai há 4 semanas atrás
15 ficheiros alterados com 734 adições e 236 exclusões
  1. BIN
      .exe
  2. 37 0
      README.md
  3. 53 26
      config.yaml
  4. 5 17
      db/db.go
  5. BIN
      extract
  6. 104 0
      extract.go
  7. 11 5
      go.mod
  8. 27 12
      go.sum
  9. BIN
      go_build_dataIdentify.exe
  10. BIN
      go_build_dataIdentify_linux
  11. 1 1
      main.go
  12. 169 65
      main_test.go
  13. 14 16
      service/model.go
  14. 135 61
      service/rule.go
  15. 178 33
      service/service.go

BIN
.exe


+ 37 - 0
README.md

@@ -0,0 +1,37 @@
+2+2
+2025/07/11 17:24:17 总共 794 报价模式不一致 26 中标联合体不一致 43
+1+2
+2025/07/11 17:26:12 总共 794 报价模式不一致 26 中标联合体不一致 40
+
+2025/07/11 17:28:22 总共 794 报价模式不一致 33 中标联合体不一致 40
+
+
+2025/07/12 16:26:45 总共 794 报价模式不一致 26 中标联合体不一致 17 报价模式大模型抽取 62 中标联合体大模型抽取 12
+
+报价模式正确率:96.7
+中标联合体正确率:97.8
+
+
+2025/07/14 09:53:34 总共 794 报价模式不一致 28 中标联合体不一致 14 报价模式大模型抽取 59 中标联合体大模型抽取 11
+
+
+- "第[^一]中标候选人"
+-     - "(牵头(人|羊|供应商)|(投标|中标|成交)(联合体|成员))[::]"
+
+6866dfedd5d8e4081f826419	https://www.jianyu360.com/nologin/content/AoqY1xVcD0oJD0vRGBmcHUoMSc4CSNmcX9gKTgFLS5FdGhzYVJUCl4%3D.html
+
+https://www.jianyu360.cn/nologin/content/AqCY1xVcC4/UTkvM2hmcHUoMSc4CSNmcX9gKT8nIz0gc2BzYytUCmE=.html
+
+2025/07/14 20:41:14 总共 794 报价模式不一致 16 中标联合体不一致 9 报价模式大模型抽取 30 中标联合体大模型抽取 18
+2025/07/14 20:43:04 总共 794 报价模式不一致 18 中标联合体不一致 9 报价模式大模型抽取 30 中标联合体大模型抽取 18
+
+
+2025/07/15 11:52:04 总数 794
+报价模式,正确:780,正确率:98.24%
+报价模式-费率模式,总数:22,正确:18,正确率:81.82%
+报价模式-上浮下浮模式,总数:15,正确:13,正确率:86.67%
+报价模式-​​"勘察测量单价+设计费率下浮"的混合模式​​,总数:1,正确:0,正确率:0.00%
+报价模式-单价模式,总数:257,正确:254,正确率:98.83%
+报价模式-整标报价模式,总数:499,正确:495,正确率:99.20%
+中标联合,正确:788,正确率:99.24%
+报价模式大模型抽取 36 中标联合体大模型抽取 18

+ 53 - 26
config.yaml

@@ -8,14 +8,14 @@ logger:
   compress: true
 mongodb:
   main:
-    #mongodbAddr: "172.20.45.128:27080,172.31.31.202:27081"
-    #dbName: "qfw"
-    #userName: "wcj2025"
-    #password: "Bidingjy@0522"
+    mongodbAddr: "172.20.45.128:27080,172.31.31.202:27081"
+    dbName: "qfw"
+    userName: "wcj2025"
+    password: "Bidingjy@0522"
     size: 8
-    mongodbAddr: "172.20.45.129:27002"
-    dbName: "qfw_data"
-    collection: "bidding_hasdetail"
+    #mongodbAddr: "172.20.45.129:27002"
+    #dbName: "qfw_data"
+    collection: "bidding"
   extract:
     mongodbAddr: "172.17.4.85:27080"
     dbName: "qfw"
@@ -25,34 +25,62 @@ model:
   apiKey: "app-a2sCpnnOn6UAhWj9waToxETv"
   user: "jianyu"
 clearPatterns:
-  - "第[^一]中标候选人.+"
+  - "限.{2,6}价"
+  - "候选人是联合体的"
+  - "中联合体各方的名称均应填写"
+  - "[\u4e00-\u9fa5]+[::](/|详见附件)"
 quoteMode:
+  minBidamount: 1000
+  tableK: "报价[方形模]式"
   rules:
     - mode: "费率模式"
+      tableV: "费率"
       patterns:
-        - "((投标|中标|成交).{0,8}费率)|((投标报价系数|费率报价)[0-9.]+%)"
+        - "((投标|中标|成交)[\u4e00-\u9fa5]{0,8}费率)|((投标报价系数|费率报价)[0-9.]+%)"
+        - "报价[方形模]式[::]费率"
+        - "投标报价[((]费率[))]"
+        - "招标文件.{0,10}费率[.0-9]+%"
+      modelPatterns: "费率"
     - mode: "上浮下浮模式"
+      tableK: "中标.{0,8}金额|[上下]浮率"
+      tableV: "上浮|下浮|折扣|[.0-9]+%"
       patterns:
-        - "(?s)(投标|中标|成交).{0,6}[::]?.{0,6}[上下]浮率"
+        - "(?s)(投标|中标|成交|报价)[^((]{0,12}[上下]浮率"
         - "(?s)统一[上下]浮率为[::][0-9.]+%"
         - "(?s)存款利率[上下]浮率[::]?[0-9.]+%"
-    - mode: "单价模式"
-      patterns:
-        - "全部包含:数量+单价+规格型号"
+        - "[投中招]标.{0,8}折扣系数"
+        - "报价[方形模]式[::](上浮|下浮|折扣)"
+        - "[上下]浮率报价[::]?[0-9.]+%"
+      modelPatterns: "上浮率|下浮率"
     - mode: "整标报价模式"
+      #tableK: "((中标|成交).{0,8}金额)|投标总价"
+      tableV: "总价|([0-9]+[.,,]?[0-9]+)"
       patterns:
-        - "(中标|成交)总价"
-  modelPatterns:
-    - "上浮率|下浮率|费率"
-    - "全部包含:单价+数量"
+        - "(成交|中标)总价[::]"
+        - "报价[方形模]式[::]总价"
+        - "(成交|中标)总金额.{0,6}[::]"
+    - mode: "单价模式"
+      tableK: "(中标|成交).{0,8}单价|单价报价"
+      tableV: "单价|[.0-9]+"
+      patterns:
+        - "标的单价"
+        - "报价[方形模]式[::]单价"
+      lineUnderstand:
+        patterns:
+          - "全部包含:标的+数量+单价"
+          - "全部包含:规格型号+数量+单价"
+          - "全部包含:技术规格+数量+单价"
+        space: 1
+      modelPatterns: "全部包含:单价+数量"
 bidCommonwealth:
   subtype:
     - 中标
     - 成交
     - 合同
   showOnlyOnce:
-    - ["牵头(羊|人)","成员单位"]
-  firstWinnerOrder: "联合体|牵头(人|羊)"
+    - ["牵头(羊|人|供应商|单位)","成员单位"]
+  firstWinnerOrder: "联合体|(牵头(人|羊|供应商|单位))|(公司.{6,}公司)"
+  firstWinnerOrderClear: "(.+?的.+?为)|(.{0,4}[支分]公司)"
   # 白名单规则集合(确认是联合体中标)
   whitelistPatterns:
     - "由.*组成联合体.*中标"
@@ -68,23 +96,22 @@ bidCommonwealth:
     - "中标人信息.*包含.*联合体.*成员"
     - "评标委员会推荐.*联合体.*为中标候选人"
     - "是否联合体[::]+是"
-    - "(((中标|成交)[^候选]+?)|(第一.{0,6}候选人))[::].+(联合|主)体"
+    - "(((中标|成交)[^候选]{0,8})|(第一.{0,6}候选人))[::][\u4e00-\u9fa5]+[((]?(联合|主)体"
     - "联合体.{0,8}、.{2,20}联合体"
     - ".{2,50}与.{2,50}组成的联合体"
+    - "([((]主[))].{2,30}[,,][((]成[))])"
+    - "((牵头|成员)(人|羊|供应商|单位)|(投标|中标|成交)(联合体|成员))[::]"
+    - "(成交|中标)供应商[::]联合体"
   # 黑名单规则集合(排除非联合体中标)
   blacklistPatterns:
     - "不[允许|接受|支持]联合体"
     - "拒绝联合体投标"
     - "仅限独立投标人"
-    - "中标单位为.*公司$"
-    - "联合体.*未中标"
-    - "联合体.*未通过资格审查"
-    - "中标单位.*与.*无关"
-    - "中标单位.*但.*未组成联合体"
     - "仅限.*独立.*投标"
     - "该项目.*不允许联合体"
     - "联合体.*不符合.*资格要求"
     - "中标人.*未与其他单位合作"
     - "单一投标人.*中标"
+    - "接受联合体投标[::]\\s*不接受"
   modelPatterns:
-    - "联合体(牵头人|成员)|牵头人|成员单位"
+    - "联合体成员|牵头(供应商||羊)|成员单位"

+ 5 - 17
db/db.go

@@ -8,28 +8,16 @@ import (
 )
 
 var (
-	Mgo_Main    *MongodbSim
 	Mgo_Extract *MongodbSim
+	Mgo_Main    *MongodbSim
 )
 
 func init() {
 	ctx := gctx.New()
-	Mgo_Main = &MongodbSim{
-		MongodbAddr: g.Config().MustGet(ctx, "mongodb.main.mongodbAddr").String(),
-		Size:        g.Config().MustGet(ctx, "mongodb.main.size").Int(),
-		DbName:      g.Config().MustGet(ctx, "mongodb.main.dbName").String(),
-		UserName:    g.Config().MustGet(ctx, "mongodb.main.userName").String(),
-		Password:    g.Config().MustGet(ctx, "mongodb.main.password").String(),
-	}
+	g.Config().MustGet(ctx, "mongodb.main").Struct(&Mgo_Main)
 	Mgo_Main.InitPool()
-	log.Println("初始化 mongodb main")
-	Mgo_Extract = &MongodbSim{
-		MongodbAddr: g.Config().MustGet(ctx, "mongodb.extract.mongodbAddr").String(),
-		Size:        g.Config().MustGet(ctx, "mongodb.extract.size").Int(),
-		DbName:      g.Config().MustGet(ctx, "mongodb.extract.dbName").String(),
-		UserName:    g.Config().MustGet(ctx, "mongodb.extract.userName").String(),
-		Password:    g.Config().MustGet(ctx, "mongodb.extract.password").String(),
-	}
+	log.Println("初始化mongodb main")
+	g.Config().MustGet(ctx, "mongodb.extract").Struct(&Mgo_Extract)
 	Mgo_Extract.InitPool()
-	log.Println("初始化 mongodb extract")
+	log.Println("初始化mongodb extract")
 }

BIN
extract


+ 104 - 0
extract.go

@@ -0,0 +1,104 @@
+package main
+
+import (
+	"app.yhyue.com/moapp/jybase/encrypt"
+	. "app.yhyue.com/moapp/jybase/mongodb"
+	. "dataIdentify/db"
+	. "dataIdentify/service"
+	"github.com/gogf/gf/v2/frame/g"
+	"github.com/gogf/gf/v2/os/gctx"
+	"github.com/gogf/gf/v2/util/gconv"
+	"github.com/gogf/gf/v2/util/grand"
+	"log"
+	"strings"
+	"time"
+)
+
+func main() {
+	var maxSize = 200
+	zblhtSize := 0
+	log.Println("start...")
+	sess := Mgo_Main.GetMgoConn()
+	defer Mgo_Main.DestoryMongoConn(sess)
+	SelectField["publishtime"] = 1
+	SelectField["href"] = 1
+	SelectField["s_winner"] = 1
+	it := sess.DB(Mgo_Main.DbName).C(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String()).Find(map[string]interface{}{
+		"_id": map[string]interface{}{
+			"$lt": StringTOBsonId("686ce21dd5d8e4081f8f2d98"),
+		},
+		//"_id":         StringTOBsonId("6763aa5555a3d7e571cda133"),
+		"extracttype": 1,
+	}).Select(SelectField).Sort("-_id").Iter()
+	types := "中标、成交、合同"
+	all := map[string]int{}
+	for _, v := range strings.Split(types, "、") {
+		all[v] = 0
+	}
+	var isOver = func() bool {
+		for _, v := range all {
+			if v < maxSize {
+				return false
+			}
+		}
+		return true
+	}
+	index := 0
+	for m := make(map[string]interface{}); it.Next(m); {
+		index++
+		if index%500 == 0 {
+			log.Println("index", index)
+		}
+		subtype, _ := m["subtype"].(string)
+		if _, ok := all[subtype]; !ok {
+			continue
+		}
+		if !isOver() {
+			publishtime := gconv.Int(m["publishtime"])
+			if publishtime%grand.N(1, 1000) != 0 {
+				continue
+			}
+			if all[subtype] >= maxSize {
+				continue
+			}
+		}
+		_id := BsonIdToSId(m["_id"])
+		href := "https://www.jianyu360.com/nologin/content/" + encrypt.CommonEncodeArticle("content", _id) + ".html"
+		m["jybxhref"] = href
+		var flag bool
+		var quoteMode string
+		var bidCommonwealth int
+		if isOver() {
+			flag, _, _, bidCommonwealth, _ = Pretreatment(_id, m, 2)
+			if bidCommonwealth == 1 {
+				flag, quoteMode, _, _, _ = Pretreatment(_id, m, 1)
+				if quoteMode == "" || quoteMode == QuoteMode_Other {
+					continue
+				}
+			}
+		} else {
+			flag, quoteMode, _, bidCommonwealth, _ = Pretreatment(_id, m, 0)
+			if quoteMode == "" || quoteMode == QuoteMode_Other {
+				continue
+			}
+		}
+		if !flag {
+			return
+		}
+		m["quote_mode"] = quoteMode
+		if bidCommonwealth == 1 {
+			zblhtSize++
+		}
+		m["bid_commonwealth"] = bidCommonwealth
+		delete(m, "detail")
+		if !isOver() || bidCommonwealth == 1 {
+			newId := Mgo_Main.SaveByOriID("wcj_bidding_"+time.Now().Format("20060102"), m)
+			log.Println("save", newId)
+		}
+		all[subtype]++
+		if isOver() && zblhtSize == maxSize {
+			break
+		}
+	}
+	log.Println("over...", index)
+}

+ 11 - 5
go.mod

@@ -8,6 +8,7 @@ require (
 	app.yhyue.com/moapp/jybase v0.0.0-20250509080440-038d69d3ad3b
 	github.com/gogf/gf/v2 v2.7.0
 	github.com/safejob/dify-sdk-go v1.4.3-rc.1
+	github.com/xuri/excelize/v2 v2.9.1
 	gopkg.in/natefinch/lumberjack.v2 v2.2.1
 )
 
@@ -30,18 +31,23 @@ require (
 	github.com/mattn/go-runewidth v0.0.9 // indirect
 	github.com/olekukonko/tablewriter v0.0.5 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
+	github.com/richardlehane/mscfb v1.0.4 // indirect
+	github.com/richardlehane/msoleps v1.0.4 // indirect
+	github.com/tiendc/go-deepcopy v1.6.0 // indirect
 	github.com/xdg-go/pbkdf2 v1.0.0 // indirect
 	github.com/xdg-go/scram v1.0.2 // indirect
 	github.com/xdg-go/stringprep v1.0.2 // indirect
+	github.com/xuri/efp v0.0.1 // indirect
+	github.com/xuri/nfp v0.0.1 // indirect
 	github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect
 	go.mongodb.org/mongo-driver v1.9.1 // indirect
 	go.opentelemetry.io/otel v1.14.0 // indirect
 	go.opentelemetry.io/otel/sdk v1.14.0 // indirect
 	go.opentelemetry.io/otel/trace v1.14.0 // indirect
-	golang.org/x/crypto v0.37.0 // indirect
-	golang.org/x/net v0.39.0 // indirect
-	golang.org/x/sync v0.13.0 // indirect
-	golang.org/x/sys v0.32.0 // indirect
-	golang.org/x/text v0.24.0 // indirect
+	golang.org/x/crypto v0.38.0 // indirect
+	golang.org/x/net v0.40.0 // indirect
+	golang.org/x/sync v0.14.0 // indirect
+	golang.org/x/sys v0.33.0 // indirect
+	golang.org/x/text v0.25.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )

+ 27 - 12
go.sum

@@ -52,20 +52,33 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/richardlehane/mscfb v1.0.4 h1:WULscsljNPConisD5hR0+OyZjwK46Pfyr6mPu5ZawpM=
+github.com/richardlehane/mscfb v1.0.4/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7gK3DypaEsUk=
+github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg=
+github.com/richardlehane/msoleps v1.0.4 h1:WuESlvhX3gH2IHcd8UqyCuFY5yiq/GR/yqaSM/9/g00=
+github.com/richardlehane/msoleps v1.0.4/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg=
 github.com/safejob/dify-sdk-go v1.4.3-rc.1 h1:NxVbuQZEYpoMLHNkc28wCG6B0+9DkQZzyBQ7d+n7bRo=
 github.com/safejob/dify-sdk-go v1.4.3-rc.1/go.mod h1:uOjNfuk/UUd+NI3nbkhNLzmYPR747Spqds1E2LznDPQ=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8=
-github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
 github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
+github.com/tiendc/go-deepcopy v1.6.0 h1:0UtfV/imoCwlLxVsyfUd4hNHnB3drXsfle+wzSCA5Wo=
+github.com/tiendc/go-deepcopy v1.6.0/go.mod h1:toXoeQoUqXOOS/X4sKuiAoSk6elIdqc0pN7MTgOOo2I=
 github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
 github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
 github.com/xdg-go/scram v1.0.2 h1:akYIkZ28e6A96dkWNJQu3nmCzH3YfwMPQExUYDaRv7w=
 github.com/xdg-go/scram v1.0.2/go.mod h1:1WAq6h33pAW+iRreB34OORO2Nf7qel3VV3fjBj+hCSs=
 github.com/xdg-go/stringprep v1.0.2 h1:6iq84/ryjjeRmMJwxutI51F2GIPlP5BfTvXHeYjyhBc=
 github.com/xdg-go/stringprep v1.0.2/go.mod h1:8F9zXuvzgwmyT5DUm4GUfZGDdT3W+LCvS6+da4O5kxM=
+github.com/xuri/efp v0.0.1 h1:fws5Rv3myXyYni8uwj2qKjVaRP30PdjeYe2Y6FDsCL8=
+github.com/xuri/efp v0.0.1/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI=
+github.com/xuri/excelize/v2 v2.9.1 h1:VdSGk+rraGmgLHGFaGG9/9IWu1nj4ufjJ7uwMDtj8Qw=
+github.com/xuri/excelize/v2 v2.9.1/go.mod h1:x7L6pKz2dvo9ejrRuD8Lnl98z4JLt0TGAwjhW+EiP8s=
+github.com/xuri/nfp v0.0.1 h1:MDamSGatIvp8uOmDP8FnmjuQpu90NzdJxo7242ANR9Q=
+github.com/xuri/nfp v0.0.1/go.mod h1:WwHg+CVyzlv/TX9xqBFXEZAuxOPxn2k1GNHwG41IIUQ=
 github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM6xpdFEAYOk8iySO56hMFq6uLyA=
 github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
 go.mongodb.org/mongo-driver v1.9.1 h1:m078y9v7sBItkt1aaoe2YlvWEXcD263e1a4E1fBrJ1c=
@@ -78,26 +91,28 @@ go.opentelemetry.io/otel/trace v1.14.0 h1:wp2Mmvj41tDsyAJXiWDWpfNsOiIyd38fy85pyK
 go.opentelemetry.io/otel/trace v1.14.0/go.mod h1:8avnQLK+CG77yNLUae4ea2JDQ6iT+gozhnZjy/rw9G8=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20201216223049-8b5274cf687f/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
-golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
-golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
+golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8=
+golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw=
+golang.org/x/image v0.25.0 h1:Y6uW6rH1y5y/LK1J8BPWZtr6yZ7hrsy6hFrXjgsc2fQ=
+golang.org/x/image v0.25.0/go.mod h1:tCAmOEGthTtkalusGp1g3xa2gke8J6c2N565dTyl9Rs=
 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
-golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
+golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY=
+golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
-golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
+golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
-golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
+golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
-golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
+golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
+golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190531172133-b3315ee88b7d/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

BIN
go_build_dataIdentify.exe


BIN
go_build_dataIdentify_linux


+ 1 - 1
main.go

@@ -12,7 +12,7 @@ import (
 	"os"
 )
 
-func main() {
+func main111() {
 	var logger *lumberjack.Logger
 	ctx := gctx.New()
 	g.Config().MustGet(ctx, "logger").Struct(&logger)

+ 169 - 65
main_test.go

@@ -5,19 +5,17 @@ import (
 	. "app.yhyue.com/moapp/jybase/mongodb"
 	. "dataIdentify/db"
 	. "dataIdentify/service"
+	"fmt"
 	"github.com/gogf/gf/v2/frame/g"
 	"github.com/gogf/gf/v2/os/gctx"
+	"github.com/gogf/gf/v2/util/gconv"
+	"github.com/xuri/excelize/v2"
 	"log"
 	"net/rpc"
-	"strings"
 	"sync"
 	"testing"
 )
 
-const (
-	maxSize = 100
-)
-
 //	{
 //		$unset: {
 //		"bid_commonwealth" : "",
@@ -30,7 +28,7 @@ func TestAddField(t *testing.T) {
 	wait := &sync.WaitGroup{}
 	sess := Mgo_Main.GetMgoConn()
 	defer Mgo_Main.DestoryMongoConn(sess)
-	it := sess.DB("qfw_data").C("wcj_bidding").Find(map[string]interface{}{}).Select(nil).Sort("-_id").Iter()
+	it := sess.DB("qfw_data").C("wcj_bidding_20250708").Find(map[string]interface{}{}).Select(nil).Sort("-_id").Iter()
 	index := 0
 	for tm := make(map[string]interface{}); it.Next(tm); {
 		index++
@@ -45,7 +43,7 @@ func TestAddField(t *testing.T) {
 				wait.Done()
 			}()
 			href := "https://www.jianyu360.com/nologin/content/" + encrypt.CommonEncodeArticle("content", BsonIdToSId(m["_id"])) + ".html"
-			Mgo_Main.Update("wcj_bidding", map[string]interface{}{
+			Mgo_Main.Update("wcj_bidding_20250708", map[string]interface{}{
 				"_id": m["_id"],
 			}, map[string]interface{}{"$set": map[string]interface{}{
 				"href": href,
@@ -59,16 +57,15 @@ func TestAddField(t *testing.T) {
 
 // 示例测试
 func TestRule(t *testing.T) {
-	Start("5b0e1b8da5cb26b9b7b8c9ac")
+	Start("686bc7add5d8e4081f8b8766")
 }
 func TestAll(t *testing.T) {
 	pool := make(chan bool, g.Config().MustGet(gctx.New(), "poolSize").Int())
 	wait := &sync.WaitGroup{}
 	sess := Mgo_Main.GetMgoConn()
 	defer Mgo_Main.DestoryMongoConn(sess)
-	it := sess.DB("qfw_data").C("wcj_bidding").Find(map[string]interface{}{
-		"_id": StringTOBsonId("67c123333309c0998b619793"),
-	}).Select(SelectField).Sort("-_id").Iter()
+	collection := "wcj_bidding_20250708"
+	it := sess.DB("qfw_data").C(collection).Find(nil).Select(SelectField).Sort("-_id").Iter()
 	index := 0
 	for tm := make(map[string]interface{}); it.Next(tm); {
 		index++
@@ -83,7 +80,7 @@ func TestAll(t *testing.T) {
 				wait.Done()
 			}()
 			_id := BsonIdToSId(m["_id"])
-			flag, quoteMode, bidCommonwealth, other := Pretreatment(_id, m)
+			flag, quoteMode, _, bidCommonwealth, _ := Pretreatment(_id, m, 0)
 			if !flag {
 				return
 			}
@@ -94,11 +91,8 @@ func TestAll(t *testing.T) {
 			if bidCommonwealth != -1 {
 				set["bid_commonwealth"] = bidCommonwealth
 			}
-			if set != nil {
-				set["model_dataidentify"] = other
-			}
 			if len(set) > 0 {
-				Mgo_Main.UpdateById("wcj_bidding", m["_id"], map[string]interface{}{"$set": set})
+				Mgo_Main.UpdateById(collection, m["_id"], map[string]interface{}{"$set": set})
 			}
 		}(tm)
 		tm = make(map[string]interface{})
@@ -106,56 +100,14 @@ func TestAll(t *testing.T) {
 	wait.Wait()
 	log.Println("over...", index)
 }
-func TestExtract(t *testing.T) {
-	log.Println("start...")
-	sess := Mgo_Main.GetMgoConn()
-	defer Mgo_Main.DestoryMongoConn(sess)
-	it := sess.DB("qfw_data").C("bidding_hasdetail").Find(map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$lt": StringTOBsonId("68639706408dd61d6e5d05cd"),
-		},
-	}).Select(map[string]interface{}{
-		"detail":    1,
-		"_id":       1,
-		"subtype":   1,
-		"toptype":   1,
-		"s_winner":  1,
-		"bidamount": 1,
-	}).Sort("-_id").Iter()
-	types := "邀标、竞谈、单一、招标、中标、成交、合同"
-	all := map[string]int{}
-	for _, v := range strings.Split(types, "、") {
-		all[v] = 0
-	}
-	var isOver = func() bool {
-		for _, v := range all {
-			if v < maxSize {
-				return false
-			}
-		}
-		return true
-	}
-	index := 0
-	for m := make(map[string]interface{}); it.Next(m); {
-		index++
-		if index%500 == 0 {
-			log.Println("index", index)
-		}
-		subtype, _ := m["subtype"].(string)
-		if v, ok := all[subtype]; !ok || v >= maxSize {
-			continue
-		}
-		_id := Mgo_Main.SaveByOriID("wcj_bidding", m)
-		log.Println("save", _id)
-		all[subtype]++
-		if isOver() {
-			break
-		}
-	}
-	log.Println("over...", index)
-}
+
+/* 返回结果:
+ * map[中标联合体:否 报价模式:其他]
+ * 中标联合体: 是/否
+ * 报价模式:单价/费率/折扣率/正常报价/其他
+ */
 func TestRpc(t *testing.T) {
-	conn, err := rpc.DialHTTP("tcp", "127.0.0.1:8811")
+	conn, err := rpc.DialHTTP("tcp", "172.31.31.203:8811")
 	if err != nil {
 		log.Println(err)
 		return
@@ -168,3 +120,155 @@ func TestRpc(t *testing.T) {
 	}
 	log.Println(reply)
 }
+func TestDd(t *testing.T) {
+	of, err := excelize.OpenFile("./报价模式+中标联合体验证.xlsx")
+	if err != nil {
+		log.Fatalln(err)
+		return
+	}
+	// 读取指定工作表中的所有行。
+	f := excelize.NewFile()
+	sheetName := of.GetSheetList()[1]
+	newSheetIndex, _ := f.NewSheet(sheetName)
+	f.SetActiveSheet(newSheetIndex)
+	rows, err := of.GetRows(sheetName)
+	k := 0
+	for ck, row := range rows {
+		if ck != 0 {
+			data, _ := Mgo_Main.FindById("bidding_hasdetail", row[0], `{"subtype":1}`)
+			if data == nil || len(*data) == 0 {
+				continue
+			} else if subtype := gconv.String((*data)["subtype"]); subtype != "中标" && subtype != "成交" && subtype != "合同" {
+				continue
+			}
+		}
+		k++
+		for kk, vv := range row {
+			cell, _ := excelize.ColumnNumberToName(kk + 1)
+			f.SetCellValue(sheetName, fmt.Sprintf("%s%d", cell, k), vv)
+		}
+	}
+	f.SaveAs("./新_报价模式+中标联合体验证.xlsx")
+}
+
+// 总共 300 报价模式不一致 86 中标联合体不一致 2
+func TestCompare(t *testing.T) {
+	// 打开一个已存在的Excel文件。
+	of, err := excelize.OpenFile("./报价模式+中标联合体验证第二轮.xlsx")
+	if err != nil {
+		log.Fatalln(err)
+		return
+	}
+
+	// 获取工作表的名称列表。
+	sheets := of.GetSheetList()
+	// 读取指定工作表中的所有行。
+	f := excelize.NewFile()
+	sheetName := "对比结果"
+	newSheetIndex, _ := f.NewSheet(sheetName)
+	f.SetActiveSheet(newSheetIndex)
+	// 设置工作表的标题行
+	headers := []string{"_id", "链接", "测试-报价模式", "抽取-报价模式", "测试-中标联合体", "抽取-中标联合体"}
+	for colNum, header := range headers {
+		cell, _ := excelize.ColumnNumberToName(colNum + 1)
+		f.SetCellValue(sheetName, fmt.Sprintf("%s%d", cell, 1), header)
+	}
+	rowsTemp, err := of.GetRows(sheets[1])
+	if err != nil {
+		log.Fatalln(err)
+	}
+	//rows := [][]string{rowsTemp[0], rowsTemp[1]}
+	rows := rowsTemp
+	result := map[string]map[string]interface{}{}
+	lock := &sync.Mutex{}
+	pool := make(chan bool, 5)
+	wait := &sync.WaitGroup{}
+	var aEq, bEq float64
+	oneFieldMap := map[string]float64{}
+	oneFieldEqMap := map[string]float64{}
+	var aBidModel, bBigModel int64
+	for k, r := range rows {
+		if k == 0 {
+			continue
+		}
+		pool <- true
+		wait.Add(1)
+		go func(kk int, row []string) {
+			defer func() {
+				<-pool
+				wait.Done()
+			}()
+			_, a, aa, b, bb := Start(row[0])
+			if a == QuoteMode_Other {
+				a = QuoteMode_Whole
+			}
+			bv := ""
+			if b == 1 {
+				bv = "是"
+			} else if b == -1 {
+				bv = "否"
+			}
+			lock.Lock()
+			result[row[0]] = map[string]interface{}{
+				"报价模式":      a,
+				"报价模式_大模型":  aa,
+				"中标联合体":     bv,
+				"中标联合体_大模型": bb,
+			}
+			lock.Unlock()
+		}(k, r)
+	}
+	wait.Wait()
+	rowNum := 1
+	for k, row := range rows {
+		if k == 0 {
+			continue
+		}
+		if row[4] == "" {
+			row[4] = QuoteMode_Whole
+		}
+		obj := result[row[0]]
+		a, b := obj["报价模式"], obj["中标联合体"]
+		if gconv.Bool(obj["报价模式_大模型"]) {
+			aBidModel++
+		}
+		if gconv.Bool(obj["中标联合体_大模型"]) {
+			bBigModel++
+		}
+		oneFieldMap[row[4]]++
+		if a == row[4] {
+			aEq++
+			oneFieldEqMap[row[4]]++
+		}
+		if b == row[6] {
+			bEq++
+		}
+		if a == row[4] && b == row[6] {
+			continue
+		}
+		rowNum++
+		cell, _ := excelize.ColumnNumberToName(1)
+		f.SetCellValue(sheetName, fmt.Sprintf("%s%d", cell, rowNum), row[0])
+		cell, _ = excelize.ColumnNumberToName(2)
+		f.SetCellValue(sheetName, fmt.Sprintf("%s%d", cell, rowNum), row[1])
+		cell, _ = excelize.ColumnNumberToName(3)
+		f.SetCellValue(sheetName, fmt.Sprintf("%s%d", cell, rowNum), row[4])
+		cell, _ = excelize.ColumnNumberToName(4)
+		f.SetCellValue(sheetName, fmt.Sprintf("%s%d", cell, rowNum), a)
+		cell, _ = excelize.ColumnNumberToName(5)
+		f.SetCellValue(sheetName, fmt.Sprintf("%s%d", cell, rowNum), row[6])
+		cell, _ = excelize.ColumnNumberToName(6)
+		f.SetCellValue(sheetName, fmt.Sprintf("%s%d", cell, rowNum), b)
+	}
+	f.SaveAs("./对比结果.xlsx")
+	countRows := float64(len(rows) - 1)
+	oneFieldMsg := ""
+	for k, v := range oneFieldMap {
+		oneFieldMsg += fmt.Sprintf("报价模式-%s,总数:%.f,正确:%.f,正确率:%.2f%%", k, v, oneFieldEqMap[k], oneFieldEqMap[k]/v*100) + "\n"
+	}
+	log.Println("总数", countRows, "\n",
+		fmt.Sprintf("报价模式,正确:%.f,正确率:%.2f%%", aEq, aEq/countRows*100), "\n",
+		oneFieldMsg,
+		fmt.Sprintf("中标联合,正确:%.f,正确率:%.2f%%", bEq, bEq/countRows*100), "\n",
+		"报价模式大模型抽取", aBidModel, "中标联合体大模型抽取", bBigModel)
+}

+ 14 - 16
service/model.go

@@ -3,49 +3,47 @@ package service
 import (
 	"github.com/gogf/gf/v2/frame/g"
 	"github.com/gogf/gf/v2/os/gctx"
+	"github.com/gogf/gf/v2/util/gconv"
 	"github.com/safejob/dify-sdk-go"
 	"github.com/safejob/dify-sdk-go/types"
 	"log"
+	"strings"
 )
 
 type Model struct {
 }
 
-func (ml *Model) Execute(b *BidInfo) (bool, string, int, map[string]interface{}) {
+func (ml *Model) Execute(b *BidInfo) (bool, string, bool, int, bool) {
 	return ml.Do(b, 2)
 }
-func (ml *Model) Do(b *BidInfo, t int) (bool, string, int, map[string]interface{}) {
-	result := ml.post(b.Id, b.Detail, t)
+func (ml *Model) Do(b *BidInfo, t int) (bool, string, bool, int, bool) {
+	result := ml.post(b.Id, b.OriginalDetail, t)
 	if result == nil {
-		return false, "", 0, nil
+		return false, "", true, 0, true
 	}
 	for k, v := range result {
 		if v == nil {
 			delete(result, k)
 		}
 	}
-	set := map[string]interface{}{}
-	flag := false
 	quoteMode, _ := result["报价模式"].(string)
+	for k, _ := range allQuoteMode {
+		if strings.HasPrefix(quoteMode, k) {
+			quoteMode = k
+		}
+	}
 	if !allQuoteMode[quoteMode] {
-		quoteMode = ""
-		flag = true
+		quoteMode = QuoteMode_Other
 	}
 	bidCommonwealth := 0
 	if result["中标联合体"] != nil {
-		if result["中标联合体"] == "是" {
+		if strings.HasPrefix(gconv.String(result["中标联合体"]), "是") {
 			bidCommonwealth = 1
 		} else {
 			bidCommonwealth = -1
 		}
-		if result["中标联合体"] != "是" && result["中标联合体"] != "否" {
-			flag = true
-		}
-	}
-	if flag && result != nil {
-		set["model_dataidentify"] = result
 	}
-	return true, quoteMode, bidCommonwealth, set
+	return true, quoteMode, true, bidCommonwealth, true
 }
 func (ml *Model) post(_id, detail string, t int) map[string]interface{} {
 	ctx := gctx.New()

+ 135 - 61
service/rule.go

@@ -11,106 +11,165 @@ import (
 )
 
 var (
-	clearPatterns                     = g.Config().MustGet(gctx.New(), "clearPatterns").Strings()
-	bidCommonwealth_blacklistPatterns = g.Config().MustGet(gctx.New(), "bidCommonwealth.blacklistPatterns").Strings()
-	bidCommonwealth_whitelistPatterns = g.Config().MustGet(gctx.New(), "bidCommonwealth.whitelistPatterns").Strings()
-	bidCommonwealth_modelPatterns     = g.Config().MustGet(gctx.New(), "bidCommonwealth.modelPatterns").Strings()
-	bidCommonwealth_firstWinnerOrder  = g.Config().MustGet(gctx.New(), "bidCommonwealth.firstWinnerOrder").String()
-	quoteModeRules                    = g.Config().MustGet(gctx.New(), "quoteMode.rules").Maps()
-	quoteMode_modelPatterns           = g.Config().MustGet(gctx.New(), "quoteMode.modelPatterns").Strings()
-	allQuoteMode                      = map[string]bool{}
-	showOnlyOnce                      = [][]string{}
+	clearPatterns                            = g.Config().MustGet(gctx.New(), "clearPatterns").Strings()
+	bidCommonwealth_blacklistPatterns        = g.Config().MustGet(gctx.New(), "bidCommonwealth.blacklistPatterns").Strings()
+	bidCommonwealth_whitelistPatterns        = g.Config().MustGet(gctx.New(), "bidCommonwealth.whitelistPatterns").Strings()
+	bidCommonwealth_modelPatterns            = g.Config().MustGet(gctx.New(), "bidCommonwealth.modelPatterns").Strings()
+	bidCommonwealth_firstWinnerOrder         = g.Config().MustGet(gctx.New(), "bidCommonwealth.firstWinnerOrder").String()
+	bidCommonwealth_firstWinnerOrderClearReg = regexp.MustCompile(g.Config().MustGet(gctx.New(), "bidCommonwealth.firstWinnerOrderClear").String())
+	quoteModeRules                           = g.Config().MustGet(gctx.New(), "quoteMode.rules").Maps()
+	quoteModeTableKReg                       = g.Config().MustGet(gctx.New(), "quoteMode.tableK").String()
+	showOnlyOnce                             = [][]string{}
+	moreWinnerReg                            = regexp.MustCompile("[,,、]")
 )
 
 func init() {
 	for _, v := range g.Config().MustGet(gctx.New(), "bidCommonwealth.showOnlyOnce").Array() {
 		showOnlyOnce = append(showOnlyOnce, gconv.Strings(v))
 	}
-	for _, v := range quoteModeRules {
-		vv := gvar.New(v).MapStrVar()
-		allQuoteMode[vv["mode"].String()] = true
-	}
 }
 
 type Rule struct{}
 
-// 判断是否是联合体中标
-func (r *Rule) Execute(b *BidInfo) (bool, string, int, map[string]interface{}) {
-	bc := r.bidCommonwealth(b)
-	quoteMode := r.quoteMode(b)
-	if bc == -2 && quoteMode == "大模型识别" {
-		_, quoteMode, bc, _ = (&Model{}).Do(b, 2)
-	} else if bc == -2 {
-		_, _, bc, _ = (&Model{}).Do(b, 3)
-	} else if quoteMode == "大模型识别" {
-		_, quoteMode, _, _ = (&Model{}).Do(b, 1)
+func (r *Rule) Execute(b *BidInfo) (bool, string, bool, int, bool) {
+	var bidCommonwealth int
+	var quoteMode string
+	var quoteMode_bigModel, bidCommonwealth_bigModel bool
+	var bigModel map[string]bool
+	if b.Type == 0 {
+		bidCommonwealth = r.bidCommonwealth(b)
+		quoteMode, bigModel = r.quoteMode(b)
+		if bidCommonwealth == -2 && len(bigModel) > 0 {
+			_, quoteMode, quoteMode_bigModel, bidCommonwealth, bidCommonwealth_bigModel = (&Model{}).Do(b, 2)
+		} else if bidCommonwealth == -2 {
+			_, _, _, bidCommonwealth, bidCommonwealth_bigModel = (&Model{}).Do(b, 3)
+		} else if len(bigModel) > 0 {
+			_, quoteMode, quoteMode_bigModel, _, _ = (&Model{}).Do(b, 1)
+			if !bigModel[quoteMode] {
+				quoteMode = ""
+			}
+		}
+	} else if b.Type == 1 {
+		quoteMode, bigModel = r.quoteMode(b)
+		if len(bigModel) > 0 {
+			_, quoteMode, quoteMode_bigModel, _, _ = (&Model{}).Do(b, 1)
+			if !bigModel[quoteMode] {
+				quoteMode = ""
+			}
+		}
+	} else if b.Type == 2 {
+		bidCommonwealth = r.bidCommonwealth(b)
+		if bidCommonwealth == -2 {
+			_, _, _, bidCommonwealth, bidCommonwealth_bigModel = (&Model{}).Do(b, 3)
+		}
 	}
-	if (quoteMode == "" || quoteMode == "无法识别") && b.Bidamount > 0 {
-		quoteMode = "整体报价模式"
+	if b.Type == 0 || b.Type == 1 {
+		if quoteMode == "" && (b.Bidamount == 0 || b.Bidamount > g.Config().MustGet(gctx.New(), "quoteMode.minBidamount").Float64()) {
+			quoteMode = QuoteMode_Whole
+		}
 	}
-	log.Println(b.Id, "规则", "报价模式", quoteMode, "中标联合体", bc)
-	return true, quoteMode, bc, nil
+	log.Println(b.Id, "规则", "报价模式", quoteMode, "中标联合体", bidCommonwealth)
+	return true, quoteMode, quoteMode_bigModel, bidCommonwealth, bidCommonwealth_bigModel
 }
 
-// 识别中标联合体
-func (r *Rule) quoteMode(b *BidInfo) string {
-	for _, line := range strings.Split(b.Detail, "\n") {
-		for _, v := range quoteModeRules {
-			vv := gvar.New(v).MapStrVar()
-			if r.matchAnyPattern(line, vv["patterns"].Strings()) {
-				return vv["mode"].String()
+// 识别报价模式
+func (r *Rule) quoteMode(b *BidInfo) (string, map[string]bool) {
+	for _, v := range quoteModeRules {
+		vv := gvar.New(v).MapStrVar()
+		modeName := vv["mode"].String()
+		for _, row := range b.TableKv {
+			for k, v := range row {
+				if tableV := vv["tableV"].String(); tableV != "" {
+					matchedK, _ := regexp.MatchString(quoteModeTableKReg, k)
+					if !matchedK {
+						if tableK := vv["tableK"].String(); tableK != "" {
+							matchedK, _ = regexp.MatchString(tableK, k)
+						}
+					}
+					matchedV, _ := regexp.MatchString(tableV, v)
+					if matchedK && matchedV {
+						log.Println(b.Id, "tableKv", k, v)
+						return modeName, nil
+					}
+				}
 			}
 		}
 	}
-	if r.matchAnyPattern(b.Detail, quoteMode_modelPatterns) {
-		return "大模型识别"
+	bigModel := map[string]bool{}
+	for _, v := range quoteModeRules {
+		vv := gvar.New(v).MapStrVar()
+		modeName := vv["mode"].String()
+		var lineUnderstandPatterns []string
+		if lineUnderstand := vv["lineUnderstand"].MapStrVar(); lineUnderstand != nil {
+			lineUnderstandPatterns = lineUnderstand["patterns"].Strings()
+		}
+		lines := strings.Split(b.Detail, "\n")
+		for lineNum, lineVal := range lines {
+			if r.matchAnyPattern(b.Id, lineVal, vv["patterns"].Strings()) {
+				return modeName, nil
+			}
+			for _, vvv := range lineUnderstandPatterns {
+				countLine := len(strings.Split(vvv, "+"))
+				if countLine > 1 {
+					countLine += vv["space"].Int()
+				}
+				newText := []string{}
+				for i := lineNum; i < lineNum+countLine && i < len(lines); i++ {
+					newText = append(newText, lines[i])
+				}
+				if r.matchAnyPattern(b.Id, strings.Join(newText, "\n"), []string{vvv}) {
+					return modeName, nil
+				}
+			}
+		}
+		//
+		if modelPatterns := vv["modelPatterns"].String(); modelPatterns != "" && r.matchAnyPattern(b.Id, b.Detail, []string{modelPatterns}) {
+			bigModel[modeName] = true
+		}
 	}
-	return ""
+	return "", bigModel
 }
 
 // 识别中标联合体
 func (r *Rule) bidCommonwealth(b *BidInfo) int {
-	if len(b.WinnerOrder) > 0 {
-		if regexp.MustCompile(bidCommonwealth_firstWinnerOrder).MatchString(b.WinnerOrder[0]) {
-			return 1
-		}
+	// Step 1: 排除黑名单
+	if r.matchAnyPattern(b.Id, b.Detail, bidCommonwealth_blacklistPatterns) {
 		return -1
 	}
-	for _, v := range strings.Split(b.KvText, "\n") {
-		if r.matchAnyPattern(v, bidCommonwealth_whitelistPatterns) {
+	for _, v := range b.KvText {
+		if r.matchAnyPattern(b.Id, v, bidCommonwealth_whitelistPatterns) {
 			return 1
 		}
 	}
-	v := b.Detail
-	// Step 1: 排除黑名单
-	if r.matchAnyPattern(v, bidCommonwealth_blacklistPatterns) {
-		return -1
-	}
-	if r.matchOnlyOnce(v) {
+	if b.FirstWinner != "" && regexp.MustCompile(bidCommonwealth_firstWinnerOrder).MatchString(b.FirstWinner) {
+		log.Println(b.Id, "第一中标候选人", b.FirstWinner, bidCommonwealth_firstWinnerOrder)
 		return 1
-	}
-	// Step 2: 精准匹配白名单
-	if r.matchAnyPattern(v, bidCommonwealth_whitelistPatterns) {
+	} else if r.matchOnlyOnce(b.Id, b.Detail) {
+		return 1
+	} else if r.matchAnyPattern(b.Id, b.Detail, bidCommonwealth_whitelistPatterns) {
+		return 1
+	} else if b.Multipackage < 2 && len(moreWinnerReg.Split(b.Winner, -1)) > 1 {
+		log.Println(b.Id, "单包多个中标单位")
 		return 1
 	}
-	// Step 3: 检查“中标”附近是否有“联合体”
-	index := strings.Index(v, "中标")
+	index := strings.Index(b.Detail, "中标")
 	if index != -1 {
 		start := max(0, index-50)
-		end := min(len(v), index+50)
-		contextAroundWin := v[start:end]
+		end := min(len(b.Detail), index+50)
+		contextAroundWin := b.Detail[start:end]
 		if strings.Contains(contextAroundWin, "联合体") {
+			log.Println(b.Id, "”中标“附件有”联合体“")
 			return 1
 		}
 	}
-	if r.matchAnyPattern(v, bidCommonwealth_modelPatterns) {
+	if r.matchAnyPattern(b.Id, b.Detail, bidCommonwealth_modelPatterns) {
 		return -2
 	}
 	return -1
 }
 
 // 匹配任意模式
-func (r *Rule) matchAnyPattern(text string, patterns []string) bool {
+func (r *Rule) matchAnyPattern(_id string, text string, patterns []string) bool {
 	for _, pattern := range patterns {
 		if strings.HasPrefix(pattern, "全部包含:") {
 			vs := strings.Split(strings.TrimPrefix(pattern, "全部包含:"), "+")
@@ -121,20 +180,34 @@ func (r *Rule) matchAnyPattern(text string, patterns []string) bool {
 				}
 			}
 			if index == len(vs) {
-				log.Println(pattern)
+				log.Println(_id, "matchAnyPattern", pattern)
 				return true
 			}
 			continue
 		}
 		if matched, _ := regexp.MatchString(pattern, text); matched {
-			log.Println(pattern)
+			log.Println(_id, "matchAnyPattern", pattern)
 			return true
 		}
 	}
 	return false
 }
 
-func (r *Rule) matchOnlyOnce(text string) bool {
+// 全部匹配
+func (r *Rule) matchAllPattern(text string, patterns []string) bool {
+	count := 0
+	for _, pattern := range patterns {
+		if matched, _ := regexp.MatchString(pattern, text); matched {
+			count++
+		}
+	}
+	if count > 0 && count == len(patterns) {
+		return true
+	}
+	return false
+}
+
+func (r *Rule) matchOnlyOnce(_id, text string) bool {
 	for _, v := range showOnlyOnce {
 		count := 0
 		for _, vv := range v {
@@ -144,6 +217,7 @@ func (r *Rule) matchOnlyOnce(text string) bool {
 			count++
 		}
 		if count > 0 && count == len(v) {
+			log.Println(_id, v)
 			return true
 		}
 	}

+ 178 - 33
service/service.go

@@ -2,9 +2,11 @@ package service
 
 import (
 	. "dataIdentify/db"
+	"fmt"
 	"github.com/gogf/gf/v2/frame/g"
 	"github.com/gogf/gf/v2/os/gctx"
 	"github.com/gogf/gf/v2/util/gconv"
+	"golang.org/x/net/html"
 	"log"
 	"regexp"
 	"strings"
@@ -12,13 +14,31 @@ import (
 
 var (
 	SelectField = map[string]interface{}{
-		"_id":         1,
-		"winnerorder": 1,
-		"detail":      1,
-		"subtype":     1,
-		"bidamount":   1,
+		"_id":          1,
+		"winnerorder":  1,
+		"detail":       1,
+		"subtype":      1,
+		"bidamount":    1,
+		"s_winner":     1,
+		"com_package":  1,
+		"multipackage": 1,
 	}
-	service Service = &Rule{}
+	service      Service = &Rule{}
+	semicolonReg         = regexp.MustCompile("[::]")
+	allQuoteMode         = map[string]bool{
+		QuoteMode_Whole:     true,
+		QuoteMode_UnitPrice: true,
+		QuoteMode_Rate:      true,
+		QuoteMode_Discount:  true,
+	}
+)
+
+const (
+	QuoteMode_Other     = "其他报价模式"
+	QuoteMode_Whole     = "整标报价模式"
+	QuoteMode_UnitPrice = "单价模式"
+	QuoteMode_Rate      = "费率模式"
+	QuoteMode_Discount  = "上浮下浮模式"
 )
 
 type DataIdentify struct {
@@ -26,19 +46,18 @@ type DataIdentify struct {
 
 func (d *DataIdentify) Execute(_id *string, reply *map[string]string) error {
 	log.Println("rpc接收到要识别的_id", _id)
-	_, a, b, _ := Start(*_id)
+	_, a, _, b, _ := Start(*_id)
 	*reply = map[string]string{}
-	if !allQuoteMode[a] {
-		a = "其他"
-	}
-	if a == "费率模式" {
+	if a == QuoteMode_Rate {
 		a = "费率"
-	} else if a == "单价模式" {
+	} else if a == QuoteMode_UnitPrice {
 		a = "单价"
-	} else if a == "上浮下浮模式" {
+	} else if a == QuoteMode_Discount {
 		a = "折扣率"
-	} else if a == "整体报价模式" {
+	} else if a == QuoteMode_Whole {
 		a = "正常报价"
+	} else {
+		a = "其他"
 	}
 	(*reply)["报价模式"] = a
 	if b == 1 {
@@ -50,48 +69,174 @@ func (d *DataIdentify) Execute(_id *string, reply *map[string]string) error {
 }
 
 type Service interface {
-	Execute(b *BidInfo) (bool, string, int, map[string]interface{})
+	Execute(b *BidInfo) (bool, string, bool, int, bool)
 }
 type BidInfo struct {
 	Id, Detail, Subtype string
-	WinnerOrder         []string
-	KvText              string
+	OriginalDetail      string
+	FirstWinner         string
+	KvText              []string
 	Bidamount           float64
+	Type                int
+	Multipackage        int
+	Winner              string
+	TableKv             []map[string]string
 }
 
-func Start(_id string) (bool, string, int, map[string]interface{}) {
+func Start(_id string) (bool, string, bool, int, bool) {
 	data, ok := Mgo_Main.FindById(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String(), _id, SelectField)
 	if !ok || data == nil || len(*data) == 0 {
 		log.Println(_id, "没有找到标讯")
-		return false, "", 0, nil
+		return false, "", false, 0, false
 	}
-	return Pretreatment(_id, *data)
+	return Pretreatment(_id, *data, 0)
 }
 
-func Pretreatment(_id string, m map[string]interface{}) (bool, string, int, map[string]interface{}) {
+func Pretreatment(_id string, m map[string]interface{}, t int) (bool, string, bool, int, bool) {
 	if m == nil || len(m) == 0 {
 		log.Println(_id, "没有找到标讯")
-		return false, "", 0, nil
+		return false, "", false, 0, false
+	}
+	bi := &BidInfo{
+		Id:        _id,
+		Bidamount: gconv.Float64(m["bidamount"]),
+		Type:      t,
 	}
-	detail, _ := m["detail"].(string)
-	subtype, _ := m["subtype"].(string)
-	detail = strings.TrimSpace(strings.Replace(detail, "<br/>", "\n", -1))
+	bi.Detail, _ = m["detail"].(string)
 	for _, v := range clearPatterns {
-		detail = regexp.MustCompile(v).ReplaceAllString(detail, "")
+		bi.Detail = regexp.MustCompile(v).ReplaceAllString(bi.Detail, "")
 	}
-	bi := &BidInfo{
-		Id:          _id,
-		Detail:      detail,
-		Subtype:     subtype,
-		WinnerOrder: gconv.Strings(m["winnerorder"]),
-		Bidamount:   gconv.Float64(m["bidamount"]),
+	bi.OriginalDetail = bi.Detail
+	bi.Subtype, _ = m["subtype"].(string)
+	bi.Detail = strings.TrimSpace(strings.Replace(bi.Detail, "<br/>", "\n", -1))
+	com_package, _ := m["com_package"].([]interface{})
+	bi.Multipackage = len(com_package)
+	if len(com_package) == 1 {
+		first, _ := com_package[0].(map[string]interface{})
+		bi.Winner, _ = first["winner"].(string)
+	}
+	if bi.Winner == "" {
+		bi.Winner = gconv.String(m["s_winner"])
+	}
+	if winnerorder := gconv.Maps(m["winnerorder"]); len(winnerorder) > 0 {
+		bi.FirstWinner = strings.TrimSpace(gconv.String(winnerorder[0]["entname"]))
+		array := []string{}
+		for _, v := range strings.Split(bi.FirstWinner, ",") {
+			if bidCommonwealth_firstWinnerOrderClearReg.MatchString(v) {
+				continue
+			}
+			array = append(array, v)
+		}
+		bi.FirstWinner = strings.Join(array, ",")
+		//if strings.Contains(bi.FirstWinner, ",") && strings.Contains(bi.Detail, strings.ReplaceAll(bi.FirstWinner, ",", "")) {
+		//	bi.FirstWinner = ""
+		//}
 	}
 	extract, _ := Mgo_Extract.FindById("result_20220219", _id, `{"kvtext":1}`)
 	if extract == nil || len(*extract) == 0 {
 		extract, _ = Mgo_Extract.FindById("result_20220218", _id, `{"kvtext":1}`)
 	}
 	if extract != nil && len(*extract) > 0 {
-		bi.KvText, _ = (*extract)["kvtext"].(string)
+		kvText, _ := (*extract)["kvtext"].(string)
+		for _, v := range strings.Split(kvText, "\n") {
+			vs := semicolonReg.Split(v, -1)
+			if len(vs) < 2 || (strings.TrimSpace(vs[1]) == "" || strings.TrimSpace(vs[1]) == "/") {
+				continue
+			}
+			bi.KvText = append(bi.KvText, v)
+		}
 	}
+	bi.ParseTable(bi.OriginalDetail)
+	//for _, v := range bi.TableKv {
+	//	for k, v := range v {
+	//		log.Println(k, v)
+	//	}
+	//}
 	return service.Execute(bi)
 }
+
+// 解析HTML中的第一个<table>并将其转为KV结构
+func (bi *BidInfo) ParseTable(htmlContent string) error {
+	doc, err := html.Parse(strings.NewReader(htmlContent))
+	if err != nil {
+		return err
+	}
+
+	var table *html.Node
+	var found bool
+
+	// 查找第一个<table>标签
+	var traverse func(*html.Node)
+	traverse = func(n *html.Node) {
+		if n.Type == html.ElementNode && n.Data == "table" {
+			table = n
+			found = true
+			return
+		}
+		for c := n.FirstChild; c != nil && !found; c = c.NextSibling {
+			traverse(c)
+		}
+	}
+	traverse(doc)
+
+	if table == nil {
+		return fmt.Errorf("未找到<table>标签")
+	}
+
+	var rows [][]string
+
+	// 遍历表格,提取单元格文本
+	var visitNode func(*html.Node, []string) []string
+	visitNode = func(n *html.Node, row []string) []string {
+		if n.Type == html.ElementNode && (n.Data == "td" || n.Data == "th") {
+			row = append(row, bi.extractText(n))
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			row = visitNode(c, row)
+		}
+		return row
+	}
+
+	var collectRows func(*html.Node)
+	collectRows = func(n *html.Node) {
+		if n.Type == html.ElementNode && (n.Data == "tr") {
+			row := visitNode(n, nil)
+			if len(row) > 0 {
+				rows = append(rows, row)
+			}
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			collectRows(c)
+		}
+	}
+	collectRows(table)
+
+	if len(rows) < 1 {
+		return fmt.Errorf("表格中没有数据")
+	}
+
+	headers := rows[0]
+	for _, r := range rows[1:] {
+		item := make(map[string]string)
+		for i, val := range r {
+			if i < len(headers) {
+				key := headers[i]
+				item[strings.TrimSpace(key)] = strings.TrimSpace(val)
+			}
+		}
+		bi.TableKv = append(bi.TableKv, item)
+	}
+	return nil
+}
+
+// 提取节点内所有文本
+func (bi *BidInfo) extractText(n *html.Node) string {
+	var text string
+	if n.Type == html.TextNode {
+		text = n.Data
+	}
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		text += bi.extractText(c)
+	}
+	return text
+}