wangchuanjin 2 hete
szülő
commit
23ff3541bb
11 módosított fájl, 263 hozzáadás és 491 törlés
  1. 31 68
      config.yaml
  2. BIN
      dataIdentify
  3. 1 5
      db/db.go
  4. BIN
      extract
  5. 66 58
      extract.go
  6. 1 1
      main.go
  7. 38 147
      main_test.go
  8. 71 123
      service/rule.go
  9. 55 89
      service/service.go
  10. BIN
      对比结果.xlsx
  11. BIN
      报价模式+中标联合体验证第二轮.xlsx

+ 31 - 68
config.yaml

@@ -16,91 +16,56 @@ mongodb:
     #mongodbAddr: "172.20.45.129:27002"
     #dbName: "qfw_data"
     collection: "bidding"
-  extract:
-    mongodbAddr: "172.17.4.85:27080"
-    dbName: "qfw"
-    size: 8
-model:
-  apiServer: "https://dify_v2.jydev.jianyu360.com/v1"
-  apiKey: "app-a2sCpnnOn6UAhWj9waToxETv"
-  user: "jianyu"
-clearPatterns:
-  - "限.{2,6}价"
-  - "候选人是联合体的"
-  - "中联合体各方的名称均应填写"
-  - "[\u4e00-\u9fa5]+[::](/|详见附件)"
 quoteMode:
-  minBidamount: 1000
+  minBidamount: 10000
   tableK: "报价[方形模]式"
   rules:
+    - mode: "整标报价模式"
+      tableV: "总价"
+      patterns:
+        - "(成交|中标)总价[::]"
+        - "报价[方形模]式[::]总价"
+        - "(成交|中标)总金额.{0,6}[::]"
+      excludePatterns:
+        - "((?s)报价.{1,10}/)|单价|上浮|下浮|费率|折扣"
     - mode: "费率模式"
       tableV: "费率"
+      tableKv:
+        - k: "((投标|中标|成交)[\u4e00-\u9fa5]{0,8}[((]?费率[))]?)|投标报价系数|费率报价"
+          v: "[0-9.]+%"
       patterns:
-        - "((投标|中标|成交)[\u4e00-\u9fa5]{0,8}费率)|((投标报价系数|费率报价)[0-9.]+%)"
+        - "(((投标|中标|成交)[\u4e00-\u9fa5]{0,8}[((]?费率[))]?)|投标报价系数|费率报价)[::][0-9.]+%"
         - "报价[方形模]式[::]费率"
-        - "投标报价[((]费率[))]"
-        - "招标文件.{0,10}费率[.0-9]+%"
-      modelPatterns: "费率"
     - mode: "上浮下浮模式"
-      tableK: "中标.{0,8}金额|[上下]浮率"
-      tableV: "上浮|下浮|折扣|[.0-9]+%"
+      tableK: "(中标|成交)([((].{2,6}[))])?金额"
+      tableV: "上浮|下浮|折扣"
+      tableKv:
+        - k: "(磋商|投标)报价.{0,6}[上下]浮率"
+          v: "[0-9.]+%"
       patterns:
-        - "(?s)(投标|中标|成交|报价)[^((]{0,12}[上下]浮率"
-        - "(?s)统一[上下]浮率为[::][0-9.]+%"
-        - "(?s)存款利率[上下]浮率[::]?[0-9.]+%"
-        - "[投中招]标.{0,8}折扣系数"
+        - "(统一|存款利率|中标|成交)[\u4e00-\u9fa5()()]{0,6}[上下]浮率为?[::]?[0-9.]+%"
         - "报价[方形模]式[::](上浮|下浮|折扣)"
         - "[上下]浮率报价[::]?[0-9.]+%"
-      modelPatterns: "上浮率|下浮率"
-    - mode: "整标报价模式"
-      #tableK: "((中标|成交).{0,8}金额)|投标总价"
-      tableV: "总价|([0-9]+[.,,]?[0-9]+)"
-      patterns:
-        - "(成交|中标)总价[::]"
-        - "报价[方形模]式[::]总价"
-        - "(成交|中标)总金额.{0,6}[::]"
+        - "投标报价[::][上下]浮率"
+        - "采用报价[上下]浮率[\u4e00-\u9fa5()()]+最接近基准[上下]浮率[\u4e00-\u9fa5()()]+方式选取中标单位"
     - mode: "单价模式"
-      tableK: "(中标|成交).{0,8}单价|单价报价"
-      tableV: "单价|[.0-9]+"
+      tableV: "单价"
       patterns:
-        - "标的单价"
         - "报价[方形模]式[::]单价"
-      lineUnderstand:
-        patterns:
-          - "全部包含:标的+数量+单价"
-          - "全部包含:规格型号+数量+单价"
-          - "全部包含:技术规格+数量+单价"
-        space: 1
-      modelPatterns: "全部包含:单价+数量"
+      mustTableKv:
+        - k: "(标的|标项)名称"
+          v: "[\u4e00-\u9fa5]{4,}"
+        - k: "数量"
+          v: "[0-9.]+"
+        - k: "单价"
+          v: "[0-9.]+"
 bidCommonwealth:
-  subtype:
-    - 中标
-    - 成交
-    - 合同
-  showOnlyOnce:
-    - ["牵头(羊|人|供应商|单位)","成员单位"]
-  firstWinnerOrder: "联合体|(牵头(人|羊|供应商|单位))|(公司.{6,}公司)"
-  firstWinnerOrderClear: "(.+?的.+?为)|(.{0,4}[支分]公司)"
   # 白名单规则集合(确认是联合体中标)
   whitelistPatterns:
-    - "由.*组成联合体.*中标"
-    - "中标人为联合体.*包含.*?。"
-    - "联合体.*中标候选人.*包括.*?。"
-    - "联合体.*中标.*(?:成员|牵头方|牵头人|成员单位).*为.*?。"
-    - "牵头单位.*与.*组成.*联合体.*中标"
-    - "联合体.*为唯一中标候选人"
-    - "联合体.*通过评审.*被推荐为中标人"
-    - "联合体.*(?:为|成为|被推荐为|被确定为).*中标人"
-    - "中标结果:.*由.*组成.*联合体"
-    - "中标单位:.*公司.*与.*公司.*组成联合体"
-    - "中标人信息.*包含.*联合体.*成员"
-    - "评标委员会推荐.*联合体.*为中标候选人"
-    - "是否联合体[::]+是"
     - "(((中标|成交)[^候选]{0,8})|(第一.{0,6}候选人))[::][\u4e00-\u9fa5]+[((]?(联合|主)体"
     - "联合体.{0,8}、.{2,20}联合体"
-    - ".{2,50}与.{2,50}组成的联合体"
     - "([((]主[))].{2,30}[,,][((]成[))])"
-    - "((牵头|成员)(人|羊|供应商|单位)|(投标|中标|成交)(联合体|成员))[::]"
+    - "((牵头|成员)(人|羊|供应商|单位)|(投标|中标|成交)(联合体|成员))[::][\u4e00-\u9fa5]+"
     - "(成交|中标)供应商[::]联合体"
   # 黑名单规则集合(排除非联合体中标)
   blacklistPatterns:
@@ -112,6 +77,4 @@ bidCommonwealth:
     - "联合体.*不符合.*资格要求"
     - "中标人.*未与其他单位合作"
     - "单一投标人.*中标"
-    - "接受联合体投标[::]\\s*不接受"
-  modelPatterns:
-    - "联合体成员|牵头(供应商|人|羊)|成员单位"
+    - "接受联合体投标[::]\\s*不接受"

BIN
dataIdentify


+ 1 - 5
db/db.go

@@ -8,8 +8,7 @@ import (
 )
 
 var (
-	Mgo_Extract *MongodbSim
-	Mgo_Main    *MongodbSim
+	Mgo_Main *MongodbSim
 )
 
 func init() {
@@ -17,7 +16,4 @@ func init() {
 	g.Config().MustGet(ctx, "mongodb.main").Struct(&Mgo_Main)
 	Mgo_Main.InitPool()
 	log.Println("初始化mongodb main")
-	g.Config().MustGet(ctx, "mongodb.extract").Struct(&Mgo_Extract)
-	Mgo_Extract.InitPool()
-	log.Println("初始化mongodb extract")
 }

BIN
extract


+ 66 - 58
extract.go

@@ -5,100 +5,108 @@ import (
 	. "app.yhyue.com/moapp/jybase/mongodb"
 	. "dataIdentify/db"
 	. "dataIdentify/service"
+	"flag"
 	"github.com/gogf/gf/v2/frame/g"
 	"github.com/gogf/gf/v2/os/gctx"
 	"github.com/gogf/gf/v2/util/gconv"
 	"github.com/gogf/gf/v2/util/grand"
 	"log"
-	"strings"
+	"sync"
 	"time"
 )
 
 func main() {
-	var maxSize = 200
-	zblhtSize := 0
+	maxSize := flag.Int("c", 0, "")
+	poolSize := flag.Int("p", 5, "")
+	lastId := flag.String("id", "", "")
+	flag.Parse()
 	log.Println("start...")
 	sess := Mgo_Main.GetMgoConn()
 	defer Mgo_Main.DestoryMongoConn(sess)
 	SelectField["publishtime"] = 1
 	SelectField["href"] = 1
 	SelectField["s_winner"] = 1
-	it := sess.DB(Mgo_Main.DbName).C(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String()).Find(map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$lt": StringTOBsonId("686ce21dd5d8e4081f8f2d98"),
-		},
+	query := map[string]interface{}{
 		//"_id":         StringTOBsonId("6763aa5555a3d7e571cda133"),
 		"extracttype": 1,
-	}).Select(SelectField).Sort("-_id").Iter()
-	types := "中标、成交、合同"
-	all := map[string]int{}
-	for _, v := range strings.Split(types, "、") {
-		all[v] = 0
+	}
+	if *lastId != "" {
+		query["_id"] = map[string]interface{}{
+			"$lt": StringTOBsonId(*lastId),
+		}
+	}
+	it := sess.DB(Mgo_Main.DbName).C(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String()).Find(query).Select(SelectField).Sort("-_id").Iter()
+	all := map[string]int{
+		"是中标联合体": 0,
+	}
+	for k, _ := range AllQuoteMode {
+		all[k] = 0
 	}
 	var isOver = func() bool {
 		for _, v := range all {
-			if v < maxSize {
+			if v < *maxSize {
 				return false
 			}
 		}
 		return true
 	}
 	index := 0
-	for m := make(map[string]interface{}); it.Next(m); {
+	pool := make(chan bool, *poolSize)
+	wait := &sync.WaitGroup{}
+	lock := &sync.Mutex{}
+	isAllOver := false
+	for mm := make(map[string]interface{}); it.Next(mm); {
 		index++
-		if index%500 == 0 {
-			log.Println("index", index)
-		}
-		subtype, _ := m["subtype"].(string)
-		if _, ok := all[subtype]; !ok {
-			continue
+		if index%50000 == 0 {
+			log.Println("index", index, all)
 		}
-		if !isOver() {
+		pool <- true
+		wait.Add(1)
+		go func(m map[string]interface{}) {
+			defer func() {
+				<-pool
+				wait.Done()
+			}()
+			subtype, _ := m["subtype"].(string)
+			if subtype != "中标" && subtype != "成交" && subtype != "合同" {
+				return
+			}
 			publishtime := gconv.Int(m["publishtime"])
 			if publishtime%grand.N(1, 1000) != 0 {
-				continue
+				return
 			}
-			if all[subtype] >= maxSize {
-				continue
+			_id := BsonIdToSId(m["_id"])
+			href := "https://www.jianyu360.com/nologin/content/" + encrypt.CommonEncodeArticle("content", _id) + ".html"
+			m["jybxhref"] = href
+			quoteMode, bidCommonwealth := Pretreatment(_id, m, 0)
+			if quoteMode == "" && bidCommonwealth != 1 {
+				return
 			}
-		}
-		_id := BsonIdToSId(m["_id"])
-		href := "https://www.jianyu360.com/nologin/content/" + encrypt.CommonEncodeArticle("content", _id) + ".html"
-		m["jybxhref"] = href
-		var flag bool
-		var quoteMode string
-		var bidCommonwealth int
-		if isOver() {
-			flag, _, _, bidCommonwealth, _ = Pretreatment(_id, m, 2)
-			if bidCommonwealth == 1 {
-				flag, quoteMode, _, _, _ = Pretreatment(_id, m, 1)
-				if quoteMode == "" || quoteMode == QuoteMode_Other {
-					continue
+			m["quote_mode"] = quoteMode
+			m["bid_commonwealth"] = bidCommonwealth
+			delete(m, "detail")
+			lock.Lock()
+			if all[quoteMode] < *maxSize || (all["是中标联合体"] < *maxSize && bidCommonwealth == 1) {
+				if Mgo_Main.SaveByOriID("wcj_bidding_"+time.Now().Format("20060102"), m) {
+					log.Println("save", _id)
+					if all[quoteMode] < *maxSize {
+						all[quoteMode]++
+					}
+					if all["是中标联合体"] < *maxSize && bidCommonwealth == 1 {
+						all["是中标联合体"]++
+					}
+					if isOver() {
+						isAllOver = true
+					}
 				}
 			}
-		} else {
-			flag, quoteMode, _, bidCommonwealth, _ = Pretreatment(_id, m, 0)
-			if quoteMode == "" || quoteMode == QuoteMode_Other {
-				continue
-			}
-		}
-		if !flag {
-			return
-		}
-		m["quote_mode"] = quoteMode
-		if bidCommonwealth == 1 {
-			zblhtSize++
-		}
-		m["bid_commonwealth"] = bidCommonwealth
-		delete(m, "detail")
-		if !isOver() || bidCommonwealth == 1 {
-			newId := Mgo_Main.SaveByOriID("wcj_bidding_"+time.Now().Format("20060102"), m)
-			log.Println("save", newId)
-		}
-		all[subtype]++
-		if isOver() && zblhtSize == maxSize {
+			lock.Unlock()
+		}(mm)
+		mm = make(map[string]interface{})
+		if isAllOver {
 			break
 		}
 	}
+	wait.Wait()
 	log.Println("over...", index)
 }

+ 1 - 1
main.go

@@ -12,7 +12,7 @@ import (
 	"os"
 )
 
-func main111() {
+func main11() {
 	var logger *lumberjack.Logger
 	ctx := gctx.New()
 	g.Config().MustGet(ctx, "logger").Struct(&logger)

+ 38 - 147
main_test.go

@@ -1,14 +1,8 @@
 package main
 
 import (
-	"app.yhyue.com/moapp/jybase/encrypt"
-	. "app.yhyue.com/moapp/jybase/mongodb"
-	. "dataIdentify/db"
 	. "dataIdentify/service"
 	"fmt"
-	"github.com/gogf/gf/v2/frame/g"
-	"github.com/gogf/gf/v2/os/gctx"
-	"github.com/gogf/gf/v2/util/gconv"
 	"github.com/xuri/excelize/v2"
 	"log"
 	"net/rpc"
@@ -16,89 +10,9 @@ import (
 	"testing"
 )
 
-//	{
-//		$unset: {
-//		"bid_commonwealth" : "",
-//		"quote_mode" : "",
-//		"model_dataidentify" : ""
-//		}
-//	}
-func TestAddField(t *testing.T) {
-	pool := make(chan bool, 5)
-	wait := &sync.WaitGroup{}
-	sess := Mgo_Main.GetMgoConn()
-	defer Mgo_Main.DestoryMongoConn(sess)
-	it := sess.DB("qfw_data").C("wcj_bidding_20250708").Find(map[string]interface{}{}).Select(nil).Sort("-_id").Iter()
-	index := 0
-	for tm := make(map[string]interface{}); it.Next(tm); {
-		index++
-		if index%100 == 0 {
-			log.Println("index", index)
-		}
-		pool <- true
-		wait.Add(1)
-		go func(m map[string]interface{}) {
-			defer func() {
-				<-pool
-				wait.Done()
-			}()
-			href := "https://www.jianyu360.com/nologin/content/" + encrypt.CommonEncodeArticle("content", BsonIdToSId(m["_id"])) + ".html"
-			Mgo_Main.Update("wcj_bidding_20250708", map[string]interface{}{
-				"_id": m["_id"],
-			}, map[string]interface{}{"$set": map[string]interface{}{
-				"href": href,
-			}}, false, false)
-		}(tm)
-		tm = make(map[string]interface{})
-	}
-	wait.Wait()
-	log.Println("over...", index)
-}
-
 // 示例测试
 func TestRule(t *testing.T) {
-	Start("686bc7add5d8e4081f8b8766")
-}
-func TestAll(t *testing.T) {
-	pool := make(chan bool, g.Config().MustGet(gctx.New(), "poolSize").Int())
-	wait := &sync.WaitGroup{}
-	sess := Mgo_Main.GetMgoConn()
-	defer Mgo_Main.DestoryMongoConn(sess)
-	collection := "wcj_bidding_20250708"
-	it := sess.DB("qfw_data").C(collection).Find(nil).Select(SelectField).Sort("-_id").Iter()
-	index := 0
-	for tm := make(map[string]interface{}); it.Next(tm); {
-		index++
-		if index%100 == 0 {
-			log.Println("index", index)
-		}
-		pool <- true
-		wait.Add(1)
-		go func(m map[string]interface{}) {
-			defer func() {
-				<-pool
-				wait.Done()
-			}()
-			_id := BsonIdToSId(m["_id"])
-			flag, quoteMode, _, bidCommonwealth, _ := Pretreatment(_id, m, 0)
-			if !flag {
-				return
-			}
-			set := map[string]interface{}{}
-			if quoteMode != "" {
-				set["quote_mode"] = quoteMode
-			}
-			if bidCommonwealth != -1 {
-				set["bid_commonwealth"] = bidCommonwealth
-			}
-			if len(set) > 0 {
-				Mgo_Main.UpdateById(collection, m["_id"], map[string]interface{}{"$set": set})
-			}
-		}(tm)
-		tm = make(map[string]interface{})
-	}
-	wait.Wait()
-	log.Println("over...", index)
+	Start("686964f0d5d8e4081f880f7c")
 }
 
 /* 返回结果:
@@ -120,36 +34,6 @@ func TestRpc(t *testing.T) {
 	}
 	log.Println(reply)
 }
-func TestDd(t *testing.T) {
-	of, err := excelize.OpenFile("./报价模式+中标联合体验证.xlsx")
-	if err != nil {
-		log.Fatalln(err)
-		return
-	}
-	// 读取指定工作表中的所有行。
-	f := excelize.NewFile()
-	sheetName := of.GetSheetList()[1]
-	newSheetIndex, _ := f.NewSheet(sheetName)
-	f.SetActiveSheet(newSheetIndex)
-	rows, err := of.GetRows(sheetName)
-	k := 0
-	for ck, row := range rows {
-		if ck != 0 {
-			data, _ := Mgo_Main.FindById("bidding_hasdetail", row[0], `{"subtype":1}`)
-			if data == nil || len(*data) == 0 {
-				continue
-			} else if subtype := gconv.String((*data)["subtype"]); subtype != "中标" && subtype != "成交" && subtype != "合同" {
-				continue
-			}
-		}
-		k++
-		for kk, vv := range row {
-			cell, _ := excelize.ColumnNumberToName(kk + 1)
-			f.SetCellValue(sheetName, fmt.Sprintf("%s%d", cell, k), vv)
-		}
-	}
-	f.SaveAs("./新_报价模式+中标联合体验证.xlsx")
-}
 
 // 总共 300 报价模式不一致 86 中标联合体不一致 2
 func TestCompare(t *testing.T) {
@@ -183,10 +67,6 @@ func TestCompare(t *testing.T) {
 	lock := &sync.Mutex{}
 	pool := make(chan bool, 5)
 	wait := &sync.WaitGroup{}
-	var aEq, bEq float64
-	oneFieldMap := map[string]float64{}
-	oneFieldEqMap := map[string]float64{}
-	var aBidModel, bBigModel int64
 	for k, r := range rows {
 		if k == 0 {
 			continue
@@ -198,10 +78,7 @@ func TestCompare(t *testing.T) {
 				<-pool
 				wait.Done()
 			}()
-			_, a, aa, b, bb := Start(row[0])
-			if a == QuoteMode_Other {
-				a = QuoteMode_Whole
-			}
+			a, b := Start(row[0])
 			bv := ""
 			if b == 1 {
 				bv = "是"
@@ -210,40 +87,54 @@ func TestCompare(t *testing.T) {
 			}
 			lock.Lock()
 			result[row[0]] = map[string]interface{}{
-				"报价模式":      a,
-				"报价模式_大模型":  aa,
-				"中标联合体":     bv,
-				"中标联合体_大模型": bb,
+				"报价模式":  a,
+				"中标联合体": bv,
 			}
 			lock.Unlock()
 		}(k, r)
 	}
 	wait.Wait()
+	var aEq, bEq float64
+	bZOneFieldMap := map[string]float64{}
+	oneFieldMap := map[string]float64{}
+	oneFieldEqMap := map[string]float64{}
+	var aCount, aaCount, bCount, bbCount float64
 	rowNum := 1
 	for k, row := range rows {
 		if k == 0 {
 			continue
 		}
-		if row[4] == "" {
-			row[4] = QuoteMode_Whole
+		bZOneFieldMap[row[4]]++
+		if row[4] != "" {
+			aaCount++
+		}
+		if row[6] == "是" {
+			bbCount++
 		}
 		obj := result[row[0]]
-		a, b := obj["报价模式"], obj["中标联合体"]
-		if gconv.Bool(obj["报价模式_大模型"]) {
-			aBidModel++
+		if obj == nil {
+			continue
 		}
-		if gconv.Bool(obj["中标联合体_大模型"]) {
-			bBigModel++
+		a, b := obj["报价模式"], obj["中标联合体"]
+		if a == "" && b == "否" {
+			continue
 		}
-		oneFieldMap[row[4]]++
-		if a == row[4] {
-			aEq++
-			oneFieldEqMap[row[4]]++
+		if a != "" {
+			if a == row[4] {
+				aEq++
+				oneFieldEqMap[row[4]]++
+
+			}
+			aCount++
+			oneFieldMap[a.(string)]++
 		}
-		if b == row[6] {
-			bEq++
+		if b == "是" {
+			if b == row[6] {
+				bEq++
+			}
+			bCount++
 		}
-		if a == row[4] && b == row[6] {
+		if (a == "" || a == row[4]) && (b == "否" || b == row[6]) {
 			continue
 		}
 		rowNum++
@@ -264,11 +155,11 @@ func TestCompare(t *testing.T) {
 	countRows := float64(len(rows) - 1)
 	oneFieldMsg := ""
 	for k, v := range oneFieldMap {
-		oneFieldMsg += fmt.Sprintf("报价模式-%s,总数:%.f,正确:%.f,正确率:%.2f%%", k, v, oneFieldEqMap[k], oneFieldEqMap[k]/v*100) + "\n"
+		oneFieldMsg += fmt.Sprintf("报价模式-%s,总数:%.f,识别出:%.f,识别率:%.f%%,正确:%.f,正确率:%.2f%%", k, bZOneFieldMap[k], v, v/bZOneFieldMap[k]*100, oneFieldEqMap[k], oneFieldEqMap[k]/v*100) + "\n"
 	}
 	log.Println("总数", countRows, "\n",
-		fmt.Sprintf("报价模式,正确:%.f,正确率:%.2f%%", aEq, aEq/countRows*100), "\n",
+		fmt.Sprintf("报价模式,总数:%.f,识别出:%.f,识别率:%.f%%,正确:%.f,正确率:%.2f%%", aaCount, aCount, aCount/aaCount*100, aEq, aEq/aCount*100), "\n",
 		oneFieldMsg,
-		fmt.Sprintf("中标联合,正确:%.f,正确率:%.2f%%", bEq, bEq/countRows*100), "\n",
-		"报价模式大模型抽取", aBidModel, "中标联合体大模型抽取", bBigModel)
+		fmt.Sprintf("中标联合体,总数:%.f,识别出:%.f,识别率:%.f%%,正确:%.f,正确率:%.2f%%", bbCount, bCount, bCount/bbCount*100, bEq, bEq/bCount*100), "\n",
+		"")
 }

+ 71 - 123
service/rule.go

@@ -4,130 +4,121 @@ import (
 	"github.com/gogf/gf/v2/container/gvar"
 	"github.com/gogf/gf/v2/frame/g"
 	"github.com/gogf/gf/v2/os/gctx"
-	"github.com/gogf/gf/v2/util/gconv"
 	"log"
 	"regexp"
 	"strings"
 )
 
 var (
-	clearPatterns                            = g.Config().MustGet(gctx.New(), "clearPatterns").Strings()
-	bidCommonwealth_blacklistPatterns        = g.Config().MustGet(gctx.New(), "bidCommonwealth.blacklistPatterns").Strings()
-	bidCommonwealth_whitelistPatterns        = g.Config().MustGet(gctx.New(), "bidCommonwealth.whitelistPatterns").Strings()
-	bidCommonwealth_modelPatterns            = g.Config().MustGet(gctx.New(), "bidCommonwealth.modelPatterns").Strings()
-	bidCommonwealth_firstWinnerOrder         = g.Config().MustGet(gctx.New(), "bidCommonwealth.firstWinnerOrder").String()
-	bidCommonwealth_firstWinnerOrderClearReg = regexp.MustCompile(g.Config().MustGet(gctx.New(), "bidCommonwealth.firstWinnerOrderClear").String())
-	quoteModeRules                           = g.Config().MustGet(gctx.New(), "quoteMode.rules").Maps()
-	quoteModeTableKReg                       = g.Config().MustGet(gctx.New(), "quoteMode.tableK").String()
-	showOnlyOnce                             = [][]string{}
-	moreWinnerReg                            = regexp.MustCompile("[,,、]")
+	bidCommonwealth_blacklistPatterns = g.Config().MustGet(gctx.New(), "bidCommonwealth.blacklistPatterns").Strings()
+	bidCommonwealth_whitelistPatterns = g.Config().MustGet(gctx.New(), "bidCommonwealth.whitelistPatterns").Strings()
+	quoteModeRules                    = g.Config().MustGet(gctx.New(), "quoteMode.rules").Maps()
+	quoteModeTableKReg                = g.Config().MustGet(gctx.New(), "quoteMode.tableK").String()
 )
 
-func init() {
-	for _, v := range g.Config().MustGet(gctx.New(), "bidCommonwealth.showOnlyOnce").Array() {
-		showOnlyOnce = append(showOnlyOnce, gconv.Strings(v))
-	}
-}
-
 type Rule struct{}
 
-func (r *Rule) Execute(b *BidInfo) (bool, string, bool, int, bool) {
+func (r *Rule) Execute(b *BidInfo) (string, int) {
 	var bidCommonwealth int
 	var quoteMode string
-	var quoteMode_bigModel, bidCommonwealth_bigModel bool
-	var bigModel map[string]bool
 	if b.Type == 0 {
 		bidCommonwealth = r.bidCommonwealth(b)
-		quoteMode, bigModel = r.quoteMode(b)
-		if bidCommonwealth == -2 && len(bigModel) > 0 {
-			_, quoteMode, quoteMode_bigModel, bidCommonwealth, bidCommonwealth_bigModel = (&Model{}).Do(b, 2)
-		} else if bidCommonwealth == -2 {
-			_, _, _, bidCommonwealth, bidCommonwealth_bigModel = (&Model{}).Do(b, 3)
-		} else if len(bigModel) > 0 {
-			_, quoteMode, quoteMode_bigModel, _, _ = (&Model{}).Do(b, 1)
-			if !bigModel[quoteMode] {
-				quoteMode = ""
-			}
-		}
+		quoteMode, _ = r.quoteMode(b)
 	} else if b.Type == 1 {
-		quoteMode, bigModel = r.quoteMode(b)
-		if len(bigModel) > 0 {
-			_, quoteMode, quoteMode_bigModel, _, _ = (&Model{}).Do(b, 1)
-			if !bigModel[quoteMode] {
-				quoteMode = ""
-			}
-		}
+		quoteMode, _ = r.quoteMode(b)
 	} else if b.Type == 2 {
 		bidCommonwealth = r.bidCommonwealth(b)
-		if bidCommonwealth == -2 {
-			_, _, _, bidCommonwealth, bidCommonwealth_bigModel = (&Model{}).Do(b, 3)
-		}
 	}
-	if b.Type == 0 || b.Type == 1 {
-		if quoteMode == "" && (b.Bidamount == 0 || b.Bidamount > g.Config().MustGet(gctx.New(), "quoteMode.minBidamount").Float64()) {
-			quoteMode = QuoteMode_Whole
-		}
+	if quoteMode == QuoteMode_Whole && (b.Bidamount < g.Config().MustGet(gctx.New(), "quoteMode.minBidamount").Float64()) {
+		quoteMode = ""
 	}
 	log.Println(b.Id, "规则", "报价模式", quoteMode, "中标联合体", bidCommonwealth)
-	return true, quoteMode, quoteMode_bigModel, bidCommonwealth, bidCommonwealth_bigModel
+	return quoteMode, bidCommonwealth
 }
 
 // 识别报价模式
 func (r *Rule) quoteMode(b *BidInfo) (string, map[string]bool) {
 	for _, v := range quoteModeRules {
 		vv := gvar.New(v).MapStrVar()
+		mustTableKv := vv["mustTableKv"].Maps()
 		modeName := vv["mode"].String()
-		for _, row := range b.TableKv {
-			for k, v := range row {
-				if tableV := vv["tableV"].String(); tableV != "" {
-					matchedK, _ := regexp.MatchString(quoteModeTableKReg, k)
-					if !matchedK {
-						if tableK := vv["tableK"].String(); tableK != "" {
-							matchedK, _ = regexp.MatchString(tableK, k)
+		for _, table := range b.TableKv {
+			array := []string{}
+			for _, row := range table {
+				mustTableKvMap := map[int]bool{}
+				for k, v := range row {
+					if tableV := vv["tableV"].String(); tableV != "" {
+						matchedK, _ := regexp.MatchString(quoteModeTableKReg, k)
+						if !matchedK {
+							if tableK := vv["tableK"].String(); tableK != "" {
+								matchedK, _ = regexp.MatchString(tableK, k)
+							}
+						}
+						matchedV, _ := regexp.MatchString(tableV, v)
+						if matchedK && matchedV {
+							log.Println(b.Id, "tableKv", k, v)
+							return modeName, nil
 						}
 					}
-					matchedV, _ := regexp.MatchString(tableV, v)
-					if matchedK && matchedV {
-						log.Println(b.Id, "tableKv", k, v)
+					//
+					for _, vv := range vv["tableKv"].Maps() {
+						vvs := gvar.New(vv).MapStrVar()
+						tableK := vvs["k"].String()
+						tableV := vvs["v"].String()
+						if tableK == "" || tableV == "" {
+							continue
+						}
+						matchedK, _ := regexp.MatchString(tableK, k)
+						matchedV, _ := regexp.MatchString(tableV, v)
+						if matchedK && matchedV {
+							log.Println(b.Id, "tableKv", k, v)
+							return modeName, nil
+						}
+					}
+					//
+					for kk, vv := range mustTableKv {
+						vvs := gvar.New(vv).MapStrVar()
+						tableK := vvs["k"].String()
+						tableV := vvs["v"].String()
+						if tableK == "" || tableV == "" {
+							continue
+						}
+						matchedK, _ := regexp.MatchString(tableK, k)
+						matchedV, _ := regexp.MatchString(tableV, v)
+						if matchedK && matchedV {
+							array = append(array, k+":"+v)
+							mustTableKvMap[kk] = true
+							break
+						}
+					}
+				}
+				for kk, _ := range mustTableKv {
+					if !mustTableKvMap[kk] {
+						break
+					}
+					if kk == len(mustTableKv)-1 {
+						log.Println(b.Id, "mustTableKv", strings.Join(array, " "))
 						return modeName, nil
 					}
 				}
 			}
 		}
 	}
-	bigModel := map[string]bool{}
 	for _, v := range quoteModeRules {
 		vv := gvar.New(v).MapStrVar()
 		modeName := vv["mode"].String()
-		var lineUnderstandPatterns []string
-		if lineUnderstand := vv["lineUnderstand"].MapStrVar(); lineUnderstand != nil {
-			lineUnderstandPatterns = lineUnderstand["patterns"].Strings()
+		excludePatterns := vv["excludePatterns"].Strings()
+		if len(excludePatterns) > 0 && !r.matchAnyPattern(b.Id, b.Detail, excludePatterns) {
+			return modeName, nil
 		}
 		lines := strings.Split(b.Detail, "\n")
-		for lineNum, lineVal := range lines {
+		for _, lineVal := range lines {
 			if r.matchAnyPattern(b.Id, lineVal, vv["patterns"].Strings()) {
 				return modeName, nil
 			}
-			for _, vvv := range lineUnderstandPatterns {
-				countLine := len(strings.Split(vvv, "+"))
-				if countLine > 1 {
-					countLine += vv["space"].Int()
-				}
-				newText := []string{}
-				for i := lineNum; i < lineNum+countLine && i < len(lines); i++ {
-					newText = append(newText, lines[i])
-				}
-				if r.matchAnyPattern(b.Id, strings.Join(newText, "\n"), []string{vvv}) {
-					return modeName, nil
-				}
-			}
-		}
-		//
-		if modelPatterns := vv["modelPatterns"].String(); modelPatterns != "" && r.matchAnyPattern(b.Id, b.Detail, []string{modelPatterns}) {
-			bigModel[modeName] = true
 		}
 	}
-	return "", bigModel
+	return "", nil
 }
 
 // 识别中标联合体
@@ -136,34 +127,8 @@ func (r *Rule) bidCommonwealth(b *BidInfo) int {
 	if r.matchAnyPattern(b.Id, b.Detail, bidCommonwealth_blacklistPatterns) {
 		return -1
 	}
-	for _, v := range b.KvText {
-		if r.matchAnyPattern(b.Id, v, bidCommonwealth_whitelistPatterns) {
-			return 1
-		}
-	}
-	if b.FirstWinner != "" && regexp.MustCompile(bidCommonwealth_firstWinnerOrder).MatchString(b.FirstWinner) {
-		log.Println(b.Id, "第一中标候选人", b.FirstWinner, bidCommonwealth_firstWinnerOrder)
+	if r.matchAnyPattern(b.Id, b.Detail, bidCommonwealth_whitelistPatterns) {
 		return 1
-	} else if r.matchOnlyOnce(b.Id, b.Detail) {
-		return 1
-	} else if r.matchAnyPattern(b.Id, b.Detail, bidCommonwealth_whitelistPatterns) {
-		return 1
-	} else if b.Multipackage < 2 && len(moreWinnerReg.Split(b.Winner, -1)) > 1 {
-		log.Println(b.Id, "单包多个中标单位")
-		return 1
-	}
-	index := strings.Index(b.Detail, "中标")
-	if index != -1 {
-		start := max(0, index-50)
-		end := min(len(b.Detail), index+50)
-		contextAroundWin := b.Detail[start:end]
-		if strings.Contains(contextAroundWin, "联合体") {
-			log.Println(b.Id, "”中标“附件有”联合体“")
-			return 1
-		}
-	}
-	if r.matchAnyPattern(b.Id, b.Detail, bidCommonwealth_modelPatterns) {
-		return -2
 	}
 	return -1
 }
@@ -206,20 +171,3 @@ func (r *Rule) matchAllPattern(text string, patterns []string) bool {
 	}
 	return false
 }
-
-func (r *Rule) matchOnlyOnce(_id, text string) bool {
-	for _, v := range showOnlyOnce {
-		count := 0
-		for _, vv := range v {
-			if len(regexp.MustCompile(vv).FindAllString(text, -1)) != 1 {
-				break
-			}
-			count++
-		}
-		if count > 0 && count == len(v) {
-			log.Println(_id, v)
-			return true
-		}
-	}
-	return false
-}

+ 55 - 89
service/service.go

@@ -1,8 +1,8 @@
 package service
 
 import (
+	. "app.yhyue.com/moapp/jybase/common"
 	. "dataIdentify/db"
-	"fmt"
 	"github.com/gogf/gf/v2/frame/g"
 	"github.com/gogf/gf/v2/os/gctx"
 	"github.com/gogf/gf/v2/util/gconv"
@@ -14,18 +14,13 @@ import (
 
 var (
 	SelectField = map[string]interface{}{
-		"_id":          1,
-		"winnerorder":  1,
-		"detail":       1,
-		"subtype":      1,
-		"bidamount":    1,
-		"s_winner":     1,
-		"com_package":  1,
-		"multipackage": 1,
+		"_id":       1,
+		"detail":    1,
+		"subtype":   1,
+		"bidamount": 1,
 	}
 	service      Service = &Rule{}
-	semicolonReg         = regexp.MustCompile("[::]")
-	allQuoteMode         = map[string]bool{
+	AllQuoteMode         = map[string]bool{
 		QuoteMode_Whole:     true,
 		QuoteMode_UnitPrice: true,
 		QuoteMode_Rate:      true,
@@ -46,7 +41,7 @@ type DataIdentify struct {
 
 func (d *DataIdentify) Execute(_id *string, reply *map[string]string) error {
 	log.Println("rpc接收到要识别的_id", _id)
-	_, a, _, b, _ := Start(*_id)
+	a, b := Start(*_id)
 	*reply = map[string]string{}
 	if a == QuoteMode_Rate {
 		a = "费率"
@@ -69,33 +64,28 @@ func (d *DataIdentify) Execute(_id *string, reply *map[string]string) error {
 }
 
 type Service interface {
-	Execute(b *BidInfo) (bool, string, bool, int, bool)
+	Execute(b *BidInfo) (string, int)
 }
 type BidInfo struct {
 	Id, Detail, Subtype string
-	OriginalDetail      string
-	FirstWinner         string
-	KvText              []string
 	Bidamount           float64
 	Type                int
-	Multipackage        int
-	Winner              string
-	TableKv             []map[string]string
+	TableKv             [][]map[string]string
 }
 
-func Start(_id string) (bool, string, bool, int, bool) {
+func Start(_id string) (string, int) {
 	data, ok := Mgo_Main.FindById(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String(), _id, SelectField)
 	if !ok || data == nil || len(*data) == 0 {
 		log.Println(_id, "没有找到标讯")
-		return false, "", false, 0, false
+		return "", 0
 	}
 	return Pretreatment(_id, *data, 0)
 }
 
-func Pretreatment(_id string, m map[string]interface{}, t int) (bool, string, bool, int, bool) {
+func Pretreatment(_id string, m map[string]interface{}, t int) (string, int) {
 	if m == nil || len(m) == 0 {
 		log.Println(_id, "没有找到标讯")
-		return false, "", false, 0, false
+		return "", 0
 	}
 	bi := &BidInfo{
 		Id:        _id,
@@ -103,73 +93,35 @@ func Pretreatment(_id string, m map[string]interface{}, t int) (bool, string, bo
 		Type:      t,
 	}
 	bi.Detail, _ = m["detail"].(string)
-	for _, v := range clearPatterns {
-		bi.Detail = regexp.MustCompile(v).ReplaceAllString(bi.Detail, "")
-	}
-	bi.OriginalDetail = bi.Detail
 	bi.Subtype, _ = m["subtype"].(string)
 	bi.Detail = strings.TrimSpace(strings.Replace(bi.Detail, "<br/>", "\n", -1))
-	com_package, _ := m["com_package"].([]interface{})
-	bi.Multipackage = len(com_package)
-	if len(com_package) == 1 {
-		first, _ := com_package[0].(map[string]interface{})
-		bi.Winner, _ = first["winner"].(string)
-	}
-	if bi.Winner == "" {
-		bi.Winner = gconv.String(m["s_winner"])
-	}
-	if winnerorder := gconv.Maps(m["winnerorder"]); len(winnerorder) > 0 {
-		bi.FirstWinner = strings.TrimSpace(gconv.String(winnerorder[0]["entname"]))
-		array := []string{}
-		for _, v := range strings.Split(bi.FirstWinner, ",") {
-			if bidCommonwealth_firstWinnerOrderClearReg.MatchString(v) {
-				continue
-			}
-			array = append(array, v)
-		}
-		bi.FirstWinner = strings.Join(array, ",")
-		//if strings.Contains(bi.FirstWinner, ",") && strings.Contains(bi.Detail, strings.ReplaceAll(bi.FirstWinner, ",", "")) {
-		//	bi.FirstWinner = ""
-		//}
-	}
-	extract, _ := Mgo_Extract.FindById("result_20220219", _id, `{"kvtext":1}`)
-	if extract == nil || len(*extract) == 0 {
-		extract, _ = Mgo_Extract.FindById("result_20220218", _id, `{"kvtext":1}`)
-	}
-	if extract != nil && len(*extract) > 0 {
-		kvText, _ := (*extract)["kvtext"].(string)
-		for _, v := range strings.Split(kvText, "\n") {
-			vs := semicolonReg.Split(v, -1)
-			if len(vs) < 2 || (strings.TrimSpace(vs[1]) == "" || strings.TrimSpace(vs[1]) == "/") {
-				continue
-			}
-			bi.KvText = append(bi.KvText, v)
-		}
-	}
-	bi.ParseTable(bi.OriginalDetail)
-	//for _, v := range bi.TableKv {
-	//	for k, v := range v {
-	//		log.Println(k, v)
+	bi.Detail = regexp.MustCompile("[::][ ]+").ReplaceAllString(bi.Detail, ":")
+	bi.TableToSliceOfMaps(bi.Detail)
+	//for i, table := range bi.TableKv {
+	//	log.Printf("Table %d:\n", i+1)
+	//	for _, row := range table {
+	//		for k, v := range row {
+	//			log.Printf("  %s: %s\n", k, v)
+	//		}
 	//	}
 	//}
 	return service.Execute(bi)
 }
-
-// 解析HTML中的第一个<table>并将其转为KV结构
-func (bi *BidInfo) ParseTable(htmlContent string) error {
+func (bi *BidInfo) TableToSliceOfMaps(htmlContent string) {
+	defer Catch()
 	doc, err := html.Parse(strings.NewReader(htmlContent))
 	if err != nil {
-		return err
+		return
 	}
-
-	var table *html.Node
 	var found bool
-
 	// 查找第一个<table>标签
 	var traverse func(*html.Node)
 	traverse = func(n *html.Node) {
 		if n.Type == html.ElementNode && n.Data == "table" {
-			table = n
+			result := bi.ParseTable(n)
+			if len(result) > 0 {
+				bi.TableKv = append(bi.TableKv, result)
+			}
 			found = true
 			return
 		}
@@ -178,29 +130,43 @@ func (bi *BidInfo) ParseTable(htmlContent string) error {
 		}
 	}
 	traverse(doc)
+}
 
-	if table == nil {
-		return fmt.Errorf("未找到<table>标签")
-	}
-
+// 解析HTML中的第一个<table>并将其转为KV结构
+func (bi *BidInfo) ParseTable(table *html.Node) []map[string]string {
+	var result []map[string]string
 	var rows [][]string
-
 	// 遍历表格,提取单元格文本
-	var visitNode func(*html.Node, []string) []string
-	visitNode = func(n *html.Node, row []string) []string {
+	var visitNode func(*html.Node, []string) (bool, []string)
+	visitNode = func(n *html.Node, row []string) (bool, []string) {
+		if n.Data == "table" {
+			return false, row
+		}
 		if n.Type == html.ElementNode && (n.Data == "td" || n.Data == "th") {
+			for _, v := range n.Attr {
+				if (v.Key == "rowspan" || v.Key == "colspan") && gconv.Int(v.Val) > 1 {
+					return false, row
+				}
+			}
 			row = append(row, bi.extractText(n))
 		}
 		for c := n.FirstChild; c != nil; c = c.NextSibling {
-			row = visitNode(c, row)
+			var isOk bool
+			isOk, row = visitNode(c, row)
+			if !isOk {
+				return false, row
+			}
 		}
-		return row
+		return true, row
 	}
 
 	var collectRows func(*html.Node)
 	collectRows = func(n *html.Node) {
 		if n.Type == html.ElementNode && (n.Data == "tr") {
-			row := visitNode(n, nil)
+			isOK, row := visitNode(n, nil)
+			if !isOK {
+				return
+			}
 			if len(row) > 0 {
 				rows = append(rows, row)
 			}
@@ -212,7 +178,7 @@ func (bi *BidInfo) ParseTable(htmlContent string) error {
 	collectRows(table)
 
 	if len(rows) < 1 {
-		return fmt.Errorf("表格中没有数据")
+		return result
 	}
 
 	headers := rows[0]
@@ -224,9 +190,9 @@ func (bi *BidInfo) ParseTable(htmlContent string) error {
 				item[strings.TrimSpace(key)] = strings.TrimSpace(val)
 			}
 		}
-		bi.TableKv = append(bi.TableKv, item)
+		result = append(result, item)
 	}
-	return nil
+	return result
 }
 
 // 提取节点内所有文本

BIN
对比结果.xlsx


BIN
报价模式+中标联合体验证第二轮.xlsx