Browse Source

分包识别、联合体投标、报价模式

zhangjinkun 2 tháng trước cách đây
commit
e3674bacf4
7 tập tin đã thay đổi với 1015 bổ sung0 xóa
  1. 12 0
      bidding/go.mod
  2. 71 0
      bidding/go.sum
  3. 195 0
      bidding/main.go
  4. 247 0
      bidding/package.go
  5. 56 0
      bidding/quotemode.go
  6. 329 0
      bidding/util.go
  7. 105 0
      client/main.go

+ 12 - 0
bidding/go.mod

@@ -0,0 +1,12 @@
+module bidding
+
+go 1.23.0
+
+toolchain go1.23.9
+
+require (
+	github.com/PuerkitoBio/goquery v1.10.3
+	golang.org/x/net v0.40.0
+)
+
+require github.com/andybalholm/cascadia v1.3.3 // indirect

+ 71 - 0
bidding/go.sum

@@ -0,0 +1,71 @@
+github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo=
+github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
+github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
+github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
+golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
+golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
+golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
+golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
+golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY=
+golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
+golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

+ 195 - 0
bidding/main.go

@@ -0,0 +1,195 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+)
+
+var (
+	classifier       *MultiPackageClassifier
+	quote_classifier *QuoteClassifier
+)
+
+func init() {
+	// 初始化分类器
+	classifier = NewClassifier()
+	quote_classifier = NewQuoteClassifier()
+}
+
+func main() {
+	//packAgeDemo()
+	//quoteDemo()
+	// 注册路由
+	http.HandleFunc("/classify", classifyHandler)
+	http.HandleFunc("/quote_classify", quoteClassifyHandler)
+
+	// 启动HTTP服务
+	log.Println("Starting server on :8182")
+	log.Fatal(http.ListenAndServe(":8182", nil))
+}
+
+// 报价模式测试
+func quoteDemo() {
+	listContent := []string{
+		"下浮 20.22%",
+		"上浮动:百分之三十",
+	}
+	for k, content := range listContent {
+		// 执行分类判断
+		doc := BidDocument{
+			Content: content,
+		}
+		// 执行分类判断
+		modenum, _ := quote_classifier.QuoteMode(doc)
+		log.Println(modenum, k)
+	}
+}
+
+// 分包识别测试
+func packAgeDemo() {
+	content := `预中标单位:宁波公路市政设计有限公司和浙江土力勘测设计院有限公司联合体`
+	// 执行分类判断
+	doc := BidDocument{
+		Content: content,
+	}
+	//分包识别
+	packageType, _ := classifier.IsMultiPackage(doc)
+	//联合投标识别
+	isConsortium := isConsortiumKeysReg(doc.Content)
+	log.Println(packageType, isConsortium)
+}
+
+// 报价分类请求处理函数
+func quoteClassifyHandler(w http.ResponseWriter, r *http.Request) {
+	// 只允许POST请求
+	if r.Method != http.MethodPost {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	// 解析请求体
+	var requestData map[string]interface{}
+	err := json.NewDecoder(r.Body).Decode(&requestData)
+	if err != nil {
+		http.Error(w, "Invalid request payload", http.StatusBadRequest)
+		return
+	}
+	defer r.Body.Close()
+
+	// 执行分类判断
+	modenum, _ := quote_classifier.QuoteMode(getDoc(requestData))
+	// 构建响应
+	response := map[string]interface{}{
+		"result":  modenum,
+		"success": true,
+	}
+
+	// 设置响应头
+	w.Header().Set("Content-Type", "application/json")
+
+	// 返回JSON响应
+	if err := json.NewEncoder(w).Encode(response); err != nil {
+		log.Printf("Error encoding response: %v", err)
+		http.Error(w, "Internal server error", http.StatusInternalServerError)
+		return
+	}
+}
+
+// 分包分类请求处理函数
+func classifyHandler(w http.ResponseWriter, r *http.Request) {
+	// 只允许POST请求
+	if r.Method != http.MethodPost {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	// 解析请求体
+	var requestData map[string]interface{}
+	err := json.NewDecoder(r.Body).Decode(&requestData)
+	if err != nil {
+		http.Error(w, "Invalid request payload", http.StatusBadRequest)
+		return
+	}
+	defer r.Body.Close()
+
+	// 执行分类判断
+	doc := getDoc(requestData)
+	packageType, _ := classifier.IsMultiPackage(doc) //分包识别
+	isConsortium := isConsortiumKeysReg(doc.Content) //联合投标识别
+
+	// 构建响应
+	response := map[string]interface{}{
+		"packageType":    packageType,
+		"description":    getPackageTypeDescription(packageType),
+		"isJointVenture": isConsortium,
+		"success":        true,
+	}
+
+	// 设置响应头
+	w.Header().Set("Content-Type", "application/json")
+
+	// 返回JSON响应
+	if err := json.NewEncoder(w).Encode(response); err != nil {
+		log.Printf("Error encoding response: %v", err)
+		http.Error(w, "Internal server error", http.StatusInternalServerError)
+		return
+	}
+}
+
+// 分类逻辑
+func classifyBid(data map[string]interface{}) (int, bool) {
+	content := fmt.Sprint(data["title"]) + "\n" + fmt.Sprint(data["detail"])
+	// 文本清理
+	content = cleanWebText(content, clearKeys, clearKeysBack)
+	content_rmtable := removeTables(content)
+
+	// 执行分类判断
+	doc := BidDocument{
+		Content:         content,
+		Content_NoTable: content_rmtable,
+		Budget:          content,
+		AwardNotice:     content,
+		BidderOptions:   content,
+	}
+
+	ispack, _ := classifier.IsMultiPackage(doc)
+	// log.Printf("Classified as: %d", ispack)
+
+	iscon := isConsortiumKeysReg(content)
+	return ispack, iscon
+}
+
+// 分类逻辑
+func getDoc(data map[string]interface{}) BidDocument {
+	content := fmt.Sprint(data["title"]) + "\n" + fmt.Sprint(data["detail"])
+	// 文本清理
+	content = cleanWebText(content, clearKeys, clearKeysBack)
+	content_rmtable := removeTables(content)
+
+	// 执行分类判断
+	doc := BidDocument{
+		Content:         content,
+		Content_NoTable: content_rmtable,
+		Budget:          content,
+		AwardNotice:     content,
+		BidderOptions:   content,
+	}
+
+	return doc
+}
+
+// 获取分类类型描述
+func getPackageTypeDescription(packageType int) string {
+	switch packageType {
+	case 1:
+		return "多包"
+	case -1:
+		return "单包"
+	case 0:
+		return "不确定"
+	default:
+		return "不确定"
+	}
+}

+ 247 - 0
bidding/package.go

@@ -0,0 +1,247 @@
+package main
+
+import (
+	"log"
+	"regexp"
+	"strings"
+)
+
+// BidDocument 招标文档结构体,包含需要分析的各个部分
+type BidDocument struct {
+	Content         string // 招标文件正文内容
+	Content_NoTable string // 招标文件正文内容
+	Section         string // 特定章节内容(如投标人须知)
+	AwardNotice     string // 中标公告内容
+	Budget          string // 预算信息内容
+	BidderOptions   string // 投标人选项说明
+}
+
+// MultiPackageClassifier 多包项目分类器
+type MultiPackageClassifier struct {
+	coreKeywords    []string         // 核心关键词列表
+	packagePatterns []*regexp.Regexp // 标段编号正则模式
+	excludePattern  *regexp.Regexp   // 排除条件正则模式
+	budgetPattern   *regexp.Regexp   // 预算分项模式
+	awardPattern    *regexp.Regexp   // 中标公告模式
+
+	packNumCode *regexp.Regexp // 编号值正则
+}
+
+// 存储解析结果的结构体
+type MatchResult struct {
+	FullMatch string   // 提取完整匹配内容
+	Groups    []string // 提取分组内容
+	Start     int
+	End       int
+}
+
+// NewClassifier 初始化分类器(工厂方法)
+func NewClassifier() *MultiPackageClassifier {
+	coreKeys := "包,分标,标段,子标段,标段(包),分标,分段招标,多标段,分标段,标段划分表" +
+		"包件,分包,包号,包划分,标包,多包,分项招标" +
+		"分包方案,包别,分段实施,独立投标,兼投,兼中,№.1标包,№.2标包,№.3标包"
+	packKeys := `(标|标段|子标段|标段\(包\)|包|包号|采购包|分标|包件|包组编号|标项|个标段)`
+	packMatch := `(\s)?[\(\)一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+(\s)?`
+	packTeMatch := `(:|:|\s)[0-9A-Za-z]{1,}`
+	packNumMatch := `[一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]`
+	log.Println("MultiPackageClassifier")
+	return &MultiPackageClassifier{
+		//1、 核心关键词列表(可根据需要扩展)
+		coreKeywords: strings.Split(coreKeys, ","),
+
+		//2、 预编译正则表达式(匹配各种标段编号形式)
+		packagePatterns: []*regexp.Regexp{
+			regexp.MustCompile(packKeys + packMatch),
+			regexp.MustCompile(packKeys + packTeMatch),
+			regexp.MustCompile(packMatch + packKeys),
+		},
+
+		//3、 排除条件正则(整体招标相关表述)
+		excludePattern: regexp.MustCompile(
+			`本项目不划分标段|本次招标划分为(1|一)个标段|本招标项目划分为标段(1|一)个标段|不可拆分|必须全部响应|统一评标|组合招标|打包采购`,
+		),
+
+		//4、 分包编号正则(提取分包编号数量)
+		packNumCode: regexp.MustCompile(packNumMatch),
+
+		//5、 预算分项模式(匹配标段预算信息)
+		budgetPattern: regexp.MustCompile(
+			`(标段|包)[::]\s*.*?(\d+万元)`, // 示例:"标段A:500万元"
+		),
+
+		//6、 中标公告模式(匹配多个中标结果)
+		awardPattern: regexp.MustCompile(
+			`(标段|包)[::]\s*([^,。]+).*?中标单位`, // 示例:"标段A中标单位:XX公司"
+		),
+	}
+}
+
+// IsMultiPackage 主判断方法,返回是否多包项目及判断依据
+func (c *MultiPackageClassifier) IsMultiPackage(doc BidDocument) (int, map[string]interface{}) {
+	result := make(map[string]interface{})
+
+	// 第一步:排除条件检查(具有一票否决权)
+	if c.hasExclusion(doc.Content) {
+		result["exclusion"] = "存在排除关键词"
+		//log.Println("存在排除关键词")
+		return -1, result
+	}
+
+	// 第二步:核心特征检查,匹配数量
+	//1、表格检查分包
+	isTablePack, tablePackNums := tableIsPackage(doc.Content)
+	//2、核心关键词数据检测
+	coreFeaturesNum := c.checkCoreFeatures(doc, result)
+	//3、分包编码检测
+	packCodeNum, packCodeKeys := c.getPackNumCode(c.regMatchPackagePatterns(doc, result))
+
+	// 第三步:辅助特征检查
+	auxFeatures := c.checkAuxFeatures(doc, result)
+
+	isPackage := 0 // 1多包,0不确定,-1无多包
+	// 决策逻辑
+	if packCodeNum > 1 || isTablePack || (coreFeaturesNum >= 1 && auxFeatures >= 1) { //1多包  分包编码大于1  | 表格判定多包 | (核心特征大于等于1 && 1个辅助特征)
+		isPackage = 1
+	} else if coreFeaturesNum <= 2 && packCodeNum <= 1 && auxFeatures < 1 { //单包 核心特征<2 && 分包编码<2 && 无辅助特征
+		isPackage = -1
+	}
+	result["isPackage"] = isPackage
+	result["core_count"] = coreFeaturesNum
+	result["aux_count"] = auxFeatures
+	if 1 == 0 {
+		log.Printf("核心关键词:%d,分包编码数量:%d,表格分包:%t,表格数据量:%d,辅助词:%d \n", coreFeaturesNum, packCodeNum, isTablePack, tablePackNums, auxFeatures)
+		log.Println("分包编码", packCodeKeys, result)
+		//log.Println(doc.Content)
+	}
+	//log.Println(result)
+	return isPackage, result
+}
+
+// checkCoreFeatures 检查核心特征并返回符合数量
+func (c *MultiPackageClassifier) checkCoreFeatures(doc BidDocument, result map[string]interface{}) int {
+	count := 0
+	text := doc.Content_NoTable
+	// 特征1:存在标段相关关键词
+	if hasNestedTables(doc.Content) { //如果有表格嵌套,使用全文
+		text = doc.Content
+	}
+	exitstKeys := c.checkKeywords(text)
+	if len(exitstKeys) > 0 {
+		result["core_keywords"] = exitstKeys
+		count++
+	}
+
+	//特征2:检测到标段编号模式
+	if c.checkPackageNumbers(text) {
+		result["package_numbers"] = true
+		count++
+	}
+
+	// 特征3:存在灵活的投标选项
+	if c.checkBidderOptions(doc.BidderOptions) {
+		result["bidder_options"] = true
+		count++
+	}
+	return count
+}
+
+// checkAuxFeatures 检查辅助特征并返回符合数量
+func (c *MultiPackageClassifier) checkAuxFeatures(doc BidDocument, result map[string]interface{}) int {
+	count := 0
+
+	// 辅助特征1:存在分项预算
+	if matches := c.budgetPattern.FindAllString(doc.Budget, -1); len(matches) > 1 {
+		result["split_budget"] = matches
+		count++
+	}
+
+	// 辅助特征2:存在多个中标结果
+	if matches := c.awardPattern.FindAllStringSubmatch(doc.AwardNotice, -1); len(matches) > 1 {
+		result["multiple_awards"] = matches
+		count++
+	}
+
+	return count
+}
+
+// checkKeywords 检查核心关键词
+func (c *MultiPackageClassifier) checkKeywords(text string) map[string]bool {
+	matchKeys := map[string]bool{}
+	lowerText := strings.ToLower(text)
+	for _, kw := range c.coreKeywords {
+		if strings.Contains(lowerText, kw) {
+			matchKeys[kw] = true
+		}
+	}
+	return matchKeys
+}
+
+// checkPackageNumbers 检查标段编号模式
+func (c *MultiPackageClassifier) checkPackageNumbers(text string) bool {
+	for _, pattern := range c.packagePatterns {
+		if pattern.MatchString(text) {
+			return true
+		}
+	}
+	return false
+}
+
+// checkBidderOptions 检查投标人选项
+func (c *MultiPackageClassifier) checkBidderOptions(options string) bool {
+	return strings.Contains(options, "可选投") ||
+		strings.Contains(options, "兼投") ||
+		strings.Contains(options, "可投多个")
+}
+
+// hasExclusion 检查排除条件
+func (c *MultiPackageClassifier) hasExclusion(text string) bool {
+	return c.excludePattern.MatchString(text)
+}
+
+// regMatchPackagePatterns 匹配结果
+func (c *MultiPackageClassifier) regMatchPackagePatterns(doc BidDocument, result map[string]interface{}) map[string]MatchResult {
+	// 查找所有匹配项的位置和内容
+	results := map[string]MatchResult{}
+	for _, reg := range c.packagePatterns {
+		//log.Println(reg)
+		matches := reg.FindAllStringSubmatchIndex(doc.Content, -1)
+		// 遍历所有匹配项
+		for _, match := range matches {
+			full := doc.Content[match[0]:match[1]] // 提取完整匹配内容
+			groups := make([]string, 0)            // 提取分组内容
+			for i := 2; i < len(match); i += 2 {
+				start := match[i]
+				end := match[i+1]
+				if start >= 0 { // 分组可能不存在
+					groups = append(groups, doc.Content[start:end])
+				}
+			}
+			// 保存结果
+			results[full] = MatchResult{
+				FullMatch: full,
+				Groups:    groups,
+				Start:     match[0],
+				End:       match[1],
+			}
+		}
+	}
+	return results
+}
+
+// checkPackageNumbers 检查标段编号模式
+func (c *MultiPackageClassifier) getPackNumCode(match map[string]MatchResult) (int, map[string]bool) {
+	keys := map[string]bool{}
+	for k, _ := range match {
+		ss := c.packNumCode.FindAllString(k, -1)
+		key := ""
+		for _, v := range ss {
+			key = key + v
+		}
+		//log.Println(k, key)
+		if len(key) > 0 {
+			keys[convertNumerals(key)] = true
+		}
+	}
+	//log.Println("keys", keys)
+	return len(keys), keys
+}

+ 56 - 0
bidding/quotemode.go

@@ -0,0 +1,56 @@
+package main
+
+import (
+	"log"
+	"regexp"
+)
+
+// quoteClassifier 报价模式分类器
+type QuoteClassifier struct {
+	unitPriceWords    []string //单价/日历天报价模式
+	rateWords         []string // 费率报价模式
+	floatingRateWords []string // 上浮下浮率报价模式
+	lumpSumWords      []string // 整标报价模式
+
+	unitPricePattern    []*regexp.Regexp // 单价正则模式
+	ratePattern         []*regexp.Regexp // 费率正则模式
+	floatingRatePattern []*regexp.Regexp // 上浮下浮率正则模式
+	lumpSumPattern      []*regexp.Regexp // 整标报价正则模式
+
+}
+
+// NewQuoteClassifier 初始化分类器(工厂方法)
+func NewQuoteClassifier() *QuoteClassifier {
+	log.Println("QuoteClassifier")
+	return &QuoteClassifier{
+		//1、 核心关键词列表(可根据需要扩展)
+
+		//2、 预编译正则表达式(匹配各种报价形式)
+		//①单价正则模式
+		unitPricePattern: []*regexp.Regexp{},
+
+		//②费率正则模式
+		ratePattern: []*regexp.Regexp{},
+
+		//③上浮下浮率正则模式
+		floatingRatePattern: []*regexp.Regexp{
+			regexp.MustCompile(`([上下](浮|浮率|浮动)[::\s]?.{0,30}百分之)`),
+			regexp.MustCompile(`([上下](浮|浮率|浮动)[::\s]?([\d.]+%))`),
+		},
+
+		//④整标报价正则模式
+		lumpSumPattern: []*regexp.Regexp{},
+	}
+}
+
+// QuoteMode 主判断方法,返回报价模式及判断依据
+func (c *QuoteClassifier) QuoteMode(doc BidDocument) (int, map[string]interface{}) {
+	result := make(map[string]interface{})
+	quoteType := 0
+	for _, reg := range c.floatingRatePattern {
+		if reg.MatchString(doc.Content) {
+			quoteType = 2
+		}
+	}
+	return quoteType, result
+}

+ 329 - 0
bidding/util.go

@@ -0,0 +1,329 @@
+// util
+package main
+
+import (
+	"fmt"
+	"log"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/PuerkitoBio/goquery"
+	"golang.org/x/net/html"
+)
+
+var (
+	theadWords = "(标段|标包|标包号|包号|包段|子包号|子标段名称|子项|包件号|包件代码|包件编号|分包编码|分包名称|分标编号|分标编码|合同段|包件名称|标包名称|" +
+		"中标单位|中标人|中商人|成交人|成交人名称|供应商|供应商名称|项目名称|项目地址|标的|标的名称|标项名称|采购合同|" +
+		"成交价格|中标报价|简要规格描述)"
+	theadWords_order    = "(包件号|标的|标段|候选人|供应商)"
+	theadWordsReg       = regexp.MustCompile(theadWords)
+	theadWordsReg_order = regexp.MustCompile(theadWords_order)
+
+	delRowKeys    = "未成交|未中标原因"
+	delRowKeysReg = regexp.MustCompile(delRowKeys)
+
+	//负向表头,用于剔除干扰表格
+	reverseTheadKeys = map[string][]string{
+		"bidlist": []string{"品牌", "规格型号", "数量", "单价", "报价得分", "总分"},
+		//"spotcheck": []string{"项目名称", "抽取家数"},
+	}
+
+	//联合体投标判断
+	consortium        = "(联合体牵头人|联合体成员[:: ].{5,30}(公司|院|大学|研究所))|(中标单位[:: ].{5,60}(联合体))"
+	consortiumKeysReg = regexp.MustCompile(consortium)
+
+	clearKeys     = []string{"承包(一|二|三|四)级", "开标(\\d)(室|厅)", "\\d+\\.\\d+", "\\d+(.{1,10}).(pdf|doc|zip|rar)"}
+	clearKeysBack = []string{"上一篇", "下一篇", "历史业绩", "候选人企业业绩"}
+)
+
+func getIdFromDate(startStr, endStr string) (string, string) {
+	start, _ := time.Parse("2006-01-02", startStr)
+	end, _ := time.Parse("2006-01-02", endStr)
+	// 昨天凌晨0点时间戳
+	hexTimestamp1 := fmt.Sprintf("%X", start.Unix()) + "0000000000000000"
+	// 今天凌晨0点时间戳
+	hexTimestamp2 := fmt.Sprintf("%X", end.Unix()) + "0000000000000000"
+	return hexTimestamp1, hexTimestamp2
+}
+
+// 判断是否有嵌套表格
+func tableIsPackage(htmlContent string) (bool, int) {
+	//判断是否有多层表格嵌套
+	if hasNestedTables(htmlContent) {
+		//log.Println("表格嵌套")
+		return false, 0
+	}
+	ispack := false
+	tablesMixRows := 0
+	tablesData := getPackAgeByTable(htmlContent)
+	for _, dataRows := range tablesData {
+		// for k, v := range dataRows {
+		// 	log.Println(i, k, v)
+		// }
+		if len(dataRows) > 2 {
+			ispack = true
+		}
+		if tablesMixRows < len(dataRows) {
+			tablesMixRows = len(dataRows)
+		}
+	}
+	//log.Println(ispack, tablesMixRows)
+	return ispack, tablesMixRows
+}
+
+// 提取疑似表格分包数据
+func getPackAgeByTable(htmlContent string) map[string][]map[string]string {
+	// 解析HTML文档
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
+	if err != nil {
+		log.Println(err)
+	}
+	// 遍历所有表格
+	tableDataRows := map[string][]map[string]string{}
+	doc.Find("table").Each(func(i int, table *goquery.Selection) {
+		var headers []string
+		var rows []map[string]string
+		// 遍历表格行
+		table.Find("tr").Each(func(rowIdx int, row *goquery.Selection) {
+			// 自动识别表头行(根据单元格内容特征)
+			if isHeaderRow(row) && len(headers) < 1 {
+				isDelHeader := false
+				tmphead := []string{}
+
+				bidTheadKeysIndexNum := map[string]int{} //记录满足剔除的表头的阈值
+				row.Find("th").Each(func(cellIdx int, cell *goquery.Selection) {
+					text := strings.TrimSpace(cell.Text())
+					tmphead = append(tmphead, text)
+					if delRowKeysReg.MatchString(text) {
+						isDelHeader = true
+					}
+					//如果是标的物、评分、抽查列表,剔除
+					for k, theadKeys := range reverseTheadKeys {
+						for _, v := range theadKeys {
+							if strings.Contains(text, v) {
+								bidTheadKeysIndexNum[k]++
+							}
+						}
+						if bidTheadKeysIndexNum[k] > 1 { //满足一个以上的表头删除
+							isDelHeader = true
+						}
+					}
+
+				})
+				//log.Println("tmphead th", tmphead)
+				if len(tmphead) < 1 {
+					row.Find("td").Each(func(cellIdx int, cell *goquery.Selection) {
+						text := strings.TrimSpace(cell.Text())
+						tmphead = append(tmphead, text)
+						if delRowKeysReg.MatchString(text) {
+							isDelHeader = true
+						}
+						//如果是标的物、评分、抽查列表,剔除
+						for k, theadKeys := range reverseTheadKeys {
+							for _, v := range theadKeys {
+								if strings.Contains(text, v) {
+									bidTheadKeysIndexNum[k]++
+								}
+							}
+							if bidTheadKeysIndexNum[k] > 1 {
+								isDelHeader = true
+							}
+						}
+					})
+				}
+				//log.Println("tmphead td", tmphead)
+				if !isDelHeader {
+					headers = append(headers, tmphead...)
+				}
+				//log.Println("headers", headers)
+			}
+			// 处理数据行
+			if len(headers) > 0 {
+				isDelRows := false //是否需要屏蔽词
+				rowData := make(map[string]string)
+				row.Find("td").Each(func(cellIdx int, cell *goquery.Selection) {
+					if cellIdx < len(headers) {
+						header := headers[cellIdx]
+						text := strings.TrimSpace(cell.Text())
+						rowData[header] = text
+						if delRowKeysReg.MatchString(text) {
+							isDelRows = true
+						}
+					}
+				})
+				//log.Println(isDelRows, rowData)
+				if !isDelRows {
+					rows = append(rows, rowData)
+				}
+			}
+		})
+		tableDataRows[fmt.Sprint(i)] = rows
+	})
+	return tableDataRows
+}
+
+// 自定义表头判断逻辑(根据单元格内容特征)
+func isHeaderRow(row *goquery.Selection) bool {
+	// 判断条件示例 包含 theadWords 特定关键词
+	hasAttributeKeyword := false
+	matchNum := 0
+	row.Find("td").Each(func(cellIdx int, cell *goquery.Selection) {
+		text := strings.TrimSpace(cell.Text())
+		if theadWordsReg.MatchString(text) && len([]rune(text)) < 8 {
+			matchNum++
+			hasAttributeKeyword = true
+		} else if theadWordsReg_order.MatchString(text) && len([]rune(text)) < 8 {
+			matchNum++
+			hasAttributeKeyword = true
+		}
+		//log.Println(text, matchNum, hasAttributeKeyword)
+	})
+	row.Find("th").Each(func(cellIdx int, cell *goquery.Selection) {
+		text := strings.TrimSpace(cell.Text())
+		if theadWordsReg.MatchString(text) && len([]rune(text)) < 8 {
+			matchNum++
+			hasAttributeKeyword = true
+		} else if theadWordsReg_order.MatchString(text) && len([]rune(text)) < 8 {
+			matchNum++
+			hasAttributeKeyword = true
+		}
+		//log.Println(text, matchNum, hasAttributeKeyword)
+	})
+	//log.Println("isHeaderRow", matchNum, hasAttributeKeyword, matchNum > 1 && hasAttributeKeyword)
+	return matchNum > 1 && hasAttributeKeyword
+}
+func removeTables(html string) string {
+	// 匹配<table>标签及其内容的正则表达式
+	re := regexp.MustCompile(`(?i)<table[^>]*>[\s\S]*?</table>`)
+	return re.ReplaceAllString(html, "")
+}
+
+// cleanWebText 删除包含指定关键词及其后续的所有内容
+func cleanWebText(input string, keywords, keywordsback []string) string {
+	// 构建关键词正则表达式(使用OR连接)
+	keywordPattern := strings.Join(keywordsback, "|")
+	re, err := regexp.Compile(fmt.Sprintf(`(?s)(%s).*`, keywordPattern))
+	if err != nil {
+		return input // 正则编译失败时返回原始文本
+	}
+	input = re.ReplaceAllString(input, "")
+
+	keyword := strings.Join(keywords, "|")
+	re, err = regexp.Compile(keyword)
+	if err != nil {
+		return input
+	}
+	return re.ReplaceAllString(input, "")
+}
+
+// 支持中文数字(零一二三四五六七八九十)、阿拉伯数字(0-9)、罗马数字(Ⅰ-Ⅻ)
+func convertNumerals(input string) string {
+	// 字符映射表
+	chineseNumMap := map[rune]rune{
+		'零': '0', '一': '1', '二': '2', '三': '3', '四': '4',
+		'五': '5', '六': '6', '七': '7', '八': '8', '九': '9',
+		'十': '1', // 仅处理个位,十位需特殊处理
+	}
+
+	romanNumMap := map[rune]rune{
+		'Ⅰ': '1', 'Ⅱ': '2', 'Ⅲ': '3', 'Ⅳ': '4', 'Ⅴ': '5',
+		'Ⅵ': '6', 'Ⅶ': '7', 'Ⅷ': '8', 'Ⅸ': '9', 'Ⅹ': '1', // 仅处理个位
+		'Ⅺ': '1', 'Ⅻ': '1', // 罗马数字11和12仅处理十位
+	}
+
+	var result strings.Builder
+	for _, char := range input {
+		// 直接检查阿拉伯数字
+		if char >= '0' && char <= '9' {
+			result.WriteRune(char)
+			continue
+		}
+
+		// 检查中文数字
+		if num, exists := chineseNumMap[char]; exists {
+			result.WriteRune(num)
+			continue
+		}
+
+		// 检查罗马数字
+		if num, exists := romanNumMap[char]; exists {
+			result.WriteRune(num)
+			continue
+		}
+
+		// 非数字字符保持不变
+		result.WriteRune(char)
+	}
+
+	return result.String()
+}
+
+// 检查HTML文本中是否存在多层表格嵌套
+func hasNestedTables(htmlContent string) bool {
+	doc, err := html.Parse(strings.NewReader(htmlContent))
+	if err != nil {
+		return false
+	}
+
+	var hasNested bool
+	var checkNested func(node *html.Node, depth int)
+	checkNested = func(node *html.Node, depth int) {
+		if node.Type == html.ElementNode && node.Data == "table" {
+			if depth > 0 { // 非顶层表格
+				hasNested = true
+				return
+			}
+			depth++
+		}
+
+		for c := node.FirstChild; c != nil && !hasNested; c = c.NextSibling {
+			checkNested(c, depth)
+		}
+	}
+
+	checkNested(doc, 0)
+	return hasNested
+}
+
+// Unicode判断工具函数
+func isChineseRune(r rune) bool {
+	// 基础汉字检测
+	if r >= 0x4E00 && r <= 0x9FFF {
+		return true
+	}
+
+	// CJK符号和标点
+	if r >= 0x3000 && r <= 0x303F {
+		return true
+	}
+
+	// 全角符号(过滤字母数字)
+	if r >= 0xFF00 && r <= 0xFFEF {
+		// 排除全角字母
+		if (r >= 0xFF21 && r <= 0xFF3A) || // 大写字母
+			(r >= 0xFF41 && r <= 0xFF5A) { // 小写字母
+			return false
+		}
+		// 排除全角数字
+		if r >= 0xFF10 && r <= 0xFF19 {
+			return false
+		}
+		return true
+	}
+
+	// 特殊符号检测
+	switch r {
+	case 0x2018, 0x2019, 0x201C, 0x201D, // 引号
+		0x2014, 0x2026, // 破折号、省略号
+		0x3010, 0x3011, // 【】
+		0x3008, 0x3009, 0x300A, 0x300B: // 《》〈〉
+		return true
+	}
+	return false
+}
+
+// 判断是否是联合体中标
+func isConsortiumKeysReg(content string) bool {
+	return consortiumKeysReg.MatchString(content)
+}

+ 105 - 0
client/main.go

@@ -0,0 +1,105 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"net/http"
+	"os"
+)
+
+// 请求结构
+type ClassificationRequest struct {
+	Title  string `json:"title"`
+	Detail string `json:"detail"`
+}
+
+// 响应结构
+type ClassificationResponse struct {
+	PackageType    int    `json:"packageType"`
+	Description    string `json:"description"`
+	IsJointVenture bool   `json:"isJointVenture"`
+	Success        bool   `json:"success"`
+	ErrorMessage   string `json:"errorMessage,omitempty"`
+	Result         string `json:"result"`
+}
+
+func main() {
+	// 默认服务地址
+	serverURL := "http://localhost:8182/quote_classify"
+
+	// 检查是否提供了自定义URL
+	if len(os.Args) > 1 {
+		serverURL = os.Args[1]
+	}
+
+	// 创建示例请求数据
+	requestData := ClassificationRequest{
+		Title:  "XX市公共设施建设项目招标公告",
+		Detail: `下浮:20%`,
+	}
+
+	// 发送请求
+	response, err := sendClassificationRequest(serverURL, requestData)
+	if err != nil {
+		fmt.Printf("请求失败: %v\n", err)
+		return
+	}
+
+	// 处理响应
+	if !response.Success {
+		fmt.Printf("分类失败: %s\n", response.ErrorMessage)
+		return
+	}
+
+	// 输出结果
+	fmt.Printf("类型代码: %d\n", response.PackageType)
+	fmt.Printf("类型描述: %s\n", response.Description)
+	fmt.Printf("是否联合体投标: %v\n", response.IsJointVenture)
+}
+
+// 发送分类请求到服务器
+func sendClassificationRequest(url string, requestData ClassificationRequest) (*ClassificationResponse, error) {
+	// 转换请求数据为JSON
+	jsonData, err := json.Marshal(requestData)
+	if err != nil {
+		return nil, fmt.Errorf("无法序列化请求数据: %v", err)
+	}
+
+	// 创建HTTP请求
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("无法创建HTTP请求: %v", err)
+	}
+
+	// 设置请求头
+	req.Header.Set("Content-Type", "application/json")
+
+	// 发送请求
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("发送请求失败: %v", err)
+	}
+	defer resp.Body.Close()
+
+	// 读取响应内容
+	body, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("读取响应失败: %v", err)
+	}
+
+	// 检查HTTP状态码
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("服务器返回错误: %s, 响应: %s", resp.Status, string(body))
+	}
+
+	// 解析响应JSON
+	var response ClassificationResponse
+	if err := json.Unmarshal(body, &response); err != nil {
+		return nil, fmt.Errorf("解析响应失败: %v, 响应内容: %s", err, string(body))
+	}
+
+	return &response, nil
+}