|
- package service
- import (
- . "dataIdentify/db"
- "fmt"
- "github.com/gogf/gf/v2/frame/g"
- "github.com/gogf/gf/v2/os/gctx"
- "github.com/gogf/gf/v2/util/gconv"
- "golang.org/x/net/html"
- "log"
- "regexp"
- "strings"
- )
- var (
- SelectField = map[string]interface{}{
- "_id": 1,
- "winnerorder": 1,
- "detail": 1,
- "subtype": 1,
- "bidamount": 1,
- "s_winner": 1,
- "com_package": 1,
- "multipackage": 1,
- }
- service Service = &Rule{}
- semicolonReg = regexp.MustCompile("[::]")
- allQuoteMode = map[string]bool{
- QuoteMode_Whole: true,
- QuoteMode_UnitPrice: true,
- QuoteMode_Rate: true,
- QuoteMode_Discount: true,
- }
- )
- const (
- QuoteMode_Other = "其他报价模式"
- QuoteMode_Whole = "整标报价模式"
- QuoteMode_UnitPrice = "单价模式"
- QuoteMode_Rate = "费率模式"
- QuoteMode_Discount = "上浮下浮模式"
- )
- type DataIdentify struct {
- }
- func (d *DataIdentify) Execute(_id *string, reply *map[string]string) error {
- log.Println("rpc接收到要识别的_id", _id)
- _, a, _, b, _ := Start(*_id)
- *reply = map[string]string{}
- if a == QuoteMode_Rate {
- a = "费率"
- } else if a == QuoteMode_UnitPrice {
- a = "单价"
- } else if a == QuoteMode_Discount {
- a = "折扣率"
- } else if a == QuoteMode_Whole {
- a = "正常报价"
- } else {
- a = "其他"
- }
- (*reply)["报价模式"] = a
- if b == 1 {
- (*reply)["中标联合体"] = "是"
- } else {
- (*reply)["中标联合体"] = "否"
- }
- return nil
- }
- type Service interface {
- Execute(b *BidInfo) (bool, string, bool, int, bool)
- }
- type BidInfo struct {
- Id, Detail, Subtype string
- OriginalDetail string
- FirstWinner string
- KvText []string
- Bidamount float64
- Type int
- Multipackage int
- Winner string
- TableKv []map[string]string
- }
- func Start(_id string) (bool, string, bool, int, bool) {
- data, ok := Mgo_Main.FindById(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String(), _id, SelectField)
- if !ok || data == nil || len(*data) == 0 {
- log.Println(_id, "没有找到标讯")
- return false, "", false, 0, false
- }
- return Pretreatment(_id, *data, 0)
- }
- func Pretreatment(_id string, m map[string]interface{}, t int) (bool, string, bool, int, bool) {
- if m == nil || len(m) == 0 {
- log.Println(_id, "没有找到标讯")
- return false, "", false, 0, false
- }
- bi := &BidInfo{
- Id: _id,
- Bidamount: gconv.Float64(m["bidamount"]),
- Type: t,
- }
- bi.Detail, _ = m["detail"].(string)
- for _, v := range clearPatterns {
- bi.Detail = regexp.MustCompile(v).ReplaceAllString(bi.Detail, "")
- }
- bi.OriginalDetail = bi.Detail
- bi.Subtype, _ = m["subtype"].(string)
- bi.Detail = strings.TrimSpace(strings.Replace(bi.Detail, "<br/>", "\n", -1))
- com_package, _ := m["com_package"].([]interface{})
- bi.Multipackage = len(com_package)
- if len(com_package) == 1 {
- first, _ := com_package[0].(map[string]interface{})
- bi.Winner, _ = first["winner"].(string)
- }
- if bi.Winner == "" {
- bi.Winner = gconv.String(m["s_winner"])
- }
- if winnerorder := gconv.Maps(m["winnerorder"]); len(winnerorder) > 0 {
- bi.FirstWinner = strings.TrimSpace(gconv.String(winnerorder[0]["entname"]))
- array := []string{}
- for _, v := range strings.Split(bi.FirstWinner, ",") {
- if bidCommonwealth_firstWinnerOrderClearReg.MatchString(v) {
- continue
- }
- array = append(array, v)
- }
- bi.FirstWinner = strings.Join(array, ",")
- //if strings.Contains(bi.FirstWinner, ",") && strings.Contains(bi.Detail, strings.ReplaceAll(bi.FirstWinner, ",", "")) {
- // bi.FirstWinner = ""
- //}
- }
- extract, _ := Mgo_Extract.FindById("result_20220219", _id, `{"kvtext":1}`)
- if extract == nil || len(*extract) == 0 {
- extract, _ = Mgo_Extract.FindById("result_20220218", _id, `{"kvtext":1}`)
- }
- if extract != nil && len(*extract) > 0 {
- kvText, _ := (*extract)["kvtext"].(string)
- for _, v := range strings.Split(kvText, "\n") {
- vs := semicolonReg.Split(v, -1)
- if len(vs) < 2 || (strings.TrimSpace(vs[1]) == "" || strings.TrimSpace(vs[1]) == "/") {
- continue
- }
- bi.KvText = append(bi.KvText, v)
- }
- }
- bi.ParseTable(bi.OriginalDetail)
- //for _, v := range bi.TableKv {
- // for k, v := range v {
- // log.Println(k, v)
- // }
- //}
- return service.Execute(bi)
- }
- // 解析HTML中的第一个<table>并将其转为KV结构
- func (bi *BidInfo) ParseTable(htmlContent string) error {
- doc, err := html.Parse(strings.NewReader(htmlContent))
- if err != nil {
- return err
- }
- var table *html.Node
- var found bool
- // 查找第一个<table>标签
- var traverse func(*html.Node)
- traverse = func(n *html.Node) {
- if n.Type == html.ElementNode && n.Data == "table" {
- table = n
- found = true
- return
- }
- for c := n.FirstChild; c != nil && !found; c = c.NextSibling {
- traverse(c)
- }
- }
- traverse(doc)
- if table == nil {
- return fmt.Errorf("未找到<table>标签")
- }
- var rows [][]string
- // 遍历表格,提取单元格文本
- var visitNode func(*html.Node, []string) []string
- visitNode = func(n *html.Node, row []string) []string {
- if n.Type == html.ElementNode && (n.Data == "td" || n.Data == "th") {
- row = append(row, bi.extractText(n))
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- row = visitNode(c, row)
- }
- return row
- }
- var collectRows func(*html.Node)
- collectRows = func(n *html.Node) {
- if n.Type == html.ElementNode && (n.Data == "tr") {
- row := visitNode(n, nil)
- if len(row) > 0 {
- rows = append(rows, row)
- }
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- collectRows(c)
- }
- }
- collectRows(table)
- if len(rows) < 1 {
- return fmt.Errorf("表格中没有数据")
- }
- headers := rows[0]
- for _, r := range rows[1:] {
- item := make(map[string]string)
- for i, val := range r {
- if i < len(headers) {
- key := headers[i]
- item[strings.TrimSpace(key)] = strings.TrimSpace(val)
- }
- }
- bi.TableKv = append(bi.TableKv, item)
- }
- return nil
- }
- // 提取节点内所有文本
- func (bi *BidInfo) extractText(n *html.Node) string {
- var text string
- if n.Type == html.TextNode {
- text = n.Data
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- text += bi.extractText(c)
- }
- return text
- }
|