package service import ( . "app.yhyue.com/moapp/jybase/common" . "dataIdentify/db" "github.com/gogf/gf/v2/frame/g" "github.com/gogf/gf/v2/os/gctx" "github.com/gogf/gf/v2/util/gconv" "golang.org/x/net/html" "log" "regexp" "strings" ) var ( SelectField = map[string]interface{}{ "_id": 1, "detail": 1, "subtype": 1, "bidamount": 1, } service Service = &Rule{} AllQuoteMode = map[string]bool{ QuoteMode_Whole: true, QuoteMode_UnitPrice: true, QuoteMode_Rate: true, QuoteMode_Discount: true, } ) const ( QuoteMode_Other = "其他报价模式" QuoteMode_Whole = "整标报价模式" QuoteMode_UnitPrice = "单价模式" QuoteMode_Rate = "费率模式" QuoteMode_Discount = "上浮下浮模式" ) type DataIdentify struct { } func (d *DataIdentify) Execute(_id *string, reply *map[string]string) error { log.Println("rpc接收到要识别的_id", _id) a, b := Start(*_id) *reply = map[string]string{} if a == QuoteMode_Rate { a = "费率" } else if a == QuoteMode_UnitPrice { a = "单价" } else if a == QuoteMode_Discount { a = "折扣率" } else if a == QuoteMode_Whole { a = "正常报价" } else { a = "其他" } (*reply)["报价模式"] = a if b == 1 { (*reply)["中标联合体"] = "是" } else { (*reply)["中标联合体"] = "否" } return nil } type Service interface { Execute(b *BidInfo) (string, int) } type BidInfo struct { Id, Detail, Subtype string Bidamount float64 Type int TableKv [][]map[string]string } func Start(_id string) (string, int) { data, ok := Mgo_Main.FindById(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String(), _id, SelectField) if !ok || data == nil || len(*data) == 0 { log.Println(_id, "没有找到标讯") return "", 0 } return Pretreatment(_id, *data, 0) } func Pretreatment(_id string, m map[string]interface{}, t int) (string, int) { if m == nil || len(m) == 0 { log.Println(_id, "没有找到标讯") return "", 0 } bi := &BidInfo{ Id: _id, Bidamount: gconv.Float64(m["bidamount"]), Type: t, } bi.Detail, _ = m["detail"].(string) bi.Subtype, _ = m["subtype"].(string) bi.Detail = strings.TrimSpace(strings.Replace(bi.Detail, "
", "\n", -1)) bi.Detail = regexp.MustCompile("[::][ ]+").ReplaceAllString(bi.Detail, ":") bi.TableToSliceOfMaps(bi.Detail) //for i, table := range bi.TableKv { // log.Printf("Table %d:\n", i+1) // for _, row := range table { // for k, v := range row { // log.Printf(" %s: %s\n", k, v) // } // } //} return service.Execute(bi) } func (bi *BidInfo) TableToSliceOfMaps(htmlContent string) { defer Catch() doc, err := html.Parse(strings.NewReader(htmlContent)) if err != nil { return } var found bool // 查找第一个标签 var traverse func(*html.Node) traverse = func(n *html.Node) { if n.Type == html.ElementNode && n.Data == "table" { result := bi.ParseTable(n) if len(result) > 0 { bi.TableKv = append(bi.TableKv, result) } found = true return } for c := n.FirstChild; c != nil && !found; c = c.NextSibling { traverse(c) } } traverse(doc) } // 解析HTML中的第一个
并将其转为KV结构 func (bi *BidInfo) ParseTable(table *html.Node) []map[string]string { var result []map[string]string var rows [][]string // 遍历表格,提取单元格文本 var visitNode func(*html.Node, []string) (bool, []string) visitNode = func(n *html.Node, row []string) (bool, []string) { if n.Data == "table" { return false, row } if n.Type == html.ElementNode && (n.Data == "td" || n.Data == "th") { for _, v := range n.Attr { if (v.Key == "rowspan" || v.Key == "colspan") && gconv.Int(v.Val) > 1 { return false, row } } row = append(row, bi.extractText(n)) } for c := n.FirstChild; c != nil; c = c.NextSibling { var isOk bool isOk, row = visitNode(c, row) if !isOk { return false, row } } return true, row } var collectRows func(*html.Node) collectRows = func(n *html.Node) { if n.Type == html.ElementNode && (n.Data == "tr") { isOK, row := visitNode(n, nil) if !isOK { return } if len(row) > 0 { rows = append(rows, row) } } for c := n.FirstChild; c != nil; c = c.NextSibling { collectRows(c) } } collectRows(table) if len(rows) < 1 { return result } headers := rows[0] for _, r := range rows[1:] { item := make(map[string]string) for i, val := range r { if i < len(headers) { key := headers[i] item[strings.TrimSpace(key)] = strings.TrimSpace(val) } } result = append(result, item) } return result } // 提取节点内所有文本 func (bi *BidInfo) extractText(n *html.Node) string { var text string if n.Type == html.TextNode { text = n.Data } for c := n.FirstChild; c != nil; c = c.NextSibling { text += bi.extractText(c) } return text }