123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 |
- package service
- import (
- . "app.yhyue.com/moapp/jybase/common"
- . "dataIdentify/db"
- "github.com/gogf/gf/v2/frame/g"
- "github.com/gogf/gf/v2/os/gctx"
- "github.com/gogf/gf/v2/util/gconv"
- "golang.org/x/net/html"
- "log"
- "regexp"
- "strings"
- )
- var (
- SelectField = map[string]interface{}{
- "_id": 1,
- "detail": 1,
- "subtype": 1,
- "bidamount": 1,
- }
- service Service = &Rule{}
- AllQuoteMode = map[string]bool{
- QuoteMode_Whole: true,
- QuoteMode_UnitPrice: true,
- QuoteMode_Rate: true,
- QuoteMode_Discount: true,
- }
- )
- const (
- QuoteMode_Other = "其他报价模式"
- QuoteMode_Whole = "整标报价模式"
- QuoteMode_UnitPrice = "单价模式"
- QuoteMode_Rate = "费率模式"
- QuoteMode_Discount = "上浮下浮模式"
- )
- type DataIdentify struct {
- }
- func (d *DataIdentify) Execute(_id *string, reply *map[string]string) error {
- log.Println("rpc接收到要识别的_id", _id)
- a, b := Start(*_id)
- *reply = map[string]string{}
- if a == QuoteMode_Rate {
- a = "费率"
- } else if a == QuoteMode_UnitPrice {
- a = "单价"
- } else if a == QuoteMode_Discount {
- a = "折扣率"
- } else if a == QuoteMode_Whole {
- a = "正常报价"
- } else {
- a = "其他"
- }
- (*reply)["报价模式"] = a
- if b == 1 {
- (*reply)["中标联合体"] = "是"
- } else {
- (*reply)["中标联合体"] = "否"
- }
- return nil
- }
- type Service interface {
- Execute(b *BidInfo) (string, int)
- }
- type BidInfo struct {
- Id, Detail, Subtype string
- Bidamount float64
- Type int
- TableKv [][]map[string]string
- }
- func Start(_id string) (string, int) {
- data, ok := Mgo_Main.FindById(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String(), _id, SelectField)
- if !ok || data == nil || len(*data) == 0 {
- log.Println(_id, "没有找到标讯")
- return "", 0
- }
- return Pretreatment(_id, *data, 0)
- }
- func Pretreatment(_id string, m map[string]interface{}, t int) (string, int) {
- if m == nil || len(m) == 0 {
- log.Println(_id, "没有找到标讯")
- return "", 0
- }
- bi := &BidInfo{
- Id: _id,
- Bidamount: gconv.Float64(m["bidamount"]),
- Type: t,
- }
- bi.Detail, _ = m["detail"].(string)
- bi.Subtype, _ = m["subtype"].(string)
- bi.Detail = strings.TrimSpace(strings.Replace(bi.Detail, "<br/>", "\n", -1))
- bi.Detail = regexp.MustCompile("[::][ ]+").ReplaceAllString(bi.Detail, ":")
- bi.TableToSliceOfMaps(bi.Detail)
- //for i, table := range bi.TableKv {
- // log.Printf("Table %d:\n", i+1)
- // for _, row := range table {
- // for k, v := range row {
- // log.Printf(" %s: %s\n", k, v)
- // }
- // }
- //}
- return service.Execute(bi)
- }
- func (bi *BidInfo) TableToSliceOfMaps(htmlContent string) {
- defer Catch()
- doc, err := html.Parse(strings.NewReader(htmlContent))
- if err != nil {
- return
- }
- var found bool
- // 查找第一个<table>标签
- var traverse func(*html.Node)
- traverse = func(n *html.Node) {
- if n.Type == html.ElementNode && n.Data == "table" {
- result := bi.ParseTable(n)
- if len(result) > 0 {
- bi.TableKv = append(bi.TableKv, result)
- }
- found = true
- return
- }
- for c := n.FirstChild; c != nil && !found; c = c.NextSibling {
- traverse(c)
- }
- }
- traverse(doc)
- }
- // 解析HTML中的第一个<table>并将其转为KV结构
- func (bi *BidInfo) ParseTable(table *html.Node) []map[string]string {
- var result []map[string]string
- var rows [][]string
- // 遍历表格,提取单元格文本
- var visitNode func(*html.Node, []string) (bool, []string)
- visitNode = func(n *html.Node, row []string) (bool, []string) {
- if n.Data == "table" {
- return false, row
- }
- if n.Type == html.ElementNode && (n.Data == "td" || n.Data == "th") {
- for _, v := range n.Attr {
- if (v.Key == "rowspan" || v.Key == "colspan") && gconv.Int(v.Val) > 1 {
- return false, row
- }
- }
- row = append(row, bi.extractText(n))
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- var isOk bool
- isOk, row = visitNode(c, row)
- if !isOk {
- return false, row
- }
- }
- return true, row
- }
- var collectRows func(*html.Node)
- collectRows = func(n *html.Node) {
- if n.Type == html.ElementNode && (n.Data == "tr") {
- isOK, row := visitNode(n, nil)
- if !isOK {
- return
- }
- if len(row) > 0 {
- rows = append(rows, row)
- }
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- collectRows(c)
- }
- }
- collectRows(table)
- if len(rows) < 1 {
- return result
- }
- headers := rows[0]
- for _, r := range rows[1:] {
- item := make(map[string]string)
- for i, val := range r {
- if i < len(headers) {
- key := headers[i]
- item[strings.TrimSpace(key)] = strings.TrimSpace(val)
- }
- }
- result = append(result, item)
- }
- return result
- }
- // 提取节点内所有文本
- func (bi *BidInfo) extractText(n *html.Node) string {
- var text string
- if n.Type == html.TextNode {
- text = n.Data
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- text += bi.extractText(c)
- }
- return text
- }
|