package service import ( . "dataIdentify/db" "fmt" "github.com/gogf/gf/v2/frame/g" "github.com/gogf/gf/v2/os/gctx" "github.com/gogf/gf/v2/util/gconv" "golang.org/x/net/html" "log" "regexp" "strings" ) var ( SelectField = map[string]interface{}{ "_id": 1, "winnerorder": 1, "detail": 1, "subtype": 1, "bidamount": 1, "s_winner": 1, "com_package": 1, "multipackage": 1, } service Service = &Rule{} semicolonReg = regexp.MustCompile("[::]") allQuoteMode = map[string]bool{ QuoteMode_Whole: true, QuoteMode_UnitPrice: true, QuoteMode_Rate: true, QuoteMode_Discount: true, } ) const ( QuoteMode_Other = "其他报价模式" QuoteMode_Whole = "整标报价模式" QuoteMode_UnitPrice = "单价模式" QuoteMode_Rate = "费率模式" QuoteMode_Discount = "上浮下浮模式" ) type DataIdentify struct { } func (d *DataIdentify) Execute(_id *string, reply *map[string]string) error { log.Println("rpc接收到要识别的_id", _id) _, a, _, b, _ := Start(*_id) *reply = map[string]string{} if a == QuoteMode_Rate { a = "费率" } else if a == QuoteMode_UnitPrice { a = "单价" } else if a == QuoteMode_Discount { a = "折扣率" } else if a == QuoteMode_Whole { a = "正常报价" } else { a = "其他" } (*reply)["报价模式"] = a if b == 1 { (*reply)["中标联合体"] = "是" } else { (*reply)["中标联合体"] = "否" } return nil } type Service interface { Execute(b *BidInfo) (bool, string, bool, int, bool) } type BidInfo struct { Id, Detail, Subtype string OriginalDetail string FirstWinner string KvText []string Bidamount float64 Type int Multipackage int Winner string TableKv []map[string]string } func Start(_id string) (bool, string, bool, int, bool) { data, ok := Mgo_Main.FindById(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String(), _id, SelectField) if !ok || data == nil || len(*data) == 0 { log.Println(_id, "没有找到标讯") return false, "", false, 0, false } return Pretreatment(_id, *data, 0) } func Pretreatment(_id string, m map[string]interface{}, t int) (bool, string, bool, int, bool) { if m == nil || len(m) == 0 { log.Println(_id, "没有找到标讯") return false, "", false, 0, false } bi := &BidInfo{ Id: _id, Bidamount: gconv.Float64(m["bidamount"]), Type: t, } bi.Detail, _ = m["detail"].(string) for _, v := range clearPatterns { bi.Detail = regexp.MustCompile(v).ReplaceAllString(bi.Detail, "") } bi.OriginalDetail = bi.Detail bi.Subtype, _ = m["subtype"].(string) bi.Detail = strings.TrimSpace(strings.Replace(bi.Detail, "
", "\n", -1)) com_package, _ := m["com_package"].([]interface{}) bi.Multipackage = len(com_package) if len(com_package) == 1 { first, _ := com_package[0].(map[string]interface{}) bi.Winner, _ = first["winner"].(string) } if bi.Winner == "" { bi.Winner = gconv.String(m["s_winner"]) } if winnerorder := gconv.Maps(m["winnerorder"]); len(winnerorder) > 0 { bi.FirstWinner = strings.TrimSpace(gconv.String(winnerorder[0]["entname"])) array := []string{} for _, v := range strings.Split(bi.FirstWinner, ",") { if bidCommonwealth_firstWinnerOrderClearReg.MatchString(v) { continue } array = append(array, v) } bi.FirstWinner = strings.Join(array, ",") //if strings.Contains(bi.FirstWinner, ",") && strings.Contains(bi.Detail, strings.ReplaceAll(bi.FirstWinner, ",", "")) { // bi.FirstWinner = "" //} } extract, _ := Mgo_Extract.FindById("result_20220219", _id, `{"kvtext":1}`) if extract == nil || len(*extract) == 0 { extract, _ = Mgo_Extract.FindById("result_20220218", _id, `{"kvtext":1}`) } if extract != nil && len(*extract) > 0 { kvText, _ := (*extract)["kvtext"].(string) for _, v := range strings.Split(kvText, "\n") { vs := semicolonReg.Split(v, -1) if len(vs) < 2 || (strings.TrimSpace(vs[1]) == "" || strings.TrimSpace(vs[1]) == "/") { continue } bi.KvText = append(bi.KvText, v) } } bi.ParseTable(bi.OriginalDetail) //for _, v := range bi.TableKv { // for k, v := range v { // log.Println(k, v) // } //} return service.Execute(bi) } // 解析HTML中的第一个并将其转为KV结构 func (bi *BidInfo) ParseTable(htmlContent string) error { doc, err := html.Parse(strings.NewReader(htmlContent)) if err != nil { return err } var table *html.Node var found bool // 查找第一个
标签 var traverse func(*html.Node) traverse = func(n *html.Node) { if n.Type == html.ElementNode && n.Data == "table" { table = n found = true return } for c := n.FirstChild; c != nil && !found; c = c.NextSibling { traverse(c) } } traverse(doc) if table == nil { return fmt.Errorf("未找到
标签") } var rows [][]string // 遍历表格,提取单元格文本 var visitNode func(*html.Node, []string) []string visitNode = func(n *html.Node, row []string) []string { if n.Type == html.ElementNode && (n.Data == "td" || n.Data == "th") { row = append(row, bi.extractText(n)) } for c := n.FirstChild; c != nil; c = c.NextSibling { row = visitNode(c, row) } return row } var collectRows func(*html.Node) collectRows = func(n *html.Node) { if n.Type == html.ElementNode && (n.Data == "tr") { row := visitNode(n, nil) if len(row) > 0 { rows = append(rows, row) } } for c := n.FirstChild; c != nil; c = c.NextSibling { collectRows(c) } } collectRows(table) if len(rows) < 1 { return fmt.Errorf("表格中没有数据") } headers := rows[0] for _, r := range rows[1:] { item := make(map[string]string) for i, val := range r { if i < len(headers) { key := headers[i] item[strings.TrimSpace(key)] = strings.TrimSpace(val) } } bi.TableKv = append(bi.TableKv, item) } return nil } // 提取节点内所有文本 func (bi *BidInfo) extractText(n *html.Node) string { var text string if n.Type == html.TextNode { text = n.Data } for c := n.FirstChild; c != nil; c = c.NextSibling { text += bi.extractText(c) } return text }