service.go 4.8 KB


  1. package service
  2. import (
  3. . "app.yhyue.com/moapp/jybase/common"
  4. . "dataIdentify/db"
  5. "github.com/gogf/gf/v2/frame/g"
  6. "github.com/gogf/gf/v2/os/gctx"
  7. "github.com/gogf/gf/v2/util/gconv"
  8. "golang.org/x/net/html"
  9. "log"
  10. "regexp"
  11. "strings"
  12. )
  13. var (
  14. SelectField = map[string]interface{}{
  15. "_id": 1,
  16. "detail": 1,
  17. "subtype": 1,
  18. "bidamount": 1,
  19. }
  20. service Service = &Rule{}
  21. AllQuoteMode = map[string]bool{
  22. QuoteMode_Whole: true,
  23. QuoteMode_UnitPrice: true,
  24. QuoteMode_Rate: true,
  25. QuoteMode_Discount: true,
  26. }
  27. )
  28. const (
  29. QuoteMode_Other = "其他报价模式"
  30. QuoteMode_Whole = "整标报价模式"
  31. QuoteMode_UnitPrice = "单价模式"
  32. QuoteMode_Rate = "费率模式"
  33. QuoteMode_Discount = "上浮下浮模式"
  34. )
  35. type DataIdentify struct {
  36. }
  37. func (d *DataIdentify) Execute(_id *string, reply *map[string]string) error {
  38. log.Println("rpc接收到要识别的_id", _id)
  39. a, b := Start(*_id)
  40. *reply = map[string]string{}
  41. if a == QuoteMode_Rate {
  42. a = "费率"
  43. } else if a == QuoteMode_UnitPrice {
  44. a = "单价"
  45. } else if a == QuoteMode_Discount {
  46. a = "折扣率"
  47. } else if a == QuoteMode_Whole {
  48. a = "正常报价"
  49. } else {
  50. a = "其他"
  51. }
  52. (*reply)["报价模式"] = a
  53. if b == 1 {
  54. (*reply)["中标联合体"] = "是"
  55. } else {
  56. (*reply)["中标联合体"] = "否"
  57. }
  58. return nil
  59. }
  60. type Service interface {
  61. Execute(b *BidInfo) (string, int)
  62. }
  63. type BidInfo struct {
  64. Id, Detail, Subtype string
  65. Bidamount float64
  66. Type int
  67. TableKv [][]map[string]string
  68. }
  69. func Start(_id string) (string, int) {
  70. data, ok := Mgo_Main.FindById(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String(), _id, SelectField)
  71. if !ok || data == nil || len(*data) == 0 {
  72. log.Println(_id, "没有找到标讯")
  73. return "", 0
  74. }
  75. return Pretreatment(_id, *data, 0)
  76. }
  77. func Pretreatment(_id string, m map[string]interface{}, t int) (string, int) {
  78. if m == nil || len(m) == 0 {
  79. log.Println(_id, "没有找到标讯")
  80. return "", 0
  81. }
  82. bi := &BidInfo{
  83. Id: _id,
  84. Bidamount: gconv.Float64(m["bidamount"]),
  85. Type: t,
  86. }
  87. bi.Detail, _ = m["detail"].(string)
  88. bi.Subtype, _ = m["subtype"].(string)
  89. bi.Detail = strings.TrimSpace(strings.Replace(bi.Detail, "<br/>", "\n", -1))
  90. bi.Detail = regexp.MustCompile("[::][ ]+").ReplaceAllString(bi.Detail, ":")
  91. bi.TableToSliceOfMaps(bi.Detail)
  92. //for i, table := range bi.TableKv {
  93. // log.Printf("Table %d:\n", i+1)
  94. // for _, row := range table {
  95. // for k, v := range row {
  96. // log.Printf(" %s: %s\n", k, v)
  97. // }
  98. // }
  99. //}
  100. return service.Execute(bi)
  101. }
  102. func (bi *BidInfo) TableToSliceOfMaps(htmlContent string) {
  103. defer Catch()
  104. doc, err := html.Parse(strings.NewReader(htmlContent))
  105. if err != nil {
  106. return
  107. }
  108. var found bool
  109. // 查找第一个<table>标签
  110. var traverse func(*html.Node)
  111. traverse = func(n *html.Node) {
  112. if n.Type == html.ElementNode && n.Data == "table" {
  113. result := bi.ParseTable(n)
  114. if len(result) > 0 {
  115. bi.TableKv = append(bi.TableKv, result)
  116. }
  117. found = true
  118. return
  119. }
  120. for c := n.FirstChild; c != nil && !found; c = c.NextSibling {
  121. traverse(c)
  122. }
  123. }
  124. traverse(doc)
  125. }
  126. // 解析HTML中的第一个<table>并将其转为KV结构
  127. func (bi *BidInfo) ParseTable(table *html.Node) []map[string]string {
  128. var result []map[string]string
  129. var rows [][]string
  130. // 遍历表格,提取单元格文本
  131. var visitNode func(*html.Node, []string) (bool, []string)
  132. visitNode = func(n *html.Node, row []string) (bool, []string) {
  133. if n.Data == "table" {
  134. return false, row
  135. }
  136. if n.Type == html.ElementNode && (n.Data == "td" || n.Data == "th") {
  137. for _, v := range n.Attr {
  138. if (v.Key == "rowspan" || v.Key == "colspan") && gconv.Int(v.Val) > 1 {
  139. return false, row
  140. }
  141. }
  142. row = append(row, bi.extractText(n))
  143. }
  144. for c := n.FirstChild; c != nil; c = c.NextSibling {
  145. var isOk bool
  146. isOk, row = visitNode(c, row)
  147. if !isOk {
  148. return false, row
  149. }
  150. }
  151. return true, row
  152. }
  153. var collectRows func(*html.Node)
  154. collectRows = func(n *html.Node) {
  155. if n.Type == html.ElementNode && (n.Data == "tr") {
  156. isOK, row := visitNode(n, nil)
  157. if !isOK {
  158. return
  159. }
  160. if len(row) > 0 {
  161. rows = append(rows, row)
  162. }
  163. }
  164. for c := n.FirstChild; c != nil; c = c.NextSibling {
  165. collectRows(c)
  166. }
  167. }
  168. collectRows(table)
  169. if len(rows) < 1 {
  170. return result
  171. }
  172. headers := rows[0]
  173. for _, r := range rows[1:] {
  174. item := make(map[string]string)
  175. for i, val := range r {
  176. if i < len(headers) {
  177. key := headers[i]
  178. item[strings.TrimSpace(key)] = strings.TrimSpace(val)
  179. }
  180. }
  181. result = append(result, item)
  182. }
  183. return result
  184. }
  185. // 提取节点内所有文本
  186. func (bi *BidInfo) extractText(n *html.Node) string {
  187. var text string
  188. if n.Type == html.TextNode {
  189. text = n.Data
  190. }
  191. for c := n.FirstChild; c != nil; c = c.NextSibling {
  192. text += bi.extractText(c)
  193. }
  194. return text
  195. }