service.go 6.1 KB


  1. package service
  2. import (
  3. . "dataIdentify/db"
  4. "fmt"
  5. "github.com/gogf/gf/v2/frame/g"
  6. "github.com/gogf/gf/v2/os/gctx"
  7. "github.com/gogf/gf/v2/util/gconv"
  8. "golang.org/x/net/html"
  9. "log"
  10. "regexp"
  11. "strings"
  12. )
  13. var (
  14. SelectField = map[string]interface{}{
  15. "_id": 1,
  16. "winnerorder": 1,
  17. "detail": 1,
  18. "subtype": 1,
  19. "bidamount": 1,
  20. "s_winner": 1,
  21. "com_package": 1,
  22. "multipackage": 1,
  23. }
  24. service Service = &Rule{}
  25. semicolonReg = regexp.MustCompile("[::]")
  26. allQuoteMode = map[string]bool{
  27. QuoteMode_Whole: true,
  28. QuoteMode_UnitPrice: true,
  29. QuoteMode_Rate: true,
  30. QuoteMode_Discount: true,
  31. }
  32. )
  33. const (
  34. QuoteMode_Other = "其他报价模式"
  35. QuoteMode_Whole = "整标报价模式"
  36. QuoteMode_UnitPrice = "单价模式"
  37. QuoteMode_Rate = "费率模式"
  38. QuoteMode_Discount = "上浮下浮模式"
  39. )
  40. type DataIdentify struct {
  41. }
  42. func (d *DataIdentify) Execute(_id *string, reply *map[string]string) error {
  43. log.Println("rpc接收到要识别的_id", _id)
  44. _, a, _, b, _ := Start(*_id)
  45. *reply = map[string]string{}
  46. if a == QuoteMode_Rate {
  47. a = "费率"
  48. } else if a == QuoteMode_UnitPrice {
  49. a = "单价"
  50. } else if a == QuoteMode_Discount {
  51. a = "折扣率"
  52. } else if a == QuoteMode_Whole {
  53. a = "正常报价"
  54. } else {
  55. a = "其他"
  56. }
  57. (*reply)["报价模式"] = a
  58. if b == 1 {
  59. (*reply)["中标联合体"] = "是"
  60. } else {
  61. (*reply)["中标联合体"] = "否"
  62. }
  63. return nil
  64. }
  65. type Service interface {
  66. Execute(b *BidInfo) (bool, string, bool, int, bool)
  67. }
  68. type BidInfo struct {
  69. Id, Detail, Subtype string
  70. OriginalDetail string
  71. FirstWinner string
  72. KvText []string
  73. Bidamount float64
  74. Type int
  75. Multipackage int
  76. Winner string
  77. TableKv []map[string]string
  78. }
  79. func Start(_id string) (bool, string, bool, int, bool) {
  80. data, ok := Mgo_Main.FindById(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String(), _id, SelectField)
  81. if !ok || data == nil || len(*data) == 0 {
  82. log.Println(_id, "没有找到标讯")
  83. return false, "", false, 0, false
  84. }
  85. return Pretreatment(_id, *data, 0)
  86. }
  87. func Pretreatment(_id string, m map[string]interface{}, t int) (bool, string, bool, int, bool) {
  88. if m == nil || len(m) == 0 {
  89. log.Println(_id, "没有找到标讯")
  90. return false, "", false, 0, false
  91. }
  92. bi := &BidInfo{
  93. Id: _id,
  94. Bidamount: gconv.Float64(m["bidamount"]),
  95. Type: t,
  96. }
  97. bi.Detail, _ = m["detail"].(string)
  98. for _, v := range clearPatterns {
  99. bi.Detail = regexp.MustCompile(v).ReplaceAllString(bi.Detail, "")
  100. }
  101. bi.OriginalDetail = bi.Detail
  102. bi.Subtype, _ = m["subtype"].(string)
  103. bi.Detail = strings.TrimSpace(strings.Replace(bi.Detail, "<br/>", "\n", -1))
  104. com_package, _ := m["com_package"].([]interface{})
  105. bi.Multipackage = len(com_package)
  106. if len(com_package) == 1 {
  107. first, _ := com_package[0].(map[string]interface{})
  108. bi.Winner, _ = first["winner"].(string)
  109. }
  110. if bi.Winner == "" {
  111. bi.Winner = gconv.String(m["s_winner"])
  112. }
  113. if winnerorder := gconv.Maps(m["winnerorder"]); len(winnerorder) > 0 {
  114. bi.FirstWinner = strings.TrimSpace(gconv.String(winnerorder[0]["entname"]))
  115. array := []string{}
  116. for _, v := range strings.Split(bi.FirstWinner, ",") {
  117. if bidCommonwealth_firstWinnerOrderClearReg.MatchString(v) {
  118. continue
  119. }
  120. array = append(array, v)
  121. }
  122. bi.FirstWinner = strings.Join(array, ",")
  123. //if strings.Contains(bi.FirstWinner, ",") && strings.Contains(bi.Detail, strings.ReplaceAll(bi.FirstWinner, ",", "")) {
  124. // bi.FirstWinner = ""
  125. //}
  126. }
  127. extract, _ := Mgo_Extract.FindById("result_20220219", _id, `{"kvtext":1}`)
  128. if extract == nil || len(*extract) == 0 {
  129. extract, _ = Mgo_Extract.FindById("result_20220218", _id, `{"kvtext":1}`)
  130. }
  131. if extract != nil && len(*extract) > 0 {
  132. kvText, _ := (*extract)["kvtext"].(string)
  133. for _, v := range strings.Split(kvText, "\n") {
  134. vs := semicolonReg.Split(v, -1)
  135. if len(vs) < 2 || (strings.TrimSpace(vs[1]) == "" || strings.TrimSpace(vs[1]) == "/") {
  136. continue
  137. }
  138. bi.KvText = append(bi.KvText, v)
  139. }
  140. }
  141. bi.ParseTable(bi.OriginalDetail)
  142. //for _, v := range bi.TableKv {
  143. // for k, v := range v {
  144. // log.Println(k, v)
  145. // }
  146. //}
  147. return service.Execute(bi)
  148. }
  149. // 解析HTML中的第一个<table>并将其转为KV结构
  150. func (bi *BidInfo) ParseTable(htmlContent string) error {
  151. doc, err := html.Parse(strings.NewReader(htmlContent))
  152. if err != nil {
  153. return err
  154. }
  155. var table *html.Node
  156. var found bool
  157. // 查找第一个<table>标签
  158. var traverse func(*html.Node)
  159. traverse = func(n *html.Node) {
  160. if n.Type == html.ElementNode && n.Data == "table" {
  161. table = n
  162. found = true
  163. return
  164. }
  165. for c := n.FirstChild; c != nil && !found; c = c.NextSibling {
  166. traverse(c)
  167. }
  168. }
  169. traverse(doc)
  170. if table == nil {
  171. return fmt.Errorf("未找到<table>标签")
  172. }
  173. var rows [][]string
  174. // 遍历表格,提取单元格文本
  175. var visitNode func(*html.Node, []string) []string
  176. visitNode = func(n *html.Node, row []string) []string {
  177. if n.Type == html.ElementNode && (n.Data == "td" || n.Data == "th") {
  178. row = append(row, bi.extractText(n))
  179. }
  180. for c := n.FirstChild; c != nil; c = c.NextSibling {
  181. row = visitNode(c, row)
  182. }
  183. return row
  184. }
  185. var collectRows func(*html.Node)
  186. collectRows = func(n *html.Node) {
  187. if n.Type == html.ElementNode && (n.Data == "tr") {
  188. row := visitNode(n, nil)
  189. if len(row) > 0 {
  190. rows = append(rows, row)
  191. }
  192. }
  193. for c := n.FirstChild; c != nil; c = c.NextSibling {
  194. collectRows(c)
  195. }
  196. }
  197. collectRows(table)
  198. if len(rows) < 1 {
  199. return fmt.Errorf("表格中没有数据")
  200. }
  201. headers := rows[0]
  202. for _, r := range rows[1:] {
  203. item := make(map[string]string)
  204. for i, val := range r {
  205. if i < len(headers) {
  206. key := headers[i]
  207. item[strings.TrimSpace(key)] = strings.TrimSpace(val)
  208. }
  209. }
  210. bi.TableKv = append(bi.TableKv, item)
  211. }
  212. return nil
  213. }
  214. // 提取节点内所有文本
  215. func (bi *BidInfo) extractText(n *html.Node) string {
  216. var text string
  217. if n.Type == html.TextNode {
  218. text = n.Data
  219. }
  220. for c := n.FirstChild; c != nil; c = c.NextSibling {
  221. text += bi.extractText(c)
  222. }
  223. return text
  224. }