analysis_report.go 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. package utility
  2. import (
  3. "context"
  4. "encoding/base64"
  5. "fmt"
  6. "github.com/PuerkitoBio/goquery"
  7. "github.com/gogf/gf/v2/container/garray"
  8. "github.com/gogf/gf/v2/frame/g"
  9. "github.com/mozillazg/go-pinyin"
  10. "os"
  11. "path/filepath"
  12. "strings"
  13. "unicode"
  14. )
  15. var imageOutDir = g.Cfg().MustGet(context.Background(), "analysisReport.images.generateDir", "").String()
  16. // AnalysisHtml 行业报告获取报告html内容并解析出png图片
  17. func AnalysisHtml(htmlFilePath string) (detail, desc string, imageUrl []string, err error) {
  18. htmlFile, ctx := &os.File{}, context.Background()
  19. htmlFile, err = os.Open(htmlFilePath)
  20. if err != nil {
  21. return "", "", nil, err
  22. }
  23. doc, err := goquery.NewDocumentFromReader(htmlFile)
  24. if err != nil {
  25. return "", "", nil, err
  26. }
  27. fileFullName := filepath.Base(htmlFilePath)
  28. fileName := strings.TrimSuffix(fileFullName, filepath.Ext(fileFullName))
  29. // 过滤条件:url为空
  30. imageUrl = garray.NewStrArrayFrom(doc.Find("img.canvas-img-mark").Map(func(i int, s *goquery.Selection) string {
  31. imgBase64, exists := s.Attr("src")
  32. if imgBase64 == "" || !exists {
  33. return ""
  34. }
  35. decodedData, err := base64.StdEncoding.DecodeString(strings.Replace(imgBase64, "data:image/png;base64,", "", 1))
  36. if err != nil {
  37. g.Log().Errorf(ctx, "decoding base64 data: %v", err)
  38. return ""
  39. }
  40. filePath := fmt.Sprintf("%s%s.png", imageOutDir, fmt.Sprintf("%s_%d", fileName, i))
  41. if err = os.WriteFile(filePath, decodedData, 0644); err != nil {
  42. g.Log().Errorf(ctx, "WriteFile png file error: %v", err)
  43. return ""
  44. }
  45. s.ReplaceWithHtml("<img class='canvas-img-mark-new' src='{{IMAGE_REPLACE}}'>")
  46. return fmt.Sprintf("%s_%d.png", fileName, i)
  47. })).Filter(func(index int, value string) bool {
  48. return value == ""
  49. }).Slice()
  50. // 获取替换过的html文本
  51. detail, err = doc.Html()
  52. desc = doc.Find("#project-one").Text()
  53. return
  54. }
  55. func NewResult(data interface{}, err error) Result {
  56. errCode := 0
  57. errMsg := ""
  58. if err != nil {
  59. errCode = -1
  60. errMsg = err.Error()
  61. }
  62. return Result{
  63. Error_code: errCode,
  64. Error_msg: errMsg,
  65. Data: data,
  66. }
  67. }
  68. func KeyPinyin(chineseString string) string {
  69. pinyinArgs := pinyin.NewArgs()
  70. pinyinArgs.Style = pinyin.Normal
  71. pinyinArgs.Heteronym = true
  72. var pinyinSlice []string
  73. for _, r := range chineseString {
  74. if unicode.Is(unicode.Han, r) {
  75. result := pinyin.Pinyin(string(r), pinyinArgs)
  76. pinyinSlice = append(pinyinSlice, result[0][0])
  77. } else {
  78. pinyinSlice = append(pinyinSlice, string(r))
  79. }
  80. }
  81. return strings.Join(pinyinSlice, "")
  82. }
  83. // 接口统一返回值
  84. type Result struct {
  85. Error_code int `json:"error_code"`
  86. Error_msg string `json:"error_msg"`
  87. Data interface{} `json:"data"`
  88. }