12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697 |
- package utility
- import (
- "context"
- "encoding/base64"
- "fmt"
- "github.com/PuerkitoBio/goquery"
- "github.com/gogf/gf/v2/container/garray"
- "github.com/gogf/gf/v2/frame/g"
- "github.com/mozillazg/go-pinyin"
- "os"
- "path/filepath"
- "strings"
- "unicode"
- )
- var imageOutDir = g.Cfg().MustGet(context.Background(), "analysisReport.images.generateDir", "").String()
- // AnalysisHtml 行业报告获取报告html内容并解析出png图片
- func AnalysisHtml(htmlFilePath string) (detail, desc string, imageUrl []string, err error) {
- htmlFile, ctx := &os.File{}, context.Background()
- htmlFile, err = os.Open(htmlFilePath)
- if err != nil {
- return "", "", nil, err
- }
- doc, err := goquery.NewDocumentFromReader(htmlFile)
- if err != nil {
- return "", "", nil, err
- }
- fileFullName := filepath.Base(htmlFilePath)
- fileName := strings.TrimSuffix(fileFullName, filepath.Ext(fileFullName))
- // 过滤条件:url为空
- imageUrl = garray.NewStrArrayFrom(doc.Find("img.canvas-img-mark").Map(func(i int, s *goquery.Selection) string {
- imgBase64, exists := s.Attr("src")
- if imgBase64 == "" || !exists {
- return ""
- }
- decodedData, err := base64.StdEncoding.DecodeString(strings.Replace(imgBase64, "data:image/png;base64,", "", 1))
- if err != nil {
- g.Log().Errorf(ctx, "decoding base64 data: %v", err)
- return ""
- }
- filePath := fmt.Sprintf("%s%s.png", imageOutDir, fmt.Sprintf("%s_%d", fileName, i))
- if err = os.WriteFile(filePath, decodedData, 0644); err != nil {
- g.Log().Errorf(ctx, "WriteFile png file error: %v", err)
- return ""
- }
- s.ReplaceWithHtml("<img class='canvas-img-mark-new' src='{{IMAGE_REPLACE}}'>")
- return fmt.Sprintf("%s_%d.png", fileName, i)
- })).Filter(func(index int, value string) bool {
- return value == ""
- }).Slice()
- // 获取替换过的html文本
- detail, err = doc.Html()
- desc = doc.Find("#project-one").Text()
- return
- }
- func NewResult(data interface{}, err error) Result {
- errCode := 0
- errMsg := ""
- if err != nil {
- errCode = -1
- errMsg = err.Error()
- }
- return Result{
- Error_code: errCode,
- Error_msg: errMsg,
- Data: data,
- }
- }
- func KeyPinyin(chineseString string) string {
- pinyinArgs := pinyin.NewArgs()
- pinyinArgs.Style = pinyin.Normal
- pinyinArgs.Heteronym = true
- var pinyinSlice []string
- for _, r := range chineseString {
- if unicode.Is(unicode.Han, r) {
- result := pinyin.Pinyin(string(r), pinyinArgs)
- pinyinSlice = append(pinyinSlice, result[0][0])
- } else {
- pinyinSlice = append(pinyinSlice, string(r))
- }
- }
- return strings.Join(pinyinSlice, "")
- }
- // 接口统一返回值
- type Result struct {
- Error_code int `json:"error_code"`
- Error_msg string `json:"error_msg"`
- Data interface{} `json:"data"`
- }
|