소스 검색

wip:识别文件格式

wangshan 1 년 전
부모
커밋
75332129ce
5개의 변경된 파일35개의 추가작업 그리고 5개의 파일을 삭제
  1. 2 1
      go.mod
  2. 4 2
      go.sum
  3. 1 1
      rpc/partnerlib/service/docDownload.go
  4. 14 1
      rpc/partnerlib/test/fileUpload_test.go
  5. 14 0
      rpc/partnerlib/util/util.go

+ 2 - 1
go.mod

@@ -6,9 +6,10 @@ toolchain go1.22.4
 
 
 require (
 require (
 	app.yhyue.com/moapp/jybase v0.0.0-20240523083821-42a82b37ae20
 	app.yhyue.com/moapp/jybase v0.0.0-20240523083821-42a82b37ae20
-	app.yhyue.com/moapp/jyfs v0.0.0-20240620115525-f330ca6510bd
+	app.yhyue.com/moapp/jyfs v0.0.0-20240620123357-8ef49ab459b5
 	app.yhyue.com/moapp/jypkg v1.21.4
 	app.yhyue.com/moapp/jypkg v1.21.4
 	github.com/gogf/gf/v2 v2.7.1
 	github.com/gogf/gf/v2 v2.7.1
+	github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d
 	github.com/zeromicro/go-zero v1.6.4
 	github.com/zeromicro/go-zero v1.6.4
 	google.golang.org/grpc v1.63.2
 	google.golang.org/grpc v1.63.2
 	google.golang.org/protobuf v1.33.0
 	google.golang.org/protobuf v1.33.0

+ 4 - 2
go.sum

@@ -18,8 +18,8 @@ app.yhyue.com/moapp/jybase v0.0.0-20231025021840-2f91c944ecdd/go.mod h1:Hv9U/7oH
 app.yhyue.com/moapp/jybase v0.0.0-20240523083821-42a82b37ae20 h1:F1ZHkzo7yHp5eNrZDqQxaXMIKFQU72bsI1dMq3ztJLA=
 app.yhyue.com/moapp/jybase v0.0.0-20240523083821-42a82b37ae20 h1:F1ZHkzo7yHp5eNrZDqQxaXMIKFQU72bsI1dMq3ztJLA=
 app.yhyue.com/moapp/jybase v0.0.0-20240523083821-42a82b37ae20/go.mod h1:XHNATN6tsJKHdCB0DbUtFdPPHXexTUFyB3RlO+lUUoM=
 app.yhyue.com/moapp/jybase v0.0.0-20240523083821-42a82b37ae20/go.mod h1:XHNATN6tsJKHdCB0DbUtFdPPHXexTUFyB3RlO+lUUoM=
 app.yhyue.com/moapp/jyfs v0.0.0-20231024061508-480c270480d4/go.mod h1:61hzZ3dZHXL28BNl8BOgZsvM2S5UVY5YFzOkEUPrSu4=
 app.yhyue.com/moapp/jyfs v0.0.0-20231024061508-480c270480d4/go.mod h1:61hzZ3dZHXL28BNl8BOgZsvM2S5UVY5YFzOkEUPrSu4=
-app.yhyue.com/moapp/jyfs v0.0.0-20240620115525-f330ca6510bd h1:vOltixTxJDEZson0cLv/39Y/Z7jJerXOPWmHxgO7F+Y=
-app.yhyue.com/moapp/jyfs v0.0.0-20240620115525-f330ca6510bd/go.mod h1:61hzZ3dZHXL28BNl8BOgZsvM2S5UVY5YFzOkEUPrSu4=
+app.yhyue.com/moapp/jyfs v0.0.0-20240620123357-8ef49ab459b5 h1:cZ+dZVygrZdHQgzuzAKtD+JkND/QRhXg2vEhXmUJz3c=
+app.yhyue.com/moapp/jyfs v0.0.0-20240620123357-8ef49ab459b5/go.mod h1:61hzZ3dZHXL28BNl8BOgZsvM2S5UVY5YFzOkEUPrSu4=
 app.yhyue.com/moapp/jypkg v1.21.4 h1:NApb2EOlUkncX9yjMjKDFyOXKK66vOMJ3HprzMx8alc=
 app.yhyue.com/moapp/jypkg v1.21.4 h1:NApb2EOlUkncX9yjMjKDFyOXKK66vOMJ3HprzMx8alc=
 app.yhyue.com/moapp/jypkg v1.21.4/go.mod h1:wyJeNc8I9R5799tqch7n8SEZrB0s8nmNou0brBh91w4=
 app.yhyue.com/moapp/jypkg v1.21.4/go.mod h1:wyJeNc8I9R5799tqch7n8SEZrB0s8nmNou0brBh91w4=
 app.yhyue.com/moapp/message v0.0.0-20231204024949-8c7145bfc161 h1:WGi4OEIoqw6NpNFGioUEBZnjK9aBa+xJqf/5WY+QyhM=
 app.yhyue.com/moapp/message v0.0.0-20231204024949-8c7145bfc161 h1:WGi4OEIoqw6NpNFGioUEBZnjK9aBa+xJqf/5WY+QyhM=
@@ -1654,6 +1654,8 @@ github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
 github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
 github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk=
 github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk=
+github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
+github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
 github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
 github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
 github.com/shirou/gopsutil v2.19.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
 github.com/shirou/gopsutil v2.19.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
 github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
 github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=

+ 1 - 1
rpc/partnerlib/service/docDownload.go

@@ -75,7 +75,7 @@ func DocDownload(in *partnerlib.UserDownloadRequest) (res *partnerlib.UDRes, err
 							"docSize":   fmt.Sprintf("%d", _docInfo.DocFileSize),
 							"docSize":   fmt.Sprintf("%d", _docInfo.DocFileSize),
 						},
 						},
 						RawFileContent: b,
 						RawFileContent: b,
-						Source:         "docin",
+						Charset:        util.GetFileChardet(b),
 					})
 					})
 					if fileRes.OssDocId == "" {
 					if fileRes.OssDocId == "" {
 						err = fmt.Errorf("文档上传失败")
 						err = fmt.Errorf("文档上传失败")

+ 14 - 1
rpc/partnerlib/test/fileUpload_test.go

@@ -7,6 +7,7 @@ import (
 	"compress/gzip"
 	"compress/gzip"
 	"fmt"
 	"fmt"
 	"github.com/gogf/gf/v2/os/gctx"
 	"github.com/gogf/gf/v2/os/gctx"
+	"github.com/saintfish/chardet"
 	"github.com/zeromicro/go-zero/core/discov"
 	"github.com/zeromicro/go-zero/core/discov"
 	"github.com/zeromicro/go-zero/zrpc"
 	"github.com/zeromicro/go-zero/zrpc"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc"
@@ -37,6 +38,18 @@ func Test_FileUpload(t *testing.T) {
 	if err != nil {
 	if err != nil {
 		return
 		return
 	}
 	}
+	// 创建一个 chardet 检测器
+	detector := chardet.NewTextDetector()
+
+	// 检测文件编码
+	result, err := detector.DetectBest(data)
+	if err != nil {
+		fmt.Println("Failed to detect file encoding:", err)
+		return
+	}
+
+	// 输出编码结果
+	fmt.Printf("File encoding: %s (confidence: %d)", result.Charset, result.Confidence*100)
 	log.Println(count)
 	log.Println(count)
 	suffix := "txt"
 	suffix := "txt"
 	//获取附件后上传oss
 	//获取附件后上传oss
@@ -49,7 +62,7 @@ func Test_FileUpload(t *testing.T) {
 			"docSize":   "1024",
 			"docSize":   "1024",
 		},
 		},
 		RawFileContent: CompressWithGzip(data),
 		RawFileContent: CompressWithGzip(data),
-		Source:         "docin",
+		Charset:        result.Charset,
 	}
 	}
 	if fr != nil && len(fr.RawFileContent) > 0 {
 	if fr != nil && len(fr.RawFileContent) > 0 {
 		conf := zrpc.RpcClientConf{
 		conf := zrpc.RpcClientConf{

+ 14 - 0
rpc/partnerlib/util/util.go

@@ -5,6 +5,8 @@ import (
 	"app.yhyue.com/moapp/jypkg/common/src/qfw/util/jy"
 	"app.yhyue.com/moapp/jypkg/common/src/qfw/util/jy"
 	"crypto/md5"
 	"crypto/md5"
 	"encoding/hex"
 	"encoding/hex"
+	"fmt"
+	"github.com/saintfish/chardet"
 )
 )
 
 
 // sha1 加密
 // sha1 加密
@@ -23,3 +25,15 @@ func GetHashKey(bs []byte) string {
 func GetOrderCode() string {
 func GetOrderCode() string {
 	return <-jy.VarOrderCode.Pool
 	return <-jy.VarOrderCode.Pool
 }
 }
+
+func GetFileChardet(data []byte) string {
+	// 创建一个 chardet 检测器
+	detector := chardet.NewTextDetector()
+	// 检测文件编码
+	result, err := detector.DetectBest(data)
+	if err != nil {
+		fmt.Println("Failed to detect file encoding:", err)
+		return ""
+	}
+	return result.Charset
+}