Browse Source

Rewrite quotePrintEncode, add tests.

Problems included:
 - UTF-8 text was broken.  The input string was iterated over as runes,
   but any multi-byte character would have been broken.  E.g., the rune
   U+1f378 should be "=F0=9F=8D=B8".
 - Further, bytes less than 16 were also broken.  E.g.,
     fmt.Printf("=%X", '\r') == "=D"
 - Line breaks were not handled correctly; quoted-printable uses CRLF
   linebreaks.

Though the old approach wasn't entirely correct, it could have been
trivially fixed.  Thus, it's worth comparing benchmarks:

Old:
Benchmark_quotedPrintEncode    10000    125393 ns/op    9520 B/op    598 allocs/op

New:
Benchmark_quotedPrintEncode    500000      5325 ns/op      8 B/op      1 allocs/op
Jed Denlea 11 years ago
parent
commit
d8df992de1
2 changed files with 79 additions and 23 deletions
  1. 40 23
      email.go
  2. 39 0
      email_test.go

+ 40 - 23
email.go

@@ -190,45 +190,62 @@ type Attachment struct {
 
 // quotePrintEncode writes the quoted-printable text to the IO Writer (according to RFC 2045)
 func quotePrintEncode(w io.Writer, s string) error {
+	var buf [3]byte
 	mc := 0
-	for _, c := range s {
-		// Handle the soft break for the EOL, if needed
-		if mc == MaxLineLength-1 || (!isPrintable(c) && mc+len(fmt.Sprintf("%s%X", "=", c)) > MaxLineLength-1) {
-			if _, err := fmt.Fprintf(w, "%s", "=\r\n"); err != nil {
-				return err
-			}
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		// We're assuming Unix style text formats as input (LF line break), and
+		// quoted-printble uses CRLF line breaks. (Literal CRs will become
+		// "=0D", but probably shouldn't be there to begin with!)
+		if c == '\n' {
+			io.WriteString(w, "\r\n")
 			mc = 0
+			continue
 		}
-		// append the appropriate character
+
+		var nextOut []byte
 		if isPrintable(c) {
-			// Printable character
-			if _, err := fmt.Fprintf(w, "%s", string(c)); err != nil {
-				return err
-			}
-			// Reset the counter if we wrote a newline
-			if c == '\n' {
-				mc = 0
-			}
-			mc++
-			continue
+			nextOut = append(buf[:0], c)
 		} else {
-			// non-printable.. encode it (TODO)
-			es := fmt.Sprintf("%s%X", "=", c)
-			if _, err := fmt.Fprintf(w, "%s", es); err != nil {
+			nextOut = buf[:]
+			qpEscape(nextOut, c)
+		}
+
+		// Add a soft line break if the next (encoded) byte would push this line
+		// to or past the limit.
+		if mc+len(nextOut) >= MaxLineLength {
+			if _, err := io.WriteString(w, "=\r\n"); err != nil {
 				return err
 			}
-			// todo - increment correctly
-			mc += len(es)
+			mc = 0
+		}
+
+		if _, err := w.Write(nextOut); err != nil {
+			return err
 		}
+		mc += len(nextOut)
+	}
+	// No trailing end-of-line?? Soft line break, then. TODO: is this sane?
+	if mc > 0 {
+		io.WriteString(w, "=\r\n")
 	}
 	return nil
 }
 
 // isPrintable returns true if the rune given is "printable" according to RFC 2045, false otherwise
-func isPrintable(c rune) bool {
+func isPrintable(c byte) bool {
 	return (c >= '!' && c <= '<') || (c >= '>' && c <= '~') || (c == ' ' || c == '\n' || c == '\t')
 }
 
+// qpEscape is a helper function for quotePrintEncode which escapes a
+// non-printable byte. Expects len(dest) == 3.
+func qpEscape(dest []byte, c byte) {
+	const nums = "0123456789ABCDEF"
+	dest[0] = '='
+	dest[1] = nums[(c&0xf0)>>4]
+	dest[2] = nums[(c & 0xf)]
+}
+
 // base64Wrap encodeds the attachment content, and wraps it according to RFC 2045 standards (every 76 chars)
 // The output is then written to the specified io.Writer
 func base64Wrap(w io.Writer, b []byte) {

+ 39 - 0
email_test.go

@@ -52,6 +52,45 @@ func Test_base64Wrap(t *testing.T) {
 	}
 }
 
+func Test_quotedPrintEncode(t *testing.T) {
+	var buf bytes.Buffer
+	text := "Dear reader!\n\n" +
+		"This is a test email to try and capture some of the corner cases that exist within\n" +
+		"the quoted-printable encoding.\n" +
+		"There are some wacky parts like =, and this input assumes UNIX line breaks so\r\n" +
+		"it can come out a little weird.  Also, we need to support unicode so here's a fish: 🐟\n"
+	expected := "Dear reader!\r\n\r\n" +
+		"This is a test email to try and capture some of the corner cases that exist=\r\n" +
+		" within\r\n" +
+		"the quoted-printable encoding.\r\n" +
+		"There are some wacky parts like =3D, and this input assumes UNIX line break=\r\n" +
+		"s so=0D\r\n" +
+		"it can come out a little weird.  Also, we need to support unicode so here's=\r\n" +
+		" a fish: =F0=9F=90=9F\r\n"
+
+	if err := quotePrintEncode(&buf, text); err != nil {
+		t.Fatal("quotePrintEncode: ", err)
+	}
+
+	if s := buf.String(); s != expected {
+		t.Errorf("quotedPrintEncode generated incorrect results: %#q != %#q", s, expected)
+	}
+}
+
+func Benchmark_quotedPrintEncode(b *testing.B) {
+	text := "Dear reader!\n\n" +
+		"This is a test email to try and capture some of the corner cases that exist within\n" +
+		"the quoted-printable encoding.\n" +
+		"There are some wacky parts like =, and this input assumes UNIX line breaks so\r\n" +
+		"it can come out a little weird.  Also, we need to support unicode so here's a fish: 🐟\n"
+
+	for i := 0; i <= b.N; i++ {
+		if err := quotePrintEncode(ioutil.Discard, text); err != nil {
+			panic(err)
+		}
+	}
+}
+
 func Benchmark_base64Wrap(b *testing.B) {
 	// Reasonable base case; 128K random bytes
 	file := make([]byte, 128*1024)