Ver código fonte

'Q' encode non US-ASCII header values(RFC 2047)

* Read field value, and determine if an encoding is needed
* "Q" encode UTF-8 values
* Add test function
Inigo Lopez de Heredia 10 anos atrás
pai
commit
99bc1ec867
2 arquivos alterados com 83 adições e 1 exclusões
  1. 61 1
      email.go
  2. 22 0
      email_test.go

+ 61 - 1
email.go

@@ -326,8 +326,68 @@ func headerToBytes(buff *bytes.Buffer, header textproto.MIMEHeader) {
 			// bytes.Buffer.Write() never returns an error.
 			io.WriteString(buff, field)
 			io.WriteString(buff, ": ")
-			io.WriteString(buff, subval)
+			buff.Write(encodeHeader(field, subval))
 			io.WriteString(buff, "\r\n")
 		}
 	}
 }
+
+// encodeHeader checks whether the header value needs to be encoded, and returns the header-safe byte stream.
+// If the field type is not encodable, or if the string contains only US-ASCII chars, the value is returned as is.
+func encodeHeader(field string, value string) []byte {
+	if field == "Content-Type" || field == "Content-Disposition" {
+		return []byte(value)
+	}
+	ascii := true
+	for i := 0; i < len(value); i++ {
+		if value[i] < ' ' || value[i] > '~' {
+			ascii = false
+			break
+		}
+	}
+	if ascii {
+		return []byte(value)
+	}
+	var b bytes.Buffer
+	encodeText(&b, value, true)
+	return b.Bytes()
+}
+
+// encodeText performs a UTF-8 "Q" encoding on the given string, according to RFC 2047.
+// Output bytes are written to "buff".
+func encodeText(buff *bytes.Buffer, s string, first bool) {
+	// First off, calculate the resulting encoded value's length.
+	encodedLen := 0
+	for i := 0; i < len(s); i++ {
+		if isPrintable[s[i]] {
+			encodedLen++
+		} else {
+			encodedLen = encodedLen + 3 // 1:3 conversion rate for Q encoding.
+		}
+	}
+	encodedLen = encodedLen + 12 // 12 = size of "=?UTF-8?Q?" + "?=
+
+	if encodedLen > MaxLineLength {
+		// Split the text (keeping multi-byte characters together), and recurse.
+		r := []rune(s)
+		encodeText(buff, string(r[:len(r)/2]), first)
+		encodeText(buff, string(r[len(r)/2:]), false)
+	} else {
+		if !first {
+			buff.WriteString("\r\n ")
+		}
+		buff.WriteString("=?UTF-8?Q?")
+
+		for i := 0; i < len(s); i++ {
+			switch c := s[i]; {
+			case c == ' ':
+				buff.WriteByte('_')
+			case isPrintable[c]:
+				buff.WriteByte(c)
+			default:
+				fmt.Fprintf(buff, "=%02X", c)
+			}
+		}
+		buff.WriteString("?=")
+	}
+}

+ 22 - 0
email_test.go

@@ -184,3 +184,25 @@ func Benchmark_base64Wrap(b *testing.B) {
 		base64Wrap(ioutil.Discard, file)
 	}
 }
+
+func Test_encodeHeader(t *testing.T) {
+	// Plain ASCII (unchanged).
+	subject := "Plain ASCII email subject, !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
+	expected := []byte("Plain ASCII email subject, !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")
+
+	b := encodeHeader("Subject", subject)
+	if !bytes.Equal(b, expected) {
+		t.Errorf("encodeHeader generated incorrect results: %#q != %#q", b, expected)
+	}
+
+	// UTF-8 ('q' encoded).
+	subject = "UTF-8 email subject. It can contain é, ñ, or £. Long subject headers will be split in multiple lines!"
+	expected = []byte("=?UTF-8?Q?UTF-8_email_subject._It_c?=\r\n" +
+		" =?UTF-8?Q?an_contain_=C3=A9,_=C3=B1,_or_=C2=A3._Lo?=\r\n" +
+		" =?UTF-8?Q?ng_subject_headers_will_be_split_in_multiple_lines!?=")
+
+	b = encodeHeader("Subject", subject)
+	if !bytes.Equal(b, expected) {
+		t.Errorf("encodeHeader generated incorrect results: %#q != %#q", b, expected)
+	}
+}