Added files.

This commit is contained in:
2025-12-08 06:42:29 +02:00
commit a65a31fdac
109 changed files with 16539 additions and 0 deletions

118
internal/utf7/decoder.go Normal file
View File

@@ -0,0 +1,118 @@
package utf7
import (
"errors"
"strings"
"unicode/utf16"
"unicode/utf8"
)
// ErrInvalidUTF7 means that a decoder encountered invalid UTF-7.
var ErrInvalidUTF7 = errors.New("utf7: invalid UTF-7")
// Decode decodes a string encoded with modified UTF-7.
//
// Note, raw UTF-8 is accepted.
func Decode(src string) (string, error) {
if !utf8.ValidString(src) {
return "", errors.New("invalid UTF-8")
}
var sb strings.Builder
sb.Grow(len(src))
ascii := true
for i := 0; i < len(src); i++ {
ch := src[i]
if ch < min || (ch > max && ch < utf8.RuneSelf) {
// Illegal code point in ASCII mode. Note, UTF-8 codepoints are
// always allowed.
return "", ErrInvalidUTF7
}
if ch != '&' {
sb.WriteByte(ch)
ascii = true
continue
}
// Find the end of the Base64 or "&-" segment
start := i + 1
for i++; i < len(src) && src[i] != '-'; i++ {
if src[i] == '\r' || src[i] == '\n' { // base64 package ignores CR and LF
return "", ErrInvalidUTF7
}
}
if i == len(src) { // Implicit shift ("&...")
return "", ErrInvalidUTF7
}
if i == start { // Escape sequence "&-"
sb.WriteByte('&')
ascii = true
} else { // Control or non-ASCII code points in base64
if !ascii { // Null shift ("&...-&...-")
return "", ErrInvalidUTF7
}
b := decode([]byte(src[start:i]))
if len(b) == 0 { // Bad encoding
return "", ErrInvalidUTF7
}
sb.Write(b)
ascii = false
}
}
return sb.String(), nil
}
// Extracts UTF-16-BE bytes from base64 data and converts them to UTF-8.
// A nil slice is returned if the encoding is invalid.
func decode(b64 []byte) []byte {
var b []byte
// Allocate a single block of memory large enough to store the Base64 data
// (if padding is required), UTF-16-BE bytes, and decoded UTF-8 bytes.
// Since a 2-byte UTF-16 sequence may expand into a 3-byte UTF-8 sequence,
// double the space allocation for UTF-8.
if n := len(b64); b64[n-1] == '=' {
return nil
} else if n&3 == 0 {
b = make([]byte, b64Enc.DecodedLen(n)*3)
} else {
n += 4 - n&3
b = make([]byte, n+b64Enc.DecodedLen(n)*3)
copy(b[copy(b, b64):n], []byte("=="))
b64, b = b[:n], b[n:]
}
// Decode Base64 into the first 1/3rd of b
n, err := b64Enc.Decode(b, b64)
if err != nil || n&1 == 1 {
return nil
}
// Decode UTF-16-BE into the remaining 2/3rds of b
b, s := b[:n], b[n:]
j := 0
for i := 0; i < n; i += 2 {
r := rune(b[i])<<8 | rune(b[i+1])
if utf16.IsSurrogate(r) {
if i += 2; i == n {
return nil
}
r2 := rune(b[i])<<8 | rune(b[i+1])
if r = utf16.DecodeRune(r, r2); r == utf8.RuneError {
return nil
}
} else if min <= r && r <= max {
return nil
}
j += utf8.EncodeRune(s[j:], r)
}
return s[:j]
}

View File

@@ -0,0 +1,115 @@
package utf7_test
import (
"strings"
"testing"
"github.com/emersion/go-imap/v2/internal/utf7"
)
var decode = []struct {
in string
out string
ok bool
}{
// Basics (the inverse test on encode checks other valid inputs)
{"", "", true},
{"abc", "abc", true},
{"&-abc", "&abc", true},
{"abc&-", "abc&", true},
{"a&-b&-c", "a&b&c", true},
{"&ABk-", "\x19", true},
{"&AB8-", "\x1F", true},
{"ABk-", "ABk-", true},
{"&-,&-&AP8-&-", "&,&\u00FF&", true},
{"&-&-,&AP8-&-", "&&,\u00FF&", true},
{"abc &- &AP8A,wD,- &- xyz", "abc & \u00FF\u00FF\u00FF & xyz", true},
// Illegal code point in ASCII
{"\x00", "", false},
{"\x1F", "", false},
{"abc\n", "", false},
{"abc\x7Fxyz", "", false},
// Invalid UTF-8
{"\xc3\x28", "", false},
{"\xe2\x82\x28", "", false},
// Invalid Base64 alphabet
{"&/+8-", "", false},
{"&*-", "", false},
{"&ZeVnLIqe -", "", false},
// CR and LF in Base64
{"&ZeVnLIqe\r\n-", "", false},
{"&ZeVnLIqe\r\n\r\n-", "", false},
{"&ZeVn\r\n\r\nLIqe-", "", false},
// Padding not stripped
{"&AAAAHw=-", "", false},
{"&AAAAHw==-", "", false},
{"&AAAAHwB,AIA=-", "", false},
{"&AAAAHwB,AIA==-", "", false},
// One byte short
{"&2A-", "", false},
{"&2ADc-", "", false},
{"&AAAAHwB,A-", "", false},
{"&AAAAHwB,A=-", "", false},
{"&AAAAHwB,A==-", "", false},
{"&AAAAHwB,A===-", "", false},
{"&AAAAHwB,AI-", "", false},
{"&AAAAHwB,AI=-", "", false},
{"&AAAAHwB,AI==-", "", false},
// Implicit shift
{"&", "", false},
{"&Jjo", "", false},
{"Jjo&", "", false},
{"&Jjo&", "", false},
{"&Jjo!", "", false},
{"&Jjo+", "", false},
{"abc&Jjo", "", false},
// Null shift
{"&AGE-&Jjo-", "", false},
{"&U,BTFw-&ZeVnLIqe-", "", false},
// Long input with Base64 at the end
{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &2D3eCg- &2D3eCw- &2D3eDg-",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \U0001f60a \U0001f60b \U0001f60e", true},
// Long input in Base64 between short ASCII
{"00000000000000000000 &MEIwQjBCMEIwQjBCMEIwQjBCMEIwQjBCMEIwQjBCMEIwQjBCMEIwQjBCMEIwQjBCMEIwQjBCMEIwQjBCMEIwQjBCMEIwQjBCMEI- 00000000000000000000",
"00000000000000000000 " + strings.Repeat("\U00003042", 37) + " 00000000000000000000", true},
// ASCII in Base64
{"&AGE-", "", false}, // "a"
{"&ACY-", "", false}, // "&"
{"&AGgAZQBsAGwAbw-", "", false}, // "hello"
{"&JjoAIQ-", "", false}, // "\u263a!"
// Bad surrogate
{"&2AA-", "", false}, // U+D800
{"&2AD-", "", false}, // U+D800
{"&3AA-", "", false}, // U+DC00
{"&2AAAQQ-", "", false}, // U+D800 'A'
{"&2AD,,w-", "", false}, // U+D800 U+FFFF
{"&3ADYAA-", "", false}, // U+DC00 U+D800
}
func TestDecoder(t *testing.T) {
for _, test := range decode {
out, err := utf7.Decode(test.in)
if out != test.out {
t.Errorf("UTF7Decode(%+q) expected %+q; got %+q", test.in, test.out, out)
}
if test.ok {
if err != nil {
t.Errorf("UTF7Decode(%+q) unexpected error; %v", test.in, err)
}
} else if err == nil {
t.Errorf("UTF7Decode(%+q) expected error", test.in)
}
}
}

88
internal/utf7/encoder.go Normal file
View File

@@ -0,0 +1,88 @@
package utf7
import (
"strings"
"unicode/utf16"
"unicode/utf8"
)
// Encode encodes a string with modified UTF-7.
func Encode(src string) string {
var sb strings.Builder
sb.Grow(len(src))
for i := 0; i < len(src); {
ch := src[i]
if min <= ch && ch <= max {
sb.WriteByte(ch)
if ch == '&' {
sb.WriteByte('-')
}
i++
} else {
start := i
// Find the next printable ASCII code point
i++
for i < len(src) && (src[i] < min || src[i] > max) {
i++
}
sb.Write(encode([]byte(src[start:i])))
}
}
return sb.String()
}
// Converts string s from UTF-8 to UTF-16-BE, encodes the result as base64,
// removes the padding, and adds UTF-7 shifts.
func encode(s []byte) []byte {
// len(s) is sufficient for UTF-8 to UTF-16 conversion if there are no
// control code points (see table below).
b := make([]byte, 0, len(s)+4)
for len(s) > 0 {
r, size := utf8.DecodeRune(s)
if r > utf8.MaxRune {
r, size = utf8.RuneError, 1 // Bug fix (issue 3785)
}
s = s[size:]
if r1, r2 := utf16.EncodeRune(r); r1 != utf8.RuneError {
b = append(b, byte(r1>>8), byte(r1))
r = r2
}
b = append(b, byte(r>>8), byte(r))
}
// Encode as base64
n := b64Enc.EncodedLen(len(b)) + 2
b64 := make([]byte, n)
b64Enc.Encode(b64[1:], b)
// Strip padding
n -= 2 - (len(b)+2)%3
b64 = b64[:n]
// Add UTF-7 shifts
b64[0] = '&'
b64[n-1] = '-'
return b64
}
// Escape passes through raw UTF-8 as-is and escapes the special UTF-7 marker
// (the ampersand character).
func Escape(src string) string {
var sb strings.Builder
sb.Grow(len(src))
for _, ch := range src {
sb.WriteRune(ch)
if ch == '&' {
sb.WriteByte('-')
}
}
return sb.String()
}

View File

@@ -0,0 +1,124 @@
package utf7_test
import (
"testing"
"github.com/emersion/go-imap/v2/internal/utf7"
)
var encode = []struct {
in string
out string
ok bool
}{
// Printable ASCII
{"", "", true},
{"a", "a", true},
{"ab", "ab", true},
{"-", "-", true},
{"&", "&-", true},
{"&&", "&-&-", true},
{"&&&-&", "&-&-&--&-", true},
{"-&*&-", "-&-*&--", true},
{"a&b", "a&-b", true},
{"a&", "a&-", true},
{"&b", "&-b", true},
{"-a&", "-a&-", true},
{"&b-", "&-b-", true},
// Unicode range
{"\u0000", "&AAA-", true},
{"\n", "&AAo-", true},
{"\r", "&AA0-", true},
{"\u001F", "&AB8-", true},
{"\u0020", " ", true},
{"\u0025", "%", true},
{"\u0026", "&-", true},
{"\u0027", "'", true},
{"\u007E", "~", true},
{"\u007F", "&AH8-", true},
{"\u0080", "&AIA-", true},
{"\u00FF", "&AP8-", true},
{"\u07FF", "&B,8-", true},
{"\u0800", "&CAA-", true},
{"\uFFEF", "&,+8-", true},
{"\uFFFF", "&,,8-", true},
{"\U00010000", "&2ADcAA-", true},
{"\U0010FFFF", "&2,,f,w-", true},
// Padding
{"\x00\x1F", "&AAAAHw-", true}, // 2
{"\x00\x1F\x7F", "&AAAAHwB,-", true}, // 0
{"\x00\x1F\x7F\u0080", "&AAAAHwB,AIA-", true}, // 1
{"\x00\x1F\x7F\u0080\u00FF", "&AAAAHwB,AIAA,w-", true}, // 2
// Mix
{"a\x00", "a&AAA-", true},
{"\x00a", "&AAA-a", true},
{"&\x00", "&-&AAA-", true},
{"\x00&", "&AAA-&-", true},
{"a\x00&", "a&AAA-&-", true},
{"a&\x00", "a&-&AAA-", true},
{"&a\x00", "&-a&AAA-", true},
{"&\x00a", "&-&AAA-a", true},
{"\x00&a", "&AAA-&-a", true},
{"\x00a&", "&AAA-a&-", true},
{"ab&\uFFFF", "ab&-&,,8-", true},
{"a&b\uFFFF", "a&-b&,,8-", true},
{"&ab\uFFFF", "&-ab&,,8-", true},
{"ab\uFFFF&", "ab&,,8-&-", true},
{"a\uFFFFb&", "a&,,8-b&-", true},
{"\uFFFFab&", "&,,8-ab&-", true},
{"\x20\x25&\x27\x7E", " %&-'~", true},
{"\x1F\x20&\x7E\x7F", "&AB8- &-~&AH8-", true},
{"&\x00\x19\x7F\u0080", "&-&AAAAGQB,AIA-", true},
{"\x00&\x19\x7F\u0080", "&AAA-&-&ABkAfwCA-", true},
{"\x00\x19&\x7F\u0080", "&AAAAGQ-&-&AH8AgA-", true},
{"\x00\x19\x7F&\u0080", "&AAAAGQB,-&-&AIA-", true},
{"\x00\x19\x7F\u0080&", "&AAAAGQB,AIA-&-", true},
{"&\x00\x1F\x7F\u0080", "&-&AAAAHwB,AIA-", true},
{"\x00&\x1F\x7F\u0080", "&AAA-&-&AB8AfwCA-", true},
{"\x00\x1F&\x7F\u0080", "&AAAAHw-&-&AH8AgA-", true},
{"\x00\x1F\x7F&\u0080", "&AAAAHwB,-&-&AIA-", true},
{"\x00\x1F\x7F\u0080&", "&AAAAHwB,AIA-&-", true},
// Russian
{"\u041C\u0430\u043A\u0441\u0438\u043C \u0425\u0438\u0442\u0440\u043E\u0432",
"&BBwEMAQ6BEEEOAQ8- &BCUEOARCBEAEPgQy-", true},
// RFC 3501
{"~peter/mail/\u53F0\u5317/\u65E5\u672C\u8A9E", "~peter/mail/&U,BTFw-/&ZeVnLIqe-", true},
{"~peter/mail/\u53F0\u5317/\u65E5\u672C\u8A9E", "~peter/mail/&U,BTFw-/&ZeVnLIqe-", true},
{"\u263A!", "&Jjo-!", true},
{"\u53F0\u5317\u65E5\u672C\u8A9E", "&U,BTF2XlZyyKng-", true},
// RFC 2152 (modified)
{"\u0041\u2262\u0391\u002E", "A&ImIDkQ-.", true},
{"Hi Mom -\u263A-!", "Hi Mom -&Jjo--!", true},
{"\u65E5\u672C\u8A9E", "&ZeVnLIqe-", true},
// 8->16 and 24->16 byte UTF-8 to UTF-16 conversion
{"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007", "&AAAAAQACAAMABAAFAAYABw-", true},
{"\u0800\u0801\u0802\u0803\u0804\u0805\u0806\u0807", "&CAAIAQgCCAMIBAgFCAYIBw-", true},
// Invalid UTF-8 (bad bytes are converted to U+FFFD)
{"\xC0\x80", "&,,3,,Q-", false}, // U+0000
{"\xF4\x90\x80\x80", "&,,3,,f,9,,0-", false}, // U+110000
{"\xF7\xBF\xBF\xBF", "&,,3,,f,9,,0-", false}, // U+1FFFFF
{"\xF8\x88\x80\x80\x80", "&,,3,,f,9,,3,,Q-", false}, // U+200000
{"\xF4\x8F\xBF\x3F", "&,,3,,f,9-?", false}, // U+10FFFF (bad byte)
{"\xF4\x8F\xBF", "&,,3,,f,9-", false}, // U+10FFFF (short)
{"\xF4\x8F", "&,,3,,Q-", false},
{"\xF4", "&,,0-", false},
{"\x00\xF4\x00", "&AAD,,QAA-", false},
}
func TestEncoder(t *testing.T) {
for _, test := range encode {
out := utf7.Encode(test.in)
if out != test.out {
t.Errorf("UTF7Encode(%+q) expected %+q; got %+q", test.in, test.out, out)
}
}
}

13
internal/utf7/utf7.go Normal file
View File

@@ -0,0 +1,13 @@
// Package utf7 implements modified UTF-7 encoding defined in RFC 3501 section 5.1.3
package utf7
import (
"encoding/base64"
)
const (
min = 0x20 // Minimum self-representing UTF-7 value
max = 0x7E // Maximum self-representing UTF-7 value
)
var b64Enc = base64.NewEncoding("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,")