Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added support for all charsets that https://github.com/golang/text su… #4

Merged
merged 1 commit into from
May 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@ module github.com/on2itsecurity/parsemail

go 1.12

require golang.org/x/net v0.0.0-20200927032502-5d4f70055728
require (
golang.org/x/net v0.0.0-20200927032502-5d4f70055728
golang.org/x/text v0.3.0
)
23 changes: 19 additions & 4 deletions parsemail.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"time"

cs "golang.org/x/net/html/charset"
"golang.org/x/text/encoding/ianaindex"
"golang.org/x/text/transform"
)

const contentTypeMultipartMixed = "multipart/mixed"
Expand Down Expand Up @@ -328,8 +330,7 @@ func decodeMimeSentence(s string) string {
ss := strings.Split(s, " ")

for _, word := range ss {
dec := new(mime.WordDecoder)
w, err := dec.Decode(word)
w, err := mimeWordDecoder.Decode(word)
if err != nil {
if len(result) == 0 {
w = word
Expand Down Expand Up @@ -490,13 +491,27 @@ type headerParser struct {
err error
}

// This is needed because the default address parser only understands utf-8, iso-8859-1, and us-ascii.
var mimeWordDecoder = &mime.WordDecoder{
CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
enc, err := ianaindex.MIME.Encoding(charset)
if err != nil {
return nil, err
}
return transform.NewReader(input, enc.NewDecoder()), nil
},
}
var addressParser = mail.AddressParser{
WordDecoder: mimeWordDecoder,
}

func (hp headerParser) parseAddress(s string) (ma *mail.Address) {
if hp.err != nil {
return nil
}

if strings.Trim(s, " \n") != "" {
ma, hp.err = mail.ParseAddress(s)
ma, hp.err = addressParser.Parse(s)

return ma
}
Expand All @@ -510,7 +525,7 @@ func (hp headerParser) parseAddressList(s string) (ma []*mail.Address) {
}

if strings.Trim(s, " \n") != "" {
ma, hp.err = mail.ParseAddressList(s)
ma, hp.err = addressParser.ParseList(s)
return
}

Expand Down
253 changes: 252 additions & 1 deletion parsemail_on2it_test.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,267 @@
package parsemail

// We add our tests in a separate file to prevent merge problems in case the original mainainer comes back.
// We add our tests in a separate file to prevent merge problems in case the original maintainer comes back.

import (
"encoding/base64"
"io/ioutil"
"net/mail"
"reflect"
"strings"
"testing"
"time"
)

func Test_decodeMimeSentence(t *testing.T) {
type args struct {
s string
}
tests := []struct {
name string
args args
want string
}{
{
"plain_ascii",
args{
`foo bar`,
},
`foo bar`,
},
{
"utf_8_bmp",
args{
`=?utf-8?Q?F=C3=B8=C3=B8_bar?=`,
},
`Føø bar`,
},
{
"utf_8_smp",
args{
`=?utf-8?Q?Cheers_=F0=9F=8D=BA!?=`,
},
`Cheers 🍺!`,
},
{
"windows-1251",
args{
`=?windows-1251?Q?John_=C4oe?=`,
},
`John Дoe`,
},
{
"windows-1252",
args{
`=?windows-1252?Q?John_Do=80?=`,
},
`John Do€`,
},
{
"iso-8859-15",
args{
`=?iso-8859-15?Q?John_Do=A4?=`,
},
`John Do€`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := decodeMimeSentence(tt.args.s); got != tt.want {
t.Errorf("decodeMimeSentence() = %v, want %v", got, tt.want)
}
})
}
}

func Test_headerParser_parseAddress(t *testing.T) {
type args struct {
s string
}
tests := []struct {
name string
args args
wantMa *mail.Address
}{
{
"plain_ascii",
args{
`[email protected]`,
},
&mail.Address{
Address: `[email protected]`,
},
},
{
"utf_8_bmp",
args{
`=?utf-8?Q?John_D=C3=B8e?= <[email protected]>`,
},
&mail.Address{
Name: `John Døe`,
Address: `[email protected]`,
},
},
{
"utf_8_smp",
args{
`=?utf-8?Q?John_=F0=9F=8D=BA_Doe?= <[email protected]>`,
},
&mail.Address{
Name: `John 🍺 Doe`,
Address: `[email protected]`,
},
},
{
"windows-1251",
args{
`=?windows-1251?Q?John_=C4oe?= <[email protected]>`,
},
&mail.Address{
Name: `John Дoe`,
Address: `[email protected]`,
},
},
{
"windows-1252",
args{
`=?windows-1252?Q?John_Do=80?= <[email protected]>`,
},
&mail.Address{
Name: `John Do€`,
Address: `[email protected]`,
},
},
{
"iso-8859-15",
args{
`=?iso-8859-15?Q?John_Do=A4?= <[email protected]>`,
},
&mail.Address{
Name: `John Do€`,
Address: `[email protected]`,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hp := headerParser{}
if gotMa := hp.parseAddress(tt.args.s); !reflect.DeepEqual(gotMa, tt.wantMa) {
t.Errorf("headerParser.parseAddress() = %v, want %v", gotMa, tt.wantMa)
}
})
}
}

func Test_headerParser_parseAddressList(t *testing.T) {
type args struct {
s string
}
tests := []struct {
name string
args args
wantMa []*mail.Address
}{
{
"plain_ascii_single",
args{
`[email protected]`,
},
[]*mail.Address{
{
Address: `[email protected]`,
},
},
},
{
"utf_8_bmp_single",
args{
`=?utf-8?Q?John_D=C3=B8e?= <[email protected]>`,
},
[]*mail.Address{
{
Name: `John Døe`,
Address: `[email protected]`,
},
},
},
{
"utf_8_smp_single",
args{
`=?utf-8?Q?John_=F0=9F=8D=BA_Doe?= <[email protected]>`,
},
[]*mail.Address{
{
Name: `John 🍺 Doe`,
Address: `[email protected]`,
},
},
},
{
"windows-1251",
args{
`=?windows-1251?Q?John_=C4oe?= <[email protected]>`,
},
[]*mail.Address{
{
Name: `John Дoe`,
Address: `[email protected]`,
},
},
},
{
"windows-1252",
args{
`=?windows-1252?Q?John_Do=80?= <[email protected]>`,
},
[]*mail.Address{
{
Name: `John Do€`,
Address: `[email protected]`,
},
},
},
{
"iso-8859-15",
args{
`=?iso-8859-15?Q?John_Do=A4?= <[email protected]>`,
},
[]*mail.Address{
{
Name: `John Do€`,
Address: `[email protected]`,
},
},
},
{
"multiple_charsets",
args{
`[email protected],=?utf-8?Q?John_D=C3=B8e?= <[email protected]>,=?windows-1251?Q?John_=C4oe?= <[email protected]>`,
},
[]*mail.Address{
{
Address: `[email protected]`,
},
{
Name: `John Døe`,
Address: `[email protected]`,
},
{
Name: `John Дoe`,
Address: `[email protected]`,
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hp := headerParser{}
if gotMa := hp.parseAddressList(tt.args.s); !reflect.DeepEqual(gotMa, tt.wantMa) {
t.Errorf("headerParser.parseAddressList() = %v, want %v", gotMa, tt.wantMa)
}
})
}
}

func TestParseEmail_on2it(t *testing.T) {
var testData = map[string]struct {
mailData string
Expand Down