Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge changes from on2itsecurity #5

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
module github.com/patrick-othmer/parsemail
module github.com/on2itsecurity/parsemail

go 1.12
go 1.21

require golang.org/x/net v0.0.0-20200927032502-5d4f70055728
require (
golang.org/x/net v0.15.0
golang.org/x/text v0.13.0
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20200927032502-5d4f70055728 h1:5wtQIAulKU5AbLQOkjxl32UufnIOqgBX72pS0AV14H0=
golang.org/x/net v0.0.0-20200927032502-5d4f70055728/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
158 changes: 132 additions & 26 deletions parsemail.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"time"

cs "golang.org/x/net/html/charset"
"golang.org/x/text/encoding/ianaindex"
"golang.org/x/text/transform"
)

const contentTypeMultipartMixed = "multipart/mixed"
Expand Down Expand Up @@ -274,10 +276,7 @@ func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody str
return textBody, htmlBody, attachments, embeddedFiles, err
}
attachments = append(attachments, at)
if strings.Contains(contentType, "application") ||
isAttachmentByContentDisposition(part) {
continue
}
continue
}

encoding := part.Header.Get("Content-Transfer-Encoding")
Expand All @@ -288,10 +287,15 @@ func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody str
return textBody, htmlBody, attachments, embeddedFiles, err
}
} else if contentType == contentTypeMultipartMixed {
textBody, htmlBody, attachments, embeddedFiles, err = parseMultipartMixed(part, params["boundary"])
tb, hb, at, ef, err := parseMultipartMixed(part, params["boundary"])
if err != nil {
return textBody, htmlBody, attachments, embeddedFiles, err
}

htmlBody += hb
textBody += tb
embeddedFiles = append(embeddedFiles, ef...)
attachments = append(attachments, at...)
} else if contentType == contentTypeMultipartRelated {
textBody, htmlBody, attachments, embeddedFiles, err = parseMultipartRelated(part, params["boundary"])
if err != nil {
Expand Down Expand Up @@ -331,8 +335,9 @@ func decodeMimeSentence(s string) string {
ss := strings.Split(s, " ")

for _, word := range ss {
dec := new(mime.WordDecoder)
w, err := dec.Decode(word)
word = removeUnsupportedEncoding(word)

w, err := mimeWordDecoder.Decode(word)
if err != nil {
if len(result) == 0 {
w = word
Expand All @@ -347,6 +352,100 @@ func decodeMimeSentence(s string) string {
return strings.Join(result, "")
}

func removeUnsupportedEncodingForAddress(s string) string {
if s == "" {
return s
}

ss := strings.Split(s, " ")
result := []string{}

for _, word := range ss {
validWord := word

if !(strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=")) {
result = append(result, validWord)

continue
}

word = word[2 : len(word)-2]

// split word "UTF-8?q?text" into "UTF-8", 'q', and "text"
charset, text, _ := strings.Cut(word, "?")
if charset == "" {
validWord = `"(removed text: non supported charset)"`
}

encoding, _, _ := strings.Cut(text, "?")
if len(encoding) != 1 {
validWord = `"(removed text: non supported encoding)"`
}

if charset != "" {
encoder, _ := ianaindex.MIME.Encoding(charset)

if encoder == nil {
validWord = `"(removed text: non supported encoder)"`
}
}

result = append(result, validWord)
}

return strings.Join(result, " ")
}

func removeUnsupportedEncodingForAddressList(s string) string {
if s == "" {
return s
}

addresses := s
result := []string{}

for _, address := range strings.Split(addresses, ",") {
result = append(result, removeUnsupportedEncodingForAddress(address))
}

return strings.Join(result, ",")
}

func removeUnsupportedEncoding(s string) string {
if s == "" {
return s
}

word := s

if !(strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=")) {
return word
}

word = word[2 : len(word)-2]

// split word "UTF-8?q?text" into "UTF-8", 'q', and "text"
charset, text, _ := strings.Cut(word, "?")
if charset == "" {
return "(removed text: non supported charset)"
}

encoding, _, _ := strings.Cut(text, "?")
if len(encoding) != 1 {
return "(removed text: non supported encoding)"
}

if charset != "" {
encoder, _ := ianaindex.MIME.Encoding(charset)

if encoder == nil {
return "(removed text: non supported encoder)"
}
}

return s
}

func decodeHeaderMime(header mail.Header) (mail.Header, error) {
parsedHeader := map[string][]string{}

Expand All @@ -364,7 +463,7 @@ func decodeHeaderMime(header mail.Header) (mail.Header, error) {
}

func isEmbeddedFile(part *multipart.Part) bool {
return part.Header.Get("Content-Transfer-Encoding") != "" || part.Header.Get("Content-Disposition")[0:17] == "inline; filename="
return part.Header.Get("Content-Transfer-Encoding") != "" || strings.HasPrefix(part.Header.Get("Content-Disposition"), "inline; filename=")
}

func decodeEmbeddedFile(part *multipart.Part) (ef EmbeddedFile, err error) {
Expand Down Expand Up @@ -399,21 +498,6 @@ func decodeEmbeddedFile(part *multipart.Part) (ef EmbeddedFile, err error) {

// Everything that is not html or plain is treated as an attachment.
func isAttachment(part *multipart.Part) bool {
contentType := part.Header.Get("Content-Type")
if strings.Contains(contentType, ";") {
contentType = strings.SplitN(contentType, ";", 2)[0]
}

if contentType != "text/html" &&
contentType != "text/plain" &&
isAttachmentByContentDisposition(part) {
return true
}

return false
}

func isAttachmentByContentDisposition(part *multipart.Part) bool {
if part.Header.Get("Content-Disposition") != "" {
contentDisposition, _, err := mime.ParseMediaType(part.Header.Get("Content-Disposition"))
if err != nil {
Expand Down Expand Up @@ -462,7 +546,9 @@ func readAllDecode(content io.Reader, encoding, contentType string) ([]byte, err
}

cr, err := cs.NewReader(r, contentType)
if err != nil {
if err == io.EOF {
return []byte{}, nil
} else if err != nil {
return nil, err
}

Expand Down Expand Up @@ -506,13 +592,33 @@ type headerParser struct {
err error
}

// This is needed because the default address parser only understands utf-8, iso-8859-1, and us-ascii.
var mimeWordDecoder = &mime.WordDecoder{
CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
enc, err := ianaindex.MIME.Encoding(charset)
if err != nil {
return nil, err
}

if enc == nil {
return nil, fmt.Errorf("invalid encoding for charset %s", charset)
}

return transform.NewReader(input, enc.NewDecoder()), nil
},
}

var addressParser = mail.AddressParser{
WordDecoder: mimeWordDecoder,
}

func (hp headerParser) parseAddress(s string) (ma *mail.Address) {
if hp.err != nil {
return nil
}

if strings.Trim(s, " \n") != "" {
ma, hp.err = mail.ParseAddress(s)
ma, hp.err = addressParser.Parse(removeUnsupportedEncodingForAddress(s))

return ma
}
Expand All @@ -526,7 +632,7 @@ func (hp headerParser) parseAddressList(s string) (ma []*mail.Address) {
}

if strings.Trim(s, " \n") != "" {
ma, hp.err = mail.ParseAddressList(s)
ma, hp.err = addressParser.ParseList(removeUnsupportedEncodingForAddressList(s))
return
}

Expand Down
Loading