-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Use a more general (and faster) method to sanitize URLs with credenti…
…als (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs
- Loading branch information
1 parent
84038f3
commit c831681
Showing
12 changed files
with
103 additions
and
194 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,59 +5,71 @@ | |
package util | ||
|
||
import ( | ||
"net/url" | ||
"strings" | ||
) | ||
"bytes" | ||
"unicode" | ||
|
||
const ( | ||
userPlaceholder = "sanitized-credential" | ||
unparsableURL = "(unparsable url)" | ||
"github.com/yuin/goldmark/util" | ||
) | ||
|
||
type sanitizedError struct { | ||
err error | ||
replacer *strings.Replacer | ||
err error | ||
} | ||
|
||
func (err sanitizedError) Error() string { | ||
return err.replacer.Replace(err.err.Error()) | ||
return SanitizeCredentialURLs(err.err.Error()) | ||
} | ||
|
||
// NewSanitizedError wraps an error and replaces all old, new string pairs in the message text. | ||
func NewSanitizedError(err error, oldnew ...string) error { | ||
return sanitizedError{err: err, replacer: strings.NewReplacer(oldnew...)} | ||
func (err sanitizedError) Unwrap() error { | ||
return err.err | ||
} | ||
|
||
// NewURLSanitizedError wraps an error and replaces the url credential or removes them. | ||
func NewURLSanitizedError(err error, u *url.URL, usePlaceholder bool) error { | ||
return sanitizedError{err: err, replacer: NewURLSanitizer(u, usePlaceholder)} | ||
// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs | ||
func SanitizeErrorCredentialURLs(err error) error { | ||
return sanitizedError{err: err} | ||
} | ||
|
||
// NewStringURLSanitizedError wraps an error and replaces the url credential or removes them. | ||
// If the url can't get parsed it gets replaced with a placeholder string. | ||
func NewStringURLSanitizedError(err error, unsanitizedURL string, usePlaceholder bool) error { | ||
return sanitizedError{err: err, replacer: NewStringURLSanitizer(unsanitizedURL, usePlaceholder)} | ||
} | ||
const userPlaceholder = "sanitized-credential" | ||
|
||
// NewURLSanitizer creates a replacer for the url with the credential sanitized or removed. | ||
func NewURLSanitizer(u *url.URL, usePlaceholder bool) *strings.Replacer { | ||
old := u.String() | ||
var schemeSep = []byte("://") | ||
|
||
if u.User != nil && usePlaceholder { | ||
u.User = url.User(userPlaceholder) | ||
} else { | ||
u.User = nil | ||
// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:[email protected]" => "https://[email protected]" | ||
func SanitizeCredentialURLs(s string) string { | ||
bs := util.StringToReadOnlyBytes(s) | ||
schemeSepPos := bytes.Index(bs, schemeSep) | ||
if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 { | ||
return s // fast return if there is no URL scheme or no userinfo | ||
} | ||
return strings.NewReplacer(old, u.String()) | ||
} | ||
|
||
// NewStringURLSanitizer creates a replacer for the url with the credential sanitized or removed. | ||
// If the url can't get parsed it gets replaced with a placeholder string | ||
func NewStringURLSanitizer(unsanitizedURL string, usePlaceholder bool) *strings.Replacer { | ||
u, err := url.Parse(unsanitizedURL) | ||
if err != nil { | ||
// don't log the error, since it might contain unsanitized URL. | ||
return strings.NewReplacer(unsanitizedURL, unparsableURL) | ||
out := make([]byte, 0, len(bs)+len(userPlaceholder)) | ||
for schemeSepPos != -1 { | ||
schemeSepPos += 3 // skip the "://" | ||
sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host" | ||
sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test" | ||
sepLoop: | ||
for ; sepEndPos < len(bs); sepEndPos++ { | ||
c := bs[sepEndPos] | ||
if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') { | ||
continue | ||
} | ||
switch c { | ||
case '@': | ||
sepAtPos = sepEndPos | ||
case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%': | ||
continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars | ||
default: | ||
break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop | ||
} | ||
} | ||
// if there is '@', and the string is like "s://u@h", then hide the "u" part | ||
if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 { | ||
out = append(out, bs[:schemeSepPos]...) | ||
out = append(out, userPlaceholder...) | ||
out = append(out, bs[sepAtPos:sepEndPos]...) | ||
} else { | ||
out = append(out, bs[:sepEndPos]...) | ||
} | ||
bs = bs[sepEndPos:] | ||
schemeSepPos = bytes.Index(bs, schemeSep) | ||
} | ||
return NewURLSanitizer(u, usePlaceholder) | ||
out = append(out, bs...) | ||
return util.BytesToReadOnlyString(out) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,154 +11,65 @@ import ( | |
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestNewSanitizedError(t *testing.T) { | ||
err := errors.New("error while secret on test") | ||
err2 := NewSanitizedError(err) | ||
assert.Equal(t, err.Error(), err2.Error()) | ||
|
||
cases := []struct { | ||
input error | ||
oldnew []string | ||
expected string | ||
}{ | ||
// case 0 | ||
{ | ||
errors.New("error while secret on test"), | ||
[]string{"secret", "replaced"}, | ||
"error while replaced on test", | ||
}, | ||
// case 1 | ||
{ | ||
errors.New("error while sec-ret on test"), | ||
[]string{"secret", "replaced"}, | ||
"error while sec-ret on test", | ||
}, | ||
} | ||
|
||
for n, c := range cases { | ||
err := NewSanitizedError(c.input, c.oldnew...) | ||
|
||
assert.Equal(t, c.expected, err.Error(), "case %d: error should match", n) | ||
} | ||
func TestSanitizeErrorCredentialURLs(t *testing.T) { | ||
err := errors.New("error with https://[email protected]") | ||
se := SanitizeErrorCredentialURLs(err) | ||
assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error()) | ||
} | ||
|
||
func TestNewStringURLSanitizer(t *testing.T) { | ||
func TestSanitizeCredentialURLs(t *testing.T) { | ||
cases := []struct { | ||
input string | ||
placeholder bool | ||
expected string | ||
input string | ||
expected string | ||
}{ | ||
// case 0 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
true, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 1 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 2 | ||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 3 | ||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 4 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 5 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
"ftp://x@", | ||
"ftp://" + userPlaceholder + "@", | ||
}, | ||
// case 6 | ||
{ | ||
"https://gi\nthub.com/go-gitea/test_repo.git", | ||
false, | ||
unparsableURL, | ||
"ftp://x/@", | ||
"ftp://x/@", | ||
}, | ||
} | ||
|
||
for n, c := range cases { | ||
// uses NewURLSanitizer internally | ||
result := NewStringURLSanitizer(c.input, c.placeholder).Replace(c.input) | ||
|
||
assert.Equal(t, c.expected, result, "case %d: error should match", n) | ||
} | ||
} | ||
|
||
func TestNewStringURLSanitizedError(t *testing.T) { | ||
cases := []struct { | ||
input string | ||
placeholder bool | ||
expected string | ||
}{ | ||
// case 0 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
true, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 1 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
"ftp://u@x/@", // test multiple @ chars | ||
"ftp://" + userPlaceholder + "@x/@", | ||
}, | ||
// case 2 | ||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
"😊ftp://u@x😊", // test unicode | ||
"😊ftp://" + userPlaceholder + "@x😊", | ||
}, | ||
// case 3 | ||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
"://@", | ||
"://@", | ||
}, | ||
// case 4 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
"//u:p@h", // do not process URLs without explicit scheme, they are not treated as "valid" URLs because there is no scheme context in string | ||
"//u:p@h", | ||
}, | ||
// case 5 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
"s://u@h", // the minimal pattern to be sanitized | ||
"s://" + userPlaceholder + "@h", | ||
}, | ||
// case 6 | ||
{ | ||
"https://gi\nthub.com/go-gitea/test_repo.git", | ||
false, | ||
unparsableURL, | ||
"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and [email protected]", | ||
"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and [email protected]", | ||
}, | ||
} | ||
|
||
encloseText := func(input string) string { | ||
return "test " + input + " test" | ||
} | ||
|
||
for n, c := range cases { | ||
err := errors.New(encloseText(c.input)) | ||
|
||
result := NewStringURLSanitizedError(err, c.input, c.placeholder) | ||
|
||
assert.Equal(t, encloseText(c.expected), result.Error(), "case %d: error should match", n) | ||
result := SanitizeCredentialURLs(c.input) | ||
assert.Equal(t, c.expected, result, "case %d: error should match", n) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.