-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use a more general (and faster) method to sanitize URLs with credentials #19239
Merged
Merged
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
3d3f223
Use a more general (and faster) method to sanitize URLs with credentials
wxiaoguang 26cf692
Update modules/util/sanitize.go
wxiaoguang 7c7116c
Merge branch 'main' into refactor-url-sanitizer
wxiaoguang ded6969
fix edge case "://@"
wxiaoguang dafa3f0
fix typo
wxiaoguang 9580607
Merge branch 'main' into refactor-url-sanitizer
wxiaoguang 80719d7
Merge branch 'main' into refactor-url-sanitizer
wxiaoguang ca05837
Merge branch 'main' into refactor-url-sanitizer
wxiaoguang fe5e461
Merge branch 'main' into refactor-url-sanitizer
wxiaoguang 9f5fa58
Merge branch 'main' into refactor-url-sanitizer
wxiaoguang 26f05b5
Merge branch 'main' into refactor-url-sanitizer
wxiaoguang fc167f3
Update modules/util/sanitize.go
wxiaoguang 70bc63d
Merge branch 'main' into refactor-url-sanitizer
wxiaoguang 59046a5
add a test for string "//u:p@h", which is not treated as a "valid" UR…
wxiaoguang 0252a7b
Merge branch 'refactor-url-sanitizer' of github.com:wxiaoguang/gitea …
wxiaoguang 74ea0d5
Merge branch 'main' into refactor-url-sanitizer
wxiaoguang File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,59 +5,71 @@ | |
package util | ||
|
||
import ( | ||
"net/url" | ||
"strings" | ||
) | ||
"bytes" | ||
"unicode" | ||
|
||
const ( | ||
userPlaceholder = "sanitized-credential" | ||
unparsableURL = "(unparsable url)" | ||
"github.com/yuin/goldmark/util" | ||
) | ||
|
||
type sanitizedError struct { | ||
err error | ||
replacer *strings.Replacer | ||
err error | ||
} | ||
|
||
func (err sanitizedError) Error() string { | ||
return err.replacer.Replace(err.err.Error()) | ||
return SanitizeCredentialURLs(err.err.Error()) | ||
} | ||
|
||
// NewSanitizedError wraps an error and replaces all old, new string pairs in the message text. | ||
func NewSanitizedError(err error, oldnew ...string) error { | ||
return sanitizedError{err: err, replacer: strings.NewReplacer(oldnew...)} | ||
func (err sanitizedError) Unwrap() error { | ||
return err.err | ||
} | ||
|
||
// NewURLSanitizedError wraps an error and replaces the url credential or removes them. | ||
func NewURLSanitizedError(err error, u *url.URL, usePlaceholder bool) error { | ||
return sanitizedError{err: err, replacer: NewURLSanitizer(u, usePlaceholder)} | ||
// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs | ||
func SanitizeErrorCredentialURLs(err error) error { | ||
return sanitizedError{err: err} | ||
} | ||
|
||
// NewStringURLSanitizedError wraps an error and replaces the url credential or removes them. | ||
// If the url can't get parsed it gets replaced with a placeholder string. | ||
func NewStringURLSanitizedError(err error, unsanitizedURL string, usePlaceholder bool) error { | ||
return sanitizedError{err: err, replacer: NewStringURLSanitizer(unsanitizedURL, usePlaceholder)} | ||
} | ||
const userPlaceholder = "sanitized-credential" | ||
|
||
// NewURLSanitizer creates a replacer for the url with the credential sanitized or removed. | ||
func NewURLSanitizer(u *url.URL, usePlaceholder bool) *strings.Replacer { | ||
old := u.String() | ||
var schemeSep = []byte("://") | ||
|
||
if u.User != nil && usePlaceholder { | ||
u.User = url.User(userPlaceholder) | ||
} else { | ||
u.User = nil | ||
// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:[email protected]" => "https://[email protected]" | ||
func SanitizeCredentialURLs(s string) string { | ||
bs := util.StringToReadOnlyBytes(s) | ||
schemeSepPos := bytes.Index(bs, schemeSep) | ||
if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 { | ||
return s // fast return if there is no URL scheme or no userinfo | ||
} | ||
return strings.NewReplacer(old, u.String()) | ||
} | ||
|
||
// NewStringURLSanitizer creates a replacer for the url with the credential sanitized or removed. | ||
// If the url can't get parsed it gets replaced with a placeholder string | ||
func NewStringURLSanitizer(unsanitizedURL string, usePlaceholder bool) *strings.Replacer { | ||
u, err := url.Parse(unsanitizedURL) | ||
if err != nil { | ||
// don't log the error, since it might contain unsanitized URL. | ||
return strings.NewReplacer(unsanitizedURL, unparsableURL) | ||
out := make([]byte, 0, len(bs)+len(userPlaceholder)) | ||
for schemeSepPos != -1 { | ||
schemeSepPos += 3 // skip the "://" | ||
sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host" | ||
sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test" | ||
sepLoop: | ||
for ; sepEndPos < len(bs); sepEndPos++ { | ||
c := bs[sepEndPos] | ||
if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') { | ||
continue | ||
} | ||
switch c { | ||
case '@': | ||
sepAtPos = sepEndPos | ||
case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%': | ||
continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars | ||
default: | ||
break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop | ||
} | ||
} | ||
// if there is '@', and the string is like "s://u@h", then hide the "u" part | ||
if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 { | ||
out = append(out, bs[:schemeSepPos]...) | ||
out = append(out, userPlaceholder...) | ||
out = append(out, bs[sepAtPos:sepEndPos]...) | ||
} else { | ||
out = append(out, bs[:sepEndPos]...) | ||
} | ||
bs = bs[sepEndPos:] | ||
schemeSepPos = bytes.Index(bs, schemeSep) | ||
} | ||
return NewURLSanitizer(u, usePlaceholder) | ||
out = append(out, bs...) | ||
return util.BytesToReadOnlyString(out) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,154 +11,65 @@ import ( | |
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestNewSanitizedError(t *testing.T) { | ||
err := errors.New("error while secret on test") | ||
err2 := NewSanitizedError(err) | ||
assert.Equal(t, err.Error(), err2.Error()) | ||
|
||
cases := []struct { | ||
input error | ||
oldnew []string | ||
expected string | ||
}{ | ||
// case 0 | ||
{ | ||
errors.New("error while secret on test"), | ||
[]string{"secret", "replaced"}, | ||
"error while replaced on test", | ||
}, | ||
// case 1 | ||
{ | ||
errors.New("error while sec-ret on test"), | ||
[]string{"secret", "replaced"}, | ||
"error while sec-ret on test", | ||
}, | ||
} | ||
|
||
for n, c := range cases { | ||
err := NewSanitizedError(c.input, c.oldnew...) | ||
|
||
assert.Equal(t, c.expected, err.Error(), "case %d: error should match", n) | ||
} | ||
func TestSanitizeErrorCredentialURLs(t *testing.T) { | ||
err := errors.New("error with https://[email protected]") | ||
se := SanitizeErrorCredentialURLs(err) | ||
assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error()) | ||
} | ||
|
||
func TestNewStringURLSanitizer(t *testing.T) { | ||
func TestSanitizeCredentialURLs(t *testing.T) { | ||
cases := []struct { | ||
input string | ||
placeholder bool | ||
expected string | ||
input string | ||
expected string | ||
}{ | ||
// case 0 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
true, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 1 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 2 | ||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 3 | ||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 4 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 5 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
"ftp://x@", | ||
"ftp://" + userPlaceholder + "@", | ||
}, | ||
// case 6 | ||
{ | ||
"https://gi\nthub.com/go-gitea/test_repo.git", | ||
false, | ||
unparsableURL, | ||
"ftp://x/@", | ||
"ftp://x/@", | ||
}, | ||
} | ||
|
||
for n, c := range cases { | ||
// uses NewURLSanitizer internally | ||
result := NewStringURLSanitizer(c.input, c.placeholder).Replace(c.input) | ||
|
||
assert.Equal(t, c.expected, result, "case %d: error should match", n) | ||
} | ||
} | ||
|
||
func TestNewStringURLSanitizedError(t *testing.T) { | ||
cases := []struct { | ||
input string | ||
placeholder bool | ||
expected string | ||
}{ | ||
// case 0 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
true, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 1 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
"ftp://u@x/@", // test multiple @ chars | ||
"ftp://" + userPlaceholder + "@x/@", | ||
}, | ||
// case 2 | ||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
"😊ftp://u@x😊", // test unicode | ||
"😊ftp://" + userPlaceholder + "@x😊", | ||
}, | ||
// case 3 | ||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
"://@", | ||
"://@", | ||
}, | ||
// case 4 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
"//u:p@h", // do not process URLs without explicit scheme, they are not treated as "valid" URLs because there is no scheme context in string | ||
"//u:p@h", | ||
}, | ||
// case 5 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
"s://u@h", // the minimal pattern to be sanitized | ||
"s://" + userPlaceholder + "@h", | ||
}, | ||
// case 6 | ||
{ | ||
"https://gi\nthub.com/go-gitea/test_repo.git", | ||
false, | ||
unparsableURL, | ||
"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and [email protected]", | ||
"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and [email protected]", | ||
}, | ||
} | ||
|
||
encloseText := func(input string) string { | ||
return "test " + input + " test" | ||
} | ||
|
||
for n, c := range cases { | ||
err := errors.New(encloseText(c.input)) | ||
|
||
result := NewStringURLSanitizedError(err, c.input, c.placeholder) | ||
|
||
assert.Equal(t, encloseText(c.expected), result.Error(), "case %d: error should match", n) | ||
result := SanitizeCredentialURLs(c.input) | ||
assert.Equal(t, c.expected, result, "case %d: error should match", n) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We still need this normal cases.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which case? I think I have covered most.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean why we delete these original test cases?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All code are rewritten, all new cases cover old ones. If you feel which is missing, please just point out and add it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And many cases are indeed the old cases, for example, these
github.com
cases.