-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[URI] Do not attempt multiple verification attempts if host is non-resolvable #3656
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,13 +2,18 @@ package uri | |
|
||
import ( | ||
"context" | ||
"errors" | ||
"io" | ||
"net" | ||
"net/http" | ||
"net/url" | ||
"strings" | ||
"time" | ||
|
||
regexp "github.com/wasilibs/go-re2" | ||
|
||
"github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple" | ||
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context" | ||
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors" | ||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" | ||
) | ||
|
@@ -19,85 +24,114 @@ type Scanner struct { | |
} | ||
|
||
// Ensure the Scanner satisfies the interface at compile time. | ||
var _ detectors.Detector = (*Scanner)(nil) | ||
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil) | ||
var _ interface { | ||
detectors.Detector | ||
detectors.CustomFalsePositiveChecker | ||
} = (*Scanner)(nil) | ||
|
||
var ( | ||
keyPat = regexp.MustCompile(`\b(?:https?:)?\/\/[\S]{3,50}:([\S]{3,50})@[-.%\w\/:]+\b`) | ||
|
||
// TODO: make local addr opt-out | ||
defaultClient = detectors.DetectorHttpClientWithNoLocalAddresses | ||
|
||
hostNotFoundCache = simple.NewCache[struct{}]() | ||
Comment on lines
+37
to
+38
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this would benefit from being centralized and shared by all detectors. A domain that's found to not exist for Azure Container Registry, Azure OpenAI, JDBC (below), etc. is applicable to any other detector.
|
||
) | ||
|
||
// Keywords are used for efficiently pre-filtering chunks. | ||
// Use identifiers in the secret preferably, or the provider name. | ||
func (s Scanner) Keywords() []string { | ||
return []string{"http"} | ||
return []string{"http://", "https://"} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I can tell, |
||
} | ||
|
||
func (s Scanner) Type() detectorspb.DetectorType { | ||
return detectorspb.DetectorType_URI | ||
} | ||
|
||
func (s Scanner) Description() string { | ||
return "This detector identifies URLs with embedded credentials, which can be used to access web resources without explicit user interaction." | ||
} | ||
|
||
// FromData will find and optionally verify URI secrets in a given set of bytes. | ||
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { | ||
logger := logContext.AddLogger(ctx).Logger().WithName("uri") | ||
dataStr := string(data) | ||
|
||
matches := keyPat.FindAllStringSubmatch(dataStr, -1) | ||
|
||
for _, match := range matches { | ||
|
||
uriMatches := make(map[string]string) | ||
for _, matches := range keyPat.FindAllStringSubmatch(dataStr, -1) { | ||
uriMatch := matches[0] | ||
if !s.allowKnownTestSites { | ||
if strings.Contains(match[0], "httpbin.org") { | ||
if strings.Contains(uriMatch, "httpbin.org") { | ||
continue | ||
} | ||
if strings.Contains(match[0], "httpwatch.com") { | ||
if strings.Contains(uriMatch, "httpwatch.com") { | ||
continue | ||
} | ||
} | ||
|
||
urlMatch := match[0] | ||
password := match[1] | ||
|
||
password := matches[1] | ||
// Skip findings where the password only has "*" characters, this is a redacted password | ||
// Also include the url encoded "*" characters: "%2A" | ||
if strings.Trim(password, "*") == "" || strings.Trim(password, "%2A") == "" { | ||
continue | ||
} | ||
|
||
parsedURL, err := url.Parse(urlMatch) | ||
uriMatches[uriMatch] = password | ||
} | ||
|
||
for uri, password := range uriMatches { | ||
parsedURL, err := url.Parse(uri) | ||
if err != nil { | ||
// URL is invalid. | ||
continue | ||
} | ||
// URL does not contain a password. | ||
if _, ok := parsedURL.User.Password(); !ok { | ||
continue | ||
} | ||
|
||
rawURL, _ := url.Parse(urlMatch) | ||
rawURLStr := rawURL.String() | ||
// Safe, I think? (https://github.com/golang/go/issues/38351) | ||
rawUrl := *parsedURL | ||
rawUrlWithPath := rawUrl.String() | ||
// Removing the path causes possible deduplication issues if some paths have basic auth and some do not. | ||
rawURL.Path = "" | ||
rawUrl.Path = "" | ||
rawUrlWithoutPath := rawUrl.String() | ||
|
||
s1 := detectors.Result{ | ||
r := detectors.Result{ | ||
DetectorType: detectorspb.DetectorType_URI, | ||
Raw: []byte(rawURL.String()), | ||
RawV2: []byte(rawURLStr), | ||
Redacted: detectors.RedactURL(*rawURL), | ||
Raw: []byte(rawUrlWithoutPath), | ||
RawV2: []byte(rawUrlWithPath), | ||
Redacted: detectors.RedactURL(*parsedURL), | ||
} | ||
|
||
if verify { | ||
hostname := parsedURL.Hostname() | ||
if hostNotFoundCache.Exists(hostname) { | ||
logger.V(3).Info("Skipping uri: no such host", "host", hostname) | ||
continue | ||
} | ||
|
||
if s.client == nil { | ||
s.client = defaultClient | ||
} | ||
isVerified, verificationError := verifyURL(ctx, s.client, parsedURL) | ||
s1.Verified = isVerified | ||
s1.SetVerificationError(verificationError, password) | ||
isVerified, vErr := verifyURL(ctx, s.client, parsedURL) | ||
r.Verified = isVerified | ||
if vErr != nil { | ||
var dnsErr *net.DNSError | ||
if errors.As(vErr, &dnsErr) && dnsErr.IsNotFound { | ||
hostNotFoundCache.Set(hostname, struct{}{}) | ||
} | ||
r.SetVerificationError(vErr, password) | ||
} | ||
} | ||
|
||
if !s1.Verified { | ||
if !r.Verified { | ||
// Skip unverified findings where the password starts with a `$` - it's almost certainly a variable. | ||
if strings.HasPrefix(password, "$") { | ||
continue | ||
} | ||
} | ||
|
||
results = append(results, s1) | ||
results = append(results, r) | ||
} | ||
|
||
return results, nil | ||
|
@@ -118,16 +152,19 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro | |
u.User = nil | ||
nonCredentialedURL := u.String() | ||
|
||
req, err := http.NewRequestWithContext(ctx, "GET", credentialedURL, nil) | ||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, credentialedURL, nil) | ||
if err != nil { | ||
return false, err | ||
} | ||
req = req.WithContext(ctx) | ||
|
||
credentialedRes, err := client.Do(req) | ||
if err != nil { | ||
return false, err | ||
} | ||
credentialedRes.Body.Close() | ||
defer func() { | ||
_, _ = io.Copy(io.Discard, credentialedRes.Body) | ||
_ = credentialedRes.Body.Close() | ||
}() | ||
|
||
// If the credentialed URL returns a non 2XX code, we can assume it's a false positive. | ||
if credentialedRes.StatusCode < 200 || credentialedRes.StatusCode > 299 { | ||
|
@@ -136,16 +173,19 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro | |
|
||
time.Sleep(time.Millisecond * 10) | ||
|
||
req, err = http.NewRequestWithContext(ctx, "GET", nonCredentialedURL, nil) | ||
req, err = http.NewRequestWithContext(ctx, http.MethodGet, nonCredentialedURL, nil) | ||
if err != nil { | ||
return false, err | ||
} | ||
req = req.WithContext(ctx) | ||
|
||
nonCredentialedRes, err := client.Do(req) | ||
if err != nil { | ||
return false, err | ||
} | ||
nonCredentialedRes.Body.Close() | ||
defer func() { | ||
_, _ = io.Copy(io.Discard, nonCredentialedRes.Body) | ||
_ = nonCredentialedRes.Body.Close() | ||
}() | ||
|
||
// If the non-credentialed URL returns a non 400-428 code and basic auth header, we can assume it's verified now. | ||
if nonCredentialedRes.StatusCode >= 400 && nonCredentialedRes.StatusCode < 429 { | ||
|
@@ -156,11 +196,3 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro | |
|
||
return false, nil | ||
} | ||
|
||
func (s Scanner) Type() detectorspb.DetectorType { | ||
return detectorspb.DetectorType_URI | ||
} | ||
|
||
func (s Scanner) Description() string { | ||
return "This detector identifies URLs with embedded credentials, which can be used to access web resources without explicit user interaction." | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any reason why we ignore all false positives for this detector with custom logic?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect there were too many results being filtered by the wordlist, so someone decided to do this workaround instead of removing the problematic entries.
See #3246