Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[URI] Do not attempt multiple verification attempts if host is non-resolvable #3656

Merged
merged 1 commit into from
Jan 15, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 72 additions & 40 deletions pkg/detectors/uri/uri.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@ package uri

import (
"context"
"errors"
"io"
"net"
"net/http"
"net/url"
"strings"
"time"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple"
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)
Expand All @@ -19,85 +24,114 @@ type Scanner struct {
}

// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
var _ interface {
detectors.Detector
detectors.CustomFalsePositiveChecker
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reason why we ignore all false positives for this detector with custom logic?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect there were too many results being filtered by the wordlist, so someone decided to do this workaround instead of removing the problematic entries.

See #3246

} = (*Scanner)(nil)

var (
keyPat = regexp.MustCompile(`\b(?:https?:)?\/\/[\S]{3,50}:([\S]{3,50})@[-.%\w\/:]+\b`)

// TODO: make local addr opt-out
defaultClient = detectors.DetectorHttpClientWithNoLocalAddresses

hostNotFoundCache = simple.NewCache[struct{}]()
Comment on lines +37 to +38
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would benefit from being centralized and shared by all detectors. A domain that's found to not exist for Azure Container Registry, Azure OpenAI, JDBC (below), etc. is applicable to any other detector.

Found unverified result πŸ·πŸ”‘β“
Verification issue: lookup your_server.database.windows.net: no such host
Detector Type: JDBC
Decoder Type: PLAIN
Raw result: jdbc:sqlserver://your_server.database.windows.net:1433;
Commit: aa081336863641da6061a892c7304f9823b4e8d6
Commit_source: refs/remotes/origin/pull/2/head (hidden ref)
Email: Brian Gianforcaro <[email protected]>
File: Java/sample_java.java
Line: 11
Link: https://github.com/Azure/azure-sql-database-samples/blob/aa081336863641da6061a892c7304f9823b4e8d6/Java/sample_java.java#L11
Repository: https://github.com/Azure/azure-sql-database-samples.git
Timestamp: 2015-09-28 17:26:38 +0000

)

// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
return []string{"http"}
return []string{"http://", "https://"}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can tell, http:// and https:// are more correct and efficient keywords.

}

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_URI
}

func (s Scanner) Description() string {
return "This detector identifies URLs with embedded credentials, which can be used to access web resources without explicit user interaction."
}

// FromData will find and optionally verify URI secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
logger := logContext.AddLogger(ctx).Logger().WithName("uri")
dataStr := string(data)

matches := keyPat.FindAllStringSubmatch(dataStr, -1)

for _, match := range matches {

uriMatches := make(map[string]string)
for _, matches := range keyPat.FindAllStringSubmatch(dataStr, -1) {
uriMatch := matches[0]
if !s.allowKnownTestSites {
if strings.Contains(match[0], "httpbin.org") {
if strings.Contains(uriMatch, "httpbin.org") {
continue
}
if strings.Contains(match[0], "httpwatch.com") {
if strings.Contains(uriMatch, "httpwatch.com") {
continue
}
}

urlMatch := match[0]
password := match[1]

password := matches[1]
// Skip findings where the password only has "*" characters, this is a redacted password
// Also include the url encoded "*" characters: "%2A"
if strings.Trim(password, "*") == "" || strings.Trim(password, "%2A") == "" {
continue
}

parsedURL, err := url.Parse(urlMatch)
uriMatches[uriMatch] = password
}

for uri, password := range uriMatches {
parsedURL, err := url.Parse(uri)
if err != nil {
// URL is invalid.
continue
}
// URL does not contain a password.
if _, ok := parsedURL.User.Password(); !ok {
continue
}

rawURL, _ := url.Parse(urlMatch)
rawURLStr := rawURL.String()
// Safe, I think? (https://github.com/golang/go/issues/38351)
rawUrl := *parsedURL
rawUrlWithPath := rawUrl.String()
// Removing the path causes possible deduplication issues if some paths have basic auth and some do not.
rawURL.Path = ""
rawUrl.Path = ""
rawUrlWithoutPath := rawUrl.String()

s1 := detectors.Result{
r := detectors.Result{
DetectorType: detectorspb.DetectorType_URI,
Raw: []byte(rawURL.String()),
RawV2: []byte(rawURLStr),
Redacted: detectors.RedactURL(*rawURL),
Raw: []byte(rawUrlWithoutPath),
RawV2: []byte(rawUrlWithPath),
Redacted: detectors.RedactURL(*parsedURL),
}

if verify {
hostname := parsedURL.Hostname()
if hostNotFoundCache.Exists(hostname) {
logger.V(3).Info("Skipping uri: no such host", "host", hostname)
continue
}

if s.client == nil {
s.client = defaultClient
}
isVerified, verificationError := verifyURL(ctx, s.client, parsedURL)
s1.Verified = isVerified
s1.SetVerificationError(verificationError, password)
isVerified, vErr := verifyURL(ctx, s.client, parsedURL)
r.Verified = isVerified
if vErr != nil {
var dnsErr *net.DNSError
if errors.As(vErr, &dnsErr) && dnsErr.IsNotFound {
hostNotFoundCache.Set(hostname, struct{}{})
}
r.SetVerificationError(vErr, password)
}
}

if !s1.Verified {
if !r.Verified {
// Skip unverified findings where the password starts with a `$` - it's almost certainly a variable.
if strings.HasPrefix(password, "$") {
continue
}
}

results = append(results, s1)
results = append(results, r)
}

return results, nil
Expand All @@ -118,16 +152,19 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro
u.User = nil
nonCredentialedURL := u.String()

req, err := http.NewRequestWithContext(ctx, "GET", credentialedURL, nil)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, credentialedURL, nil)
if err != nil {
return false, err
}
req = req.WithContext(ctx)

credentialedRes, err := client.Do(req)
if err != nil {
return false, err
}
credentialedRes.Body.Close()
defer func() {
_, _ = io.Copy(io.Discard, credentialedRes.Body)
_ = credentialedRes.Body.Close()
}()

// If the credentialed URL returns a non 2XX code, we can assume it's a false positive.
if credentialedRes.StatusCode < 200 || credentialedRes.StatusCode > 299 {
Expand All @@ -136,16 +173,19 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro

time.Sleep(time.Millisecond * 10)

req, err = http.NewRequestWithContext(ctx, "GET", nonCredentialedURL, nil)
req, err = http.NewRequestWithContext(ctx, http.MethodGet, nonCredentialedURL, nil)
if err != nil {
return false, err
}
req = req.WithContext(ctx)

nonCredentialedRes, err := client.Do(req)
if err != nil {
return false, err
}
nonCredentialedRes.Body.Close()
defer func() {
_, _ = io.Copy(io.Discard, nonCredentialedRes.Body)
_ = nonCredentialedRes.Body.Close()
}()

// If the non-credentialed URL returns a non 400-428 code and basic auth header, we can assume it's verified now.
if nonCredentialedRes.StatusCode >= 400 && nonCredentialedRes.StatusCode < 429 {
Expand All @@ -156,11 +196,3 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro

return false, nil
}

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_URI
}

func (s Scanner) Description() string {
return "This detector identifies URLs with embedded credentials, which can be used to access web resources without explicit user interaction."
}
Loading