Skip to content

Commit

Permalink
optimize the SCM URL sanitizer regex
Browse files Browse the repository at this point in the history
\w+ is too greedy for large strings that don't contain URLs
  • Loading branch information
ryanpetrello committed Mar 11, 2020
1 parent 208dbc1 commit 7e3865c
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
2 changes: 1 addition & 1 deletion awx/main/redact.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class UriCleaner(object):
REPLACE_STR = REPLACE_STR
SENSITIVE_URI_PATTERN = re.compile(r'(\w+:(\/?\/?)[^\s]+)', re.MULTILINE) # NOQA
SENSITIVE_URI_PATTERN = re.compile(r'((http|https|ssh|git):(\/?\/?)[^\s]+)', re.MULTILINE) # NOQA

@staticmethod
def remove_sensitive(cleartext):
Expand Down
7 changes: 7 additions & 0 deletions awx/main/tests/unit/test_redact.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,10 @@ def test_uri_scm_cleartext_redact_and_replace(test_data):
# Ensure the host didn't get redacted
assert redacted_str.count(uri.host) == test_data['host_occurrences']


@pytest.mark.timeout(1)
def test_large_string_performance():
length = 100000
redacted = UriCleaner.remove_sensitive('x' * length)
assert len(redacted) == length

0 comments on commit 7e3865c

Please sign in to comment.