Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add is_normalized and starts_with to paths module #514

Merged
merged 2 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions docs/paths_doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,33 @@ Returns `True` if `path` is an absolute path.
`True` if `path` is an absolute path.


<a id="paths.is_normalized"></a>

## paths.is_normalized

<pre>
paths.is_normalized(<a href="#paths.is_normalized-str">str</a>, <a href="#paths.is_normalized-look_for_same_level_references">look_for_same_level_references</a>)
</pre>

Returns true if the passed path doesn't contain uplevel references "..".

Also checks for single-dot references "." if look_for_same_level_references
is `True.`


**PARAMETERS**


| Name | Description | Default Value |
| :------------- | :------------- | :------------- |
| <a id="paths.is_normalized-str"></a>str | The path string to check. | none |
| <a id="paths.is_normalized-look_for_same_level_references"></a>look_for_same_level_references | If True checks if path doesn't contain uplevel references ".." or single-dot references ".". | `True` |

**RETURNS**

True if the path is normalized, False otherwise.


<a id="paths.join"></a>

## paths.join
Expand Down Expand Up @@ -239,3 +266,24 @@ the leading dot). The returned tuple always satisfies the relationship
`root + ext == p`.


<a id="paths.starts_with"></a>

## paths.starts_with

<pre>
paths.starts_with(<a href="#paths.starts_with-path_a">path_a</a>, <a href="#paths.starts_with-path_b">path_b</a>)
</pre>

Returns True if and only if path_b is an ancestor of path_a.

Does not handle OS dependent case-insensitivity.

**PARAMETERS**


| Name | Description | Default Value |
| :------------- | :------------- | :------------- |
| <a id="paths.starts_with-path_a"></a>path_a | <p align="center"> - </p> | none |
| <a id="paths.starts_with-path_b"></a>path_b | <p align="center"> - </p> | none |


78 changes: 78 additions & 0 deletions lib/paths.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,67 @@ def _normalize(path):

return path or "."

_BASE = 0
_SEPARATOR = 1
_DOT = 2
_DOTDOT = 3

def _is_normalized(str, look_for_same_level_references = True):
"""Returns true if the passed path doesn't contain uplevel references "..".

Also checks for single-dot references "." if look_for_same_level_references
is `True.`

Args:
str: The path string to check.
look_for_same_level_references: If True checks if path doesn't contain
uplevel references ".." or single-dot references ".".

Returns:
True if the path is normalized, False otherwise.
"""
state = _SEPARATOR
for c in str.elems():
is_separator = False
if c == "/":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The native check actually uses the host operating system: https://github.com/bazelbuild/bazel/blob/34e3159e342a58b66353cca57ebe0b3d3ad85ff0/src/main/java/com/google/devtools/build/lib/vfs/PathFragment.java#L728

Since we're using / throughout the rules, even when they run on Windows, I believe this code is better than the native one. One piece of evidence are also all other functions here, that use just forward slashes.

is_separator = True

if state == _BASE:
if is_separator:
state = _SEPARATOR
else:
state = _BASE
elif state == _SEPARATOR:
if is_separator:
state = _SEPARATOR
elif c == ".":
state = _DOT
else:
state = _BASE
elif state == _DOT:
if is_separator:
if look_for_same_level_references:
# "." segment found.
return False
state = _SEPARATOR
elif c == ".":
state = _DOTDOT
else:
state = _BASE
elif state == _DOTDOT:
if is_separator:
return False
else:
state = _BASE

if state == _DOT:
if look_for_same_level_references:
# "." segment found.
return False
elif state == _DOTDOT:
return False
return True

def _relativize(path, start):
"""Returns the portion of `path` that is relative to `start`.

Expand Down Expand Up @@ -230,13 +291,30 @@ def _split_extension(p):
dot_distance_from_end = len(b) - last_dot_in_basename
return (p[:-dot_distance_from_end], p[-dot_distance_from_end:])

def _starts_with(path_a, path_b):
"""Returns True if and only if path_b is an ancestor of path_a.

Does not handle OS dependent case-insensitivity."""
if not path_b:
# all paths start with the empty string
return True
norm_a = _normalize(path_a)
norm_b = _normalize(path_b)
if len(norm_b) > len(norm_a):
return False
if not norm_a.startswith(norm_b):
return False
return len(norm_a) == len(norm_b) or norm_a[len(norm_b)] == "/"

paths = struct(
basename = _basename,
dirname = _dirname,
is_absolute = _is_absolute,
join = _join,
normalize = _normalize,
is_normalized = _is_normalized,
relativize = _relativize,
replace_extension = _replace_extension,
split_extension = _split_extension,
starts_with = _starts_with,
)
76 changes: 76 additions & 0 deletions tests/paths_tests.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,55 @@ def _normalize_test(ctx):

normalize_test = unittest.make(_normalize_test)

def _is_normalized_test(ctx):
"""Unit tests for paths.is_normalized."""
env = unittest.begin(ctx)

# Try the most basic cases.
asserts.true(env, paths.is_normalized(""))
asserts.false(env, paths.is_normalized("."))
asserts.true(env, paths.is_normalized("/"))
asserts.true(env, paths.is_normalized("/tmp"))
asserts.true(env, paths.is_normalized("tmp"))
asserts.true(env, paths.is_normalized("c:/"))
asserts.false(env, paths.is_normalized("../a"))
asserts.false(env, paths.is_normalized("a/.."))

# Try some basic adjacent-slash removal.
asserts.true(env, paths.is_normalized("foo//bar"))
asserts.true(env, paths.is_normalized("foo////bar"))

# Try some "." removal.
asserts.false(env, paths.is_normalized("foo/./bar"))
asserts.false(env, paths.is_normalized("./foo/bar"))
asserts.false(env, paths.is_normalized("foo/bar/."))
asserts.false(env, paths.is_normalized("/."))

# Try some ".." removal.
asserts.false(env, paths.is_normalized("foo/../bar"))
asserts.false(env, paths.is_normalized("foo/bar/.."))
asserts.false(env, paths.is_normalized("foo/.."))
asserts.false(env, paths.is_normalized("foo/bar/../.."))
asserts.false(env, paths.is_normalized("foo/../.."))
asserts.false(env, paths.is_normalized("/foo/../.."))
asserts.false(env, paths.is_normalized("a/b/../../../../c/d/.."))

# Make sure one or two initial slashes are preserved, but three or more are
# collapsed to a single slash.
asserts.true(env, paths.is_normalized("/foo"))
asserts.true(env, paths.is_normalized("//foo"))
asserts.true(env, paths.is_normalized("///foo"))

# Trailing slashes should be removed unless the entire path is a trailing
# slash.
asserts.true(env, paths.is_normalized("/"))
asserts.true(env, paths.is_normalized("foo/"))
asserts.true(env, paths.is_normalized("foo/bar/"))

return unittest.end(env)

is_normalized_test = unittest.make(_is_normalized_test)

def _relativize_test(ctx):
"""Unit tests for paths.relativize."""
env = unittest.begin(ctx)
Expand Down Expand Up @@ -276,6 +325,31 @@ def _split_extension_test(ctx):

split_extension_test = unittest.make(_split_extension_test)

def _starts_with_test(ctx):
"""Unit tests for paths.starts_with."""
env = unittest.begin(ctx)

# Make sure that relative-to-current-directory works in all forms.
asserts.true(env, paths.starts_with("foo", ""))
asserts.false(env, paths.starts_with("foo", "."))

# Try some regular cases.
asserts.true(env, paths.starts_with("foo/bar", "foo"))
asserts.false(env, paths.starts_with("foo/bar", "fo"))
asserts.true(env, paths.starts_with("foo/bar/baz", "foo/bar"))
asserts.true(env, paths.starts_with("foo/bar/baz", "foo"))

# Try a case where a parent directory is normalized away.
asserts.true(env, paths.starts_with("foo/bar/../baz", "foo"))

# Relative paths work, as long as they share a common start.
asserts.true(env, paths.starts_with("../foo/bar/baz/file", "../foo/bar/baz"))
asserts.true(env, paths.starts_with("../foo/bar/baz/file", "../foo/bar"))

return unittest.end(env)

starts_with_test = unittest.make(_starts_with_test)

def paths_test_suite():
"""Creates the test targets and test suite for paths.bzl tests."""
unittest.suite(
Expand All @@ -285,7 +359,9 @@ def paths_test_suite():
is_absolute_test,
join_test,
normalize_test,
is_normalized_test,
relativize_test,
replace_extension_test,
split_extension_test,
starts_with_test,
)