Skip to content

Commit

Permalink
Reimplement canonical in terms of GetFinalPathNameByHandleW on Windows.
Browse files Browse the repository at this point in the history
This moves the common part of v3 and v4 canonical() to a separate
function and changes the Windows implementation to use
GetFinalPathNameByHandleW system call. As a side effect, this converts
drive names to upper case, which makes paths more interoperable.

Additionally, on POSIX systems, avoid adding a trailing directory
separator if the input path has one (which may be the case in v4). This
is consistent with libstdc++ and MSVC implementations of std::filesystem.

Fixes #325.
  • Loading branch information
Lastique committed Sep 16, 2024
1 parent 01cadd0 commit 11beaba
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 124 deletions.
7 changes: 7 additions & 0 deletions doc/release_history.html
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@
</tr>
</table>

<h2>1.87.0</h2>
<ul>
<li>As was announced in 1.84.0, Windows versions prior to 10 are no longer supported.</li>
<li>On Windows, <code>canonical</code> is now based on the <code>GetFinalPathNameByHandleW</code> WinAPI function. As a side effect, drive letters are converted to upper case, which makes the resulting paths more interoperable. (<a href="https://github.com/boostorg/filesystem/issues/325">#325</a>)</li>
<li><b>v4:</b> <code>canonical</code> no longer produces a trailing directory separator in the resulting path, if the input path has one.</li>
</ul>

<h2>1.86.0</h2>
<ul>
<li><code>is_empty</code> operation is now better protected against concurrent filesystem modifications.</li>
Expand Down
229 changes: 112 additions & 117 deletions src/operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,11 +263,11 @@ namespace {
// The number of retries remove_all should make if it detects that the directory it is about to enter has been replaced with a symlink or a regular file
BOOST_CONSTEXPR_OR_CONST unsigned int remove_all_directory_replaced_retry_count = 5u;

#if defined(BOOST_POSIX_API)

// Size of a small buffer for a path that can be placed on stack, in character code units
BOOST_CONSTEXPR_OR_CONST std::size_t small_path_size = 1024u;

#if defined(BOOST_POSIX_API)

// Absolute maximum path length, in character code units, that we're willing to accept from various system calls.
// This value is arbitrary, it is supposed to be a hard limit to avoid memory exhaustion
// in some of the algorithms below in case of some corrupted or maliciously broken filesystem.
Expand Down Expand Up @@ -2383,7 +2383,7 @@ inline path convert_nt_path_to_win32_path(const wchar_t* nt_path, std::size_t si
(
// Check if the following is a drive letter
(
detail::is_letter(nt_path[pos]) && nt_path[pos + 1u] == colon &&
nt_path[pos + 1u] == colon && detail::is_letter(nt_path[pos]) &&
((size - pos) == 2u || detail::is_directory_separator(nt_path[pos + 2u]))
) ||
// Check for an "incorrect" syntax for UNC path junction points
Expand Down Expand Up @@ -2565,15 +2565,11 @@ path absolute_v4(path const& p, path const& base, system::error_code* ec)
return res;
}

BOOST_FILESYSTEM_DECL
path canonical_v3(path const& p, path const& base, system::error_code* ec)
namespace {

inline path canonical_common(path& source, system::error_code* ec)
{
path source(detail::absolute_v3(p, base, ec));
if (ec && *ec)
{
return_empty_path:
return path();
}
#if defined(BOOST_POSIX_API)

system::error_code local_ec;
file_status st(detail::status_impl(source, &local_ec));
Expand All @@ -2590,7 +2586,9 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)
BOOST_FILESYSTEM_THROW(filesystem_error("boost::filesystem::canonical", source, local_ec));

*ec = local_ec;
goto return_empty_path;

return_empty_path:
return path();
}

path root(source.root_path());
Expand All @@ -2602,6 +2600,8 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)
{
for (path::iterator itr(source.begin()), end(source.end()); itr != end; path_algorithms::increment_v4(itr))
{
if (itr->empty())
continue;
if (path_algorithms::compare_v4(*itr, dot_p) == 0)
continue;
if (path_algorithms::compare_v4(*itr, dot_dot_p) == 0)
Expand All @@ -2624,12 +2624,6 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)

path_algorithms::append_v4(result, *itr);

// If we don't have an absolute path yet then don't check symlink status.
// This avoids checking "C:" which is "the current directory on drive C"
// and hence not what we want to check/resolve here.
if (!result.is_absolute())
continue;

st = detail::symlink_status_impl(result, ec);
if (ec && *ec)
goto return_empty_path;
Expand All @@ -2656,7 +2650,7 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)
if (path_algorithms::compare_v4(*itr, dot_p) != 0)
path_algorithms::append_v4(link, *itr);
}
source = link;
source = std::move(link);
root = source.root_path();
}
else // link is relative
Expand All @@ -2672,7 +2666,7 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)
if (path_algorithms::compare_v4(*itr, dot_p) != 0)
path_algorithms::append_v4(new_source, *itr);
}
source = new_source;
source = std::move(new_source);
}

// symlink causes scan to be restarted
Expand All @@ -2688,131 +2682,132 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)

BOOST_ASSERT_MSG(result.is_absolute(), "canonical() implementation error; please report");
return result;
}

BOOST_FILESYSTEM_DECL
path canonical_v4(path const& p, path const& base, system::error_code* ec)
{
path source(detail::absolute_v4(p, base, ec));
if (ec && *ec)
{
return_empty_path:
return path();
}
#else // defined(BOOST_POSIX_API)

system::error_code local_ec;
file_status st(detail::status_impl(source, &local_ec));
unique_handle h(create_file_handle(
source.c_str(),
FILE_READ_ATTRIBUTES | FILE_READ_EA,
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
nullptr, // lpSecurityAttributes
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS));

if (st.type() == fs::file_not_found)
{
local_ec = system::errc::make_error_code(system::errc::no_such_file_or_directory);
goto fail_local_ec;
}
else if (local_ec)
DWORD err;
if (!h)
{
fail_local_ec:
if (!ec)
BOOST_FILESYSTEM_THROW(filesystem_error("boost::filesystem::canonical", source, local_ec));
err = ::GetLastError();

*ec = local_ec;
goto return_empty_path;
fail_err:
emit_error(err, source, ec, "boost::filesystem::canonical");
return path();
}

path root(source.root_path());
path const& dot_p = dot_path();
path const& dot_dot_p = dot_dot_path();
unsigned int symlinks_allowed = symloop_max;
path result;
WCHAR path_small_buf[small_path_size];
std::unique_ptr< WCHAR[] > path_large_buf;
WCHAR* path_buf = path_small_buf;
DWORD path_buf_size = sizeof(path_small_buf) / sizeof(*path_small_buf);
DWORD flags = FILE_NAME_NORMALIZED;
DWORD path_len;

while (true)
{
for (path::iterator itr(source.begin()), end(source.end()); itr != end; path_algorithms::increment_v4(itr))
path_len = ::GetFinalPathNameByHandleW(h.get(), path_buf, path_buf_size, flags);
if (path_len > 0)
{
if (path_algorithms::compare_v4(*itr, dot_p) == 0)
continue;
if (path_algorithms::compare_v4(*itr, dot_dot_p) == 0)
{
if (path_algorithms::compare_v4(result, root) != 0)
result.remove_filename_and_trailing_separators();
continue;
}
if (path_len < path_buf_size)
break;

if (itr->size() == 1u && detail::is_directory_separator(itr->native()[0]))
// The buffer is not large enough, path_len is the required buffer size, including the terminating zero
path_large_buf.reset(new WCHAR[path_len]);
path_buf = path_large_buf.get();
path_buf_size = path_len;
}
else
{
err = ::GetLastError();
if (BOOST_UNLIKELY(err == ERROR_PATH_NOT_FOUND && (flags & VOLUME_NAME_NT) == 0u))
{
// Convert generic separator returned by the iterator for the root directory to
// the preferred separator. This is important on Windows, as in some cases,
// like paths for network shares and cloud storage mount points GetFileAttributesW
// will return "file not found" if the path contains forward slashes.
result += path::preferred_separator;
// We don't need to check for a symlink after adding a separator.
// Drive letter does not exist for the file, obtain an NT path for it
flags |= VOLUME_NAME_NT;
continue;
}

path_algorithms::append_v4(result, *itr);

// If we don't have an absolute path yet then don't check symlink status.
// This avoids checking "C:" which is "the current directory on drive C"
// and hence not what we want to check/resolve here.
if (!result.is_absolute())
continue;

st = detail::symlink_status_impl(result, ec);
if (ec && *ec)
goto return_empty_path;
goto fail_err;
}
}

if (is_symlink(st))
path result;
if ((flags & VOLUME_NAME_NT) == 0u)
{
// If the input path did not contain a long path prefix, convert
// "\\?\X:" to "X:" and "\\?\UNC\" to "\\". Otherwise, preserve the prefix.
const path::value_type* source_str = source.c_str();
if (source.size() < 4u ||
source_str[0] != path::preferred_separator ||
source_str[1] != path::preferred_separator ||
source_str[2] != questionmark ||
source_str[3] != path::preferred_separator)
{
if (path_len >= 6u &&
path_buf[0] == path::preferred_separator &&
path_buf[1] == path::preferred_separator &&
path_buf[2] == questionmark &&
path_buf[3] == path::preferred_separator)
{
if (symlinks_allowed == 0)
{
local_ec = system::errc::make_error_code(system::errc::too_many_symbolic_link_levels);
goto fail_local_ec;
}

--symlinks_allowed;

path link(detail::read_symlink(result, ec));
if (ec && *ec)
goto return_empty_path;
result.remove_filename_and_trailing_separators();

if (link.is_absolute())
if (path_buf[5] == colon && detail::is_letter(path_buf[4]))
{
for (path_algorithms::increment_v4(itr); itr != end; path_algorithms::increment_v4(itr))
{
if (path_algorithms::compare_v4(*itr, dot_p) != 0)
path_algorithms::append_v4(link, *itr);
}
source = link;
root = source.root_path();
path_buf += 4;
path_len -= 4u;
}
else // link is relative
else if (path_len >= 8u &&
(path_buf[4] == L'U' || path_buf[4] == L'u') &&
(path_buf[5] == L'N' || path_buf[5] == L'n') &&
(path_buf[6] == L'C' || path_buf[6] == L'c') &&
path_buf[7] == path::preferred_separator)
{
link.remove_trailing_separator();
if (path_algorithms::compare_v4(link, dot_p) == 0)
continue;

path new_source(result);
path_algorithms::append_v4(new_source, link);
for (path_algorithms::increment_v4(itr); itr != end; path_algorithms::increment_v4(itr))
{
if (path_algorithms::compare_v4(*itr, dot_p) != 0)
path_algorithms::append_v4(new_source, *itr);
}
source = new_source;
path_buf += 6;
path_len -= 6u;
path_buf[0] = path::preferred_separator;
}

// symlink causes scan to be restarted
goto restart_scan;
}
}

break;

restart_scan:
result.clear();
result.assign(path_buf, path_buf + path_len);
}
else
{
// Convert NT path to a Win32 path
result.assign(L"\\\\?\\GLOBALROOT");
result.concat(path_buf, path_buf + path_len);
}

BOOST_ASSERT_MSG(result.is_absolute(), "canonical() implementation error; please report");
return result;

#endif // defined(BOOST_POSIX_API)
}

} // unnamed namespace

BOOST_FILESYSTEM_DECL
path canonical_v3(path const& p, path const& base, system::error_code* ec)
{
path source(detail::absolute_v3(p, base, ec));
if (ec && *ec)
return path();

return detail::canonical_common(source, ec);
}

BOOST_FILESYSTEM_DECL
path canonical_v4(path const& p, path const& base, system::error_code* ec)
{
path source(detail::absolute_v4(p, base, ec));
if (ec && *ec)
return path();

return detail::canonical_common(source, ec);
}

BOOST_FILESYSTEM_DECL
Expand Down
25 changes: 18 additions & 7 deletions test/operations_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1781,15 +1781,9 @@ void canonical_basic_tests()
BOOST_TEST(ok);

// non-symlink tests; also see canonical_symlink_tests()
#if BOOST_FILESYSTEM_VERSION == 3
BOOST_TEST_EQ(fs::canonical(""), fs::current_path());
BOOST_TEST_EQ(fs::canonical("", fs::current_path()), fs::current_path());
BOOST_TEST_EQ(fs::canonical("", ""), fs::current_path());
#else
BOOST_TEST_EQ(fs::canonical(""), fs::current_path() / fs::path());
BOOST_TEST_EQ(fs::canonical("", fs::current_path()), fs::current_path() / fs::path());
BOOST_TEST_EQ(fs::canonical("", ""), fs::current_path() / fs::path());
#endif
BOOST_TEST_EQ(fs::canonical(fs::current_path()), fs::current_path());
BOOST_TEST_EQ(fs::canonical(fs::current_path(), ""), fs::current_path());
BOOST_TEST_EQ(fs::canonical(fs::current_path(), "no-such-file"), fs::current_path());
Expand Down Expand Up @@ -1836,6 +1830,23 @@ void canonical_basic_tests()
// Test Windows long paths
fs::path long_path = make_long_path(dir / L"f0");
BOOST_TEST_EQ(fs::canonical(long_path), long_path);

// Test that canonical() consistently converts drive letters to upper case.
// https://github.com/boostorg/filesystem/issues/325
{
fs::path::string_type root_path = dir.root_path().native(), lc_root_path, uc_root_path;
for (fs::path::string_type::value_type c : root_path)
{
fs::path::string_type::value_type lc = c, uc = c;
if (lc >= L'A' && lc <= L'Z')
lc += L'a' - L'A';
if (uc >= L'a' && uc <= L'z')
uc -= L'a' - L'A';
lc_root_path.push_back(lc);
uc_root_path.push_back(uc);
}
BOOST_TEST_EQ(fs::canonical(fs::path(lc_root_path) / dir.relative_path()), fs::canonical(fs::path(uc_root_path) / dir.relative_path()));
}
#endif
}

Expand Down Expand Up @@ -2829,7 +2840,7 @@ void weakly_canonical_basic_tests()
BOOST_TEST_EQ(fs::weakly_canonical("..//foo", d1), dir / "foo");

#ifdef BOOST_WINDOWS_API
BOOST_TEST_EQ(fs::weakly_canonical("c:/no-such/foo/bar"), fs::path("c:/no-such/foo/bar"));
BOOST_TEST_EQ(fs::weakly_canonical("c:/no-such/foo/bar"), fs::path("C:/no-such/foo/bar"));

// Test Windows long paths
fs::path long_path = make_long_path(dir / L"f0");
Expand Down

0 comments on commit 11beaba

Please sign in to comment.