Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cufile version #565

Merged
merged 16 commits into from
Dec 5, 2024
17 changes: 16 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ if(KvikIO_CUDA_SUPPORT)
else()
set(cuFile_FOUND 1)

# Check batch and stream API support (cuFile_BATCH_API_FOUND and cuFile_STREAM_API_FOUND)
# Check API support
try_compile(
cuFile_BATCH_API_FOUND SOURCE_FROM_CONTENT
batch.cpp
Expand Down Expand Up @@ -109,6 +109,20 @@ if(KvikIO_CUDA_SUPPORT)
OUTPUT_VARIABLE stream_output
)
message(STATUS "Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}")
try_compile(
cuFile_VERSION_API_FOUND SOURCE_FROM_CONTENT
version.cpp
[[#include <cufile.h>
int main() {
int version;
cuFileGetVersion(&version);
return 0;
}
]]
LINK_LIBRARIES CUDA::cuFile rt ${CMAKE_DL_LIBS}
OUTPUT_VARIABLE version_output
)
message(STATUS "Found cuFile Version API: ${cuFile_VERSION_API_FOUND}")
endif()
endif()

Expand Down Expand Up @@ -154,6 +168,7 @@ target_compile_definitions(
$<$<BOOL:${cuFile_FOUND}>:KVIKIO_CUFILE_FOUND>
$<$<BOOL:${cuFile_BATCH_API_FOUND}>:KVIKIO_CUFILE_BATCH_API_FOUND>
$<$<BOOL:${cuFile_STREAM_API_FOUND}>:KVIKIO_CUFILE_STREAM_API_FOUND>
$<$<BOOL:${cuFile_VERSION_API_FOUND}>:KVIKIO_CUFILE_VERSION_API_FOUND>
)

set_target_properties(
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/basic_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ int main()
cout << "Parallel POSIX read (" << kvikio::defaults::thread_pool_nthreads()
<< " threads): " << read << endl;
}
if (kvikio::is_batch_and_stream_available() && !kvikio::defaults::is_compat_mode_preferred()) {
if (kvikio::is_batch_api_available() && !kvikio::defaults::is_compat_mode_preferred()) {
std::cout << std::endl;
Timer timer;
// Here we use the batch API to read "/tmp/test-file" into `b_dev` by
Expand Down
29 changes: 13 additions & 16 deletions cpp/include/kvikio/file_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,23 +62,20 @@ class FileHandle {
*/
bool is_compat_mode_preferred_for_async(CompatMode requested_compat_mode)
{
if (!defaults::is_compat_mode_preferred(requested_compat_mode)) {
if (!is_batch_and_stream_available()) {
if (requested_compat_mode == CompatMode::AUTO) { return true; }
throw std::runtime_error("Missing cuFile batch or stream library symbol.");
}

// When checking for availability, we also check if cuFile's config file exist. This is
// because even when the stream API is available, it doesn't work if no config file exist.
if (config_path().empty()) {
if (requested_compat_mode == CompatMode::AUTO) { return true; }
throw std::runtime_error("Missing cuFile configuration file.");
}

return false;
if (defaults::is_compat_mode_preferred(requested_compat_mode)) { return true; }

if (!is_stream_api_available()) {
if (requested_compat_mode == CompatMode::AUTO) { return true; }
throw std::runtime_error("Missing the cuFile stream api.");
}

return true;
// When checking for availability, we also check if cuFile's config file exist. This is
madsbk marked this conversation as resolved.
Show resolved Hide resolved
// because even when the stream API is available, it doesn't work if no config file exist.
madsbk marked this conversation as resolved.
Show resolved Hide resolved
if (config_path().empty()) {
if (requested_compat_mode == CompatMode::AUTO) { return true; }
throw std::runtime_error("Missing cuFile configuration file.");
}
return false;
}

public:
Expand Down Expand Up @@ -670,7 +667,7 @@ class FileHandle {
*/
[[nodiscard]] bool is_compat_mode_preferred_for_async() const noexcept
{
static bool is_extra_symbol_available = is_batch_and_stream_available();
static bool is_extra_symbol_available = is_stream_api_available();
static bool is_config_path_empty = config_path().empty();
return is_compat_mode_preferred() || !is_extra_symbol_available || is_config_path_empty;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/kvikio/remote_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace detail {
* @note Is not thread-safe.
*/
class BounceBufferH2D {
CUstream _stream; // The CUDA steam to use.
CUstream _stream; // The CUDA stream to use.
CUdeviceptr _dev; // The output device buffer.
AllocRetain::Alloc _host_buffer; // The host buffer to bounce data on.
std::ptrdiff_t _dev_offset{0}; // Number of bytes written to `_dev`.
Expand Down
99 changes: 66 additions & 33 deletions cpp/include/kvikio/shim/cufile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,11 @@ class cuFileAPI {
decltype(cuFileDriverOpen)* DriverOpen{nullptr};
decltype(cuFileDriverClose)* DriverClose{nullptr};

// Don't call `GetVersion` directly, use `cuFileAPI::instance().version`.
decltype(cuFileGetVersion)* GetVersion{nullptr};

public:
bool stream_available = false;
int version{0};

private:
#ifdef KVIKIO_CUFILE_FOUND
Expand Down Expand Up @@ -88,33 +91,39 @@ class cuFileAPI {
get_symbol(DriverSetMaxCacheSize, lib, KVIKIO_STRINGIFY(cuFileDriverSetMaxCacheSize));
get_symbol(DriverSetMaxPinnedMemSize, lib, KVIKIO_STRINGIFY(cuFileDriverSetMaxPinnedMemSize));

#ifdef KVIKIO_CUFILE_BATCH_API_FOUND
get_symbol(BatchIOSetUp, lib, KVIKIO_STRINGIFY(cuFileBatchIOSetUp));
get_symbol(BatchIOSubmit, lib, KVIKIO_STRINGIFY(cuFileBatchIOSubmit));
get_symbol(BatchIOGetStatus, lib, KVIKIO_STRINGIFY(cuFileBatchIOGetStatus));
get_symbol(BatchIOCancel, lib, KVIKIO_STRINGIFY(cuFileBatchIOCancel));
get_symbol(BatchIODestroy, lib, KVIKIO_STRINGIFY(cuFileBatchIODestroy));
#endif

#ifdef KVIKIO_CUFILE_STREAM_API_FOUND
get_symbol(ReadAsync, lib, KVIKIO_STRINGIFY(cuFileReadAsync));
get_symbol(WriteAsync, lib, KVIKIO_STRINGIFY(cuFileWriteAsync));
get_symbol(StreamRegister, lib, KVIKIO_STRINGIFY(cuFileStreamRegister));
get_symbol(StreamDeregister, lib, KVIKIO_STRINGIFY(cuFileStreamDeregister));
#ifdef KVIKIO_CUFILE_VERSION_API_FOUND
try {
void* s{};
get_symbol(s, lib, "cuFileReadAsync");
stream_available = true;
} catch (const std::runtime_error&) {
get_symbol(GetVersion, lib, KVIKIO_STRINGIFY(cuFileGetVersion));
int ver;
CUfileError_t const error = GetVersion(&ver);
if (error.err == CU_FILE_SUCCESS) { version = ver; }
} catch (std::runtime_error const&) {
}
#endif

// Some symbols were introduced in later versions, so version guards are required.
// Note: `version` is 0 for cuFile versions prior to v1.8 because `cuFileGetVersion`
// did not exist. As a result, the batch and stream APIs are not loaded in versions
// 1.6 and 1.7, respectively, even though they are available. This trade-off is made
// for improved robustness.
if (version >= 1060) {
get_symbol(BatchIOSetUp, lib, KVIKIO_STRINGIFY(cuFileBatchIOSetUp));
get_symbol(BatchIOSubmit, lib, KVIKIO_STRINGIFY(cuFileBatchIOSubmit));
get_symbol(BatchIOGetStatus, lib, KVIKIO_STRINGIFY(cuFileBatchIOGetStatus));
get_symbol(BatchIOCancel, lib, KVIKIO_STRINGIFY(cuFileBatchIOCancel));
get_symbol(BatchIODestroy, lib, KVIKIO_STRINGIFY(cuFileBatchIODestroy));
}
if (version >= 1070) {
get_symbol(ReadAsync, lib, KVIKIO_STRINGIFY(cuFileReadAsync));
get_symbol(WriteAsync, lib, KVIKIO_STRINGIFY(cuFileWriteAsync));
get_symbol(StreamRegister, lib, KVIKIO_STRINGIFY(cuFileStreamRegister));
get_symbol(StreamDeregister, lib, KVIKIO_STRINGIFY(cuFileStreamDeregister));
}

// cuFile is supposed to open and close the driver automatically but
// because of a bug in cuFile v1.4 (CUDA v11.8) it sometimes segfaults:
// <https://github.com/rapidsai/kvikio/issues/159>.
// We use the stream API as a version indicator of cuFile since it was introduced
// in cuFile v1.7 (CUDA v12.2).
if (!stream_available) { driver_open(); }
if (version < 1050) { driver_open(); }
}

// Notice, we have to close the driver at program exit (if we opened it) even though we are
Expand All @@ -124,7 +133,7 @@ class cuFileAPI {
// [1] <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#initialization>
~cuFileAPI()
{
if (!stream_available) { driver_close(); }
if (version < 1050) { driver_close(); }
}
#else
cuFileAPI() { throw std::runtime_error("KvikIO not compiled with cuFile.h"); }
Expand Down Expand Up @@ -205,25 +214,49 @@ inline bool is_cufile_available()
}

/**
* @brief Check if cuFile's batch and stream API is available
* @brief Get cufile version (or zero if older than v1.8).
*
* Technically, the batch API is available in CUDA 12.1 but since there is no good
* way to check CUDA version using the driver API, we check for the existing of the
* `cuFileReadAsync` symbol, which is defined in CUDA 12.2+.
* The version is returned as (1000*major + 10*minor). E.g., cufile v1.8.0 would
* be represented by 1080.
*
* @return The boolean answer
* Notice, this is not the version of the CUDA toolkit. cufile is part of the
* toolkit but follows its own version scheme.
*
* @return The version (1000*major + 10*minor) or zero if older than 1080.
*/
#if defined(KVIKIO_CUFILE_STREAM_API_FOUND) && defined(KVIKIO_CUFILE_STREAM_API_FOUND)
inline bool is_batch_and_stream_available() noexcept
#ifdef KVIKIO_CUFILE_FOUND
inline int cufile_version()
{
try {
return is_cufile_available() && cuFileAPI::instance().stream_available;
} catch (const std::runtime_error&) {
return false;
return cuFileAPI::instance().version;
} catch (std::runtime_error const&) {
return 0;
}
}
#else
constexpr bool is_batch_and_stream_available() { return false; }
constexpr bool cufile_version() { return 0; }
madsbk marked this conversation as resolved.
Show resolved Hide resolved
#endif

/**
* @brief Check if cuFile's batch API is available.
*
* Since `cuFileGetVersion()` first became available in cufile v1.8 (CTK v12.3),
* this function returns false for versions older than v1.8 even though the batch
* API became available in v1.6.
*
* @return The boolean answer
*/
inline bool is_batch_api_available() noexcept { return cufile_version() >= 1060; }

/**
* @brief Check if cuFile's stream (async) API is available.
*
* Since `cuFileGetVersion()` first became available in cufile v1.8 (CTK v12.3),
* this function returns false for versions older than v1.8 even though the stream
* API became available in v1.7.
*
* @return The boolean answer
*/
inline bool is_stream_api_available() noexcept { return cufile_version() >= 1070; }

} // namespace kvikio
7 changes: 5 additions & 2 deletions cpp/include/kvikio/shim/cufile_h_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ CUfileError_t cuFileDriverSetMaxPinnedMemSize(...);

#endif
bdice marked this conversation as resolved.
Show resolved Hide resolved

// If the Batch API isn't defined, we define some of the data types here.
// If some cufile APIs aren't defined, we define some of the data types here.
// Notice, this doesn't need to be ABI compatible with the cufile definitions and
// the lack of definitions is not a problem because the linker will never look for
// these symbols because the "real" function calls are made through the shim instance.
Expand Down Expand Up @@ -105,10 +105,13 @@ CUfileError_t cuFileBatchIOCancel(...);
CUfileError_t cuFileBatchIODestroy(...);
#endif

// If the Stream API isn't defined, we define some of the data types here.
#ifndef KVIKIO_CUFILE_STREAM_API_FOUND
CUfileError_t cuFileReadAsync(...);
CUfileError_t cuFileWriteAsync(...);
CUfileError_t cuFileStreamRegister(...);
CUfileError_t cuFileStreamDeregister(...);
#endif

#ifndef KVIKIO_CUFILE_VERSION_API_FOUND
CUfileError_t cuFileGetVersion(...);
#endif
5 changes: 5 additions & 0 deletions python/kvikio/kvikio/_lib/cufile_driver.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,15 @@ from libcpp cimport bool


cdef extern from "<kvikio/shim/cufile.hpp>" nogil:
cdef int cpp_libcufile_version "kvikio::cufile_version"() except +
cdef void cpp_driver_open "kvikio::cuFileAPI::instance().driver_open"() except +
cdef void cpp_driver_close "kvikio::cuFileAPI::instance().driver_close"() except +


def libcufile_version() -> int:
return cpp_libcufile_version()


def driver_open():
cpp_driver_open()

Expand Down
6 changes: 6 additions & 0 deletions python/kvikio/kvikio/benchmarks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def drop_vm_cache() -> None:
def pprint_sys_info() -> None:
"""Pretty print system information"""

version = kvikio.cufile_driver.libcufile_version()
props = kvikio.cufile_driver.DriverProperties()
try:
import pynvml
Expand All @@ -41,6 +42,10 @@ def pprint_sys_info() -> None:
gpu_name = f"{pynvml.nvmlDeviceGetName(dev)} (dev #0)"
mem_total = format_bytes(pynvml.nvmlDeviceGetMemoryInfo(dev).total)
bar1_total = format_bytes(pynvml.nvmlDeviceGetBAR1MemoryInfo(dev).bar1Total)
if version == (0, 0):
libcufile_version = "unknown"
madsbk marked this conversation as resolved.
Show resolved Hide resolved
else:
libcufile_version = f"{version[0]}.{version[1]}"
madsbk marked this conversation as resolved.
Show resolved Hide resolved
gds_version = "N/A (Compatibility Mode)"
if props.is_gds_available:
gds_version = f"v{props.major_version}.{props.minor_version}"
Expand All @@ -61,6 +66,7 @@ def pprint_sys_info() -> None:
print(f"GPU | {gpu_name}")
print(f"GPU Memory Total | {mem_total}")
print(f"BAR1 Memory Total | {bar1_total}")
print(f"libcufile version | {libcufile_version}")
print(f"GDS driver | {gds_version}")
print(f"GDS config.json | {gds_config_json_path}")

Expand Down
19 changes: 19 additions & 0 deletions python/kvikio/kvikio/cufile_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# See file LICENSE for terms.

import atexit
from typing import Tuple

from kvikio._lib import cufile_driver # type: ignore

Expand All @@ -10,6 +11,24 @@
DriverProperties = cufile_driver.DriverProperties


def libcufile_version() -> Tuple[int, int]:
"""Get the libcufile version (or zero if older than v1.8).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor note that this returns (0, 0) and not 0 for old versions. I would suggest removing the parenthetical here and instead specifying that behavior in the Notes below.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed to:

def libcufile_version() -> Tuple[int, int]:
    """Get the libcufile version.

    Returns (0, 0) for cuFile versions prior to v1.8.

    Notes
    -----
    This is not the version of the CUDA toolkit. cufile is part of the
    toolkit but follows its own version scheme.

    Returns
    -------
    The version as a tuple (MAJOR, MINOR).
    """


Notes
-----
This is not the version of the CUDA toolkit. cufile is part of the
toolkit but follows its own version scheme.

Returns
-------
The version as a tuple (MAJOR, MINOR).
"""
v = cufile_driver.libcufile_version()
major = v // 1000
minor = (v % 1000) // 10
return (major, minor)
madsbk marked this conversation as resolved.
Show resolved Hide resolved


def driver_open() -> None:
"""Open the cuFile driver

Expand Down
6 changes: 6 additions & 0 deletions python/kvikio/tests/test_cufile_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
import kvikio.cufile_driver


def test_version():
major, minor = kvikio.cufile_driver.libcufile_version()
assert major >= 0
assert minor >= 0


@pytest.mark.cufile
def test_open_and_close():
kvikio.cufile_driver.driver_open()
Expand Down
Loading