diff --git a/build.sh b/build.sh index ab3bd0e7a89..5136ebb3d3d 100755 --- a/build.sh +++ b/build.sh @@ -147,7 +147,7 @@ function buildLibCudfJniInDocker { -DCUDF_USE_ARROW_STATIC=ON \ -DCUDF_ENABLE_ARROW_S3=OFF \ -DBUILD_TESTS=OFF \ - -DPER_THREAD_DEFAULT_STREAM=ON \ + -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON \ -DRMM_LOGGING_LEVEL=OFF \ -DBUILD_SHARED_LIBS=OFF && \ cmake --build . --parallel ${PARALLEL_LEVEL} && \ @@ -274,7 +274,7 @@ if buildAll || hasArg libcudf; then -DBUILD_TESTS=${BUILD_TESTS} \ -DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ - -DPER_THREAD_DEFAULT_STREAM=${BUILD_PER_THREAD_DEFAULT_STREAM} \ + -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=${BUILD_PER_THREAD_DEFAULT_STREAM} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ ${CMAKE_ARGS} diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index e946a24bfeb..20bf2afa957 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -183,6 +183,7 @@ outputs: - test -f $PREFIX/include/cudf/lists/gather.hpp - test -f $PREFIX/include/cudf/lists/list_view.hpp - test -f $PREFIX/include/cudf/lists/lists_column_view.hpp + - test -f $PREFIX/include/cudf/lists/list_view.hpp - test -f $PREFIX/include/cudf/lists/sorting.hpp - test -f $PREFIX/include/cudf/lists/stream_compaction.hpp - test -f $PREFIX/include/cudf/merge.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6a08637dc11..e2fd8ce56ee 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -47,7 +47,14 @@ option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF) option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF) option(CUDF_ENABLE_ARROW_PARQUET "Find (or build) Arrow with Parquet support" OFF) option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" ON) -option(PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF) +option( + CUDF_USE_PER_THREAD_DEFAULT_STREAM + "Build cuDF with per-thread default stream, including passing the per-thread default + stream to external libraries." + OFF +) +option(PER_THREAD_DEFAULT_STREAM "[DEPRECATED] Build with per-thread default stream" OFF) +mark_as_advanced(FORCE, PER_THREAD_DEFAULT_STREAM) option(DISABLE_DEPRECATION_WARNING "Disable warnings generated from deprecated declarations." OFF) # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking @@ -57,6 +64,16 @@ option(CUDA_ENABLE_LINEINFO # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) +# PER_THREAD_DEFAULT_STREAM will be replaced with CUDF_USE_PER_THREAD_DEFAULT_STREAM +if(PER_THREAD_DEFAULT_STREAM) + set(CUDF_USE_PER_THREAD_DEFAULT_STREAM ON) + message( + DEPRECATION + "CUDF: PER_THREAD_DEFAULT_STREAM is deprecated, and will be removed in a future release, + please use CUDF_USE_PER_THREAD_DEFAULT_STREAM instead." + ) +endif() + message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}") message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}") message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}") @@ -64,7 +81,7 @@ message(VERBOSE "CUDF: Build cuDF shared libraries: ${BUILD_SHARED_LIBS}") message(VERBOSE "CUDF: Use a file cache for JIT compiled kernels: ${JITIFY_USE_CACHE}") message(VERBOSE "CUDF: Build and statically link Arrow libraries: ${CUDF_USE_ARROW_STATIC}") message(VERBOSE "CUDF: Build and enable S3 filesystem support for Arrow: ${CUDF_ENABLE_ARROW_S3}") -message(VERBOSE "CUDF: Build with per-thread default stream: ${PER_THREAD_DEFAULT_STREAM}") +message(VERBOSE "CUDF: Build with per-thread default stream: ${CUDF_PER_THREAD_DEFAULT_STREAM}") message( VERBOSE "CUDF: Disable warnings generated from deprecated declarations: ${DISABLE_DEPRECATION_WARNING}" @@ -580,8 +597,10 @@ if(JITIFY_USE_CACHE) endif() # Per-thread default stream -if(PER_THREAD_DEFAULT_STREAM) - target_compile_definitions(cudf PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM) +if(CUDF_USE_PER_THREAD_DEFAULT_STREAM) + target_compile_definitions( + cudf PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM CUDF_USE_PER_THREAD_DEFAULT_STREAM + ) endif() # Disable NVTX if necessary diff --git a/cpp/cmake/thirdparty/get_nvcomp.cmake b/cpp/cmake/thirdparty/get_nvcomp.cmake index d0007f93628..03213da7278 100644 --- a/cpp/cmake/thirdparty/get_nvcomp.cmake +++ b/cpp/cmake/thirdparty/get_nvcomp.cmake @@ -35,7 +35,7 @@ function(find_and_configure_nvcomp VERSION_MIN VERSION_MAX) endif() # Per-thread default stream - if(TARGET nvcomp AND PER_THREAD_DEFAULT_STREAM) + if(TARGET nvcomp AND CUDF_USE_PER_THREAD_DEFAULT_STREAM) target_compile_definitions(nvcomp PRIVATE CUDA_API_PER_THREAD_DEFAULT_STREAM) endif() endfunction() diff --git a/cpp/include/cudf/io/detail/avro.hpp b/cpp/include/cudf/io/detail/avro.hpp index 62d97081b75..9551b1f05df 100644 --- a/cpp/include/cudf/io/detail/avro.hpp +++ b/cpp/include/cudf/io/detail/avro.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #pragma once #include +#include #include @@ -38,7 +39,7 @@ namespace avro { table_with_metadata read_avro( std::unique_ptr&& source, avro_reader_options const& options, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream = cudf::default_stream_value, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace avro diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 2174b688da2..79fcf4bd916 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -74,7 +75,7 @@ class reader { * @return The set of columns along with table metadata */ table_with_metadata read(orc_reader_options const& options, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + rmm::cuda_stream_view stream = cudf::default_stream_value); }; /** diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp index 9af2e3f278d..a88dddb8dd0 100644 --- a/cpp/include/cudf/io/detail/parquet.hpp +++ b/cpp/include/cudf/io/detail/parquet.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -74,7 +75,7 @@ class reader { * @return The set of columns along with table metadata */ table_with_metadata read(parquet_reader_options const& options, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + rmm::cuda_stream_view stream = cudf::default_stream_value); }; /** diff --git a/cpp/include/cudf/utilities/default_stream.hpp b/cpp/include/cudf/utilities/default_stream.hpp index 3d031f09837..94bc01787e3 100644 --- a/cpp/include/cudf/utilities/default_stream.hpp +++ b/cpp/include/cudf/utilities/default_stream.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,22 @@ #pragma once +#include + namespace cudf { +/** + * @brief Default stream for cudf + * + * Use this value to ensure the correct stream is used when compiled with per + * thread default stream. + */ +#if defined(CUDF_USE_PER_THREAD_DEFAULT_STREAM) +static const rmm::cuda_stream_view default_stream_value{rmm::cuda_stream_per_thread}; +#else +static constexpr rmm::cuda_stream_view default_stream_value{}; +#endif + /** * @brief Check if per-thread default stream is enabled. * diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index e4ee37e6cee..9159990f7bb 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -155,7 +156,7 @@ table_with_metadata read_avro(avro_reader_options const& options, CUDF_EXPECTS(datasources.size() == 1, "Only a single source is currently supported."); - return avro::read_avro(std::move(datasources[0]), options, rmm::cuda_stream_default, mr); + return avro::read_avro(std::move(datasources[0]), options, cudf::default_stream_value, mr); } compression_type infer_compression_type(compression_type compression, source_info const& info) @@ -345,7 +346,7 @@ table_with_metadata read_orc(orc_reader_options const& options, rmm::mr::device_ auto datasources = make_datasources(options.get_source()); auto reader = std::make_unique( - std::move(datasources), options, rmm::cuda_stream_default, mr); + std::move(datasources), options, cudf::default_stream_value, mr); return reader->read(options); } @@ -363,7 +364,7 @@ void write_orc(orc_writer_options const& options, rmm::mr::device_memory_resourc CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); auto writer = std::make_unique( - std::move(sinks[0]), options, io_detail::SingleWriteMode::YES, rmm::cuda_stream_default, mr); + std::move(sinks[0]), options, io_detail::SingleWriteMode::YES, cudf::default_stream_value, mr); writer->write(options.get_table()); } @@ -380,7 +381,7 @@ orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); writer = std::make_unique( - std::move(sinks[0]), options, io_detail::SingleWriteMode::NO, rmm::cuda_stream_default, mr); + std::move(sinks[0]), options, io_detail::SingleWriteMode::NO, cudf::default_stream_value, mr); } /** @@ -455,7 +456,7 @@ std::unique_ptr> write_parquet(parquet_writer_options const auto sinks = make_datasinks(options.get_sink()); auto writer = std::make_unique( - std::move(sinks), options, io_detail::SingleWriteMode::YES, rmm::cuda_stream_default, mr); + std::move(sinks), options, io_detail::SingleWriteMode::YES, cudf::default_stream_value, mr); writer->write(options.get_table(), options.get_partitions()); @@ -473,7 +474,7 @@ parquet_chunked_writer::parquet_chunked_writer(chunked_parquet_writer_options co auto sinks = make_datasinks(options.get_sink()); writer = std::make_unique( - std::move(sinks), options, io_detail::SingleWriteMode::NO, rmm::cuda_stream_default, mr); + std::move(sinks), options, io_detail::SingleWriteMode::NO, cudf::default_stream_value, mr); } /** diff --git a/java/README.md b/java/README.md index ea1b9e3e4e4..05a24c1d3d3 100644 --- a/java/README.md +++ b/java/README.md @@ -101,7 +101,7 @@ Since the PTDS option is for each compilation unit, it should be done at the sam whole codebase. To enable PTDS, first build cuDF: ```shell script cd src/cudf/cpp/build -cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DPER_THREAD_DEFAULT_STREAM=ON +cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON make -j`nproc` make install ``` @@ -109,7 +109,7 @@ make install then build the jar: ```shell script cd src/cudf/java -mvn clean install -DPER_THREAD_DEFAULT_STREAM=ON +mvn clean install -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON ``` ## GPUDirect Storage (GDS) diff --git a/java/ci/build-in-docker.sh b/java/ci/build-in-docker.sh index d21010ba30e..ee11922bfcf 100755 --- a/java/ci/build-in-docker.sh +++ b/java/ci/build-in-docker.sh @@ -61,7 +61,7 @@ cmake .. -G"${CMAKE_GENERATOR}" \ -DCUDF_USE_ARROW_STATIC=ON \ -DCUDF_ENABLE_ARROW_S3=OFF \ -DBUILD_TESTS=$BUILD_CPP_TESTS \ - -DPER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS \ + -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS \ -DRMM_LOGGING_LEVEL=$RMM_LOGGING_LEVEL \ -DBUILD_SHARED_LIBS=OFF @@ -75,7 +75,7 @@ cmake --install . ###### Build cudf jar ###### BUILD_ARG="-Dmaven.repo.local=\"$WORKSPACE/.m2\"\ -DskipTests=$SKIP_JAVA_TESTS\ - -DPER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS\ + -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS\ -DCUDA_STATIC_RUNTIME=$ENABLE_CUDA_STATIC_RUNTIME\ -DCUDF_JNI_LIBCUDF_STATIC=ON\ -DUSE_GDS=$ENABLE_GDS -Dtest=*,!CuFileTest" diff --git a/java/pom.xml b/java/pom.xml index 31a79ec9801..be6a63b5cba 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -165,6 +165,7 @@ OFF OFF OFF + ${PER_THREAD_DEFAULT_STREAM} OFF ALL OFF @@ -385,7 +386,7 @@ - + diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 3a375412bbd..5d349b0c383 100755 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -38,15 +38,29 @@ project( option(USE_NVTX "Build with NVTX support" ON) option(BUILD_SHARED_LIBS "Build cuDF JNI shared libraries" ON) option(BUILD_TESTS "Configure CMake to build tests" ON) -option(PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF) +option(PER_THREAD_DEFAULT_STREAM "[DEPRECATED] Build with per-thread default stream" OFF) +mark_as_advanced(FORCE, PER_THREAD_DEFAULT_STREAM) +option(CUDF_USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF) option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) option(USE_GDS "Build with GPUDirect Storage (GDS)/cuFile support" OFF) option(CUDF_JNI_LIBCUDF_STATIC "Link with libcudf.a" OFF) +# PER_THREAD_DEFAULT_STREAM will be replaced with CUDF_USE_PER_THREAD_DEFAULT_STREAM +if(PER_THREAD_DEFAULT_STREAM) + set(CUDF_USE_PER_THREAD_DEFAULT_STREAM ON) + message( + DEPRECATION + "CUDF: PER_THREAD_DEFAULT_STREAM is deprecated, and will be removed in a future release, + please use CUDF_USE_PER_THREAD_DEFAULT_STREAM instead." + ) +endif() + message(VERBOSE "CUDF_JNI: Build with NVTX support: ${USE_NVTX}") message(VERBOSE "CUDF_JNI: Build cuDF JNI shared libraries: ${BUILD_SHARED_LIBS}") message(VERBOSE "CUDF_JNI: Configure CMake to build tests: ${BUILD_TESTS}") -message(VERBOSE "CUDF_JNI: Build with per-thread default stream: ${PER_THREAD_DEFAULT_STREAM}") +message(VERBOSE + "CUDF_JNI: Build with per-thread default stream: ${CUDF_USE_PER_THREAD_DEFAULT_STREAM}" +) message(VERBOSE "CUDF_JNI: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}") message(VERBOSE "CUDF_JNI: Build with GPUDirect Storage support: ${USE_GDS}") message(VERBOSE "CUDF_JNI: Link with libcudf statically: ${CUDF_JNI_LIBCUDF_STATIC}") @@ -80,9 +94,9 @@ if(NOT USE_NVTX) target_compile_definitions(cudfjni PUBLIC NVTX_DISABLE) endif() -if(PER_THREAD_DEFAULT_STREAM) +if(CUDF_USE_PER_THREAD_DEFAULT_STREAM) message(STATUS "Using per-thread default stream") - add_compile_definitions(CUDA_API_PER_THREAD_DEFAULT_STREAM) + add_compile_definitions(CUDA_API_PER_THREAD_DEFAULT_STREAM CUDF_USE_PER_THREAD_DEFAULT_STREAM) endif() # ##################################################################################################