-
Notifications
You must be signed in to change notification settings - Fork 93
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b59498f
commit d8505c9
Showing
140 changed files
with
33,009 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
cmake_minimum_required(VERSION 3.18) | ||
|
||
# Used for config file generation | ||
if(NOT DEFINED PROJECT_NAME) | ||
set(NOT_SUBPROJECT ON) | ||
else() | ||
set(NOT_SUBPROJECT OFF) | ||
endif() | ||
|
||
# Command line options | ||
option(BUILD_EXAMPLES "Build examples" OFF) | ||
option(BUILD_TESTS "Build unit tests" OFF) | ||
option(BUILD_BENCHMARKS "Build benchmarks" OFF) | ||
option(BUILD_DOCS "Build documentation. Mutually exclusive with all other options" OFF) | ||
option(BUILD_32_BIT "Build with 32-bit indexing support" OFF) | ||
option(MULTI_GPU "Multi-GPU support" OFF) | ||
option(EN_VISUALIZATION "Enable visualization support" OFF) | ||
option(EN_CUTLASS OFF) | ||
option(GPU_ARCH "List of GPU architectures to build for, separated by semicolon" OFF) | ||
|
||
# Building documentation is mutually exclusive with everything else, and doesn't require CUDA | ||
if (BUILD_DOCS) | ||
project(MATX_DOCS) | ||
add_subdirectory(docs) | ||
return() | ||
endif() | ||
|
||
# This needs to go after BUILD_DOCS check so it doesn't look for CUDA if we're just building docs | ||
project(MATX | ||
LANGUAGES CUDA CXX | ||
DESCRIPTION "A modern and efficient header-only C++ library for numerical computing on GPU" | ||
VERSION 0.0.13 | ||
HOMEPAGE_URL "https://github.com/NVIDIA/MatX") | ||
|
||
# In an upcoming CMake it will have the capability to auto-detect GPU architectures. For now, rapids-cmake has a utility | ||
# function to do it, so we grab that as a dependency. The user can optionally override GPU_ARCH to specify | ||
# their own | ||
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-21.12/RAPIDS.cmake | ||
${CMAKE_BINARY_DIR}/RAPIDS.cmake) | ||
include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) | ||
|
||
include(rapids-cmake) | ||
include(rapids-cpm) | ||
include(rapids-export) | ||
include(rapids-find) | ||
|
||
rapids_cmake_write_version_file(include/version_config.h) | ||
|
||
# Figure out what GPU arch their on if it's not specified. If we can't figure it out and it's not specified, fall back to 70;80 | ||
if(NOT GPU_ARCH) | ||
include(rapids-cuda) | ||
set(CMAKE_CUDA_ARCHITECTURES "NATIVE") | ||
rapids_cuda_init_architectures(MATX) | ||
if (NOT CMAKE_CUDA_ARCHITECTURES) | ||
message(STATUS "Tried to detect GPU architecture of current machine, but failed. Falling back to 70;80") | ||
set(CMAKE_CUDA_ARCHITECTURES "70;80") | ||
else() | ||
message(STATUS "GPU_ARCH not specified. Using GPU architectures of this machine for building (${CMAKE_CUDA_ARCHITECTURES})") | ||
endif() | ||
else() | ||
message(STATUS "Using GPU architectures ${GPU_ARCH}") | ||
set(CMAKE_CUDA_ARCHITECTURES ${GPU_ARCH}) | ||
endif() | ||
|
||
|
||
# MatX requires C++17 to build. Enforce on all libraries pulled in as well | ||
set (CMAKE_CXX_STANDARD 17) | ||
set (CUDA_CXX_STANDARD 17) | ||
|
||
# CPM is required for all package management | ||
include(cmake/GetCPM.cmake) | ||
# Helper for selecting build type | ||
include(cmake/BuildType.cmake) | ||
|
||
rapids_find_package( | ||
CUDAToolkit 11.5 REQUIRED | ||
BUILD_EXPORT_SET matx-exports | ||
INSTALL_EXPORT_SET matx-exports) | ||
|
||
rapids_cpm_init() | ||
|
||
# Create our transitive target to pass build properties to external users and our own build environment | ||
add_library(matx INTERFACE) | ||
add_library(matx::matx ALIAS matx) | ||
target_include_directories(matx INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" | ||
"$<INSTALL_INTERFACE:include>") | ||
target_include_directories(matx INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/kernels>" | ||
"$<INSTALL_INTERFACE:include/kernels>") | ||
target_compile_features(matx INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>) | ||
target_compile_options(matx INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>) | ||
|
||
# Set flags for compiling tests faster | ||
set(MATX_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} --threads 0) | ||
if (NOT CMAKE_BUILD_TYPE OR ${CMAKE_BUILD_TYPE} STREQUAL "Debug") | ||
set(MATX_CUDA_FLAGS ${MATX_CUDA_FLAGS} -g -lineinfo) | ||
endif() | ||
|
||
# Set preferred compiler warning flags | ||
set(WARN_FLAGS -Wall | ||
-Wextra | ||
-Werror all-warnings | ||
-Wcast-align | ||
-Wunused | ||
-Wconversion | ||
-Wno-unknown-pragmas | ||
-Wnon-virtual-dtor | ||
-Wshadow) | ||
|
||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") | ||
set(WARN_FLAGS ${WARN_FLAGS} | ||
-Wmisleading-indentation | ||
-Wduplicated-cond | ||
-Wduplicated-branches | ||
-Wlogical-op | ||
-Wnull-dereference | ||
-Wuseless-cast) | ||
endif() | ||
|
||
# CUTLASS slows down compile times when used, so leave it as optional for now | ||
if (EN_CUTLASS) | ||
include(cmake/GetCUTLASS.cmake) | ||
set (CUTLASS_INC ${cutlass_SOURCE_DIR}/include/ ${cutlass_SOURCE_DIR}/tools/util/include/) | ||
target_compile_definitions(matx INTERFACE ENABLE_CUTLASS=1) | ||
else() | ||
set (CUTLASS_INC "") | ||
target_compile_definitions(matx INTERFACE ENABLE_CUTLASS=0) | ||
endif() | ||
|
||
if (MULTI_GPU) | ||
include(cmake/FindNvshmem.cmake) | ||
find_package(Nvshmem REQUIRED) | ||
endif() | ||
|
||
# Find python3 and pybind11 for generating unit tests and benchmarks | ||
if (BUILD_BENCHMARKS OR BUILD_TESTS OR EN_VISUALIZATION OR BUILD_EXAMPLES) | ||
include(cmake/GetPyBind11.cmake) | ||
find_package(Python3 REQUIRED COMPONENTS Interpreter Development) | ||
find_package(pybind11 REQUIRED) | ||
|
||
# Check for python libs | ||
include(cmake/CheckPythonLibs.cmake) | ||
check_python_libs("numpy") | ||
|
||
# Required by pybind | ||
# https://pybind11.readthedocs.io/en/stable/faq.html#someclass-declared-with-greater- | ||
# visibility-than-the-type-of-its-field-someclass-member-wattributes | ||
target_compile_options(matx INTERFACE -fvisibility=hidden) | ||
target_link_libraries(matx INTERFACE pybind11::embed) | ||
|
||
# Visualization requires Python libraries | ||
if (EN_VISUALIZATION) | ||
check_python_libs("plotly" "pandas") | ||
endif() | ||
endif() | ||
|
||
# Build config files if the user isn't adding this as a subdirectory. At this point our transitive target | ||
# should have all build properties needed based on the options passed in | ||
if (NOT_SUBPROJECT) | ||
include(GNUInstallDirs) | ||
include(CMakePackageConfigHelpers) | ||
|
||
install(TARGETS matx EXPORT matx-exports) | ||
install(DIRECTORY include/ DESTINATION include) | ||
install(FILES ${CMAKE_BINARY_DIR}/include/version_config.h DESTINATION include) | ||
|
||
set(doc_string | ||
[=[ | ||
Provide targets for MatX. | ||
|
||
[MatX](https://github.com/NVIDIA/MatX) provides a Python-like syntax for near-native speed | ||
numerical computing on NVIDIA GPUs. | ||
]=]) | ||
|
||
rapids_export( | ||
INSTALL matx | ||
EXPORT_SET matx-exports | ||
GLOBAL_TARGETS matx | ||
NAMESPACE matx:: | ||
DOCUMENTATION doc_string) | ||
|
||
# build export targets | ||
rapids_export( | ||
BUILD matx | ||
EXPORT_SET matx-exports | ||
GLOBAL_TARGETS matx | ||
NAMESPACE matx:: | ||
DOCUMENTATION doc_string) | ||
endif() | ||
|
||
|
||
|
||
if (BUILD_32_BIT) | ||
add_definitions(-DINDEX_32_BIT) | ||
target_compile_definitions(matx INTERFACE INDEX_32_BIT) | ||
else() | ||
add_definitions(-DINDEX_64_BIT) | ||
target_compile_definitions(matx INTERFACE INDEX_64_BIT) | ||
endif() | ||
|
||
if (BUILD_EXAMPLES) | ||
add_subdirectory(examples) | ||
endif() | ||
|
||
if (BUILD_BENCHMARKS) | ||
include(cmake/GetNVBench.cmake) | ||
add_subdirectory(bench) | ||
endif() | ||
|
||
if (BUILD_TESTS) | ||
include(cmake/GetGTest.cmake) | ||
add_subdirectory(test) | ||
endif() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
//////////////////////////////////////////////////////////////////////////////// | ||
// BSD 3-Clause License | ||
// | ||
// Copyright (c) 2021, NVIDIA Corporation | ||
// All rights reserved. | ||
// | ||
// Redistribution and use in source and binary forms, with or without | ||
// modification, are permitted provided that the following conditions are met: | ||
// | ||
// 1. Redistributions of source code must retain the above copyright notice, this | ||
// list of conditions and the following disclaimer. | ||
// | ||
// 2. Redistributions in binary form must reproduce the above copyright notice, | ||
// this list of conditions and the following disclaimer in the documentation | ||
// and/or other materials provided with the distribution. | ||
// | ||
// 3. Neither the name of the copyright holder nor the names of its | ||
// contributors may be used to endorse or promote products derived from | ||
// this software without specific prior written permission. | ||
// | ||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
///////////////////////////////////////////////////////////////////////////////// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
# MatX - Matrix Primitives Library | ||
|
||
MatX is a modern C++ library for numerical computing on NVIDIA GPUs. Near-native performance can be achieved while using a simple syntax common in higher-level languages such as Python or MATLAB. | ||
|
||
 | ||
|
||
The above image shows the Python (Numpy) version of an FFT resampler next to the MatX version. The total runtimes the NumPy version, CuPy version, | ||
and MatX version are shown below: | ||
|
||
* Python/Numpy: **4500ms** (Xeon(R) CPU E5-2698 v4 @ 2.20GHz) | ||
* CuPy: **10.6ms** (A100) | ||
* MatX: **2.54ms** (A100) | ||
|
||
While the code complexity and length are roughly the same, the MatX version shows a **1771x** over the Numpy version, and over **4x** faster than | ||
the CuPy version on the same GPU. | ||
|
||
Key features include: | ||
|
||
* :zap: MatX is fast. By using existing, optimized libraries as a backend, and efficient kernel generation when needed, no hand-optimizations | ||
are necessary | ||
|
||
* :open_hands: MatX is easy to learn. Users familiar with high-level languages will pick up the syntax quickly | ||
|
||
* :bookmark_tabs: MatX easily integrates with existing libraries and code | ||
|
||
* :sparkler: Visualize data from the GPU right on a web browser | ||
|
||
* :arrow_up_down: IO capabilities for reading/writing files | ||
|
||
# Requirements | ||
MatX is using bleeding edge features in the CUDA compilers and libraries. For this reason, a minimum of CUDA 11.4 and g++9 is required currently. | ||
|
||
# Documentation | ||
Documentation for MatX can be either built locally as shown below | ||
|
||
# Supported Data Types | ||
MatX supports all types that use standard C++ operators for math (+, -, etc). Unit tests are run against all common types shown below. | ||
|
||
* Integer: int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t | ||
* Floating Point: matxFp16 (fp16), matxBf16 (bfloat16), float, double | ||
* Complex: matxfp16Complex, matxBf16Complex, cuda::std::complex<float>, cuda::std::complex<double> | ||
|
||
Since CUDA half precision types (``__half`` and ``__nv_bfloat16``) do not support all C++ operators on the host side, MatX provides the ``matxFp16`` and | ||
``matxBf16`` types for scalars, and ``matxFp16Complex`` and ``matxBf16Complex`` for complex types. These wrappers are needed so that tensor | ||
views can be evaluated on both the host and device, regardless of CUDA or hardware support. When possible, the half types will use hardware- | ||
accelerated intrinsics automatically. Existing code using ``__half`` and ``__nv_bfloat16`` may be converted to the ``matx`` equivalent types directly | ||
and leverage all operators. | ||
|
||
# Building | ||
MatX is a header-only library that does not require compiling for using in your applications. However, building unit tests, benchmarks, | ||
or examples must be compiled. CPM is used as a package manager for CMake to download and configure any dependencies. If MatX is to | ||
be used in an air-gapped environment, CPM [can be configured](https://github.com/cpm-cmake/CPM.cmake#cpm_source_cache) to search locally for files. | ||
Depending on what options are enabled, compiling could take very long without parallelism enabled. Using the ``-j`` flag on ``make`` is | ||
suggested with the highest number your system will accommodate. | ||
|
||
To build all components, issue the standard cmake build commands in a cloned repo: | ||
|
||
``` | ||
mkdir build && cd build | ||
cmake -DBUILD_TESTS=ON -DBUILD_BENCHMARKS=ON -DBUILD_EXAMPLES=ON .. | ||
make -j | ||
``` | ||
|
||
By default CMake will target the GPU architecture(s) of the system you're compiling on. If you wish to target other architectures, pass the | ||
CMAKE_CUDA_ARCHITECTURES flag with a list of architectures to build for. | ||
|
||
By default nothing is compiled. If you wish to compile certain options, use the CMake flags below with ON or OFF values: | ||
|
||
``` | ||
BUILD_TESTS | ||
BUILD_BENCHMARKS | ||
BUILD_EXAMPLES | ||
BUILD_DOCS | ||
``` | ||
|
||
For example, to disable unit test building: | ||
``` | ||
mkdir build && cd build | ||
cmake -DBUILD_TESTS=OFF .. | ||
make -j | ||
``` | ||
|
||
Note that if documentation is selected all other build options are off. This eases the dependencies needed to build documentation | ||
so large libraries such as CUDA don't need to be installed. | ||
|
||
## CMake | ||
MatX uses CMake as a first-class build generator, and therefor provides the proper config files to include into your own project. There are | ||
typically two ways to do this: adding a subdirectory and finding the package. | ||
|
||
### MatX As A Subdirectory | ||
Adding the subdirectory is useful if you include the MatX | ||
source into the directory structure of your project. Using this method, you can simply add the MatX directory: | ||
|
||
``` | ||
add_subdirectory(path/to/matx) | ||
``` | ||
|
||
### MatX Installed To The System | ||
The other option is to install MatX and use the configuration file provided after building. This is typically done in a way similar to what is | ||
shown below: | ||
|
||
``` | ||
cd /path/to/matx | ||
mkdir build && cd build | ||
cmake .. | ||
make && make install | ||
``` | ||
|
||
If you have the correct permissions, the headers and cmake packages will be installed on your system in the expected paths for your operating | ||
system. With the package installed you can use ``find_package`` as follows: | ||
|
||
``` | ||
find_package(matx CONFIG REQUIRED) | ||
``` | ||
|
||
An example of using this method can be found in the examples/cmake_sample_project directory | ||
|
||
### MatX CMake Targets | ||
Once either of the two methods above are done, you can use the transitive target ``matx::matx`` in your library inside of ``target_link_libraries``. | ||
MatX may add other optional targets in the future inside the matx:: namespace as well. | ||
|
||
# Unit Tests | ||
MatX contains a suite of unit tests to test functionality of the primitive functions, plus end-to-end tests of example code. | ||
MatX uses [pybind11](https://github.com/pybind/pybind11) to generate some of the unit test inputs and outputs. This avoids | ||
the need to store large test vector files in git, and instead can be generated as-needed. | ||
|
||
To run the unit tests, from the cmake build directory run: | ||
``` | ||
make test | ||
``` | ||
|
||
This will execute all unit tests defined. If you wish to execute a subset of tests, or run with different options, you | ||
may run test/matx_test directly with parameters defined by [Google Test](https://github.com/google/googletest). To run matx_test | ||
directly, you must be inside the build/test directory for the correct paths to be set. For example, | ||
to run only tests with the name FFT: | ||
|
||
``` | ||
cd build/test | ||
./matx_test --gtest_filter="*FFT*" | ||
``` | ||
|
||
# Quick Start | ||
A [quick start guide](docs/quickstart.rst) can be found in the docs directory |
Oops, something went wrong.