Skip to content

Commit

Permalink
Initial commit from internal repo
Browse files Browse the repository at this point in the history
  • Loading branch information
cliffburdick committed Oct 25, 2021
1 parent b59498f commit d8505c9
Show file tree
Hide file tree
Showing 140 changed files with 33,009 additions and 0 deletions.
213 changes: 213 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
cmake_minimum_required(VERSION 3.18)

# Used for config file generation
if(NOT DEFINED PROJECT_NAME)
set(NOT_SUBPROJECT ON)
else()
set(NOT_SUBPROJECT OFF)
endif()

# Command line options
option(BUILD_EXAMPLES "Build examples" OFF)
option(BUILD_TESTS "Build unit tests" OFF)
option(BUILD_BENCHMARKS "Build benchmarks" OFF)
option(BUILD_DOCS "Build documentation. Mutually exclusive with all other options" OFF)
option(BUILD_32_BIT "Build with 32-bit indexing support" OFF)
option(MULTI_GPU "Multi-GPU support" OFF)
option(EN_VISUALIZATION "Enable visualization support" OFF)
option(EN_CUTLASS OFF)
option(GPU_ARCH "List of GPU architectures to build for, separated by semicolon" OFF)

# Building documentation is mutually exclusive with everything else, and doesn't require CUDA
if (BUILD_DOCS)
project(MATX_DOCS)
add_subdirectory(docs)
return()
endif()

# This needs to go after BUILD_DOCS check so it doesn't look for CUDA if we're just building docs
project(MATX
LANGUAGES CUDA CXX
DESCRIPTION "A modern and efficient header-only C++ library for numerical computing on GPU"
VERSION 0.0.13
HOMEPAGE_URL "https://github.com/NVIDIA/MatX")

# In an upcoming CMake it will have the capability to auto-detect GPU architectures. For now, rapids-cmake has a utility
# function to do it, so we grab that as a dependency. The user can optionally override GPU_ARCH to specify
# their own
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-21.12/RAPIDS.cmake
${CMAKE_BINARY_DIR}/RAPIDS.cmake)
include(${CMAKE_BINARY_DIR}/RAPIDS.cmake)

include(rapids-cmake)
include(rapids-cpm)
include(rapids-export)
include(rapids-find)

rapids_cmake_write_version_file(include/version_config.h)

# Figure out what GPU arch their on if it's not specified. If we can't figure it out and it's not specified, fall back to 70;80
if(NOT GPU_ARCH)
include(rapids-cuda)
set(CMAKE_CUDA_ARCHITECTURES "NATIVE")
rapids_cuda_init_architectures(MATX)
if (NOT CMAKE_CUDA_ARCHITECTURES)
message(STATUS "Tried to detect GPU architecture of current machine, but failed. Falling back to 70;80")
set(CMAKE_CUDA_ARCHITECTURES "70;80")
else()
message(STATUS "GPU_ARCH not specified. Using GPU architectures of this machine for building (${CMAKE_CUDA_ARCHITECTURES})")
endif()
else()
message(STATUS "Using GPU architectures ${GPU_ARCH}")
set(CMAKE_CUDA_ARCHITECTURES ${GPU_ARCH})
endif()


# MatX requires C++17 to build. Enforce on all libraries pulled in as well
set (CMAKE_CXX_STANDARD 17)
set (CUDA_CXX_STANDARD 17)

# CPM is required for all package management
include(cmake/GetCPM.cmake)
# Helper for selecting build type
include(cmake/BuildType.cmake)

rapids_find_package(
CUDAToolkit 11.5 REQUIRED
BUILD_EXPORT_SET matx-exports
INSTALL_EXPORT_SET matx-exports)

rapids_cpm_init()

# Create our transitive target to pass build properties to external users and our own build environment
add_library(matx INTERFACE)
add_library(matx::matx ALIAS matx)
target_include_directories(matx INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include>")
target_include_directories(matx INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/kernels>"
"$<INSTALL_INTERFACE:include/kernels>")
target_compile_features(matx INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
target_compile_options(matx INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)

# Set flags for compiling tests faster
set(MATX_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} --threads 0)
if (NOT CMAKE_BUILD_TYPE OR ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
set(MATX_CUDA_FLAGS ${MATX_CUDA_FLAGS} -g -lineinfo)
endif()

# Set preferred compiler warning flags
set(WARN_FLAGS -Wall
-Wextra
-Werror all-warnings
-Wcast-align
-Wunused
-Wconversion
-Wno-unknown-pragmas
-Wnon-virtual-dtor
-Wshadow)

if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(WARN_FLAGS ${WARN_FLAGS}
-Wmisleading-indentation
-Wduplicated-cond
-Wduplicated-branches
-Wlogical-op
-Wnull-dereference
-Wuseless-cast)
endif()

# CUTLASS slows down compile times when used, so leave it as optional for now
if (EN_CUTLASS)
include(cmake/GetCUTLASS.cmake)
set (CUTLASS_INC ${cutlass_SOURCE_DIR}/include/ ${cutlass_SOURCE_DIR}/tools/util/include/)
target_compile_definitions(matx INTERFACE ENABLE_CUTLASS=1)
else()
set (CUTLASS_INC "")
target_compile_definitions(matx INTERFACE ENABLE_CUTLASS=0)
endif()

if (MULTI_GPU)
include(cmake/FindNvshmem.cmake)
find_package(Nvshmem REQUIRED)
endif()

# Find python3 and pybind11 for generating unit tests and benchmarks
if (BUILD_BENCHMARKS OR BUILD_TESTS OR EN_VISUALIZATION OR BUILD_EXAMPLES)
include(cmake/GetPyBind11.cmake)
find_package(Python3 REQUIRED COMPONENTS Interpreter Development)
find_package(pybind11 REQUIRED)

# Check for python libs
include(cmake/CheckPythonLibs.cmake)
check_python_libs("numpy")

# Required by pybind
# https://pybind11.readthedocs.io/en/stable/faq.html#someclass-declared-with-greater-
# visibility-than-the-type-of-its-field-someclass-member-wattributes
target_compile_options(matx INTERFACE -fvisibility=hidden)
target_link_libraries(matx INTERFACE pybind11::embed)

# Visualization requires Python libraries
if (EN_VISUALIZATION)
check_python_libs("plotly" "pandas")
endif()
endif()

# Build config files if the user isn't adding this as a subdirectory. At this point our transitive target
# should have all build properties needed based on the options passed in
if (NOT_SUBPROJECT)
include(GNUInstallDirs)
include(CMakePackageConfigHelpers)

install(TARGETS matx EXPORT matx-exports)
install(DIRECTORY include/ DESTINATION include)
install(FILES ${CMAKE_BINARY_DIR}/include/version_config.h DESTINATION include)

set(doc_string
[=[
Provide targets for MatX.

[MatX](https://github.com/NVIDIA/MatX) provides a Python-like syntax for near-native speed
numerical computing on NVIDIA GPUs.
]=])

rapids_export(
INSTALL matx
EXPORT_SET matx-exports
GLOBAL_TARGETS matx
NAMESPACE matx::
DOCUMENTATION doc_string)

# build export targets
rapids_export(
BUILD matx
EXPORT_SET matx-exports
GLOBAL_TARGETS matx
NAMESPACE matx::
DOCUMENTATION doc_string)
endif()



if (BUILD_32_BIT)
add_definitions(-DINDEX_32_BIT)
target_compile_definitions(matx INTERFACE INDEX_32_BIT)
else()
add_definitions(-DINDEX_64_BIT)
target_compile_definitions(matx INTERFACE INDEX_64_BIT)
endif()

if (BUILD_EXAMPLES)
add_subdirectory(examples)
endif()

if (BUILD_BENCHMARKS)
include(cmake/GetNVBench.cmake)
add_subdirectory(bench)
endif()

if (BUILD_TESTS)
include(cmake/GetGTest.cmake)
add_subdirectory(test)
endif()

31 changes: 31 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
////////////////////////////////////////////////////////////////////////////////
// BSD 3-Clause License
//
// Copyright (c) 2021, NVIDIA Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/////////////////////////////////////////////////////////////////////////////////
143 changes: 143 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# MatX - Matrix Primitives Library

MatX is a modern C++ library for numerical computing on NVIDIA GPUs. Near-native performance can be achieved while using a simple syntax common in higher-level languages such as Python or MATLAB.

![FFT resampler](docs/img/fft_resamp.PNG)

The above image shows the Python (Numpy) version of an FFT resampler next to the MatX version. The total runtimes the NumPy version, CuPy version,
and MatX version are shown below:

* Python/Numpy: **4500ms** (Xeon(R) CPU E5-2698 v4 @ 2.20GHz)
* CuPy: **10.6ms** (A100)
* MatX: **2.54ms** (A100)

While the code complexity and length are roughly the same, the MatX version shows a **1771x** over the Numpy version, and over **4x** faster than
the CuPy version on the same GPU.

Key features include:

* :zap: MatX is fast. By using existing, optimized libraries as a backend, and efficient kernel generation when needed, no hand-optimizations
are necessary

* :open_hands: MatX is easy to learn. Users familiar with high-level languages will pick up the syntax quickly

* :bookmark_tabs: MatX easily integrates with existing libraries and code

* :sparkler: Visualize data from the GPU right on a web browser

* :arrow_up_down: IO capabilities for reading/writing files

# Requirements
MatX is using bleeding edge features in the CUDA compilers and libraries. For this reason, a minimum of CUDA 11.4 and g++9 is required currently.

# Documentation
Documentation for MatX can be either built locally as shown below

# Supported Data Types
MatX supports all types that use standard C++ operators for math (+, -, etc). Unit tests are run against all common types shown below.

* Integer: int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t
* Floating Point: matxFp16 (fp16), matxBf16 (bfloat16), float, double
* Complex: matxfp16Complex, matxBf16Complex, cuda::std::complex<float>, cuda::std::complex<double>

Since CUDA half precision types (``__half`` and ``__nv_bfloat16``) do not support all C++ operators on the host side, MatX provides the ``matxFp16`` and
``matxBf16`` types for scalars, and ``matxFp16Complex`` and ``matxBf16Complex`` for complex types. These wrappers are needed so that tensor
views can be evaluated on both the host and device, regardless of CUDA or hardware support. When possible, the half types will use hardware-
accelerated intrinsics automatically. Existing code using ``__half`` and ``__nv_bfloat16`` may be converted to the ``matx`` equivalent types directly
and leverage all operators.

# Building
MatX is a header-only library that does not require compiling for using in your applications. However, building unit tests, benchmarks,
or examples must be compiled. CPM is used as a package manager for CMake to download and configure any dependencies. If MatX is to
be used in an air-gapped environment, CPM [can be configured](https://github.com/cpm-cmake/CPM.cmake#cpm_source_cache) to search locally for files.
Depending on what options are enabled, compiling could take very long without parallelism enabled. Using the ``-j`` flag on ``make`` is
suggested with the highest number your system will accommodate.

To build all components, issue the standard cmake build commands in a cloned repo:

```
mkdir build && cd build
cmake -DBUILD_TESTS=ON -DBUILD_BENCHMARKS=ON -DBUILD_EXAMPLES=ON ..
make -j
```

By default CMake will target the GPU architecture(s) of the system you're compiling on. If you wish to target other architectures, pass the
CMAKE_CUDA_ARCHITECTURES flag with a list of architectures to build for.

By default nothing is compiled. If you wish to compile certain options, use the CMake flags below with ON or OFF values:

```
BUILD_TESTS
BUILD_BENCHMARKS
BUILD_EXAMPLES
BUILD_DOCS
```

For example, to disable unit test building:
```
mkdir build && cd build
cmake -DBUILD_TESTS=OFF ..
make -j
```

Note that if documentation is selected all other build options are off. This eases the dependencies needed to build documentation
so large libraries such as CUDA don't need to be installed.

## CMake
MatX uses CMake as a first-class build generator, and therefor provides the proper config files to include into your own project. There are
typically two ways to do this: adding a subdirectory and finding the package.

### MatX As A Subdirectory
Adding the subdirectory is useful if you include the MatX
source into the directory structure of your project. Using this method, you can simply add the MatX directory:

```
add_subdirectory(path/to/matx)
```

### MatX Installed To The System
The other option is to install MatX and use the configuration file provided after building. This is typically done in a way similar to what is
shown below:

```
cd /path/to/matx
mkdir build && cd build
cmake ..
make && make install
```

If you have the correct permissions, the headers and cmake packages will be installed on your system in the expected paths for your operating
system. With the package installed you can use ``find_package`` as follows:

```
find_package(matx CONFIG REQUIRED)
```

An example of using this method can be found in the examples/cmake_sample_project directory

### MatX CMake Targets
Once either of the two methods above are done, you can use the transitive target ``matx::matx`` in your library inside of ``target_link_libraries``.
MatX may add other optional targets in the future inside the matx:: namespace as well.

# Unit Tests
MatX contains a suite of unit tests to test functionality of the primitive functions, plus end-to-end tests of example code.
MatX uses [pybind11](https://github.com/pybind/pybind11) to generate some of the unit test inputs and outputs. This avoids
the need to store large test vector files in git, and instead can be generated as-needed.

To run the unit tests, from the cmake build directory run:
```
make test
```

This will execute all unit tests defined. If you wish to execute a subset of tests, or run with different options, you
may run test/matx_test directly with parameters defined by [Google Test](https://github.com/google/googletest). To run matx_test
directly, you must be inside the build/test directory for the correct paths to be set. For example,
to run only tests with the name FFT:

```
cd build/test
./matx_test --gtest_filter="*FFT*"
```

# Quick Start
A [quick start guide](docs/quickstart.rst) can be found in the docs directory
Loading

0 comments on commit d8505c9

Please sign in to comment.