Skip to content

Commit

Permalink
Docs: File headers
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Jan 16, 2025
1 parent 37c0581 commit e1ac216
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 8 deletions.
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
"blas",
"CCCL",
"constexpr",
"cublas",
"CUDA",
"Kahan",
"openmp",
Expand Down
13 changes: 10 additions & 3 deletions reduce_bench.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/**
* @date 04/09/2019
* @file reduce_bench.cpp
* @brief Benchmarking parallel reductions
* @author Ash Vardanian
*/
#include <cstdlib> // Accessing environment variables
#include <new> // `std::launder`

Expand Down Expand Up @@ -128,17 +134,18 @@ int main(int argc, char **argv) {
->UseRealTime();
#endif // defined(__AVX512F__)

// CUDA
// CUDA
#if defined(__CUDACC__)
if (cuda_device_count()) {
bm::RegisterBenchmark("cub@cuda", &make<cuda_cub_t>)->MinTime(10)->UseRealTime();
bm::RegisterBenchmark("warps@cuda", &make<cuda_warps_t>)->MinTime(10)->UseRealTime();
bm::RegisterBenchmark("thrust@cuda", &make<cuda_thrust_t>)->MinTime(10)->UseRealTime();
} else
} else {
fmt::print("No CUDA capable devices found!\n");
}
#endif

// OpenCL
// OpenCL
#if defined(__OPENCL__)
for (auto tgt : ocl_targets) {
for (auto kernel_name : opencl_t::kernels_k) {
Expand Down
6 changes: 6 additions & 0 deletions reduce_cpu.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/**
* @date 04/09/2019
* @file reduce_cpu.hpp
* @brief Parallel reduction with SIMD and multicore acceleration
* @author Ash Vardanian
*/
#pragma once
#include <cstring> // `std::memcpy`
#include <execution> // `std::execution::par_unseq`
Expand Down
6 changes: 6 additions & 0 deletions reduce_cublas.cuh
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/**
* @date 04/09/2019
* @file reduce_cublas.cuh
* @brief cuBLAS-based reductions
* @author Ash Vardanian
*/
#pragma once
#include <cublas_api.h>
#include <cuda_runtime_api.h>
Expand Down
6 changes: 6 additions & 0 deletions reduce_cuda.cuh
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/**
* @date 04/09/2019
* @file reduce_cuda.cuh
* @brief Pure CUDA, CUB, and Thrust-based reductions
* @author Ash Vardanian
*/
#pragma once
#include <cuda_runtime_api.h>
#include <mma.h> // `wmma::`
Expand Down
11 changes: 6 additions & 5 deletions reduce_opencl.cl
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
// Project: SandboxGPUs.
// Author: Ash Vardanian.
// Created: 04/09/2019.
// Copyright: Check "License" file.
//
/**
* @date 04/09/2019
* @file reduce_opencl.cl
* @brief OpenCL kernels for reduction algorithms
* @author Ash Vardanian
*/

/**
* Most of the algorithms here have following properties:
Expand Down
6 changes: 6 additions & 0 deletions reduce_opencl.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/**
* @date 04/09/2019
* @file reduce_opencl.hpp
* @brief OpenCL host code for parallel reductions
* @author Ash Vardanian
*/
#pragma once
#include <fstream>
#include <sstream>
Expand Down

0 comments on commit e1ac216

Please sign in to comment.