Skip to content

Commit

Permalink
Merge pull request cms-sw#17 from jsamudio/dev_PFClusterPortableColle…
Browse files Browse the repository at this point in the history
…ction

Implement CUDA Portable Collection for PF Clusters
  • Loading branch information
jsamudio authored Jun 23, 2023
2 parents d3eb59b + 35bd2b2 commit e4b3841
Show file tree
Hide file tree
Showing 15 changed files with 1,445 additions and 419 deletions.
103 changes: 103 additions & 0 deletions CUDADataFormats/Common/interface/PortableCollectionCommon.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#ifndef CUDADataFormats_Common_interface_PortableCollectionCommon_h
#define CUDADataFormats_Common_interface_PortableCollectionCommon_h

#include <cstddef>
#include <type_traits>
#include <array>

namespace portablecollection {

// Note: if there are other uses for this, it could be moved to a central place
template <std::size_t Start, std::size_t End, std::size_t Inc = 1, typename F>
constexpr void constexpr_for(F&& f) {
if constexpr (Start < End) {
f(std::integral_constant<std::size_t, Start>());
constexpr_for<Start + Inc, End, Inc>(std::forward<F>(f));
}
}

template <std::size_t Idx, typename T>
struct CollectionLeaf {
CollectionLeaf() = default;
CollectionLeaf(std::byte* buffer, int32_t elements) : layout_(buffer, elements), view_(layout_) {}
template <std::size_t N>
CollectionLeaf(std::byte* buffer, std::array<int32_t, N> const& sizes)
: layout_(buffer, sizes[Idx]), view_(layout_) {
static_assert(N >= Idx);
}
using Layout = T;
using View = typename Layout::View;
using ConstView = typename Layout::ConstView;
Layout layout_; //
View view_; //!
// Make sure types are not void.
static_assert(not std::is_same<T, void>::value);
};

template <std::size_t Idx, typename T, typename... Args>
struct CollectionImpl : public CollectionLeaf<Idx, T>, public CollectionImpl<Idx + 1, Args...> {
CollectionImpl() = default;
CollectionImpl(std::byte* buffer, int32_t elements) : CollectionLeaf<Idx, T>(buffer, elements) {}

template <std::size_t N>
CollectionImpl(std::byte* buffer, std::array<int32_t, N> const& sizes)
: CollectionLeaf<Idx, T>(buffer, sizes),
CollectionImpl<Idx + 1, Args...>(CollectionLeaf<Idx, T>::layout_.metadata().nextByte(), sizes) {}
};

template <std::size_t Idx, typename T>
struct CollectionImpl<Idx, T> : public CollectionLeaf<Idx, T> {
CollectionImpl() = default;
CollectionImpl(std::byte* buffer, int32_t elements) : CollectionLeaf<Idx, T>(buffer, elements) {}

template <std::size_t N>
CollectionImpl(std::byte* buffer, std::array<int32_t, N> const& sizes) : CollectionLeaf<Idx, T>(buffer, sizes) {
static_assert(N == Idx + 1);
}
};

template <typename... Args>
struct Collections : public CollectionImpl<0, Args...> {};

// return the type at the Idx position in Args...
template <std::size_t Idx, typename... Args>
using TypeResolver = typename std::tuple_element<Idx, std::tuple<Args...>>::type;

// count how many times the type T occurs in Args...
template <typename T, typename... Args>
inline constexpr std::size_t typeCount = ((std::is_same<T, Args>::value ? 1 : 0) + ... + 0);

// count the non-void elements of Args...
template <typename... Args>
inline constexpr std::size_t membersCount = sizeof...(Args);

// if the type T occurs in Tuple, TupleTypeIndex has a static member value with the corresponding index;
// otherwise there is no such data member.
template <typename T, typename Tuple>
struct TupleTypeIndex {};

template <typename T, typename... Args>
struct TupleTypeIndex<T, std::tuple<T, Args...>> {
static_assert(typeCount<T, Args...> == 0, "the requested type appears more than once among the arguments");
static constexpr std::size_t value = 0;
};

template <typename T, typename U, typename... Args>
struct TupleTypeIndex<T, std::tuple<U, Args...>> {
static_assert(not std::is_same_v<T, U>);
static_assert(typeCount<T, Args...> == 1, "the requested type does not appear among the arguments");
static constexpr std::size_t value = 1 + TupleTypeIndex<T, std::tuple<Args...>>::value;
};

// if the type T occurs in Args..., TypeIndex has a static member value with the corresponding index;
// otherwise there is no such data member.
template <typename T, typename... Args>
using TypeIndex = TupleTypeIndex<T, std::tuple<Args...>>;

// return the index where the type T occurs in Args...
template <typename T, typename... Args>
inline constexpr std::size_t typeIndex = TypeIndex<T, Args...>::value;

} // namespace portablecollection

#endif // CUDADataFormats_Common_interface_PortableCollectionCommon_h
207 changes: 207 additions & 0 deletions CUDADataFormats/Common/interface/PortableDeviceCollection.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

#include <cassert>
#include <cstdlib>
#include <optional>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "CUDADataFormats/Common/interface/PortableCollectionCommon.h"

namespace cms::cuda {

Expand Down Expand Up @@ -62,6 +64,211 @@ namespace cms::cuda {
View view_; //!
};

//generic SoA-Based product in device memory
template <typename T0, typename... Args>
class PortableDeviceMultiCollection {
template <typename T>
static constexpr std::size_t count_t_ = portablecollection::typeCount<T, T0, Args...>;

template <typename T>
static constexpr std::size_t index_t_ = portablecollection::typeIndex<T, T0, Args...>;

static constexpr std::size_t members_ = sizeof...(Args) + 1;

public:
using Buffer = cms::cuda::device::unique_ptr<std::byte[]>;
using Implementation = portablecollection::CollectionImpl<0, T0, Args...>;

using SizesArray = std::array<int32_t, members_>;

template <std::size_t Idx = 0>
using Layout = portablecollection::TypeResolver<Idx, T0, Args...>;

template <std::size_t Idx = 0UL>
using View = typename std::tuple_element<Idx, std::tuple<T0, Args...>>::type::View;

template <std::size_t Idx = 0UL>
using ConstView = typename std::tuple_element<Idx, std::tuple<T0, Args...>>::type::ConstView;

private:
template <std::size_t Idx>
using Leaf = portablecollection::CollectionLeaf<Idx, Layout<Idx>>;

template <std::size_t Idx>
Leaf<Idx>& get() {
return static_cast<Leaf<Idx>&>(impl_);
}

template <std::size_t Idx>
Leaf<Idx> const& get() const {
return static_cast<Leaf<Idx> const&>(impl_);
}

template <typename T>
Leaf<index_t_ <T>>& get() {
return static_cast<Leaf<index_t_<T>>&>(impl_);
}

template <typename T>
Leaf<index_t_<T>> const& get() const {
return static_cast<Leaf<index_t_<T>> const&>(impl_);
}

public:
PortableDeviceMultiCollection() = default;

PortableDeviceMultiCollection(int32_t elements, cudaStream_t stream)
: buffer_{cms::cuda::make_device_unique<std::byte[]>(Layout<>::computeDataSize(elements), stream)},
impl_{buffer_.get(), elements} {
assert(reinterpret_cast<uintptr_t>(buffer_.get()) % Layout<>::alignment == 0);
static_assert(members_ == 1);
}

static int32_t computeDataSize(const SizesArray& sizes) {
int32_t ret = 0;
portablecollection::constexpr_for<0, members_>(
[&sizes, &ret](auto i) { ret += Layout<i>::computeDataSize(sizes[i]); });
return ret;
}

PortableDeviceMultiCollection(const SizesArray& sizes, cudaStream_t stream)
// allocate device memory asynchronously on the given work queue
: buffer_{cms::cuda::make_device_unique<std::byte[]>(computeDataSize(sizes), stream)},
impl_{buffer_.get(), sizes} {
portablecollection::constexpr_for<0, members_>(
[&](auto i) { assert(reinterpret_cast<uintptr_t>(buffer_.get()) % Layout<i>::alignment == 0); });
constexpr auto alignment = Layout<0>::alignment;
portablecollection::constexpr_for<1, members_>(
[&alignment](auto i) { static_assert(alignment == Layout<i>::alignment); });
}

// non-copyable
PortableDeviceMultiCollection(PortableDeviceMultiCollection const&) = delete;
PortableDeviceMultiCollection& operator=(PortableDeviceMultiCollection const&) = delete;

// movable
PortableDeviceMultiCollection(PortableDeviceMultiCollection&&) = default;
PortableDeviceMultiCollection& operator=(PortableDeviceMultiCollection&&) = default;

// default destructor
~PortableDeviceMultiCollection() = default;

// access the View by index
template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
View<Idx>& view() {
return get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
ConstView<Idx> const& view() const {
return get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
ConstView<Idx> const& const_view() const {
return get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
View<Idx>& operator*() {
return get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
ConstView<Idx> const& operator*() const {
return get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
View<Idx>* operator->() {
return &get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
ConstView<Idx> const* operator->() const {
return &get<Idx>().view_;
}

// access the View by type
template <typename T>
typename T::View& view() {
return get<T>().view_;
}

template <typename T>
typename T::ConstView const& view() const {
return get<T>().view_;
}

template <typename T>
typename T::ConstView const& const_view() const {
return get<T>().view_;
}

template <typename T>
typename T::View& operator*() {
return get<T>().view_;
}

template <typename T>
typename T::ConstView const& operator*() const {
return get<T>().view_;
}

template <typename T>
typename T::View* operator->() {
return &get<T>().view_;
}

template <typename T>
typename T::ConstView const* operator->() const {
return &get<T>().view_;
}

// access the Buffer
Buffer& buffer() { return buffer_; }
Buffer const& buffer() const { return buffer_; }
Buffer const& const_buffer() const { return buffer_; }

// Extract the sizes array
SizesArray sizes() const {
SizesArray ret;
portablecollection::constexpr_for<0, members_>([&](auto i) { ret[i] = get<i>().layout_.metadata().size(); });
return ret;
}

size_t bufferSize() const {
SizesArray layoutSize;
size_t bytes;
bytes = 0;
portablecollection::constexpr_for<0, members_>([&](auto i) {
layoutSize[i] = get<i>().layout_.metadata().byteSize();
bytes += layoutSize[i];
});
return bytes;
}


private:
Buffer buffer_; //!
Implementation impl_; // (serialized: this is where the layouts live)
};

// Singleton case does not need to be aliased. A special template covers it.

// This aliasing is needed to work with ROOT serialization. Bare templates make dictionary compilation fail.
template <typename T0, typename T1>
using PortableDeviceCollection2 = PortableDeviceMultiCollection<T0, T1>;

template <typename T0, typename T1, typename T2>
using PortableDeviceCollection3 = PortableDeviceMultiCollection<T0, T1, T2>;

template <typename T0, typename T1, typename T2, typename T3>
using PortableDeviceCollection4 = PortableDeviceMultiCollection<T0, T1, T2, T3>;

template <typename T0, typename T1, typename T2, typename T3, typename T4>
using PortableDeviceCollection5 = PortableDeviceMultiCollection<T0, T1, T2, T3, T4>;

} // namespace cms::cuda

#endif // CUDADataFormats_Common_interface_PortableDeviceCollection_h
Loading

0 comments on commit e4b3841

Please sign in to comment.