Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Added skeleton of batch based GPU assignment #2820

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ set(XTENSOR_HEADERS
${XTENSOR_INCLUDE_DIR}/xtensor/xcomplex.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xcontainer.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xcsv.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xdevice.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xdynamic_view.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xeval.hpp
${XTENSOR_INCLUDE_DIR}/xtensor/xexception.hpp
Expand Down
14 changes: 13 additions & 1 deletion include/xtensor/xassign.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,17 @@ namespace xt
static void run_impl(E1& e1, const E2& e2, std::false_type);
};

class device_assigner
{
public:

template <class E1, class E2>
static void run(E1& e1, const E2& e2)
{
e1.store_device(e2.load_device());
}
};

/*************************
* strided_loop_assigner *
*************************/
Expand Down Expand Up @@ -463,7 +474,8 @@ namespace xt
// in compilation error for expressions that do not provide a SIMD interface.
// simd_assign is true if simd_linear_assign() or simd_linear_assign(de1, de2)
// is true.
linear_assigner<simd_assign>::run(de1, de2);
//linear_assigner<simd_assign>::run(de1, de2);
device_assigner::run(de1, de2);
}
else
{
Expand Down
16 changes: 16 additions & 0 deletions include/xtensor/xcontainer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "xstrides.hpp"
#include "xtensor_config.hpp"
#include "xtensor_forward.hpp"
#include "xdevice.hpp"

namespace xt
{
Expand Down Expand Up @@ -112,6 +113,8 @@ namespace xt
using reverse_linear_iterator = typename iterable_base::reverse_linear_iterator;
using const_reverse_linear_iterator = typename iterable_base::const_reverse_linear_iterator;

using container_device_return_type_t = host_device_batch<value_type>;

static_assert(static_layout != layout_type::any, "Container layout can never be layout_type::any!");

size_type size() const noexcept;
Expand Down Expand Up @@ -187,6 +190,19 @@ namespace xt
container_simd_return_type_t<storage_type, value_type, requested_type>
/*simd_return_type<requested_type>*/ load_simd(size_type i) const;

template<class device_batch>
void store_device(device_batch&& e)
{
//check length matching
e.store_host(storage().data());
}

container_device_return_type_t load_device() const
{
auto ptr = data();
return container_device_return_type_t(ptr, size());
}

linear_iterator linear_begin() noexcept;
linear_iterator linear_end() noexcept;

Expand Down
86 changes: 86 additions & 0 deletions include/xtensor/xdevice.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#ifndef XTENSOR_DEVICE_HPP
#define XTENSOR_DEVICE_HPP

#include <memory>
#include <algorithm>
#include <functional>
#include <vector>

namespace xt{
namespace detail{

}
/**
* Device implementation for the various operations. All device specific code goes in here disabled via macro
* for invalid syntax which might be needed for Sycl or CUDA.
*/
//#ifdef XTENSOR_DEVICE_ASSIGN
template<class T>
class host_device_batch
{
public:
host_device_batch(const T* ptr, std::size_t size)
{
//copy the data to the device
//CUDA Impl = Nearly identical
m_data.resize(size);
std::copy(ptr, ptr + size, std::begin(m_data));
}
template<class A>
host_device_batch& operator+(const host_device_batch<A>& rhs)
{
//CUDA impl = thrust::transform(m_data.begin(), m_data.end(), rhs.m_data().begin(), m_data.end(), thrust::plus<T>{});
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::plus<T>{});
return *this;
}
template<class A>
host_device_batch& operator-(const host_device_batch<A>& rhs)
{
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::minus<T>{});
return *this;
}
template<class A>
host_device_batch& operator*(const host_device_batch<A>& rhs)
{
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::multiplies<T>{});
return *this;
}
template<class A>
host_device_batch& operator/(const host_device_batch<A>& rhs)
{
std::transform(std::begin(m_data), std::end(m_data), std::begin(rhs.m_data), std::begin(m_data), std::divides<T>{});
return *this;
}
void store_host(T* dst)
{
std::copy(std::begin(m_data), std::end(m_data), dst);
}
private:
//CUDA impl = thrust::device_vector<T> m_data;
std::vector<T> m_data;
};
//#endif

// template<class T>
// class cuda_device_batch : public batch<host_device_batch<T>>
// {
// public:

// };

// template<class T>
// class intel_device_batch : public batch<host_device_batch<T>>
// {
// public:

// };

// template<class T>
// class opencl_device_batch : public batch<host_device_batch<T>>
// {
// public:

// };
}

#endif
20 changes: 20 additions & 0 deletions include/xtensor/xfunction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "xstrides.hpp"
#include "xtensor_simd.hpp"
#include "xutils.hpp"
#include "xdevice.hpp"

namespace xt
{
Expand Down Expand Up @@ -283,6 +284,7 @@ namespace xt
using const_iterator = typename iterable_base::const_iterator;
using reverse_iterator = typename iterable_base::reverse_iterator;
using const_reverse_iterator = typename iterable_base::const_reverse_iterator;
using device_return_type = host_device_batch<value_type>;

template <class Func, class... CTA, class U = std::enable_if_t<!std::is_base_of<std::decay_t<Func>, self_type>::value>>
xfunction(Func&& f, CTA&&... e) noexcept;
Expand Down Expand Up @@ -361,6 +363,8 @@ namespace xt
template <class align, class requested_type = value_type, std::size_t N = xt_simd::simd_traits<requested_type>::size>
simd_return_type<requested_type> load_simd(size_type i) const;

device_return_type load_device() const;

const tuple_type& arguments() const noexcept;

const functor_type& functor() const noexcept;
Expand All @@ -385,6 +389,9 @@ namespace xt
template <class align, class requested_type, std::size_t N, std::size_t... I>
auto load_simd_impl(std::index_sequence<I...>, size_type i) const;

template <std::size_t... I>
inline auto load_device_impl(std::index_sequence<I...>) const;

template <class Func, std::size_t... I>
const_stepper build_stepper(Func&& f, std::index_sequence<I...>) const noexcept;

Expand Down Expand Up @@ -844,6 +851,12 @@ namespace xt
return operator()();
}

template <class F, class... CT>
inline auto xfunction<F, CT...>::load_device() const -> device_return_type
{
return load_device_impl(std::make_index_sequence<sizeof...(CT)>());
}

template <class F, class... CT>
template <class align, class requested_type, std::size_t N>
inline auto xfunction<F, CT...>::load_simd(size_type i) const -> simd_return_type<requested_type>
Expand Down Expand Up @@ -912,6 +925,13 @@ namespace xt
return m_f.simd_apply((std::get<I>(m_e).template load_simd<align, requested_type>(i))...);
}

template <class F, class... CT>
template <std::size_t... I>
inline auto xfunction<F, CT...>::load_device_impl(std::index_sequence<I...>) const
{
return m_f.device_apply((std::get<I>(m_e).load_device())...);
}

template <class F, class... CT>
template <class Func, std::size_t... I>
inline auto xfunction<F, CT...>::build_stepper(Func&& f, std::index_sequence<I...>) const noexcept
Expand Down
54 changes: 39 additions & 15 deletions include/xtensor/xmath.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,21 +81,27 @@ namespace xt
XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, unsigned long long);


#define XTENSOR_UNARY_MATH_FUNCTOR(NAME) \
struct NAME##_fun \
{ \
template <class T> \
constexpr auto operator()(const T& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
template <class B> \
constexpr auto simd_apply(const B& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
#define XTENSOR_UNARY_MATH_FUNCTOR(NAME) \
struct NAME##_fun \
{ \
template <class T> \
constexpr auto operator()(const T& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
template <class B> \
constexpr auto simd_apply(const B& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
template <class B> \
constexpr auto device_apply(const B& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
}

#define XTENSOR_UNARY_MATH_FUNCTOR_COMPLEX_REDUCING(NAME) \
Expand All @@ -113,6 +119,12 @@ namespace xt
using math::NAME; \
return NAME(arg); \
} \
template <class B> \
constexpr auto device_apply(const B& arg) const \
{ \
using math::NAME; \
return NAME(arg); \
} \
}

#define XTENSOR_BINARY_MATH_FUNCTOR(NAME) \
Expand All @@ -130,6 +142,12 @@ namespace xt
using math::NAME; \
return NAME(arg1, arg2); \
} \
template <class B> \
constexpr auto device_apply(const B& arg1, const B& arg2) const \
{ \
using math::NAME; \
return NAME(arg1, arg2); \
} \
}

#define XTENSOR_TERNARY_MATH_FUNCTOR(NAME) \
Expand All @@ -147,6 +165,12 @@ namespace xt
using math::NAME; \
return NAME(arg1, arg2, arg3); \
} \
template <class B> \
auto device_apply(const B& arg1, const B& arg2, const B& arg3) const \
{ \
using math::NAME; \
return NAME(arg1, arg2, arg3); \
} \
}

namespace math
Expand Down
5 changes: 5 additions & 0 deletions include/xtensor/xoperation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ namespace xt
{ \
return (arg1 OP arg2); \
} \
template <class B> \
constexpr auto device_apply(B&& arg1, const B&& arg2) const \
{ \
return (arg1 OP arg2); \
} \
}

namespace detail
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ set(XTENSOR_TESTS
test_xcomplex.cpp
test_xcsv.cpp
test_xdatesupport.cpp
test_xdevice_assign.cpp
test_xdynamic_view.cpp
test_xfunctor_adaptor.cpp
test_xfixed.cpp
Expand Down
39 changes: 39 additions & 0 deletions test/test_xdevice_assign.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
* Copyright (c) QuantStack *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
// This file is generated from test/files/cppy_source/test_extended_broadcast_view.cppy by preprocess.py!
// Warning: This file should not be modified directly! Instead, modify the `*.cppy` file.


#include <algorithm>

#include "xtensor/xarray.hpp"
#include "xtensor/xfixed.hpp"
#include "xtensor/xnoalias.hpp"
#include "xtensor/xstrided_view.hpp"
#include "xtensor/xtensor.hpp"
#include "xtensor/xview.hpp"

#include "test_common_macros.hpp"

namespace xt
{
TEST(test_xdevice, basic_xfunction)
{
std::vector<double> expectation = {2,3,4,5,6};

xt::xarray<float> a = {1., 2., 3., 4., 5.};
xt::xarray<float> b = xt::ones_like(a);
auto c = xt::xtensor<float, 1>::from_shape(a.shape());
c = a + b;
for(size_t i = 0; i < expectation.size(); i++)
{
ASSERT_EQ(c(i), expectation.at(i));
}
}
}
Loading