Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Eager] Support SellectedRows MergeAdd case #39449

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
460996f
add trace op
phlrain Jan 25, 2022
1136133
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Jan 25, 2022
635b4d9
bug fix
phlrain Jan 25, 2022
aa5866a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Jan 25, 2022
aea8814
bug fix; test=develop
phlrain Jan 25, 2022
79cdc15
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Jan 25, 2022
8fe7d06
thrust bug fix; test=develop
phlrain Jan 28, 2022
e8e55f6
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Jan 28, 2022
11b622d
remove useless register; test=develop
phlrain Jan 29, 2022
89f2f5b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Jan 29, 2022
b8f5e72
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Jan 29, 2022
7bd1f82
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Feb 1, 2022
86bb360
fix bug; test=develop
phlrain Feb 1, 2022
67f37d1
update trace kernel; test=develop
phlrain Feb 9, 2022
6d8f113
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Feb 9, 2022
4abe6bf
move kernel args to trace_sig; test=develop
phlrain Feb 9, 2022
64fc1c9
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Feb 9, 2022
25b6ddf
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Feb 9, 2022
b44386f
try to fix trace kernel conflict; test=develop
phlrain Feb 9, 2022
1801f63
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Feb 9, 2022
d0083e0
Refactor SelectedRows MergeAdd func by using template
veyron95 Feb 10, 2022
d6fbf25
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
veyron95 Feb 10, 2022
2d35720
Add GetInnerMutable func instead of modify GetInnerMutableTensor
veyron95 Feb 11, 2022
d8d684b
Updated PADDLE_ENFORCE statement
veyron95 Feb 11, 2022
7350686
Remove useless PADDLE_ENFORCE statement
veyron95 Feb 11, 2022
ccf7d89
Polish Code
veyron95 Feb 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions paddle/fluid/eager/grad_tensor_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
if (buffer_tensor.is_dense_tensor()) {
paddle::imperative::SelectedRowsAddToTensor(t, &buffer_tensor);
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"We don't support Selected Rows merge for now, support it later "
"and make all kinds of grads can be merged."));
buffer_tensor =
std::move(*paddle::imperative::SelectedRowsMerge<
paddle::experimental::Tensor>(t, buffer_tensor));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/selected_rows.h"

#include "paddle/pten/core/kernel_registry.h"

Expand Down Expand Up @@ -102,3 +103,69 @@ TEST(GradTensorHolder, Interfaces) {
CHECK_EQ(holder_et0_ptr[0], 1.0f);
CHECK_EQ(holder_et1_ptr[0], 30.0f);
}

TEST(GradTensorHolder, SelectedRowsMergeAdd) {
pten::CPUPlace cpu;

std::vector<int64_t> rows{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
int64_t table_size = 10;
int64_t embedding_width = 10;

auto sr1 = std::make_shared<pten::SelectedRows>(rows, table_size);
auto sr2 = std::make_shared<pten::SelectedRows>(rows, table_size);

// initialize a sparse table 1
sr1->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr1 = sr1->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr1[i * embedding_width + j] = static_cast<float>(i);
}
}

// initialize a sparse table 2
sr2->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr2 = sr2->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr2[i * embedding_width + j] = static_cast<float>(i);
}
}
// new 2 pten::Tensor
paddle::experimental::Tensor t1(sr1);
paddle::experimental::Tensor t2(sr2);

// Constructor empty GradTensorHolder
GradSlotMeta slot_meta;
slot_meta.Init(1);
GradTensorHolder grad_tensor_holder =
GradTensorHolder({slot_meta, slot_meta});

// accumulation
grad_tensor_holder.add(0, 0, t1, false);
grad_tensor_holder.add(0, 0, t2, false);

// Buffers()
const auto& buffers = grad_tensor_holder.Buffers();
CHECK_EQ(static_cast<int>(buffers.size()), 2);
CHECK_EQ(static_cast<int>(buffers[0].size()), 1);
CHECK_EQ(static_cast<int>(buffers[1].size()), 1);

// operator[]
const auto& holder_et0 = grad_tensor_holder[0][0];

auto* tmp_buffer_tensor =
static_cast<pten::SelectedRows*>(holder_et0.impl().get());
auto* tmp_buffer_data_sr =
tmp_buffer_tensor->mutable_value()->mutable_data<float>(cpu);

// verify the MergeAdd result (accumulation result)
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
EXPECT_EQ(tmp_buffer_data_sr[i * embedding_width + j],
(static_cast<float>(i) + static_cast<float>(i)));
}
}
}
4 changes: 2 additions & 2 deletions paddle/fluid/imperative/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ if(WITH_GLOO)
endif()

if(NOT WITH_ASCEND_CL)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function pten_tensor)
else()
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function npu_op_runner)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function npu_op_runner pten_tensor)
endif()

add_subdirectory(tests)
50 changes: 37 additions & 13 deletions paddle/fluid/imperative/gradient_accumulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,23 @@ TType& GetInnerTensor(const paddle::experimental::Tensor& src) {
return *src_tensor;
}

template <typename TType>
TType* GetEmptyInnerTensor(paddle::experimental::Tensor* dst) {
PADDLE_ENFORCE_EQ(
dst->defined(), false,
platform::errors::Fatal(
"The underlying Tensor implementation should be nullptr"));
dst->set_impl(std::make_shared<TType>());
auto* dst_tensor = static_cast<TType*>(dst->impl().get());
return dst_tensor;
}

template <typename TType>
TType* GetEmptyInnerTensor(paddle::imperative::VariableWrapper* dst) {
auto* dst_tensor = dst->MutableVar()->GetMutable<TType>();
return dst_tensor;
}

template <typename VarType>
void TensorAdd(const VarType& src, VarType* dst) {
pten::DenseTensor* dst_tensor = GetInnerMutableTensor<pten::DenseTensor>(dst);
Expand Down Expand Up @@ -469,23 +486,25 @@ template void SelectedRowsAddTensor(
// Note(chenweihang): when two selected rows need to be added,
// adding one to another is not equal to merging two selected rows
// to one then add it to a empty selected rows, the after is correct
// Note(chenweihang): when two selected rows need to be added,
// adding one to another is not equal to merging two selected rows
// to one then add it to a empty selected rows, the after is correct
std::shared_ptr<VariableWrapper> SelectedRowsMerge(
const framework::Variable& src1, const framework::Variable& src2) {
auto& src_selected_rows1 = src1.Get<pten::SelectedRows>();
auto& src_selected_rows2 = src2.Get<pten::SelectedRows>();
template <typename ReturnVarType, typename VarType>
std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
const VarType& src2) {
const pten::SelectedRows& src_selected_rows1 =
GetInnerTensor<pten::SelectedRows>(src1);
const pten::SelectedRows& src_selected_rows2 =
GetInnerTensor<pten::SelectedRows>(src2);

auto place = src_selected_rows1.value().place();
auto data_type = src_selected_rows1.value().type();
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();

std::vector<const pten::SelectedRows*> src_selected_rows;
src_selected_rows.emplace_back(&src_selected_rows1);
src_selected_rows.emplace_back(&src_selected_rows2);
auto dst_var = std::make_shared<VariableWrapper>("Temp");
auto* dst_selected_rows =
dst_var->MutableVar()->GetMutable<pten::SelectedRows>();

auto dst_var = std::make_shared<ReturnVarType>("Temp");
veyron95 marked this conversation as resolved.
Show resolved Hide resolved
pten::SelectedRows* dst_selected_rows =
GetEmptyInnerTensor<pten::SelectedRows>(dst_var.get());

#define PADDLE_SELECTED_ROWS_ADD(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
Expand All @@ -510,12 +529,17 @@ std::shared_ptr<VariableWrapper> SelectedRowsMerge(
#endif

#undef PADDLE_SELECTED_ROWS_ADD

PADDLE_THROW(platform::errors::InvalidArgument(
"Not supported data type %s for SelectedRowsMerge",
framework::DataTypeToString(data_type)));
}

template std::shared_ptr<paddle::experimental::Tensor> SelectedRowsMerge(
const paddle::experimental::Tensor& src1,
const paddle::experimental::Tensor& src2);
template std::shared_ptr<paddle::imperative::VariableWrapper> SelectedRowsMerge(
const framework::Variable& src1, const framework::Variable& src2);

void VariableWrapperAdd(std::shared_ptr<VariableWrapper> var,
VariableWrapper* dst_var, bool unchange_input) {
auto& src = var->Var();
Expand All @@ -542,7 +566,7 @@ void VariableWrapperAdd(std::shared_ptr<VariableWrapper> var,
*dst = std::move(*(var->MutableVar()));
}
} else if (src.IsType<pten::SelectedRows>()) {
auto temp = SelectedRowsMerge(src, *dst);
auto temp = SelectedRowsMerge<VariableWrapper>(src, *dst);
*dst = std::move(*(temp->MutableVar()));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
Expand Down Expand Up @@ -598,7 +622,7 @@ void GradientAccumulator::AccumulateGrad() {
SelectedRowsAddToTensor(*dst, src);
*dst = std::move(*src);
} else if (src->IsType<pten::SelectedRows>()) {
auto temp = SelectedRowsMerge(*src, *dst);
auto temp = SelectedRowsMerge<VariableWrapper>(*src, *dst);
*dst = std::move(*(temp->MutableVar()));
}
} else {
Expand Down
6 changes: 5 additions & 1 deletion paddle/fluid/imperative/gradient_accumulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
#include <memory>
#include <utility>
#include <vector>

#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/imperative/hooks.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/pten/api/include/tensor.h"

namespace paddle {
namespace imperative {
Expand Down Expand Up @@ -164,6 +164,10 @@ class SortedGradientAccumulator : public GradientAccumulator {
std::vector<SavedVarInfo> tmp_grad_vars_;
};

template <typename ReturnVarType, typename VarType>
std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
const VarType& src2);

template <typename VarType>
void SelectedRowsAddToTensor(const VarType& src, VarType* dst);

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/imperative/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ else()
endif(WIN32)


cc_test(test_gradient_accmulator SRCS test_gradient_accmulator.cc DEPS memcpy selected_rows_utils selected_rows_functor gradient_accumulator math_function)
cc_test(test_gradient_accmulator SRCS test_gradient_accmulator.cc DEPS memcpy selected_rows_utils selected_rows_functor gradient_accumulator math_function pten_tensor pten_api pten_api_utils)
cc_test(test_layer SRCS test_layer.cc DEPS layer proto_desc operator op_registry variable_helper mul_op memcpy)
cc_test(test_prepare_op SRCS test_prepare_op.cc DEPS prepared_operator op_info split_op layer concat_and_split activation_op place)
cc_test(test_tracer SRCS test_tracer.cc DEPS tracer layer proto_desc operator op_registry variable_helper mul_op reduce_sum_op elementwise_add_op memcpy)
Expand Down
51 changes: 51 additions & 0 deletions paddle/fluid/imperative/tests/test_gradient_accmulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,57 @@ namespace framework = paddle::framework;
namespace paddle {
namespace imperative {

TEST(Test__SelectedRowsMerge_Test, SelectedRowsMerge) {
pten::CPUPlace cpu;

std::vector<int64_t> rows{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
int64_t table_size = 10;
int64_t embedding_width = 10;

auto sr1 = std::make_shared<pten::SelectedRows>(rows, table_size);
auto sr2 = std::make_shared<pten::SelectedRows>(rows, table_size);

// initialize a sparse table 1
sr1->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr1 = sr1->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr1[i * embedding_width + j] = static_cast<float>(i);
}
}

// initialize a sparse table 2
sr2->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr2 = sr2->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr2[i * embedding_width + j] = static_cast<float>(i);
}
}
// new 2 pten::Tensor
paddle::experimental::Tensor t1(sr1);
paddle::experimental::Tensor t2(sr2);

// call SelectedRowsMerge
auto new_buffer =
paddle::imperative::SelectedRowsMerge<paddle::experimental::Tensor>(t1,
t2);
auto* new_buffer_tensor =
static_cast<pten::SelectedRows*>(new_buffer->impl().get());
auto* new_buffer_data_sr1 =
new_buffer_tensor->mutable_value()->mutable_data<float>(cpu);

// verify the MergeAdd result
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
EXPECT_EQ(new_buffer_data_sr1[i * embedding_width + j],
(static_cast<float>(i) + static_cast<float>(i)));
}
}
}

template <typename Place1, typename Place2, typename T>
int TensorddTest(Place1 place1, Place2 place2, T t1, T t2) {
framework::Variable var1;
Expand Down