Skip to content

Commit

Permalink
[Eager] Support SellectedRows MergeAdd case (#39449)
Browse files Browse the repository at this point in the history
* Refactor SelectedRows MergeAdd func by using template

* Add GetInnerMutable func instead of modify GetInnerMutableTensor

* Updated PADDLE_ENFORCE statement

* Remove useless PADDLE_ENFORCE statement

* Polish Code
  • Loading branch information
veyron95 authored Feb 15, 2022
1 parent f73f5b0 commit 6549a04
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 20 deletions.
6 changes: 3 additions & 3 deletions paddle/fluid/eager/grad_tensor_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
if (buffer_tensor.is_dense_tensor()) {
paddle::imperative::SelectedRowsAddToTensor(t, &buffer_tensor);
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"We don't support Selected Rows merge for now, support it later "
"and make all kinds of grads can be merged."));
buffer_tensor =
std::move(*paddle::imperative::SelectedRowsMerge<
paddle::experimental::Tensor>(t, buffer_tensor));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/selected_rows.h"

#include "paddle/pten/core/kernel_registry.h"

Expand Down Expand Up @@ -102,3 +103,69 @@ TEST(GradTensorHolder, Interfaces) {
CHECK_EQ(holder_et0_ptr[0], 1.0f);
CHECK_EQ(holder_et1_ptr[0], 30.0f);
}

TEST(GradTensorHolder, SelectedRowsMergeAdd) {
pten::CPUPlace cpu;

std::vector<int64_t> rows{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
int64_t table_size = 10;
int64_t embedding_width = 10;

auto sr1 = std::make_shared<pten::SelectedRows>(rows, table_size);
auto sr2 = std::make_shared<pten::SelectedRows>(rows, table_size);

// initialize a sparse table 1
sr1->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr1 = sr1->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr1[i * embedding_width + j] = static_cast<float>(i);
}
}

// initialize a sparse table 2
sr2->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr2 = sr2->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr2[i * embedding_width + j] = static_cast<float>(i);
}
}
// new 2 pten::Tensor
paddle::experimental::Tensor t1(sr1);
paddle::experimental::Tensor t2(sr2);

// Constructor empty GradTensorHolder
GradSlotMeta slot_meta;
slot_meta.Init(1);
GradTensorHolder grad_tensor_holder =
GradTensorHolder({slot_meta, slot_meta});

// accumulation
grad_tensor_holder.add(0, 0, t1, false);
grad_tensor_holder.add(0, 0, t2, false);

// Buffers()
const auto& buffers = grad_tensor_holder.Buffers();
CHECK_EQ(static_cast<int>(buffers.size()), 2);
CHECK_EQ(static_cast<int>(buffers[0].size()), 1);
CHECK_EQ(static_cast<int>(buffers[1].size()), 1);

// operator[]
const auto& holder_et0 = grad_tensor_holder[0][0];

auto* tmp_buffer_tensor =
static_cast<pten::SelectedRows*>(holder_et0.impl().get());
auto* tmp_buffer_data_sr =
tmp_buffer_tensor->mutable_value()->mutable_data<float>(cpu);

// verify the MergeAdd result (accumulation result)
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
EXPECT_EQ(tmp_buffer_data_sr[i * embedding_width + j],
(static_cast<float>(i) + static_cast<float>(i)));
}
}
}
4 changes: 2 additions & 2 deletions paddle/fluid/imperative/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ if(WITH_GLOO)
endif()

if(NOT WITH_ASCEND_CL)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function pten_tensor)
else()
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function npu_op_runner)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function npu_op_runner pten_tensor)
endif()

add_subdirectory(tests)
50 changes: 37 additions & 13 deletions paddle/fluid/imperative/gradient_accumulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,23 @@ TType& GetInnerTensor(const paddle::experimental::Tensor& src) {
return *src_tensor;
}

template <typename TType>
TType* GetEmptyInnerTensor(paddle::experimental::Tensor* dst) {
PADDLE_ENFORCE_EQ(
dst->defined(), false,
platform::errors::Fatal(
"The underlying Tensor implementation should be nullptr"));
dst->set_impl(std::make_shared<TType>());
auto* dst_tensor = static_cast<TType*>(dst->impl().get());
return dst_tensor;
}

template <typename TType>
TType* GetEmptyInnerTensor(paddle::imperative::VariableWrapper* dst) {
auto* dst_tensor = dst->MutableVar()->GetMutable<TType>();
return dst_tensor;
}

template <typename VarType>
void TensorAdd(const VarType& src, VarType* dst) {
pten::DenseTensor* dst_tensor = GetInnerMutableTensor<pten::DenseTensor>(dst);
Expand Down Expand Up @@ -473,13 +490,14 @@ template void SelectedRowsAddTensor(
// Note(chenweihang): when two selected rows need to be added,
// adding one to another is not equal to merging two selected rows
// to one then add it to a empty selected rows, the after is correct
// Note(chenweihang): when two selected rows need to be added,
// adding one to another is not equal to merging two selected rows
// to one then add it to a empty selected rows, the after is correct
std::shared_ptr<VariableWrapper> SelectedRowsMerge(
const framework::Variable& src1, const framework::Variable& src2) {
auto& src_selected_rows1 = src1.Get<pten::SelectedRows>();
auto& src_selected_rows2 = src2.Get<pten::SelectedRows>();
template <typename ReturnVarType, typename VarType>
std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
const VarType& src2) {
const pten::SelectedRows& src_selected_rows1 =
GetInnerTensor<pten::SelectedRows>(src1);
const pten::SelectedRows& src_selected_rows2 =
GetInnerTensor<pten::SelectedRows>(src2);

auto place = src_selected_rows1.value().place();
auto data_type =
framework::TransToProtoVarType(src_selected_rows1.value().dtype());
Expand All @@ -488,9 +506,10 @@ std::shared_ptr<VariableWrapper> SelectedRowsMerge(
std::vector<const pten::SelectedRows*> src_selected_rows;
src_selected_rows.emplace_back(&src_selected_rows1);
src_selected_rows.emplace_back(&src_selected_rows2);
auto dst_var = std::make_shared<VariableWrapper>("Temp");
auto* dst_selected_rows =
dst_var->MutableVar()->GetMutable<pten::SelectedRows>();

auto dst_var = std::make_shared<ReturnVarType>("Temp");
pten::SelectedRows* dst_selected_rows =
GetEmptyInnerTensor<pten::SelectedRows>(dst_var.get());

#define PADDLE_SELECTED_ROWS_ADD(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
Expand All @@ -515,12 +534,17 @@ std::shared_ptr<VariableWrapper> SelectedRowsMerge(
#endif

#undef PADDLE_SELECTED_ROWS_ADD

PADDLE_THROW(platform::errors::InvalidArgument(
"Not supported data type %s for SelectedRowsMerge",
framework::DataTypeToString(data_type)));
}

template std::shared_ptr<paddle::experimental::Tensor> SelectedRowsMerge(
const paddle::experimental::Tensor& src1,
const paddle::experimental::Tensor& src2);
template std::shared_ptr<paddle::imperative::VariableWrapper> SelectedRowsMerge(
const framework::Variable& src1, const framework::Variable& src2);

void VariableWrapperAdd(std::shared_ptr<VariableWrapper> var,
VariableWrapper* dst_var, bool unchange_input) {
auto& src = var->Var();
Expand All @@ -547,7 +571,7 @@ void VariableWrapperAdd(std::shared_ptr<VariableWrapper> var,
*dst = std::move(*(var->MutableVar()));
}
} else if (src.IsType<pten::SelectedRows>()) {
auto temp = SelectedRowsMerge(src, *dst);
auto temp = SelectedRowsMerge<VariableWrapper>(src, *dst);
*dst = std::move(*(temp->MutableVar()));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
Expand Down Expand Up @@ -603,7 +627,7 @@ void GradientAccumulator::AccumulateGrad() {
SelectedRowsAddToTensor(*dst, src);
*dst = std::move(*src);
} else if (src->IsType<pten::SelectedRows>()) {
auto temp = SelectedRowsMerge(*src, *dst);
auto temp = SelectedRowsMerge<VariableWrapper>(*src, *dst);
*dst = std::move(*(temp->MutableVar()));
}
} else {
Expand Down
6 changes: 5 additions & 1 deletion paddle/fluid/imperative/gradient_accumulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
#include <memory>
#include <utility>
#include <vector>

#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/imperative/hooks.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/pten/api/include/tensor.h"

namespace paddle {
namespace imperative {
Expand Down Expand Up @@ -164,6 +164,10 @@ class SortedGradientAccumulator : public GradientAccumulator {
std::vector<SavedVarInfo> tmp_grad_vars_;
};

template <typename ReturnVarType, typename VarType>
std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
const VarType& src2);

template <typename VarType>
void SelectedRowsAddToTensor(const VarType& src, VarType* dst);

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/imperative/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ else()
endif(WIN32)


cc_test(test_gradient_accmulator SRCS test_gradient_accmulator.cc DEPS memcpy selected_rows_utils selected_rows_functor gradient_accumulator math_function)
cc_test(test_gradient_accmulator SRCS test_gradient_accmulator.cc DEPS memcpy selected_rows_utils selected_rows_functor gradient_accumulator math_function pten_tensor pten_api pten_api_utils)
cc_test(test_layer SRCS test_layer.cc DEPS layer proto_desc operator op_registry variable_helper mul_op memcpy)
cc_test(test_prepare_op SRCS test_prepare_op.cc DEPS prepared_operator op_info split_op layer concat_and_split activation_op place)
cc_test(test_tracer SRCS test_tracer.cc DEPS tracer layer proto_desc operator op_registry variable_helper mul_op reduce_sum_op elementwise_add_op memcpy)
Expand Down
51 changes: 51 additions & 0 deletions paddle/fluid/imperative/tests/test_gradient_accmulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,57 @@ namespace framework = paddle::framework;
namespace paddle {
namespace imperative {

TEST(Test__SelectedRowsMerge_Test, SelectedRowsMerge) {
pten::CPUPlace cpu;

std::vector<int64_t> rows{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
int64_t table_size = 10;
int64_t embedding_width = 10;

auto sr1 = std::make_shared<pten::SelectedRows>(rows, table_size);
auto sr2 = std::make_shared<pten::SelectedRows>(rows, table_size);

// initialize a sparse table 1
sr1->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr1 = sr1->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr1[i * embedding_width + j] = static_cast<float>(i);
}
}

// initialize a sparse table 2
sr2->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr2 = sr2->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr2[i * embedding_width + j] = static_cast<float>(i);
}
}
// new 2 pten::Tensor
paddle::experimental::Tensor t1(sr1);
paddle::experimental::Tensor t2(sr2);

// call SelectedRowsMerge
auto new_buffer =
paddle::imperative::SelectedRowsMerge<paddle::experimental::Tensor>(t1,
t2);
auto* new_buffer_tensor =
static_cast<pten::SelectedRows*>(new_buffer->impl().get());
auto* new_buffer_data_sr1 =
new_buffer_tensor->mutable_value()->mutable_data<float>(cpu);

// verify the MergeAdd result
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
EXPECT_EQ(new_buffer_data_sr1[i * embedding_width + j],
(static_cast<float>(i) + static_cast<float>(i)));
}
}
}

template <typename Place1, typename Place2, typename T>
int TensorddTest(Place1 place1, Place2 place2, T t1, T t2) {
framework::Variable var1;
Expand Down

0 comments on commit 6549a04

Please sign in to comment.