Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AutoParallel] convert distensor for eager custom op #59137

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 0 additions & 41 deletions paddle/fluid/eager/custom_operator/custom_operator_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -472,47 +472,6 @@ std::tuple<bool, bool, phi::distributed::ProcessMesh> PrepareCtxForAutoParallel(
x.emplace_back(t);
}
}
const phi::distributed::ProcessMesh* mesh = nullptr;
for (auto& input : x) {
if (input.is_dist_tensor()) {
mesh = &(
std::dynamic_pointer_cast<phi::distributed::DistTensor>(input.impl())
->dist_attr()
.process_mesh());
break;
}
}

if (mesh) {
for (auto& input : x) {
if (input.is_dist_tensor()) {
PADDLE_ENFORCE_EQ(
std::dynamic_pointer_cast<phi::distributed::DistTensor>(
input.impl())
->dist_attr()
.process_mesh(),
*mesh,
phi::errors::InvalidArgument(
"Input %s has different mesh. However all inputs should "
"have the same mesh.",
input.name()));
} else {
PADDLE_ENFORCE_EQ(
phi::DenseTensor::classof(input.impl().get()),
true,
phi::errors::InvalidArgument("Failed to convert input %s impl "
"to phi::distributed::DistTensor "
"as it's not phi::DenseTensor.",
input.name()));
phi::distributed::TensorDistAttr dist_attr(
phi::vectorize(input.impl()->dims()));
dist_attr.set_process_mesh(*mesh);
auto dense_t = std::static_pointer_cast<phi::DenseTensor>(input.impl());
input.set_impl(
std::make_shared<phi::distributed::DistTensor>(dense_t, dist_attr));
}
}
}

run_auto_parallel = paddle::experimental::AllInputsAreDistTensor(x);
rank_is_in_current_mesh = true;
Expand Down
39 changes: 36 additions & 3 deletions paddle/fluid/pybind/eager_functions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -578,14 +578,47 @@ static PyObject* eager_api_run_custom_op(PyObject* self,
<< " to CustomOpKernelContext. Add vector<Tensor> size = "
<< ctx.InputRangeAt(i).second - ctx.InputRangeAt(i).first;
} else {
paddle::Tensor tensor =
std::move(CastPyArg2Tensor(obj, i + 1)); // NOLINT
ctx.EmplaceBackInput(std::move(tensor));
const paddle::Tensor& tensor = CastPyArg2Tensor(obj, i + 1); // NOLINT
ctx.EmplaceBackInput(tensor);
VLOG(7) << "Custom operator add input " << input
<< " to CustomOpKernelContext. Add Tensor for general case.";
}
}

const phi::distributed::ProcessMesh* mesh = nullptr;
if (InputsContainDistTensor(&mesh, *(ctx.AllMutableInput()))) {
ctx.AllMutableInput()->clear();
for (size_t i = 0; i < inputs.size(); ++i) {
const auto& input = inputs.at(i);
// Parse op_type first, so that use i + 1
PyObject* obj = PyTuple_GET_ITEM(args, i + 1);
// Emplace Py_None from python, this means optional inputs passed to C++,
// use one un-initialized tensor to indicate both Tensor and
// vector<Tensor> inputs.
if (obj == Py_None) {
VLOG(7) << "Custom operator add input " << input
<< " to CustomOpKernelContext. Add un-initialized tensor "
"because the optional input is None";
ctx.EmplaceBackInput(std::move(paddle::Tensor()));
continue;
}
if (paddle::framework::detail::IsDuplicableVar(input)) {
std::vector<paddle::Tensor> tensors =
std::move(CastPyArg2VectorOfTensor(obj, i + 1, mesh)); // NOLINT
ctx.EmplaceBackInputs(std::move(tensors));
VLOG(7) << "Custom operator add input " << input
<< " to CustomOpKernelContext. Add vector<Tensor> size = "
<< ctx.InputRangeAt(i).second - ctx.InputRangeAt(i).first;
} else {
const paddle::Tensor& tensor = CastPyArg2Tensor(obj, i + 1); // NOLINT
ConvertAllInputsToDistTensor(mesh, tensor);
ctx.EmplaceBackInput(tensor);
VLOG(7) << "Custom operator add input " << input
<< " to CustomOpKernelContext. Add Tensor for general case.";
}
}
}

// Parse op_type and inputs first, so that use 1 + inputs.size() + i
int attr_start_idx = static_cast<int>(1 + inputs.size());
for (size_t i = 0; i < attrs.size(); ++i) {
Expand Down
7 changes: 3 additions & 4 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,8 @@ void PaddleInferShareExternalData(paddle_infer::Tensor &tensor, // NOLINT
}
}

void PaddleTensorShareExternalData(paddle_infer::Tensor &tensor, // NOLINT
paddle::Tensor &&paddle_tensor) {
void PaddleTensorShareExternalData(paddle_infer::Tensor &tensor, // NOLINT
paddle::Tensor &paddle_tensor) { // NOLINT
std::vector<int> shape;
for (int i = 0; i < paddle_tensor.dims().size(); ++i) {
shape.push_back(paddle_tensor.dims()[i]); // NOLINT
Expand Down Expand Up @@ -1245,8 +1245,7 @@ void BindPaddleInferTensor(py::module *m) {
.def("_share_external_data_paddle_tensor_bind",
[](paddle_infer::Tensor &self, const py::handle &input) {
PyObject *obj = input.ptr();
PaddleTensorShareExternalData(self,
std::move(CastPyArg2Tensor(obj, 0)));
PaddleTensorShareExternalData(self, CastPyArg2Tensor(obj, 0));
})
.def("copy_to_cpu", &PaddleInferTensorToNumpy)
.def("shape", &paddle_infer::Tensor::shape)
Expand Down
1 change: 1 addition & 0 deletions paddle/phi/api/ext/op_meta_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class PADDLE_API CustomOpKernelContext {
CustomOpKernelContext() = default;

void EmplaceBackInput(Tensor&& input);
void EmplaceBackInput(const Tensor& input);
void EmplaceBackInputs(const std::vector<Tensor>& inputs);
void EmplaceBackOutput(Tensor&& output);
void EmplaceBackOutputs(const std::vector<Tensor>& outputs);
Expand Down
6 changes: 6 additions & 0 deletions paddle/phi/api/lib/op_meta_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ void CustomOpKernelContext::EmplaceBackInput(Tensor&& input) {
input_range_.emplace_back(index, index + 1);
}

void CustomOpKernelContext::EmplaceBackInput(const Tensor& input) {
size_t index = inputs_.size();
inputs_.emplace_back(input);
input_range_.emplace_back(index, index + 1);
}

void CustomOpKernelContext::EmplaceBackInputs(
const std::vector<Tensor>& inputs) {
size_t index = inputs_.size();
Expand Down
2 changes: 1 addition & 1 deletion paddle/utils/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void ShareTensor(PyObject* src, PyObject* dst) {
}
}

paddle::Tensor CastPyArg2Tensor(PyObject* obj, Py_ssize_t arg_pos) {
paddle::Tensor& CastPyArg2Tensor(PyObject* obj, Py_ssize_t arg_pos) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里为什么要改成引用

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

因为需要对PyObject*里的Tensor做原位修改,如果返回复制对象,则不能做原位修改。

if (PyObject_TypeCheck(obj, p_tensor_type) ||
PyObject_TypeCheck(obj, p_string_tensor_type)) {
return reinterpret_cast<TensorObject*>(obj)->tensor;
Expand Down
2 changes: 1 addition & 1 deletion paddle/utils/pybind.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ bool PyCheckTensor(PyObject* obj);
void ShareTensor(PyObject* src, PyObject* dst);

// Internal use only, to expose the Tensor type to Python.
paddle::Tensor CastPyArg2Tensor(PyObject* obj, Py_ssize_t arg_pos);
paddle::Tensor& CastPyArg2Tensor(PyObject* obj, Py_ssize_t arg_pos);

// Internal use only, to expose the Tensor type to Python.
PyObject* ToPyObject(const paddle::Tensor& value,
Expand Down