Skip to content

Commit

Permalink
Fix xpu error message (#28061) (#28092)
Browse files Browse the repository at this point in the history
* fix error message,test=kunlun

* fix, test=kunlun
  • Loading branch information
MrChengmo authored Oct 19, 2020
1 parent 5c2852a commit 91727ac
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 26 deletions.
86 changes: 65 additions & 21 deletions paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,53 +86,97 @@ class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> {
int r = xpu::matrix_vector_add_grad(
dev_ctx.x_context(), dout->data<T>(), dout->data<T>(),
dout->data<T>(), dout->data<T>(), dx_data, dy_data, pre, n);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
if (r == xpu::Error_t::INVALID_PARAM) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of ElementWiseAddOp, error "
"message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of ElementWiseAddOp, error "
"message: RUNTIME_ERROR, "
"please check whether Baidu Kunlun card is "
"properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of ElementWiseAddOp, error message: "
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
}
return;
}

if (dx == nullptr) {
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void **>(&dx_data), len * sizeof(float)),
XPU_SUCCESS, platform::errors::External("XPU has no enough memory"));
XPU_SUCCESS,
platform::errors::ResourceExhausted("XPU has no enough memory"));
}

if (dy == nullptr) {
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void **>(&dy_data), len * sizeof(float)),
XPU_SUCCESS, platform::errors::External("XPU has no enough memory"));
XPU_SUCCESS,
platform::errors::ResourceExhausted("XPU has no enough memory"));
} else {
if (len != n) {
PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void **>(&dy_data),
len * sizeof(float)),
XPU_SUCCESS, platform::errors::External(
XPU_SUCCESS, platform::errors::ResourceExhausted(
"XPU has no enough memory"));
}
}

int r = xpu::elementwise_add_grad(
dev_ctx.x_context(), dout->data<T>() /*x*/, dout->data<T>() /*y*/,
dout->data<T>() /*out*/, dout->data<T>(), dx_data, dy_data, len);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
if (r == xpu::Error_t::INVALID_PARAM) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of ElementWiseAddOp, error "
"message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of ElementWiseAddOp, error message: "
"RUNTIME_ERROR, "
"please check whether Baidu Kunlun card is properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of ElementWiseAddOp, error message: "
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
}

if ((dy != nullptr) && (len != n)) {
r = xpu::reduce_ew(dev_ctx.x_context(), dy_data, dy->data<T>(), pre, n,
post, xpu::ElementwiseOp::ASSIGN);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
if (r == xpu::Error_t::INVALID_PARAM) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of ElementWiseAddOp, error "
"message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of ElementWiseAddOp, error "
"message: RUNTIME_ERROR, "
"please check whether Baidu Kunlun card is "
"properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of ElementWiseAddOp, error message: "
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
}
dev_ctx.Wait();
xpu_free(dy_data);
}
Expand Down
20 changes: 18 additions & 2 deletions paddle/fluid/operators/optimizers/momentum_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,24 @@ class MomentumOpXPUKernel : public framework::OpKernel<T> {
dev_ctx.x_context(), param->data<float>(), velocity->data<float>(),
grad->data<float>(), lr, use_nesterov, mu, param_out->numel(),
param_out->data<float>(), velocity_out->data<float>());
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::PermissionDenied("XPU kernel error!"));
if (r == xpu::Error_t::INVALID_PARAM) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of MomentumOp, error message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of MomentumOp, error message: RUNTIME_ERROR, "
"please check whether Baidu Kunlun card is properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of MomentumOp, error message: "
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
}
}
};
} // namespace operators
Expand Down
22 changes: 19 additions & 3 deletions paddle/fluid/operators/optimizers/sgd_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,25 @@ class SGDOpXPUKernel : public framework::OpKernel<T> {
auto &dev_ctx = ctx.template device_context<DeviceContext>();
int r = xpu::sgd(dev_ctx.x_context(), sz, grad_data, param_data, lr,
out_data);
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::PermissionDenied("XPU kernel error!"));
if (r == xpu::Error_t::INVALID_PARAM) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of SgdOp, error message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of SgdOp, error message: "
"RUNTIME_ERROR, please check whether Baidu "
"Kunlun Card is properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of SgdOp, error "
"message: NO_ENOUGH_WORKSPACE, XPU "
"has no enough memory."));
}
} else {
PADDLE_ENFORCE_EQ(false, true,
platform::errors::PermissionDenied(
Expand Down

0 comments on commit 91727ac

Please sign in to comment.