Skip to content

Commit

Permalink
part-3 cherry from: add check for cembedding (PaddlePaddle#55621)
Browse files Browse the repository at this point in the history
  • Loading branch information
ForFishes authored and wentaoyu committed Nov 23, 2023
1 parent ded4c9d commit f457d4e
Show file tree
Hide file tree
Showing 10 changed files with 35 additions and 18 deletions.
4 changes: 4 additions & 0 deletions paddle/fluid/operators/collective/c_embedding_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ class CEmbeddingOpMaker : public framework::OpProtoAndCheckerMaker {
"(int64, default 0), The starting index is indeed, "
"and the out-of-bounds will be set to 0 ")
.SetDefault(0);
AddAttr<int64_t>("vocab_size",
"(int64, default -1), The total vocabulary size to check"
"the out-of-bounds ids. If it is -1, no check will be ")
.SetDefault(-1);
AddComment(R"DOC(
c_embedding Operator.
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/api/yaml/legacy_backward.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
backward : batch_norm_double_grad

- backward_op : c_embedding_grad
forward : c_embedding (Tensor weight, Tensor x, int64_t start_index=0) -> Tensor(out)
forward : c_embedding (Tensor weight, Tensor x, int64_t start_index=0, int64_t vocab_size=-1) -> Tensor(out)
args : (Tensor weight, Tensor x, Tensor out_grad, int64_t start_index=0)
output : Tensor(weight_grad)
infer_meta :
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/api/yaml/legacy_ops.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,14 @@
func : c_concat

- op : c_embedding
args : (Tensor weight, Tensor x, int64_t start_index=0)
args : (Tensor weight, Tensor x, int64_t start_index=0, int64_t vocab_size=-1)
output : Tensor(out)
infer_meta :
func : CEmbeddingInferMeta
param : [weight, x, start_index]
kernel :
func : c_embedding
param : [weight, x, start_index]
param : [weight, x, start_index, vocab_size]
data_type : weight
backward : c_embedding_grad

Expand Down
1 change: 1 addition & 0 deletions paddle/phi/kernels/c_embedding_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ void CEmbeddingKernel(const Context& ctx,
const DenseTensor& w,
const DenseTensor& ids,
int64_t start_index,
int64_t vocab_size,
DenseTensor* out);

} // namespace phi
1 change: 1 addition & 0 deletions paddle/phi/kernels/cpu/c_embedding_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ void CEmbeddingKernel(const Context& ctx,
const DenseTensor& w,
const DenseTensor& ids,
int64_t start_index,
int64_t vocab_size,
DenseTensor* out) {
VLOG(10) << "table_dims:" << w.dims();
const T* table_data = w.data<T>();
Expand Down
19 changes: 12 additions & 7 deletions paddle/phi/kernels/gpu/c_embedding_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,23 @@ __global__ void CEmbedding(T* out,
const int64_t N,
const int64_t start_idx,
const int64_t end_idx,
const int64_t vocab_size,
const int64_t limit) {
CUDA_KERNEL_LOOP(i, limit) {
size_t row = i / columns;
size_t col = i % columns;
auto id = ids[row];

PADDLE_ENFORCE(
id >= 0 && (vocab_size < 0 || id < vocab_size),
"The index is out of bounds, "
"please check whether the dimensions of index and "
"input meet the requirements. It should "
"be less than [%d] and greater than or equal to 0, but received [%d]",
vocab_size,
id);
if (id >= start_idx && id < end_idx) {
auto real_idx = id - start_idx;
PADDLE_ENFORCE(real_idx < N,
"The index is out of bounds, "
"please check whether the dimensions of index and "
"input meet the requirements. It should "
"be less than [%d], but received [%d]",
N,
real_idx);
out[i] = table[real_idx * columns + col];
} else {
out[i] = static_cast<T>(0);
Expand All @@ -63,6 +65,7 @@ void CEmbeddingKernel(const Context& ctx,
const DenseTensor& w,
const DenseTensor& ids,
int64_t start_index,
int64_t vocab_size,
DenseTensor* out) {
size_t N = w.dims()[0];
size_t D = w.dims()[1];
Expand All @@ -87,6 +90,7 @@ void CEmbeddingKernel(const Context& ctx,
D,
N,
start_index,
vocab_size,
end_idx,
limit);

Expand All @@ -99,6 +103,7 @@ void CEmbeddingKernel(const Context& ctx,
D,
N,
start_index,
vocab_size,
end_idx,
limit);
} else {
Expand Down
1 change: 1 addition & 0 deletions paddle/phi/kernels/xpu/c_embedding_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ void CEmbeddingKernel(const Context& dev_ctx,
const DenseTensor& w,
const DenseTensor& ids,
int64_t start_index,
int64_t vocab_size,
DenseTensor* out) {
const T* table_data = w.data<T>();
T* output_data = dev_ctx.template Alloc<T>(out);
Expand Down
3 changes: 2 additions & 1 deletion paddle/phi/ops/compat/c_embedding_sig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

namespace phi {
KernelSignature CEmbeddingOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("c_embedding", {"W", "Ids"}, {"start_index"}, {"Out"});
return KernelSignature(
"c_embedding", {"W", "Ids"}, {"start_index"}, {"vocab_size"}, {"Out"});
}

KernelSignature CEmbeddingGradOpArgumentMapping(
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/distributed/fleet/layers/mpu/mp_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def __init__(
self._size = [per_part_size, embedding_dim]
self._weight_attr = weight_attr
self._name = name
self.num_embeddings = num_embeddings

if self.is_mp and paddle.in_dynamic_mode():
with get_rng_state_tracker().rng_state():
Expand Down Expand Up @@ -161,6 +162,7 @@ def forward(self, x):
self.weight,
x,
start_index=self.vocab_start_index,
vocab_size=self.num_embeddings,
name=self._name,
)
output = mp_ops._mp_allreduce(
Expand Down
16 changes: 9 additions & 7 deletions python/paddle/distributed/fleet/layers/mpu/mp_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

import paddle
from paddle import _legacy_C_ops
from paddle import _C_ops, _legacy_C_ops
from paddle.autograd import PyLayer
from paddle.base.data_feeder import check_dtype, check_variable_and_dtype
from paddle.distributed import collective
Expand Down Expand Up @@ -330,7 +330,7 @@ def _mp_allreduce(
return out


def _c_lookup_table(table, index, start_index=0, name=None):
def _c_lookup_table(table, index, start_index=0, vocab_size=-1, name=None):
"""
Lookup table according to index.
Expand All @@ -345,9 +345,7 @@ def _c_lookup_table(table, index, start_index=0, name=None):
Tensor.
"""
if in_dynamic_mode():
return _legacy_C_ops.c_embedding(
table, index, "start_index", start_index
)
return _C_ops.c_embedding(table, index, start_index, vocab_size)
else:
op_type = 'c_embedding'
helper = LayerHelper(op_type, **locals())
Expand All @@ -358,7 +356,7 @@ def _c_lookup_table(table, index, start_index=0, name=None):
type='c_embedding',
inputs={'Ids': index, 'W': table},
outputs={'Out': tmp},
attrs={"start_index": start_index},
attrs={"start_index": start_index, "vocab_size": vocab_size},
)
return tmp

Expand Down Expand Up @@ -684,7 +682,11 @@ def _parallel_embedding(
main_block.vars[weight.name].is_distributed = True

output_parallel = _c_lookup_table(
weight, x, start_index=vocab_start_index, name=name
weight,
x,
start_index=vocab_start_index,
vocab_size=origin_size[0],
name=name,
)
out = _mp_allreduce(
output_parallel,
Expand Down

0 comments on commit f457d4e

Please sign in to comment.