From f86d35a269a94e8af7bec5945be01ab0acd76730 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 16 Apr 2018 17:11:11 +0800 Subject: [PATCH 1/9] add sharable tensor --- paddle/fluid/framework/tensor.h | 29 ++++++++++++++++++++++++++ paddle/fluid/framework/tensor_impl.h | 31 ++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 6f878541e6de1d..1e5c68a1b9d4bb 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -98,6 +98,9 @@ class Tensor { /*! The internal of two tensors share the same memory block. */ inline Tensor& ShareDataWith(const Tensor& src); + /*! Share part of the memory of the two tensors */ + inline Tensor& ShareDataWith(Tensor* src, size_t offset); + /** * @brief Return a sub-tensor of the given tensor. * @@ -176,6 +179,32 @@ class Tensor { std::type_index type_; }; + template + struct SharedPlaceholderImpl : public Placeholder { + SharedPlaceholderImpl(Place place, uint8_t* data, size_t size, + std::type_index type) + : ptr_(data), place_(place), size_(size), type_(type) {} + + virtual size_t size() const { return size_; } + virtual platform::Place place() const { return place_; } + virtual void* ptr() const { return static_cast(ptr_); } + virtual std::type_index type() const { return type_; } + virtual void set_type(std::type_index type) { type_ = type; } + virtual void set_place(platform::Place place) { place_ = place; } + + /*! the pointer of memory block. */ + uint8_t* ptr_; + + /*! the place of memory block. */ + platform::Place place_; + + /*! the size of memory block. */ + size_t size_; + + /* the current type of memory */ + std::type_index type_; + }; + /*! holds the memory block if allocated. */ std::shared_ptr holder_; diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index f49d1a47a325b2..98d53fd1e7db95 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -162,6 +162,37 @@ inline Tensor& Tensor::ShareDataWith(const Tensor& src) { return *this; } +inline Tensor& Tensor::ShareDataWith(Tensor* src, size_t offset) { + // NOTE: data size is determined by current tensor shape and data type + src->check_memory_size(); + PADDLE_ENFORCE_EQ(src->type(), this->type(), + "tensor data type must be the same when sharing data"); + auto place = src->place(); + auto type = src->type(); + size_t size = src->numel() * SizeOfType(src->type()); + auto* ref = static_cast(src->mutable_data(place)) + offset; + if (platform::is_cpu_place(place)) { + holder_.reset(new SharedPlaceholderImpl( + boost::get(place), ref, size, type)); + } else if (platform::is_gpu_place(place) || + platform::is_cuda_pinned_place(place)) { +#ifndef PADDLE_WITH_CUDA + PADDLE_THROW( + "CUDAPlace or CUDAPinnedPlace is not supported in CPU-only mode."); + } +#else + if (platform::is_gpu_place(place)) { + holder_.reset(new SharedPlaceholderImpl( + boost::get(place), ref, size, type)); + } else if (platform::is_cuda_pinned_place(place)) { + holder_.reset(new SharedPlaceholderImpl( + boost::get(place), ref, size, type)); + } + } +#endif + return *this; +} + inline Tensor Tensor::Slice(int begin_idx, int end_idx) const { check_memory_size(); PADDLE_ENFORCE_GE(begin_idx, 0, From 04c559e3aad8510fb6abfb9e469449913971266c Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 16 Apr 2018 20:32:18 +0800 Subject: [PATCH 2/9] wip split byref op --- paddle/fluid/framework/tensor.h | 10 +- paddle/fluid/framework/tensor_impl.h | 4 +- paddle/fluid/operators/split_byref_op.cc | 101 ++++++++++++++++++++ paddle/fluid/operators/split_byref_op.cu.cc | 18 ++++ paddle/fluid/operators/split_byref_op.h | 43 +++++++++ paddle/fluid/operators/split_op.cc | 15 --- paddle/fluid/operators/split_op.h | 15 +++ 7 files changed, 185 insertions(+), 21 deletions(-) create mode 100644 paddle/fluid/operators/split_byref_op.cc create mode 100644 paddle/fluid/operators/split_byref_op.cu.cc create mode 100644 paddle/fluid/operators/split_byref_op.h diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 1e5c68a1b9d4bb..f30dcc000b7142 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -99,7 +99,7 @@ class Tensor { inline Tensor& ShareDataWith(const Tensor& src); /*! Share part of the memory of the two tensors */ - inline Tensor& ShareDataWith(Tensor* src, size_t offset); + inline Tensor& ShareDataWith(const Tensor* src, size_t offset); /** * @brief Return a sub-tensor of the given tensor. @@ -181,19 +181,21 @@ class Tensor { template struct SharedPlaceholderImpl : public Placeholder { - SharedPlaceholderImpl(Place place, uint8_t* data, size_t size, + SharedPlaceholderImpl(Place place, const uint8_t* data, size_t size, std::type_index type) : ptr_(data), place_(place), size_(size), type_(type) {} virtual size_t size() const { return size_; } virtual platform::Place place() const { return place_; } - virtual void* ptr() const { return static_cast(ptr_); } + virtual void* ptr() const { + return const_cast(static_cast(ptr_)); + } virtual std::type_index type() const { return type_; } virtual void set_type(std::type_index type) { type_ = type; } virtual void set_place(platform::Place place) { place_ = place; } /*! the pointer of memory block. */ - uint8_t* ptr_; + const uint8_t* ptr_; /*! the place of memory block. */ platform::Place place_; diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index 98d53fd1e7db95..a177ef74166f20 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -162,7 +162,7 @@ inline Tensor& Tensor::ShareDataWith(const Tensor& src) { return *this; } -inline Tensor& Tensor::ShareDataWith(Tensor* src, size_t offset) { +inline Tensor& Tensor::ShareDataWith(const Tensor* src, size_t offset) { // NOTE: data size is determined by current tensor shape and data type src->check_memory_size(); PADDLE_ENFORCE_EQ(src->type(), this->type(), @@ -170,7 +170,7 @@ inline Tensor& Tensor::ShareDataWith(Tensor* src, size_t offset) { auto place = src->place(); auto type = src->type(); size_t size = src->numel() * SizeOfType(src->type()); - auto* ref = static_cast(src->mutable_data(place)) + offset; + auto* ref = src->data() + offset; if (platform::is_cpu_place(place)) { holder_.reset(new SharedPlaceholderImpl( boost::get(place), ref, size, type)); diff --git a/paddle/fluid/operators/split_byref_op.cc b/paddle/fluid/operators/split_byref_op.cc new file mode 100644 index 00000000000000..7413ce3e9ce60e --- /dev/null +++ b/paddle/fluid/operators/split_byref_op.cc @@ -0,0 +1,101 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/split_byref_op.h" +#include "paddle/fluid/operators/split_op.h" + +namespace paddle { +namespace operators { +using framework::Tensor; + +class SplitByrefOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SplitOp should not be null."); + PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL, + "Outputs(Out) of SplitOp should not be empty."); + auto in_dims = ctx->GetInputDim("X"); + auto outs_names = ctx->Outputs("Out"); + size_t num = static_cast(ctx->Attrs().Get("num")); + std::vector sections = static_cast>( + ctx->Attrs().Get>("sections")); + const size_t outs_number = outs_names.size(); + std::vector outs_dims; + outs_dims.reserve(outs_number); + + if (num > 0) { + int64_t in_axis_dim = in_dims[0]; + PADDLE_ENFORCE_EQ(in_axis_dim % num, 0, + "tensor split does not result" + " in an equal division"); + size_t out_axis_dim = in_axis_dim / num; + for (size_t i = 0; i < outs_number; ++i) { + auto dim = in_dims; + dim[0] = out_axis_dim; + outs_dims.push_back(dim); + } + } else if (sections.size() > 0) { + PADDLE_ENFORCE_EQ(sections.size(), outs_number, + "tensor split sections size" + "should be equal to output size."); + for (size_t i = 0; i < outs_number; ++i) { + auto dim = in_dims; + dim[0] = sections[i]; + outs_dims.push_back(dim); + } + } + ctx->SetOutputsDim("Out", outs_dims); + } +}; + +class SplitByrefOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SplitByrefOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(Tensor) Input tensor of the split operator."); + AddOutput("Out", "(Tensor) Output tensors of the split operator.") + .AsDuplicable(); + AddComment(R"DOC( +SplitByref operator + +Split source tensor to sevaral tensors by axis 0. No copy in this operator +is performed, output tensor shares the same blocks of memory. +)DOC"); + AddAttr>("sections", + "(vector) " + "the length of each output along the " + "specified axis.") + .SetDefault(std::vector{}); + AddAttr("num", + "(int, default 0)" + "Number of sub-tensors. This must evenly divide " + "Input.dims()[axis]") + .SetDefault(0); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +// NOTE: concat op default axis must be 0! +USE_CPU_ONLY_OP(concat); + +REGISTER_OPERATOR(split_byref, ops::SplitByrefOp, ops::SplitByrefOpMaker, + ops::SplitGradMaker); +REGISTER_OP_CPU_KERNEL( + split_byref, ops::SplitByrefOpKernel); diff --git a/paddle/fluid/operators/split_byref_op.cu.cc b/paddle/fluid/operators/split_byref_op.cu.cc new file mode 100644 index 00000000000000..1faf4f55dd54a2 --- /dev/null +++ b/paddle/fluid/operators/split_byref_op.cu.cc @@ -0,0 +1,18 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/split_byref_op.h" +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + split, ops::SplitByrefOpKernel); diff --git a/paddle/fluid/operators/split_byref_op.h b/paddle/fluid/operators/split_byref_op.h new file mode 100644 index 00000000000000..7c3ab1c1b9d955 --- /dev/null +++ b/paddle/fluid/operators/split_byref_op.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class SplitByrefOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto outs = ctx.MultiOutput("Out"); + auto in_stride = framework::stride_numel(in->dims()); + auto place = ctx.GetPlace(); + + size_t input_offset = 0; + for (size_t i = 0; i < outs.size(); ++i) { + // NOTE: no need to call mutable_data here to allocate memory. + auto* out = outs[i]; + out->ShareDataWith(in, input_offset); + input_offset += out->numel() * framework::SizeOfType(out->type()); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc index e745509ec8c1f2..a4398df36bcc2d 100644 --- a/paddle/fluid/operators/split_op.cc +++ b/paddle/fluid/operators/split_op.cc @@ -108,21 +108,6 @@ This operator splits the input tensor into multiple sub-tensors. } }; -class SplitGradMaker : public framework::SingleGradOpDescMaker { - public: - using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; - - protected: - std::unique_ptr Apply() const override { - auto op = new framework::OpDesc(); - op->SetType("concat"); - op->SetInput("X", OutputGrad("Out")); - op->SetOutput("Out", InputGrad("X")); - op->SetAttrMap(Attrs()); - return std::unique_ptr(op); - } -}; - } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/split_op.h b/paddle/fluid/operators/split_op.h index e2c41f44ab3ea3..f0c417c70521b1 100644 --- a/paddle/fluid/operators/split_op.h +++ b/paddle/fluid/operators/split_op.h @@ -44,5 +44,20 @@ class SplitOpKernel : public framework::OpKernel { } }; +class SplitGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto op = new framework::OpDesc(); + op->SetType("concat"); + op->SetInput("X", OutputGrad("Out")); + op->SetOutput("Out", InputGrad("X")); + op->SetAttrMap(Attrs()); + return std::unique_ptr(op); + } +}; + } // namespace operators } // namespace paddle From 948628563f5313bce5c497c4cfd80f3d3d7774f8 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Tue, 17 Apr 2018 15:04:10 +0800 Subject: [PATCH 3/9] update --- paddle/fluid/operators/split_byref_op.cu.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/split_byref_op.cu.cc b/paddle/fluid/operators/split_byref_op.cu.cc index 1faf4f55dd54a2..5ee6186f3541b7 100644 --- a/paddle/fluid/operators/split_byref_op.cu.cc +++ b/paddle/fluid/operators/split_byref_op.cu.cc @@ -15,4 +15,5 @@ limitations under the License. */ #include "paddle/fluid/operators/split_byref_op.h" namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( - split, ops::SplitByrefOpKernel); + split_byref, + ops::SplitByrefOpKernel); From 0c6eef3e58b3cdc182d1d8531eb227abc065857f Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Tue, 17 Apr 2018 15:48:05 +0800 Subject: [PATCH 4/9] add split by ref test --- python/paddle/fluid/tests/unittests/test_split_op.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_split_op.py b/python/paddle/fluid/tests/unittests/test_split_op.py index 887bdfe8b36088..5a7123c36b17a3 100644 --- a/python/paddle/fluid/tests/unittests/test_split_op.py +++ b/python/paddle/fluid/tests/unittests/test_split_op.py @@ -19,7 +19,6 @@ class TestSplitOp(OpTest): def setUp(self): - self.op_type = "split" axis = 1 x = np.random.random((4, 5, 6)).astype('float32') out = np.split(x, [2, 3], axis) @@ -28,6 +27,9 @@ def setUp(self): self.outputs = {'Out': [('out%d' % i, out[i]) \ for i in xrange(len(out))]} + def _set_op_type(self): + self.op_type = "split" + def test_check_output(self): self.check_output() @@ -35,5 +37,10 @@ def test_check_grad(self): self.check_grad(['X'], ['out0', 'out1', 'out2']) +class TestSplitByrefOp(OpTest): + def _set_op_type(self): + self.op_type = "split_byref" + + if __name__ == '__main__': unittest.main() From ed89b7b7e6f7651a852be5cafdc1264f46bed65a Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Tue, 17 Apr 2018 17:23:02 +0800 Subject: [PATCH 5/9] dist train use split_by_ref --- python/paddle/fluid/distribute_transpiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/distribute_transpiler.py b/python/paddle/fluid/distribute_transpiler.py index aa15392d7e4901..0c21f859a7a205 100644 --- a/python/paddle/fluid/distribute_transpiler.py +++ b/python/paddle/fluid/distribute_transpiler.py @@ -824,7 +824,7 @@ def _append_split_op(self, program, gradblocks): for v in splited_vars: sections.append(v.shape[0]) program.global_block().append_op( - type="split", + type="split_byref", inputs={"X": orig_var}, outputs={"Out": splited_vars}, attrs={"sections": sections} # assume split evenly From 69188e59811d03be9d4e90ccf15d1203684a4607 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Tue, 17 Apr 2018 20:02:44 +0800 Subject: [PATCH 6/9] fix ut --- python/paddle/fluid/tests/unittests/test_split_op.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/paddle/fluid/tests/unittests/test_split_op.py b/python/paddle/fluid/tests/unittests/test_split_op.py index 5a7123c36b17a3..eb49a53e54f4bd 100644 --- a/python/paddle/fluid/tests/unittests/test_split_op.py +++ b/python/paddle/fluid/tests/unittests/test_split_op.py @@ -19,6 +19,7 @@ class TestSplitOp(OpTest): def setUp(self): + self._set_op_type() axis = 1 x = np.random.random((4, 5, 6)).astype('float32') out = np.split(x, [2, 3], axis) From 788636f078fae8b9b68e3afcf8e0eee5f52bc4fc Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Wed, 18 Apr 2018 13:28:41 +0800 Subject: [PATCH 7/9] update by comments --- paddle/fluid/framework/tensor.h | 3 --- paddle/fluid/framework/tensor_impl.h | 31 ------------------------- paddle/fluid/operators/split_byref_op.h | 7 +++--- 3 files changed, 3 insertions(+), 38 deletions(-) diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index f30dcc000b7142..5a6b24bfafbe76 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -98,9 +98,6 @@ class Tensor { /*! The internal of two tensors share the same memory block. */ inline Tensor& ShareDataWith(const Tensor& src); - /*! Share part of the memory of the two tensors */ - inline Tensor& ShareDataWith(const Tensor* src, size_t offset); - /** * @brief Return a sub-tensor of the given tensor. * diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index a177ef74166f20..f49d1a47a325b2 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -162,37 +162,6 @@ inline Tensor& Tensor::ShareDataWith(const Tensor& src) { return *this; } -inline Tensor& Tensor::ShareDataWith(const Tensor* src, size_t offset) { - // NOTE: data size is determined by current tensor shape and data type - src->check_memory_size(); - PADDLE_ENFORCE_EQ(src->type(), this->type(), - "tensor data type must be the same when sharing data"); - auto place = src->place(); - auto type = src->type(); - size_t size = src->numel() * SizeOfType(src->type()); - auto* ref = src->data() + offset; - if (platform::is_cpu_place(place)) { - holder_.reset(new SharedPlaceholderImpl( - boost::get(place), ref, size, type)); - } else if (platform::is_gpu_place(place) || - platform::is_cuda_pinned_place(place)) { -#ifndef PADDLE_WITH_CUDA - PADDLE_THROW( - "CUDAPlace or CUDAPinnedPlace is not supported in CPU-only mode."); - } -#else - if (platform::is_gpu_place(place)) { - holder_.reset(new SharedPlaceholderImpl( - boost::get(place), ref, size, type)); - } else if (platform::is_cuda_pinned_place(place)) { - holder_.reset(new SharedPlaceholderImpl( - boost::get(place), ref, size, type)); - } - } -#endif - return *this; -} - inline Tensor Tensor::Slice(int begin_idx, int end_idx) const { check_memory_size(); PADDLE_ENFORCE_GE(begin_idx, 0, diff --git a/paddle/fluid/operators/split_byref_op.h b/paddle/fluid/operators/split_byref_op.h index 7c3ab1c1b9d955..9b54c7c74acb51 100644 --- a/paddle/fluid/operators/split_byref_op.h +++ b/paddle/fluid/operators/split_byref_op.h @@ -26,15 +26,14 @@ class SplitByrefOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); auto outs = ctx.MultiOutput("Out"); - auto in_stride = framework::stride_numel(in->dims()); auto place = ctx.GetPlace(); - size_t input_offset = 0; + size_t row_offset = 0; for (size_t i = 0; i < outs.size(); ++i) { // NOTE: no need to call mutable_data here to allocate memory. auto* out = outs[i]; - out->ShareDataWith(in, input_offset); - input_offset += out->numel() * framework::SizeOfType(out->type()); + *out = std::move(in->Slice(row_offset, out->dims()[0])); + row_offset += out->dims()[0]; } } }; From 184835856c94043a5c27f5da3921cdaba433273c Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Wed, 18 Apr 2018 14:44:17 +0800 Subject: [PATCH 8/9] fix copy size --- paddle/fluid/operators/detail/sendrecvop_utils.cc | 9 +++++---- paddle/fluid/operators/split_byref_op.h | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc index 16c612c45a37dd..69fcffe9bc3400 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc @@ -82,7 +82,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, platform::CPUPlace cpu; auto& gpu_dev_ctx = static_cast(ctx); - auto copy_size = tensor.memory_size(); + auto copy_size = tensor.numel() * framework::SizeOfType(tensor.type()); payload = memory::Alloc(cpu, copy_size); memory::Copy(cpu, payload, @@ -99,7 +99,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } else { payload = tensor.data(); } - payload_size = tensor.memory_size(); + payload_size = tensor.numel() * framework::SizeOfType(tensor.type()); e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, payload_size); } break; case framework::proto::VarType_Type_SELECTED_ROWS: { @@ -118,7 +118,8 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, platform::CPUPlace cpu; auto& gpu_dev_ctx = static_cast(ctx); - auto copy_size = tensor->memory_size(); + auto copy_size = + tensor->numel() * framework::SizeOfType(tensor->type()); payload = memory::Alloc(cpu, copy_size); memory::Copy(cpu, payload, boost::get(tensor->place()), @@ -133,7 +134,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } else { payload = slr->mutable_value()->data(); } - payload_size = tensor->memory_size(); + payload_size = tensor->numel() * framework::SizeOfType(tensor->type()); e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, payload_size); } break; default: diff --git a/paddle/fluid/operators/split_byref_op.h b/paddle/fluid/operators/split_byref_op.h index 9b54c7c74acb51..a3aad68ea736e2 100644 --- a/paddle/fluid/operators/split_byref_op.h +++ b/paddle/fluid/operators/split_byref_op.h @@ -32,7 +32,8 @@ class SplitByrefOpKernel : public framework::OpKernel { for (size_t i = 0; i < outs.size(); ++i) { // NOTE: no need to call mutable_data here to allocate memory. auto* out = outs[i]; - *out = std::move(in->Slice(row_offset, out->dims()[0])); + VLOG(3) << "spliting by ref: " << row_offset << " " << out->dims()[0]; + *out = std::move(in->Slice(row_offset, row_offset + out->dims()[0])); row_offset += out->dims()[0]; } } From ff0d9341ead47b7880d8d34e600b6bcd6a31c52e Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Wed, 18 Apr 2018 18:46:21 +0800 Subject: [PATCH 9/9] remove not used code --- paddle/fluid/framework/tensor.h | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 5a6b24bfafbe76..6f878541e6de1d 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -176,34 +176,6 @@ class Tensor { std::type_index type_; }; - template - struct SharedPlaceholderImpl : public Placeholder { - SharedPlaceholderImpl(Place place, const uint8_t* data, size_t size, - std::type_index type) - : ptr_(data), place_(place), size_(size), type_(type) {} - - virtual size_t size() const { return size_; } - virtual platform::Place place() const { return place_; } - virtual void* ptr() const { - return const_cast(static_cast(ptr_)); - } - virtual std::type_index type() const { return type_; } - virtual void set_type(std::type_index type) { type_ = type; } - virtual void set_place(platform::Place place) { place_ = place; } - - /*! the pointer of memory block. */ - const uint8_t* ptr_; - - /*! the place of memory block. */ - platform::Place place_; - - /*! the size of memory block. */ - size_t size_; - - /* the current type of memory */ - std::type_index type_; - }; - /*! holds the memory block if allocated. */ std::shared_ptr holder_;