Skip to content

Commit

Permalink
[VirtualMachine] Zero copy in set_input when input is DLTensor (#11003)
Browse files Browse the repository at this point in the history
* method of creating of NDArray from external DLTensor was implemented

* set input without copying for DLTensor source

* code clean up

* update description and comments after review

Co-authored-by: Valery Chernov <[email protected]>
  • Loading branch information
vvchernov and Valery Chernov authored Apr 15, 2022
1 parent 351f31b commit fafabc9
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 8 deletions.
18 changes: 18 additions & 0 deletions include/tvm/runtime/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,24 @@ class NDArray : public ObjectRef {
*/
TVM_DLL static NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev,
Optional<String> mem_scope = NullOpt);
/*!
* \brief Create a NDArray backed by an external DLTensor.
*
* This allows us to create a NDArray using the memory
* allocated by an external source. Responsibility for memory
* retaining lies with the external source.
* \param dl_tensor The DLTensor to copy from.
* \return The created NDArray view.
*/
TVM_DLL static NDArray FromExternalDLTensor(const DLTensor& dl_tensor);
/*!
* \brief Create new NDArray, data is copied from DLTensor.
*
* \param dl_tensor The DLTensor to copy from.
* \param dev device location of the created NDArray.
* \return The created NDArray view.
*/
TVM_DLL static NDArray NewFromDLTensor(DLTensor* dl_tensor, Device dev);
/*!
* \brief Create a NDArray backed by a dlpack tensor.
*
Expand Down
4 changes: 4 additions & 0 deletions python/tvm/runtime/vm.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,10 @@ def _setup_device(self, dev, memory_cfg):

def set_input(self, func_name, *args, **kwargs):
"""Set the input to a function.
If device type and device id for input tensor are the same as
for target one the zero copy is used. It means that internal
tensor is reference to memory allocated by input one.
Otherwise new internal NDarray is created and data is copied
Parameters
----------
Expand Down
31 changes: 31 additions & 0 deletions src/runtime/ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,13 @@ struct NDArray::Internal {
}
delete ptr;
}
// Deleter for NDArray based on external DLTensor
// The memory is allocated from outside and it is assumed that
// responsibility for its freeing is also outside
static void SelfDeleter(Object* ptr_obj) {
auto* ptr = static_cast<NDArray::Container*>(ptr_obj);
delete ptr;
}
// Local create function which allocates tensor metadata
// but does not allocate space for the data.
static NDArray Create(ShapeTuple shape, DLDataType dtype, Device dev) {
Expand Down Expand Up @@ -198,6 +205,30 @@ NDArray NDArray::Empty(ShapeTuple shape, DLDataType dtype, Device dev, Optional<
return ret;
}

NDArray NDArray::FromExternalDLTensor(const DLTensor& dl_tensor) {
NDArray::Container* data = new NDArray::Container();

data->SetDeleter(Internal::SelfDeleter);
data->dl_tensor = dl_tensor;
std::vector<ShapeTuple::index_type> shape;
shape.resize(data->dl_tensor.ndim);
shape.assign(data->dl_tensor.shape, data->dl_tensor.shape + data->dl_tensor.ndim);
data->shape_ = ShapeTuple(shape);
data->dl_tensor.shape = const_cast<ShapeTuple::index_type*>(data->shape_.data());

return NDArray(GetObjectPtr<Object>(data));
}

NDArray NDArray::NewFromDLTensor(DLTensor* tensor, Device dev) {
std::vector<int64_t> shape;
for (int64_t i = 0; i < tensor->ndim; i++) {
shape.push_back(tensor->shape[i]);
}
NDArray ary = NDArray::Empty(shape, tensor->dtype, dev);
ary.CopyFrom(tensor);
return ary;
}

NDArray NDArray::FromDLPack(DLManagedTensor* tensor) {
NDArray::Container* data = new NDArray::Container();
// construct header
Expand Down
22 changes: 14 additions & 8 deletions src/runtime/vm/vm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,15 @@ inline ObjectRef CopyTo(ObjectRef src, const DLDevice& dev) {
if (src->IsInstance<NDArray::ContainerType>()) {
auto nd_array = Downcast<NDArray>(src);
// TODO(mbs): Should respect device id also.
if (nd_array->device.device_type != dev.device_type) {
VLOG(2) << "copying from " << nd_array->device.device_type << " to " << dev.device_type;
// TODO(vvchernov): it still does not work for different device id
// due to simple implementation of Get() and AllocDataSpace() methods
// see tvm/src/runtime/c_runtime_api.cc: L139
// tvm/src/runtime/cpu_device_api.cc: L47
if (nd_array->device.device_type != dev.device_type ||
nd_array->device.device_id != dev.device_id) {
VLOG(2) << "copying from " << nd_array->device.device_type << "["
<< nd_array->device.device_id << "] to " << dev.device_type << "[" << dev.device_id
<< "]";
return nd_array.CopyTo(dev);
}
return src;
Expand Down Expand Up @@ -303,13 +310,12 @@ void VirtualMachine::SetInputTensorWithIndex(std::vector<ObjectRef>& tensors,
if (inp_tensor.type_code() == kTVMDLTensorHandle) {
// Automatically convert input DLTensors to NDArray
DLTensor* tensor = inp_tensor;
std::vector<int64_t> shape;
for (int64_t i = 0; i < tensor->ndim; i++) {
shape.push_back(tensor->shape[i]);
if (dev.device_type == tensor->device.device_type &&
dev.device_id == tensor->device.device_id) {
tensors[index] = NDArray::FromExternalDLTensor(*tensor);
} else {
tensors[index] = NDArray::NewFromDLTensor(tensor, dev);
}
NDArray ary = NDArray::Empty(shape, tensor->dtype, dev);
ary.CopyFrom(tensor);
tensors[index] = ary;
} else {
tensors[index] = CopyTo(inp_tensor, dev);
}
Expand Down

0 comments on commit fafabc9

Please sign in to comment.