diff --git a/include/tvm/runtime/ndarray.h b/include/tvm/runtime/ndarray.h index a4c285e3dd08..e80ed5fb1f8f 100644 --- a/include/tvm/runtime/ndarray.h +++ b/include/tvm/runtime/ndarray.h @@ -155,6 +155,24 @@ class NDArray : public ObjectRef { */ TVM_DLL static NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev, Optional mem_scope = NullOpt); + /*! + * \brief Create a NDArray backed by an external DLTensor. + * + * This allows us to create a NDArray using the memory + * allocated by an external source. Responsibility for memory + * retaining lies with the external source. + * \param dl_tensor The DLTensor to copy from. + * \return The created NDArray view. + */ + TVM_DLL static NDArray FromExternalDLTensor(const DLTensor& dl_tensor); + /*! + * \brief Create new NDArray, data is copied from DLTensor. + * + * \param dl_tensor The DLTensor to copy from. + * \param dev device location of the created NDArray. + * \return The created NDArray view. + */ + TVM_DLL static NDArray NewFromDLTensor(DLTensor* dl_tensor, Device dev); /*! * \brief Create a NDArray backed by a dlpack tensor. * diff --git a/python/tvm/runtime/vm.py b/python/tvm/runtime/vm.py index 0592368f6b0a..6e59c3455a91 100644 --- a/python/tvm/runtime/vm.py +++ b/python/tvm/runtime/vm.py @@ -426,6 +426,10 @@ def _setup_device(self, dev, memory_cfg): def set_input(self, func_name, *args, **kwargs): """Set the input to a function. + If device type and device id for input tensor are the same as + for target one the zero copy is used. It means that internal + tensor is reference to memory allocated by input one. + Otherwise new internal NDarray is created and data is copied Parameters ---------- diff --git a/src/runtime/ndarray.cc b/src/runtime/ndarray.cc index 3b75540f8763..f44dc86f902a 100644 --- a/src/runtime/ndarray.cc +++ b/src/runtime/ndarray.cc @@ -121,6 +121,13 @@ struct NDArray::Internal { } delete ptr; } + // Deleter for NDArray based on external DLTensor + // The memory is allocated from outside and it is assumed that + // responsibility for its freeing is also outside + static void SelfDeleter(Object* ptr_obj) { + auto* ptr = static_cast(ptr_obj); + delete ptr; + } // Local create function which allocates tensor metadata // but does not allocate space for the data. static NDArray Create(ShapeTuple shape, DLDataType dtype, Device dev) { @@ -198,6 +205,30 @@ NDArray NDArray::Empty(ShapeTuple shape, DLDataType dtype, Device dev, Optional< return ret; } +NDArray NDArray::FromExternalDLTensor(const DLTensor& dl_tensor) { + NDArray::Container* data = new NDArray::Container(); + + data->SetDeleter(Internal::SelfDeleter); + data->dl_tensor = dl_tensor; + std::vector shape; + shape.resize(data->dl_tensor.ndim); + shape.assign(data->dl_tensor.shape, data->dl_tensor.shape + data->dl_tensor.ndim); + data->shape_ = ShapeTuple(shape); + data->dl_tensor.shape = const_cast(data->shape_.data()); + + return NDArray(GetObjectPtr(data)); +} + +NDArray NDArray::NewFromDLTensor(DLTensor* tensor, Device dev) { + std::vector shape; + for (int64_t i = 0; i < tensor->ndim; i++) { + shape.push_back(tensor->shape[i]); + } + NDArray ary = NDArray::Empty(shape, tensor->dtype, dev); + ary.CopyFrom(tensor); + return ary; +} + NDArray NDArray::FromDLPack(DLManagedTensor* tensor) { NDArray::Container* data = new NDArray::Container(); // construct header diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 38d793606dc4..41b9395237ee 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -70,8 +70,15 @@ inline ObjectRef CopyTo(ObjectRef src, const DLDevice& dev) { if (src->IsInstance()) { auto nd_array = Downcast(src); // TODO(mbs): Should respect device id also. - if (nd_array->device.device_type != dev.device_type) { - VLOG(2) << "copying from " << nd_array->device.device_type << " to " << dev.device_type; + // TODO(vvchernov): it still does not work for different device id + // due to simple implementation of Get() and AllocDataSpace() methods + // see tvm/src/runtime/c_runtime_api.cc: L139 + // tvm/src/runtime/cpu_device_api.cc: L47 + if (nd_array->device.device_type != dev.device_type || + nd_array->device.device_id != dev.device_id) { + VLOG(2) << "copying from " << nd_array->device.device_type << "[" + << nd_array->device.device_id << "] to " << dev.device_type << "[" << dev.device_id + << "]"; return nd_array.CopyTo(dev); } return src; @@ -303,13 +310,12 @@ void VirtualMachine::SetInputTensorWithIndex(std::vector& tensors, if (inp_tensor.type_code() == kTVMDLTensorHandle) { // Automatically convert input DLTensors to NDArray DLTensor* tensor = inp_tensor; - std::vector shape; - for (int64_t i = 0; i < tensor->ndim; i++) { - shape.push_back(tensor->shape[i]); + if (dev.device_type == tensor->device.device_type && + dev.device_id == tensor->device.device_id) { + tensors[index] = NDArray::FromExternalDLTensor(*tensor); + } else { + tensors[index] = NDArray::NewFromDLTensor(tensor, dev); } - NDArray ary = NDArray::Empty(shape, tensor->dtype, dev); - ary.CopyFrom(tensor); - tensors[index] = ary; } else { tensors[index] = CopyTo(inp_tensor, dev); }