Skip to content

Commit

Permalink
add create_tensor and move tensor bind to execute
Browse files Browse the repository at this point in the history
  • Loading branch information
chraac committed Sep 10, 2024
1 parent 44269d7 commit c34eecb
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 48 deletions.
5 changes: 2 additions & 3 deletions ggml/src/ggml-qnn/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,8 @@ class ggml_qnn_graph {

QNN_LOG_DEBUG("graph name %s, build_graph start", _graph_name.c_str());
_op_config = op_constructor(_graph_name);
_op_config->create_tensors(_device, _graph_handle, _qnn_instance, tensor_inputs.size(), tensor_outputs.size());
if (!_op_config->bind_tensors(tensor_inputs, tensor_outputs)) {
QNN_LOG_ERROR("graph name %s, bind tensors failed\n", _graph_name.c_str());
if (!_op_config->create_tensors(_device, _graph_handle, _qnn_instance, tensor_inputs, tensor_outputs)) {
QNN_LOG_ERROR("graph name %s, create_tensors failed\n", _graph_name.c_str());
return false;
}

Expand Down
66 changes: 42 additions & 24 deletions ggml/src/ggml-qnn/op-config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,52 @@ void ggml_qnn_single_op_config::add_scalar_param(const std::string &name, const
_parameters.push_back(param);
}

void ggml_qnn_single_op_config::create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn_instance> qnn_instance, const size_t input_count,
const size_t output_count) {
_tensor_inputs.resize(input_count);
_qnn_tensor_inputs.resize(input_count);
bool ggml_qnn_single_op_config::create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn_instance> qnn_instance,
const ggml_tensor_array_t &tensor_inputs,
const ggml_tensor_array_t &tensor_outputs) {
int tensor_rank = 0;
// get the max tensor rank
for (auto tensor : tensor_inputs) {
tensor_rank = std::max(tensor_rank, ggml_n_dims(tensor));
}
for (auto tensor : tensor_outputs) {
tensor_rank = std::max(tensor_rank, ggml_n_dims(tensor));
}

_tensor_inputs.resize(tensor_inputs.size());
_qnn_tensor_inputs.resize(tensor_inputs.size());
char buffer[GGML_MAX_NAME] = {};
for (size_t i = 0; i < input_count; i++) {
for (size_t i = 0; i < tensor_inputs.size(); i++) {
snprintf(buffer, GGML_MAX_NAME, "src%d", (int)i);
_tensor_inputs[i] = std::make_shared<ggml_qnn_tensor>(std::string(buffer), device, graph_handle, qnn_instance);
auto tensor = std::make_shared<ggml_qnn_tensor>(std::string(buffer), device, graph_handle, qnn_instance);
if (!tensor->create_tensor(tensor_inputs[i], true, tensor_rank)) {
QNN_LOG_ERROR("create input tensor %s failed\n", buffer);
_tensor_inputs.clear();
return false;
}

_qnn_tensor_inputs[i] = tensor->get_qnn_tensor();
_tensor_inputs[i] = tensor;
}

_tensor_outputs.resize(output_count);
_qnn_tensor_outputs.resize(output_count);
for (size_t i = 0; i < output_count; i++) {
_tensor_outputs.resize(tensor_outputs.size());
_qnn_tensor_outputs.resize(tensor_outputs.size());
for (size_t i = 0; i < tensor_outputs.size(); i++) {
snprintf(buffer, GGML_MAX_NAME, "dst%d", (int)i);
_tensor_outputs[i] = std::make_shared<ggml_qnn_tensor>(std::string(buffer), device, graph_handle, qnn_instance);
auto tensor = std::make_shared<ggml_qnn_tensor>(std::string(buffer), device, graph_handle, qnn_instance);
if (!tensor->create_tensor(tensor_outputs[i], false, tensor_rank)) {
QNN_LOG_ERROR("create output tensor %s failed\n", buffer);
_tensor_inputs.clear();
_tensor_outputs.clear();
return false;
}

_qnn_tensor_outputs[i] = tensor->get_qnn_tensor();
_tensor_outputs[i] = tensor;
}

return true;
}

bool ggml_qnn_single_op_config::add_nodes(Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance) {
Expand All @@ -54,20 +83,9 @@ bool ggml_qnn_single_op_config::bind_tensors(const ggml_tensor_array_t &tensor_i
const ggml_tensor_array_t &tensor_outputs) {
GGML_ASSERT(tensor_inputs.size() == _tensor_inputs.size());
GGML_ASSERT(tensor_outputs.size() == _tensor_outputs.size());

int tensor_rank = 0;

// get the max tensor rank
for (auto tensor : tensor_inputs) {
tensor_rank = std::max(tensor_rank, ggml_n_dims(tensor));
}
for (auto tensor : tensor_outputs) {
tensor_rank = std::max(tensor_rank, ggml_n_dims(tensor));
}

for (size_t i = 0; i < tensor_inputs.size(); i++) {
auto *ggml_tensor = tensor_inputs[i];
if (!_tensor_inputs[i]->bind_ggml_tensor(ggml_tensor, true, tensor_rank)) {
if (!_tensor_inputs[i]->bind_ggml_tensor(ggml_tensor)) {
QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor));
return false;
}
Expand All @@ -77,7 +95,7 @@ bool ggml_qnn_single_op_config::bind_tensors(const ggml_tensor_array_t &tensor_i

for (size_t i = 0; i < tensor_outputs.size(); i++) {
auto *ggml_tensor = tensor_outputs[i];
if (!_tensor_outputs[i]->bind_ggml_tensor(ggml_tensor, false, tensor_rank)) {
if (!_tensor_outputs[i]->bind_ggml_tensor(ggml_tensor)) {
QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor));
return false;
}
Expand Down
22 changes: 14 additions & 8 deletions ggml/src/ggml-qnn/op-config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ namespace qnn {
class ggml_qnn_op_config {
public:
virtual ~ggml_qnn_op_config() {}
virtual void create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn_instance> qnn_instance, const size_t input_count,
const size_t output_count) = 0;
virtual bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn_instance> qnn_instance, const ggml_tensor_array_t &tensor_inputs,
const ggml_tensor_array_t &tensor_outputs) = 0;
virtual std::vector<Qnn_Tensor_t> &get_qnn_input_tensors() = 0;
virtual std::vector<Qnn_Tensor_t> &get_qnn_output_tensors() = 0;
virtual bool add_nodes(Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance) = 0;
Expand All @@ -31,8 +31,8 @@ class ggml_qnn_single_op_config : public ggml_qnn_op_config {
_name(name), _package_name(package_name), _op_type(op_type) {}

void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar);
void create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance,
const size_t input_count, const size_t output_count) override;
bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance,
const ggml_tensor_array_t &tensor_inputs, const ggml_tensor_array_t &tensor_outputs) override;
bool add_nodes(Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance) override;
bool bind_tensors(const ggml_tensor_array_t &tensor_inputs, const ggml_tensor_array_t &tensor_outputs) override;
void unbind_tensors() override;
Expand All @@ -58,8 +58,10 @@ class ggml_qnn_single_op_config : public ggml_qnn_op_config {

class ggml_qnn_list_op_config : public ggml_qnn_op_config {
public:
void create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance,
const size_t input_count, const size_t output_count) override {}
bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance,
const ggml_tensor_array_t &tensor_inputs, const ggml_tensor_array_t &tensor_outputs) override {
return true;
}

std::vector<Qnn_Tensor_t> &get_qnn_input_tensors() override { return _operations.front()->get_qnn_input_tensors(); }

Expand All @@ -81,7 +83,11 @@ class ggml_qnn_list_op_config : public ggml_qnn_op_config {
return true;
}

void unbind_tensors() override {}
void unbind_tensors() override {
for (auto &op : _operations) {
op->unbind_tensors();
}
}

private:
std::vector<std::unique_ptr<ggml_qnn_op_config>> _operations;
Expand Down
30 changes: 17 additions & 13 deletions ggml/src/ggml-qnn/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,7 @@ class ggml_qnn_tensor {

~ggml_qnn_tensor() { _qnn_rpc_buffer.reset(); }

bool bind_ggml_tensor(ggml_tensor *tensor, bool is_input, int prev_max_rank) {
if (_tensor) {
if (_tensor != tensor) {
QNN_LOG_WARN("tensor %s has been bound to another ggml tensor %s", _tensor_name.c_str(),
ggml_get_name(_tensor));
return false;
}
QNN_LOG_INFO("tensor %s already bound to same ggml tensor %s", _tensor_name.c_str(),
ggml_get_name(_tensor));
return true;
}

bool create_tensor(const ggml_tensor *tensor, bool is_input, int prev_max_rank) {
update_params_from_ggml_tensor(tensor, is_input, prev_max_rank);

if (!QNN_TENSOR_GET_ID(_qnn_tensor)) {
Expand All @@ -59,6 +48,21 @@ class ggml_qnn_tensor {
QNN_TENSOR_GET_ID(qnn_tensor), QNN_TENSOR_GET_RANK(qnn_tensor));
}

return true;
}

bool bind_ggml_tensor(ggml_tensor *tensor) {
if (_tensor) {
if (_tensor != tensor) {
QNN_LOG_WARN("tensor %s has been bound to another ggml tensor %s", _tensor_name.c_str(),
ggml_get_name(_tensor));
return false;
}
QNN_LOG_INFO("tensor %s already bound to same ggml tensor %s", _tensor_name.c_str(),
ggml_get_name(_tensor));
return true;
}

if (should_use_mem_handle()) {
if (!_qnn_rpc_buffer) {
auto qnn_rpc_buffer = std::make_unique<ggml_qnn_rpc_buffer>(
Expand Down Expand Up @@ -167,7 +171,7 @@ class ggml_qnn_tensor {
return true;
}

void update_params_from_ggml_tensor(ggml_tensor *tensor, bool is_input, int prev_max_rank) {
void update_params_from_ggml_tensor(const ggml_tensor *tensor, bool is_input, int prev_max_rank) {
_dimensions[0] = (uint32_t)tensor->ne[0];
_dimensions[1] = (uint32_t)tensor->ne[1];
_dimensions[2] = (uint32_t)tensor->ne[2];
Expand Down

0 comments on commit c34eecb

Please sign in to comment.