Skip to content

Commit

Permalink
set qnn_instance only at constructor
Browse files Browse the repository at this point in the history
  • Loading branch information
chraac committed Sep 19, 2024
1 parent 3b69e71 commit ed181a1
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 66 deletions.
13 changes: 8 additions & 5 deletions ggml/src/ggml-qnn/backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,10 @@ std::string get_graph_key(const std::string &op_name, const std::array<ggml_tens
qnn::ggml_op_constructor_t generate_common_op_constructor(const std::string &op_name) {
if (op_name == QNN_OP_MAT_MUL) {
// For QNN_OP_MAT_MUL, we need to transpose the input tensor
return [](const std::string &name) -> std::unique_ptr<qnn::ggml_qnn_op_config> {
auto config =
std::make_unique<qnn::ggml_qnn_single_op_config>(name, QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_MAT_MUL);
return [](const std::string &name,
std::shared_ptr<qnn::qnn_instance> qnn_instance) -> std::unique_ptr<qnn::ggml_qnn_op_config> {
auto config = std::make_unique<qnn::ggml_qnn_single_op_config>(name, QNN_OP_PACKAGE_NAME_QTI_AISW,
QNN_OP_MAT_MUL, qnn_instance);
Qnn_Scalar_t scalar = QNN_SCALAR_INIT;
scalar.dataType = QNN_DATATYPE_BOOL_8;
scalar.bool8Value = true;
Expand All @@ -139,8 +140,10 @@ qnn::ggml_op_constructor_t generate_common_op_constructor(const std::string &op_
};
}

return [op_name](const std::string &name) -> std::unique_ptr<qnn::ggml_qnn_op_config> {
return std::make_unique<qnn::ggml_qnn_single_op_config>(name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_name);
return [op_name](const std::string &name,
std::shared_ptr<qnn::qnn_instance> qnn_instance) -> std::unique_ptr<qnn::ggml_qnn_op_config> {
return std::make_unique<qnn::ggml_qnn_single_op_config>(name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_name,
qnn_instance);
};
}

Expand Down
9 changes: 5 additions & 4 deletions ggml/src/ggml-qnn/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@

namespace qnn {

using ggml_op_constructor_t = std::function<std::unique_ptr<qnn::ggml_qnn_op_config>(const std::string &)>;
using ggml_op_constructor_t =
std::function<std::unique_ptr<qnn::ggml_qnn_op_config>(const std::string &, std::shared_ptr<qnn::qnn_instance>)>;

class ggml_qnn_graph {
public:
Expand Down Expand Up @@ -90,13 +91,13 @@ class ggml_qnn_graph {
}

QNN_LOG_DEBUG("graph name %s, build_graph start", _graph_name.c_str());
_op_config = op_constructor(_graph_name);
if (!_op_config->create_tensors(_device, _graph_handle, _qnn_instance, tensor_inputs, tensor_outputs)) {
_op_config = op_constructor(_graph_name, _qnn_instance);
if (!_op_config->create_tensors(_device, _graph_handle, tensor_inputs, tensor_outputs)) {
QNN_LOG_ERROR("graph name %s, create_tensors failed\n", _graph_name.c_str());
return false;
}

if (!_op_config->add_op_to_graph(_graph_handle, _qnn_instance)) {
if (!_op_config->add_op_to_graph(_graph_handle)) {
QNN_LOG_ERROR("graph name %s, add nodes failed\n", _graph_name.c_str());
return false;
}
Expand Down
67 changes: 32 additions & 35 deletions ggml/src/ggml-qnn/op-config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ bool bind_tensors(const qnn::ggml_tensor_array_t &ggml_tensors, qnn::ggml_qnn_te
class ggml_qnn_connectable_op_config : public qnn::ggml_qnn_op_config_base {
public:
explicit ggml_qnn_connectable_op_config(const std::string &name, const std::string &package_name,
const std::string &op_type) :
ggml_qnn_op_config_base(name, package_name, op_type) {}
const std::string &op_type,
std::shared_ptr<qnn::qnn_instance> qnn_instance) :
ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}

bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn::qnn_instance> qnn_instance, const qnn::ggml_tensor_array_t &tensor_inputs,
const qnn::ggml_tensor_array_t &tensor_inputs,
const qnn::ggml_tensor_array_t &tensor_outputs) override {
GGML_UNUSED(device);
GGML_UNUSED(graph_handle);
GGML_UNUSED(qnn_instance);
GGML_UNUSED(tensor_inputs);
GGML_UNUSED(tensor_outputs);
return true;
Expand All @@ -97,41 +97,42 @@ void ggml_qnn_op_config_base::add_scalar_param(const std::string &name, const Qn
param.paramType = QNN_PARAMTYPE_SCALAR;
param.name = _param_names.back().c_str();
param.scalarParam = scalar;
_scalar_parameters.push_back(param);
_qnn_parameters.push_back(param);
}

bool ggml_qnn_op_config_base::add_tensor_param(const std::string &name, const std::vector<uint32_t> &dims,
const uint8_t *data, const ggml_type data_type, QNNBackend device,
Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn_instance> qnn_instance) {
ggml_qnn_dimension_array_t dimensions;
for (size_t i = 0; i < GGML_MAX_DIMS; i++) {
dimensions[i] = i < dims.size() ? dims[i] : 1;
}

auto param_tensor = std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::PARAMETER, name, dimensions, data_type,
(int)dims.size(), device, graph_handle, qnn_instance);
bool ggml_qnn_op_config_base::add_tensor_param(const std::string &name, const ggml_qnn_dimension_array_t &dimensions,
int rank, const uint8_t *data, const ggml_type data_type,
QNNBackend device, Qnn_GraphHandle_t graph_handle) {
auto param_tensor = std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::PARAMETER, name + "_tensor", dimensions,
data_type, rank, device, graph_handle, _qnn_instance);
if (!param_tensor->alloc_qnn_tensor_id()) {
QNN_LOG_ERROR("parameter tensor alloc_qnn_tensor_id failed\n");
return false;
}

size_t data_size = ggml_type_size(data_type);
for (auto dim : dims) {
data_size *= dim;
for (int i = 0; i < rank; i++) {
data_size *= dimensions[i];
}

GGML_ASSERT(data_size > 0);
if (!param_tensor->bind_buffer(const_cast<uint8_t *>(data), data_size)) {
QNN_LOG_ERROR("parameter tensor bind_buffer failed\n");
return false;
}

_tensor_parameters.push_back(param_tensor);
_param_names.push_back(name);
Qnn_Param_t param = QNN_PARAM_INIT;
param.paramType = QNN_PARAMTYPE_TENSOR;
param.name = _param_names.back().c_str();
param.tensorParam = param_tensor->get_qnn_tensor();
_qnn_parameters.push_back(param);
return true;
}

bool ggml_qnn_op_config_base::add_op_to_graph(Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn_instance> qnn_instance) {
auto qnn_interface = qnn_instance->get_qnn_interface();
bool ggml_qnn_op_config_base::add_op_to_graph(Qnn_GraphHandle_t graph_handle) {
auto qnn_interface = _qnn_instance->get_qnn_interface();

for (size_t i = 0; i < _tensor_inputs.size(); i++) {
auto tensor = _tensor_inputs[i];
Expand Down Expand Up @@ -195,8 +196,8 @@ Qnn_OpConfig_t ggml_qnn_op_config_base::get_op_config() {
op_config.name = _name.c_str();
op_config.packageName = _package_name.c_str();
op_config.typeName = _op_type.c_str();
op_config.numOfParams = (uint32_t)_scalar_parameters.size();
op_config.params = _scalar_parameters.data();
op_config.numOfParams = (uint32_t)_qnn_parameters.size();
op_config.params = _qnn_parameters.data();
op_config.numOfInputs = (uint32_t)_qnn_tensor_inputs.size();
op_config.inputTensors = _qnn_tensor_inputs.data();
op_config.numOfOutputs = (uint32_t)_qnn_tensor_outputs.size();
Expand All @@ -205,23 +206,21 @@ Qnn_OpConfig_t ggml_qnn_op_config_base::get_op_config() {
}

bool ggml_qnn_single_op_config::create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn_instance> qnn_instance,
const ggml_tensor_array_t &tensor_inputs,
const ggml_tensor_array_t &tensor_outputs) {
const auto tensor_rank = get_rank(tensor_inputs, tensor_outputs);
tensor_common_params params = { "src", tensor_rank, device, graph_handle, qnn_instance };
tensor_common_params params = { "src", tensor_rank, device, graph_handle, _qnn_instance };
create_tensors_from_ggml_tensor(params, true, tensor_inputs, _tensor_inputs, _qnn_tensor_inputs);
params.name_prefix = "dst";
create_tensors_from_ggml_tensor(params, false, tensor_outputs, _tensor_outputs, _qnn_tensor_outputs);
return true;
}

bool ggml_qnn_matmul_op_config::create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn_instance> qnn_instance,
const ggml_tensor_array_t &tensor_inputs,
const ggml_tensor_array_t &tensor_outputs) {
const auto tensor_rank = get_rank(tensor_inputs, tensor_outputs);
tensor_common_params params = { "src", tensor_rank, device, graph_handle, qnn_instance };
tensor_common_params params = { "src", tensor_rank, device, graph_handle, _qnn_instance };
create_tensors_from_ggml_tensor(params, true, tensor_inputs, _tensor_inputs, _qnn_tensor_inputs);

// create intermediate tensor
Expand All @@ -235,18 +234,18 @@ bool ggml_qnn_matmul_op_config::create_tensors(QNNBackend device, Qnn_GraphHandl
};
auto intermediate_tensor =
std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::INTERMEDIATE, "intermediate", dimensions,
first_ggml_tensor->type, tensor_rank, device, graph_handle, qnn_instance);
first_ggml_tensor->type, tensor_rank, device, graph_handle, _qnn_instance);

// create mat_mul
auto mat_mul =
std::make_shared<ggml_qnn_connectable_op_config>(_name, QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_MAT_MUL);
auto mat_mul = std::make_shared<ggml_qnn_connectable_op_config>(_name, QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_MAT_MUL,
_qnn_instance);
params.name_prefix = "dst";
create_tensors_from_ggml_tensor(params, false, tensor_outputs, mat_mul->get_output_tensors(),
mat_mul->get_qnn_output_tensors());

// create transpose
auto transpose = std::make_shared<ggml_qnn_connectable_op_config>(_name + "_trans", QNN_OP_PACKAGE_NAME_QTI_AISW,
QNN_OP_TRANSPOSE);
QNN_OP_TRANSPOSE, _qnn_instance);

// set transpose parameters
transpose->add_scalar_param("perm", QNN_SCALAR_INIT);
Expand All @@ -267,10 +266,8 @@ bool ggml_qnn_matmul_op_config::create_tensors(QNNBackend device, Qnn_GraphHandl
return true;
}

bool ggml_qnn_matmul_op_config::add_op_to_graph(Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn_instance> qnn_instance) {
return _transpose->add_op_to_graph(graph_handle, qnn_instance) &&
_mat_mul->add_op_to_graph(graph_handle, qnn_instance);
bool ggml_qnn_matmul_op_config::add_op_to_graph(Qnn_GraphHandle_t graph_handle) {
return _transpose->add_op_to_graph(graph_handle) && _mat_mul->add_op_to_graph(graph_handle);
}

bool ggml_qnn_matmul_op_config::bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) {
Expand Down
39 changes: 21 additions & 18 deletions ggml/src/ggml-qnn/op-config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ class ggml_qnn_op_config {
public:
virtual ~ggml_qnn_op_config() {}
virtual bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn_instance> qnn_instance, const ggml_tensor_array_t &tensor_inputs,
const ggml_tensor_array_t &tensor_inputs,
const ggml_tensor_array_t &tensor_outputs) = 0;
virtual std::vector<Qnn_Tensor_t> &get_qnn_input_tensors() = 0;
virtual std::vector<Qnn_Tensor_t> &get_qnn_output_tensors() = 0;
virtual bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance) = 0;
virtual bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) = 0;
virtual bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) = 0;
virtual bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) = 0;
virtual void unbind_input_tensors() = 0;
Expand All @@ -32,14 +32,14 @@ class ggml_qnn_op_config {
class ggml_qnn_op_config_base : public ggml_qnn_op_config {
public:
explicit ggml_qnn_op_config_base(const std::string &name, const std::string &package_name,
const std::string &op_type) :
_name(name), _package_name(package_name), _op_type(op_type) {}
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance) :
_name(name), _package_name(package_name), _op_type(op_type), _qnn_instance(qnn_instance) {}

void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar);
bool add_tensor_param(const std::string &name, const std::vector<uint32_t> &dims, const uint8_t *data,
const ggml_type data_type, QNNBackend device, Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn_instance> qnn_instance);
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance) override;
bool add_tensor_param(const std::string &name, const ggml_qnn_dimension_array_t &dimensions, int rank,
const uint8_t *data, const ggml_type data_type, QNNBackend device,
Qnn_GraphHandle_t graph_handle);
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override;
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
void unbind_input_tensors() override;
Expand All @@ -53,12 +53,13 @@ class ggml_qnn_op_config_base : public ggml_qnn_op_config {
std::string _name;
std::string _package_name;
std::string _op_type;
std::shared_ptr<qnn_instance> _qnn_instance;
ggml_qnn_tensor_array_t _tensor_inputs;
ggml_qnn_tensor_array_t _tensor_outputs;
ggml_qnn_tensor_array_t _tensor_parameters;
std::vector<Qnn_Tensor_t> _qnn_tensor_inputs;
std::vector<Qnn_Tensor_t> _qnn_tensor_outputs;
ggml_qnn_tensor_array_t _tensor_parameters;
std::vector<Qnn_Param_t> _scalar_parameters;
std::vector<Qnn_Param_t> _qnn_parameters;
std::vector<std::string> _param_names;

DISABLE_COPY(ggml_qnn_op_config_base);
Expand All @@ -68,11 +69,11 @@ class ggml_qnn_op_config_base : public ggml_qnn_op_config {
class ggml_qnn_single_op_config : public ggml_qnn_op_config_base {
public:
explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name,
const std::string &op_type) :
ggml_qnn_op_config_base(name, package_name, op_type) {}
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance) :
ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}

bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance,
const ggml_tensor_array_t &tensor_inputs, const ggml_tensor_array_t &tensor_outputs) override;
bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, const ggml_tensor_array_t &tensor_inputs,
const ggml_tensor_array_t &tensor_outputs) override;

private:
DISABLE_COPY(ggml_qnn_single_op_config);
Expand All @@ -81,11 +82,12 @@ class ggml_qnn_single_op_config : public ggml_qnn_op_config_base {

class ggml_qnn_matmul_op_config : public ggml_qnn_op_config {
public:
ggml_qnn_matmul_op_config(const std::string &name) : _name(name) {}
ggml_qnn_matmul_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance) :
_name(name), _qnn_instance(qnn_instance) {}

bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance,
const ggml_tensor_array_t &tensor_inputs, const ggml_tensor_array_t &tensor_outputs) override;
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance) override;
bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, const ggml_tensor_array_t &tensor_inputs,
const ggml_tensor_array_t &tensor_outputs) override;
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override;
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
void unbind_input_tensors() override;
Expand All @@ -95,6 +97,7 @@ class ggml_qnn_matmul_op_config : public ggml_qnn_op_config {

private:
std::string _name;
std::shared_ptr<qnn_instance> _qnn_instance;
std::shared_ptr<ggml_qnn_op_config> _transpose;
std::shared_ptr<ggml_qnn_op_config> _mat_mul;
ggml_qnn_tensor_array_t _tensor_inputs;
Expand Down
20 changes: 16 additions & 4 deletions ggml/src/ggml-qnn/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

namespace qnn {

static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS should be 4");
using ggml_qnn_dimension_array_t = int64_t[GGML_MAX_DIMS];

class ggml_qnn_tensor {
Expand Down Expand Up @@ -197,10 +198,21 @@ class ggml_qnn_tensor {

void update_params_from_ggml_tensor(tensor_type_t tensor_type, const ggml_qnn_dimension_array_t &dimensions,
ggml_type data_type, int rank) {
_dimensions[0] = (uint32_t)dimensions[0];
_dimensions[1] = (uint32_t)dimensions[1];
_dimensions[2] = (uint32_t)dimensions[2];
_dimensions[3] = (uint32_t)dimensions[3];
GGML_ASSERT(rank <= GGML_MAX_DIMS && rank > 0);
switch (rank) {
case 4:
_dimensions[3] = (uint32_t)(dimensions[3] ? dimensions[3] : 1);
// fall through
case 3:
_dimensions[2] = (uint32_t)(dimensions[2] ? dimensions[2] : 1);
// fall through
case 2:
_dimensions[1] = (uint32_t)(dimensions[1] ? dimensions[1] : 1);
// fall through
case 1:
_dimensions[0] = (uint32_t)dimensions[0];
break;
}
QNN_TENSOR_SET_DATA_TYPE(_qnn_tensor, device_datatype_from_ggml_datatype(data_type));

// TODO: set the quantizeParams base on the tensor type
Expand Down

0 comments on commit ed181a1

Please sign in to comment.