Skip to content

Commit

Permalink
fix MUL_MAT_256x16x10x1_256x1x10x1_16x1x10x1
Browse files Browse the repository at this point in the history
  • Loading branch information
chraac committed Oct 3, 2024
1 parent fc8b521 commit 8e55942
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 6 deletions.
15 changes: 11 additions & 4 deletions ggml/src/ggml-qnn/op-config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@

namespace {

constexpr const qnn::qnn_internal_dimension_array_t kTransposeParamData[GGML_MAX_DIMS] = {
{ 0 },
{ 1, 0 },
{ 0, 2, 1 },
{ 0, 1, 3, 2 },
};

int get_rank(const qnn::ggml_tensor_array_t &tensor_inputs, const qnn::ggml_tensor_array_t &tensor_outputs) {
int tensor_rank = 0;
// get the max tensor rank
Expand Down Expand Up @@ -312,13 +319,13 @@ bool ggml_qnn_matmul_op_config::create_tensors(QNNBackend device, Qnn_GraphHandl
// set transpose0 parameters
const ggml_dimension_array_t param_dims = { tensor_rank, 1, 1, 1 };
transpose0->add_tensor_param(QNN_OP_TRANSPOSE_PARAM_PERM, param_dims, 1,
reinterpret_cast<const uint8_t *>(_transpose_param_data.data()), GGML_TYPE_I32, device,
graph_handle);
reinterpret_cast<const uint8_t *>(kTransposeParamData[tensor_rank - 1].data()),
GGML_TYPE_I32, device, graph_handle);

// set transpose1 parameters
transpose1->add_tensor_param(QNN_OP_TRANSPOSE_PARAM_PERM, param_dims, 1,
reinterpret_cast<const uint8_t *>(_transpose_param_data.data()), GGML_TYPE_I32, device,
graph_handle);
reinterpret_cast<const uint8_t *>(kTransposeParamData[tensor_rank - 1].data()),
GGML_TYPE_I32, device, graph_handle);

// set tensor to transpose0
ggml_qnn_tensor_array_t tensors = { _tensor_inputs.back() };
Expand Down
1 change: 0 additions & 1 deletion ggml/src/ggml-qnn/op-config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ class ggml_qnn_matmul_op_config : public ggml_qnn_op_config {
std::shared_ptr<ggml_qnn_op_config> _mat_mul;
ggml_qnn_tensor_array_t _tensor_inputs;
std::vector<Qnn_Tensor_t> _qnn_tensor_inputs;
std::array<int32_t, GGML_MAX_DIMS> _transpose_param_data = { 1, 0, 2, 3 };

DISABLE_COPY(ggml_qnn_matmul_op_config);
DISABLE_MOVE(ggml_qnn_matmul_op_config);
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-qnn/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Qnn_DataType_t device_datatype_from_ggml_datatype(ggml_type ggml_type) {
case GGML_TYPE_F32:
return QNN_DATATYPE_FLOAT_32;
case GGML_TYPE_I32:
return QNN_DATATYPE_UINT_32;
return QNN_DATATYPE_UINT_32; // TODO: Should be INT_32
case GGML_TYPE_I16:
return QNN_DATATYPE_INT_16;
case GGML_TYPE_I8:
Expand Down

0 comments on commit 8e55942

Please sign in to comment.