Skip to content

Commit

Permalink
[Qwen2] fix lora_demo
Browse files Browse the repository at this point in the history
1. uint64_t header_size
2. fix load_and_infer_embedding
  • Loading branch information
chuxiaoyi2023 committed Dec 19, 2024
1 parent b0f8c7a commit b0b6435
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 11 deletions.
12 changes: 6 additions & 6 deletions models/Qwen2/lora_demo/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class Qwen {
int stage_idx;
bool enable_lora_embedding;
bool make_in_tensors_flag;
int header_size;
uint64_t header_size;

// model
int hidden_bytes;
Expand Down Expand Up @@ -663,15 +663,15 @@ Qwen::load_and_infer_embedding(const std::vector<int> &tokens) {
}

file.seekg(0, std::ios::end);
long long file_size = static_cast<long long>(file.tellg());
if (file_size < header_size) {
uint64_t file_size_in_disk = file.tellg();
if (file_size_in_disk < header_size) {
throw std::runtime_error("file is too small\n");
}

std::vector<uint32_t> header(header_size / sizeof(uint32_t));
uint64_t file_size_in_header = 0;
file.seekg(0, std::ios::beg);
file.read(reinterpret_cast<char *>(&header[0]), sizeof(uint32_t));
if (file_size != header[0]) {
file.read(reinterpret_cast<char *>(&file_size_in_header), sizeof(uint64_t));
if (file_size_in_disk != file_size_in_header) {
throw std::runtime_error("Error: file size is not equal to file size in header\n");
}

Expand Down
11 changes: 6 additions & 5 deletions models/Qwen2/lora_demo/export_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,8 +413,9 @@ def convert_lora_embedding_to_bit(lora_model, lora_config, args):
else:
raise NotImplementedError

# 由于在final.mlir中,weight的权重排列顺序是[lora_A, lora_B]的形式
# 所以需要把B排列在前面
# 由于在final.mlir中,weight的权重排列顺序是[lora_B, lora_A]的形式
# 但是在加载时,是按照算子调用逻辑来调用的,lora_A先走先调,lora_B后跑后调
# 所以需要把A排列在前面
for a, b in zip(lora_A_weight_list, lora_B_weight_list):
lora_weight_list.append(a)
lora_weight_list.append(b)
Expand All @@ -435,11 +436,11 @@ def convert_lora_embedding_to_bit(lora_model, lora_config, args):
return lora_weights_uint8

def make_header(size, header_size = 64):
if header_size < 4:
if header_size < 8:
raise ValueError("Header size must be at least 4 bytes to store the size.")
header = np.zeros(header_size, dtype=np.uint8)
size_bytes = struct.pack('<I', header_size + size)
header[:4] = np.frombuffer(size_bytes, dtype=np.uint8)
size_bytes = struct.pack('<Q', header_size + size)
header[:8] = np.frombuffer(size_bytes, dtype=np.uint8)
return header

def convert_total_lora_to_bit(encrypt_path, origin_model, args):
Expand Down
Binary file modified models/Qwen2/support/lib_pcie/libbmlib.so.0
Binary file not shown.
Binary file modified models/Qwen2/support/lib_pcie/libbmrt.so.1.0
Binary file not shown.
Binary file modified models/Qwen2/support/lib_soc/libbmlib.so.0
Binary file not shown.
Binary file modified models/Qwen2/support/lib_soc/libbmrt.so.1.0
Binary file not shown.

0 comments on commit b0b6435

Please sign in to comment.