Skip to content

Commit

Permalink
convert.py: add python logging instead of print()
Browse files Browse the repository at this point in the history
  • Loading branch information
mofosyne committed Apr 6, 2024
1 parent 75cd4c7 commit e4e4df5
Showing 1 changed file with 29 additions and 18 deletions.
47 changes: 29 additions & 18 deletions convert.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
from __future__ import annotations

import logging
import argparse
import concurrent.futures
import enum
Expand Down Expand Up @@ -637,7 +638,7 @@ def __repr__(self) -> str:


def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
# print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
# logging.info( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
if n_head_kv is not None and n_head != n_head_kv:
n_head = n_head_kv
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
Expand Down Expand Up @@ -1026,12 +1027,12 @@ def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool = False)

# Check for a vocab size mismatch
if params.n_vocab == vocab.vocab_size:
print("Ignoring added_tokens.json since model matches vocab size without it.")
logging.warning("Ignoring added_tokens.json since model matches vocab size without it.")
return

if pad_vocab and params.n_vocab > vocab.vocab_size:
pad_count = params.n_vocab - vocab.vocab_size
print(
logging.debug(
f"Padding vocab with {pad_count} token(s) - <dummy00001> through <dummy{pad_count:05}>"
)
for i in range(1, pad_count + 1):
Expand Down Expand Up @@ -1159,7 +1160,7 @@ def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, concurrency:
elapsed = time.time() - start
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
padi = len(str(len(model)))
print(
logging.info(
f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
)
self.gguf.write_tensor_data(ndarray)
Expand Down Expand Up @@ -1274,12 +1275,12 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
# HF models permut or pack some of the tensors, so we need to undo that
for i in itertools.count():
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
print(f"Permuting layer {i}")
logging.debug(f"Permuting layer {i}")
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head)
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
# tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
print(f"Unpacking and permuting layer {i}")
logging.debug(f"Unpacking and permuting layer {i}")
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
Expand All @@ -1292,15 +1293,15 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
tensor_type, name_new = tmap.get_type_and_name(name, try_suffixes = (".weight", ".bias")) or (None, None)
if name_new is None:
if skip_unknown:
print(f"Unexpected tensor name: {name} - skipping")
logging.warning(f"Unexpected tensor name: {name} - skipping")
continue
raise ValueError(f"Unexpected tensor name: {name}. Use --skip-unknown to ignore it (e.g. LLaVA)")

if tensor_type in should_skip:
print(f"skipping tensor {name_new}")
logging.debug(f"skipping tensor {name_new}")
continue

print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}")
logging.debug(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}")
out[name_new] = lazy_tensor

return out
Expand Down Expand Up @@ -1365,7 +1366,7 @@ def load_some_model(path: Path) -> ModelPlus:
paths = find_multifile_paths(path)
models_plus: list[ModelPlus] = []
for path in paths:
print(f"Loading model file {path}")
logging.info(f"Loading model file {path}")
models_plus.append(lazy_load_file(path))

model_plus = merge_multifile_models(models_plus)
Expand Down Expand Up @@ -1406,7 +1407,7 @@ def _create_vocab_by_path(self, vocab_types: list[str]) -> Vocab:
else:
raise FileNotFoundError(f"Could not find a tokenizer matching any of {vocab_types}")

print(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}")
logging.info(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}")
return vocab

def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> tuple[BaseVocab, gguf.SpecialVocab]:
Expand Down Expand Up @@ -1466,8 +1467,18 @@ def main(args_in: list[str] | None = None) -> None:
parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")

args = parser.parse_args(args_in)

if args.dump_single or args.dump:
# Avoid printing anything besides the dump output
logging.basicConfig(level=logging.CRITICAL)
elif args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)

if args.no_vocab and args.vocab_only:
raise ValueError("--vocab-only does not make sense with --no-vocab")

Expand All @@ -1484,6 +1495,7 @@ def main(args_in: list[str] | None = None) -> None:
if args.dump:
do_dump_model(model_plus)
return

endianess = gguf.GGUFEndian.LITTLE
if args.big_endian:
endianess = gguf.GGUFEndian.BIG
Expand All @@ -1506,7 +1518,7 @@ def main(args_in: list[str] | None = None) -> None:
"q8_0": GGMLFileType.MostlyQ8_0,
}[args.outtype]

print(f"params = {params}")
logging.info(f"params = {params}")

model_parent_path = model_plus.paths[0].parent
vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
Expand All @@ -1521,27 +1533,26 @@ def main(args_in: list[str] | None = None) -> None:
outfile = args.outfile
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
endianess=endianess, pad_vocab=args.pad_vocab)
print(f"Wrote {outfile}")
logging.info(f"Wrote {outfile}")
return

if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
vocab = model_plus.vocab

print(f"Vocab info: {vocab}")
print(f"Special vocab info: {special_vocab}")

logging.info(f"Vocab info: {vocab}")
logging.info(f"Special vocab info: {special_vocab}")
model = model_plus.model
model = convert_model_names(model, params, args.skip_unknown)
ftype = pick_output_type(model, args.outtype)
model = convert_to_output_type(model, ftype)
outfile = args.outfile or default_outfile(model_plus.paths, ftype)

params.ftype = ftype
print(f"Writing {outfile}, format {ftype}")
logging.info(f"Writing {outfile}, format {ftype}")

OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
print(f"Wrote {outfile}")
logging.info(f"Wrote {outfile}")


if __name__ == '__main__':
Expand Down

0 comments on commit e4e4df5

Please sign in to comment.