From a9f3ded69f7390d9d7e142f3ea46cb812eaea2f3 Mon Sep 17 00:00:00 2001 From: Andrew Green Date: Fri, 31 Jan 2025 11:55:30 +0000 Subject: [PATCH 1/3] Don't use deprecated `llama_token_get_text`, but use 'llama_vocab_get_text` instead As suggested here: https://github.com/ggerganov/llama.cpp/blob/5783575c9d99c4d9370495800663aa5397ceb0be/include/llama.h#L962 Requires getting the vocab out of the model first though --- guidance/models/llama_cpp/_llama_cpp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/guidance/models/llama_cpp/_llama_cpp.py b/guidance/models/llama_cpp/_llama_cpp.py index d243360f8..50c3a1740 100644 --- a/guidance/models/llama_cpp/_llama_cpp.py +++ b/guidance/models/llama_cpp/_llama_cpp.py @@ -66,6 +66,7 @@ def __init__(self, model_obj, chat_template=None): ) tokenizer = llama_cpp.LlamaTokenizer(model_obj) + vocab = llama_cpp.llama_model_get_vocab(model_obj.model) if not hasattr(tokenizer, "llama"): tokenizer.llama = tokenizer._model @@ -75,7 +76,7 @@ def __init__(self, model_obj, chat_template=None): tok = tokenizer.llama.detokenize([i]) # note that detokenize returns bytes directly if tok == b"": # get text rep of special tokens - tok = llama_cpp.llama_token_get_text(model_obj.model, i) + tok = llama_cpp.llama_vocab_get_text(vocab, i) tokens.append(tok) # Chat Template logic From 52923a49f8496d7a5e1c99ed5ae469e3466b65bb Mon Sep 17 00:00:00 2001 From: Paul Koch Date: Tue, 4 Feb 2025 16:59:10 -0800 Subject: [PATCH 2/3] update llama-cpp-python version to 0.3.7 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 337b4025c..f1b5d78a6 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ here = os.path.abspath(os.path.dirname(__file__)) -llamacpp_requires = ["llama-cpp-python==0.3.5"] +llamacpp_requires = ["llama-cpp-python==0.3.7"] transformers_requires = ["transformers==4.48.2"] install_requires = [ From d14f2ed6d31f10d42ef0d278a61c86f5b607f3f6 Mon Sep 17 00:00:00 2001 From: Paul Koch Date: Tue, 4 Feb 2025 20:38:41 -0800 Subject: [PATCH 3/3] check for invalid return from llama_model_get_vocab --- guidance/models/llama_cpp/_llama_cpp.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/guidance/models/llama_cpp/_llama_cpp.py b/guidance/models/llama_cpp/_llama_cpp.py index 50c3a1740..9c5471b00 100644 --- a/guidance/models/llama_cpp/_llama_cpp.py +++ b/guidance/models/llama_cpp/_llama_cpp.py @@ -67,6 +67,9 @@ def __init__(self, model_obj, chat_template=None): tokenizer = llama_cpp.LlamaTokenizer(model_obj) vocab = llama_cpp.llama_model_get_vocab(model_obj.model) + if vocab is None: + raise Exception("call to llama_cpp.llama_model_get_vocab returned NULL.") + if not hasattr(tokenizer, "llama"): tokenizer.llama = tokenizer._model