Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

llama : update llama_model API names #11063

Merged
merged 2 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,7 @@ struct common_init_result common_init_from_params(common_params & params) {
} else if (!params.model_url.empty()) {
model = common_load_model_from_url(params.model_url, params.model, params.hf_token, mparams);
} else {
model = llama_load_model_from_file(params.model.c_str(), mparams);
model = llama_model_load_from_file(params.model.c_str(), mparams);
}

if (model == NULL) {
Expand All @@ -873,7 +873,7 @@ struct common_init_result common_init_from_params(common_params & params) {
}

if (!ok) {
llama_free_model(model);
llama_model_free(model);

return iparams;
}
Expand All @@ -884,7 +884,7 @@ struct common_init_result common_init_from_params(common_params & params) {
llama_context * lctx = llama_new_context_with_model(model, cparams);
if (lctx == NULL) {
LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str());
llama_free_model(model);
llama_model_free(model);
return iparams;
}

Expand All @@ -900,7 +900,7 @@ struct common_init_result common_init_from_params(common_params & params) {
const auto cvec = common_control_vector_load(params.control_vectors);
if (cvec.n_embd == -1) {
llama_free(lctx);
llama_free_model(model);
llama_model_free(model);

return iparams;
}
Expand All @@ -913,7 +913,7 @@ struct common_init_result common_init_from_params(common_params & params) {
params.control_vector_layer_end);
if (err) {
llama_free(lctx);
llama_free_model(model);
llama_model_free(model);

return iparams;
}
Expand All @@ -926,7 +926,7 @@ struct common_init_result common_init_from_params(common_params & params) {
if (lora == nullptr) {
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
llama_free(lctx);
llama_free_model(model);
llama_model_free(model);
return iparams;
}

Expand Down Expand Up @@ -1411,7 +1411,7 @@ struct llama_model * common_load_model_from_url(
}
}

return llama_load_model_from_file(local_path.c_str(), params);
return llama_model_load_from_file(local_path.c_str(), params);
}

struct llama_model * common_load_model_from_hf(
Expand Down
4 changes: 2 additions & 2 deletions examples/batched-bench/batched-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ int main(int argc, char ** argv) {

llama_model_params model_params = common_model_params_to_llama(params);

llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params);

if (model == NULL) {
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
Expand Down Expand Up @@ -194,7 +194,7 @@ int main(int argc, char ** argv) {
llama_batch_free(batch);

llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

llama_backend_free();

Expand Down
4 changes: 2 additions & 2 deletions examples/batched/batched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ int main(int argc, char ** argv) {

llama_model_params model_params = common_model_params_to_llama(params);

llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params);

if (model == NULL) {
LOG_ERR("%s: error: unable to load model\n" , __func__);
Expand Down Expand Up @@ -236,7 +236,7 @@ int main(int argc, char ** argv) {

llama_sampler_free(smpl);
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

llama_backend_free();

Expand Down
4 changes: 2 additions & 2 deletions examples/gritlm/gritlm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ int main(int argc, char * argv[]) {

llama_backend_init();

llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams);
llama_model * model = llama_model_load_from_file(params.model.c_str(), mparams);

// create generation context
llama_context * ctx = llama_new_context_with_model(model, cparams);
Expand Down Expand Up @@ -219,7 +219,7 @@ int main(int argc, char * argv[]) {

llama_sampler_free(smpl);
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);
llama_backend_free();

return 0;
Expand Down
8 changes: 4 additions & 4 deletions examples/llama-bench/llama-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1526,10 +1526,10 @@ int main(int argc, char ** argv) {
// keep the same model between tests when possible
if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) {
if (lmodel) {
llama_free_model(lmodel);
llama_model_free(lmodel);
}

lmodel = llama_load_model_from_file(inst.model.c_str(), inst.to_llama_mparams());
lmodel = llama_model_load_from_file(inst.model.c_str(), inst.to_llama_mparams());
if (lmodel == NULL) {
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, inst.model.c_str());
return 1;
Expand All @@ -1540,7 +1540,7 @@ int main(int argc, char ** argv) {
llama_context * ctx = llama_new_context_with_model(lmodel, inst.to_llama_cparams());
if (ctx == NULL) {
fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, inst.model.c_str());
llama_free_model(lmodel);
llama_model_free(lmodel);
return 1;
}

Expand Down Expand Up @@ -1626,7 +1626,7 @@ int main(int argc, char ** argv) {
ggml_threadpool_free_fn(threadpool);
}

llama_free_model(lmodel);
llama_model_free(lmodel);

if (p) {
p->print_footer();
Expand Down
6 changes: 3 additions & 3 deletions examples/llava/llava-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ static struct llama_model * llava_init(common_params * params) {

llama_model_params model_params = common_model_params_to_llama(*params);

llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
if (model == NULL) {
LOG_ERR("%s: unable to load model\n" , __func__);
return NULL;
Expand Down Expand Up @@ -265,7 +265,7 @@ static void llava_free(struct llava_context * ctx_llava) {
}

llama_free(ctx_llava->ctx_llama);
llama_free_model(ctx_llava->model);
llama_model_free(ctx_llava->model);
llama_backend_free();
}

Expand Down Expand Up @@ -323,7 +323,7 @@ int main(int argc, char ** argv) {
}
}

llama_free_model(model);
llama_model_free(model);

return 0;
}
4 changes: 2 additions & 2 deletions examples/llava/minicpmv-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static struct llama_model * llava_init(common_params * params) {

llama_model_params model_params = common_model_params_to_llama(*params);

llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
if (model == NULL) {
LOG_ERR("%s: unable to load model\n" , __func__);
return NULL;
Expand Down Expand Up @@ -75,7 +75,7 @@ static void llava_free(struct llava_context * ctx_llava) {
}

llama_free(ctx_llava->ctx_llama);
llama_free_model(ctx_llava->model);
llama_model_free(ctx_llava->model);
llama_backend_free();
}

Expand Down
6 changes: 3 additions & 3 deletions examples/llava/qwen2vl-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ static struct llama_model * llava_init(common_params * params) {

llama_model_params model_params = common_model_params_to_llama(*params);

llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
if (model == NULL) {
LOG_ERR("%s: unable to load model\n" , __func__);
return NULL;
Expand Down Expand Up @@ -354,7 +354,7 @@ static void llava_free(struct llava_context * ctx_llava) {
}

llama_free(ctx_llava->ctx_llama);
llama_free_model(ctx_llava->model);
llama_model_free(ctx_llava->model);
llama_backend_free();
}

Expand Down Expand Up @@ -575,7 +575,7 @@ int main(int argc, char ** argv) {
}
}

llama_free_model(model);
llama_model_free(model);

return 0;
}
4 changes: 2 additions & 2 deletions examples/passkey/passkey.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ int main(int argc, char ** argv) {

llama_model_params model_params = common_model_params_to_llama(params);

llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params);

if (model == NULL) {
LOG_ERR("%s: unable to load model\n" , __func__);
Expand Down Expand Up @@ -266,7 +266,7 @@ int main(int argc, char ** argv) {
llama_batch_free(batch);

llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

llama_backend_free();

Expand Down
8 changes: 4 additions & 4 deletions examples/quantize-stats/quantize-stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ int main(int argc, char ** argv) {
auto mparams = llama_model_default_params();
mparams.use_mlock = false;

model = llama_load_model_from_file(params.model.c_str(), mparams);
model = llama_model_load_from_file(params.model.c_str(), mparams);

if (model == NULL) {
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
Expand All @@ -323,7 +323,7 @@ int main(int argc, char ** argv) {

if (ctx == NULL) {
fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, params.model.c_str());
llama_free_model(model);
llama_model_free(model);
return 1;
}
}
Expand All @@ -347,7 +347,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "%s: error: Quantization should be tested with a float model, "
"this model contains already quantized layers (%s is type %d)\n", __func__, kv_tensor.first.c_str(), kv_tensor.second->type);
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);
return 1;
}
included_layers++;
Expand Down Expand Up @@ -409,7 +409,7 @@ int main(int argc, char ** argv) {


llama_free(ctx);
llama_free_model(model);
llama_model_free(model);
// report timing
{
const int64_t t_main_end_us = ggml_time_us();
Expand Down
2 changes: 1 addition & 1 deletion examples/run/run.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,7 @@ class LlamaData {
"\r%*s"
"\rLoading model",
get_terminal_width(), " ");
llama_model_ptr model(llama_load_model_from_file(opt.model_.c_str(), opt.model_params));
llama_model_ptr model(llama_model_load_from_file(opt.model_.c_str(), opt.model_params));
if (!model) {
printe("%s: error: unable to load model from file: %s\n", __func__, opt.model_.c_str());
}
Expand Down
4 changes: 2 additions & 2 deletions examples/simple-chat/simple-chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ int main(int argc, char ** argv) {
llama_model_params model_params = llama_model_default_params();
model_params.n_gpu_layers = ngl;

llama_model * model = llama_load_model_from_file(model_path.c_str(), model_params);
llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params);
if (!model) {
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
return 1;
Expand Down Expand Up @@ -194,7 +194,7 @@ int main(int argc, char ** argv) {
}
llama_sampler_free(smpl);
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

return 0;
}
4 changes: 2 additions & 2 deletions examples/simple/simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ int main(int argc, char ** argv) {
llama_model_params model_params = llama_model_default_params();
model_params.n_gpu_layers = ngl;

llama_model * model = llama_load_model_from_file(model_path.c_str(), model_params);
llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params);

if (model == NULL) {
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
Expand Down Expand Up @@ -199,7 +199,7 @@ int main(int argc, char ** argv) {

llama_sampler_free(smpl);
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

return 0;
}
4 changes: 2 additions & 2 deletions examples/tokenize/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ int main(int raw_argc, char ** raw_argv) {

llama_model_params model_params = llama_model_default_params();
model_params.vocab_only = true;
llama_model * model = llama_load_model_from_file(model_path, model_params);
llama_model * model = llama_model_load_from_file(model_path, model_params);
if (!model) {
fprintf(stderr, "Error: could not load model from file '%s'.\n", model_path);
return 1;
Expand Down Expand Up @@ -398,7 +398,7 @@ int main(int raw_argc, char ** raw_argv) {
}
// silence valgrind
llama_free(ctx);
llama_free_model(model);
llama_model_free(model);

return 0;
}
2 changes: 1 addition & 1 deletion include/llama-cpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include "llama.h"

struct llama_model_deleter {
void operator()(llama_model * model) { llama_free_model(model); }
void operator()(llama_model * model) { llama_model_free(model); }
};

struct llama_context_deleter {
Expand Down
13 changes: 10 additions & 3 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,12 +413,19 @@ extern "C" {
// Call once at the end of the program - currently only used for MPI
LLAMA_API void llama_backend_free(void);

LLAMA_API struct llama_model * llama_load_model_from_file(
DEPRECATED(LLAMA_API struct llama_model * llama_load_model_from_file(
const char * path_model,
struct llama_model_params params),
"use llama_model_load_from_file instead");

LLAMA_API struct llama_model * llama_model_load_from_file(
const char * path_model,
struct llama_model_params params);

// TODO: rename to llama_model_free
LLAMA_API void llama_free_model(struct llama_model * model);
DEPRECATED(LLAMA_API void llama_free_model(struct llama_model * model),
"use llama_model_free instead");

LLAMA_API void llama_model_free(struct llama_model * model);

// TODO: rename to llama_init_from_model
LLAMA_API struct llama_context * llama_new_context_with_model(
Expand Down
4 changes: 4 additions & 0 deletions src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1976,6 +1976,10 @@ struct llama_model_params llama_model_default_params() {
}

void llama_free_model(struct llama_model * model) {
llama_model_free(model);
}

void llama_model_free(struct llama_model * model) {
delete model;
}

Expand Down
Loading
Loading