From 8e12313c635ac921bfed75aabc15b9f93510f2a0 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Sat, 7 Sep 2024 00:03:01 +0300 Subject: [PATCH] llama-bench : log benchmark progress (#9287) * llama-bench : add optional progress messages --- examples/llama-bench/llama-bench.cpp | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 0c9bcb777a268..fe1802b51bdf6 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -249,6 +249,7 @@ struct cmd_params { ggml_sched_priority prio; int delay; bool verbose; + bool progress; output_formats output_format; output_formats output_format_stderr; }; @@ -280,6 +281,7 @@ static const cmd_params cmd_params_defaults = { /* prio */ GGML_SCHED_PRIO_NORMAL, /* delay */ 0, /* verbose */ false, + /* progress */ false, /* output_format */ MARKDOWN, /* output_format_stderr */ NONE, }; @@ -319,6 +321,7 @@ static void print_usage(int /* argc */, char ** argv) { printf(" -o, --output (default: %s)\n", output_format_str(cmd_params_defaults.output_format)); printf(" -oe, --output-err (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr)); printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0"); + printf(" --progress (default: %s)\n", cmd_params_defaults.progress ? "1" : "0"); printf("\n"); printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n"); } @@ -364,6 +367,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { params.numa = cmd_params_defaults.numa; params.prio = cmd_params_defaults.prio; params.delay = cmd_params_defaults.delay; + params.progress = cmd_params_defaults.progress; for (int i = 1; i < argc; i++) { arg = argv[i]; @@ -616,6 +620,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { invalid_param = !output_format_from_str(argv[i], params.output_format_stderr); } else if (arg == "-v" || arg == "--verbose") { params.verbose = true; + } else if (arg == "--progress") { + params.progress = true; } else { invalid_param = true; break; @@ -1523,7 +1529,13 @@ int main(int argc, char ** argv) { llama_model * lmodel = nullptr; const cmd_params_instance * prev_inst = nullptr; + int params_idx = 0; + auto params_count = params_instances.size(); for (const auto & inst : params_instances) { + params_idx ++; + if (params.progress) { + fprintf(stderr, "llama-bench: benchmark %d/%ld: starting\n", params_idx, params_count); + } // keep the same model between tests when possible if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) { if (lmodel) { @@ -1556,7 +1568,7 @@ int main(int argc, char ** argv) { struct ggml_threadpool_params tpp = ggml_threadpool_params_default(t.n_threads); if (!parse_cpu_mask(t.cpu_mask, tpp.cpumask)) { - LOG_TEE("%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str()); + fprintf(stderr, "%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str()); exit(1); } tpp.strict_cpu = t.cpu_strict; @@ -1565,7 +1577,7 @@ int main(int argc, char ** argv) { struct ggml_threadpool* threadpool = ggml_threadpool_new(&tpp); if (!threadpool) { - LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads); + fprintf(stderr, "%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads); exit(1); } @@ -1573,10 +1585,16 @@ int main(int argc, char ** argv) { // warmup run if (t.n_prompt > 0) { + if (params.progress) { + fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup prompt run\n", params_idx, params_count); + } //test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads); test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads); } if (t.n_gen > 0) { + if (params.progress) { + fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup generation run\n", params_idx, params_count); + } test_gen(ctx, 1, 0, t.n_threads); } @@ -1586,9 +1604,15 @@ int main(int argc, char ** argv) { uint64_t t_start = get_time_ns(); if (t.n_prompt > 0) { + if (params.progress) { + fprintf(stderr, "llama-bench: benchmark %d/%ld: prompt run %d/%d\n", params_idx, params_count, i + 1, params.reps); + } test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads); } if (t.n_gen > 0) { + if (params.progress) { + fprintf(stderr, "llama-bench: benchmark %d/%ld: generation run %d/%d\n", params_idx, params_count, i + 1, params.reps); + } test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads); }