diff --git a/llamafile/chatbot.h b/llamafile/chatbot.h index d61bbee356..a5cd8e9a8c 100644 --- a/llamafile/chatbot.h +++ b/llamafile/chatbot.h @@ -21,6 +21,11 @@ #include <__fwd/vector.h> #include +#define DEFAULT_SYSTEM_PROMPT \ + "A chat between a curious human and an artificial intelligence assistant. " \ + "The assistant gives helpful, detailed, and polite answers to the " \ + "human's questions." + struct bestlineCompletions; struct clip_ctx; struct gpt_params; @@ -58,6 +63,7 @@ bool eval_string(std::string_view, bool, bool); bool eval_token(int); bool eval_tokens(std::vector); bool handle_command(const char *); +bool is_base_model(); bool out_of_context(int); char *on_hint(const char *, const char **, const char **); const char *get_role_color(enum Role); diff --git a/llamafile/chatbot_hint.cpp b/llamafile/chatbot_hint.cpp index 88e29df0bb..5114455dcb 100644 --- a/llamafile/chatbot_hint.cpp +++ b/llamafile/chatbot_hint.cpp @@ -27,8 +27,13 @@ namespace chatbot { static const char *on_hint_impl(const char *line) { if (!*line && g_manual_mode) return get_role_name(g_role); - if (!*line && !g_manual_mode && !g_said_something) - return "say something (or type /help for help)"; + if (!*line && !g_manual_mode && !g_said_something) { + if (is_base_model()) { + return "type text to be completed (or /help for help)"; + } else { + return "say something (or type /help for help)"; + } + } static const char *const kHints[] = { "/clear", // "/context", // diff --git a/llamafile/chatbot_hist.cpp b/llamafile/chatbot_hist.cpp index 7bca4d3d6f..7bc57c55be 100644 --- a/llamafile/chatbot_hist.cpp +++ b/llamafile/chatbot_hist.cpp @@ -219,6 +219,10 @@ void rewind(int pos) { } void on_manual(const std::vector &args) { + if (is_base_model()) { + err("error: /manual mode not supported on base models"); + return; + } if (args.size() == 1) { g_manual_mode = !g_manual_mode; } else if (args.size() == 2 && (args[1] == "on" || args[1] == "off")) { diff --git a/llamafile/chatbot_main.cpp b/llamafile/chatbot_main.cpp index 6ea0aa9f32..dfa3185449 100644 --- a/llamafile/chatbot_main.cpp +++ b/llamafile/chatbot_main.cpp @@ -95,6 +95,17 @@ const char *tip() { return " (use the --verbose flag for further details)"; } +bool is_base_model() { + + // check if user explicitly passed --chat-template flag + if (!g_params.chat_template.empty()) + return false; + + // check if gguf metadata has chat template. this should always be + // present for "instruct" models, and never specified on base ones + return llama_model_meta_val_str(g_model, "tokenizer.chat_template", 0, 0) == -1; +} + int main(int argc, char **argv) { // print logo @@ -107,9 +118,7 @@ int main(int argc, char **argv) { // override defaults for some flags g_params.n_batch = 256; // for better progress indication g_params.sparams.temp = 0; // don't believe in randomness by default - g_params.prompt = "A chat between a curious human and an artificial intelligence assistant. " - "The assistant gives helpful, detailed, and polite answers to the " - "human's questions."; + g_params.prompt = DEFAULT_SYSTEM_PROMPT; // parse flags (sadly initializes gpu support as side-effect) print_ephemeral("loading backend..."); @@ -158,6 +167,8 @@ int main(int argc, char **argv) { printf(BOLD "software" UNBOLD ": llamafile " LLAMAFILE_VERSION_STRING "\n" // BOLD "model" UNBOLD ": %s\n", basename(g_params.model).c_str()); + if (is_base_model()) + printf(BOLD "mode" UNBOLD ": RAW TEXT COMPLETION (base model)\n"); printf(BOLD "compute" UNBOLD ": %s\n", describe_compute().c_str()); if (want_server) printf(BOLD "server" UNBOLD ": %s\n", g_listen_url.c_str()); diff --git a/llamafile/chatbot_repl.cpp b/llamafile/chatbot_repl.cpp index e5f4002063..a9aad32b51 100644 --- a/llamafile/chatbot_repl.cpp +++ b/llamafile/chatbot_repl.cpp @@ -104,12 +104,21 @@ void repl() { } record_undo(); + // make base models have no system prompt by default + if (is_base_model() && g_params.prompt == DEFAULT_SYSTEM_PROMPT) + g_params.prompt = ""; + // setup system prompt if (!g_params.prompt.empty()) { print_ephemeral("loading system prompt..."); - std::vector chat = {{"system", g_params.prompt}}; - std::string msg = - llama_chat_apply_template(g_model, g_params.chat_template, chat, DONT_ADD_ASSISTANT); + std::string msg; + if (is_base_model()) { + msg = g_params.prompt; + } else { + std::vector chat = {{"system", g_params.prompt}}; + msg = llama_chat_apply_template(g_model, g_params.chat_template, chat, + DONT_ADD_ASSISTANT); + } if (!eval_string(msg, DONT_ADD_SPECIAL, PARSE_SPECIAL)) exit(6); llama_synchronize(g_ctx); @@ -135,12 +144,13 @@ void repl() { write(1, get_role_color(g_role), strlen(get_role_color(g_role))); char *line = bestlineWithHistory(">>> ", "llamafile"); write(1, UNFOREGROUND, strlen(UNFOREGROUND)); + g_last_printed_char = '\n'; if (!line) { if (g_got_sigint) ensure_newline(); break; } - if (is_empty(line)) { + if (!is_base_model() && is_empty(line)) { if (g_manual_mode) { g_role = cycle_role(g_role); write(1, "\033[F", 3); @@ -155,9 +165,13 @@ void repl() { } bool add_assi = !g_manual_mode; int tokens_used_before = tokens_used(); - std::vector chat = {{get_role_name(g_role), line}}; - std::string msg = - llama_chat_apply_template(g_model, g_params.chat_template, chat, add_assi); + std::string msg; + if (is_base_model()) { + msg = line; + } else { + std::vector chat = {{get_role_name(g_role), line}}; + msg = llama_chat_apply_template(g_model, g_params.chat_template, chat, add_assi); + } if (!eval_string(msg, DONT_ADD_SPECIAL, PARSE_SPECIAL)) { rewind(tokens_used_before); continue;