diff --git a/llama.cpp/common.cpp b/llama.cpp/common.cpp index 6dfb13862c..8364426377 100644 --- a/llama.cpp/common.cpp +++ b/llama.cpp/common.cpp @@ -1213,6 +1213,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa return true; } params.chat_template = argv[i]; + FLAG_chat_template = argv[i]; // [jart] return true; } if (arg == "--slot-prompt-similarity" || arg == "-sps") { diff --git a/llamafile/flags.cpp b/llamafile/flags.cpp index de1770d5c2..fd67dc8d1f 100644 --- a/llamafile/flags.cpp +++ b/llamafile/flags.cpp @@ -38,6 +38,7 @@ bool FLAGS_READY = false; bool FLAG_ascii = false; +bool FLAG_completion_mode = false; bool FLAG_fast = false; bool FLAG_iq = false; bool FLAG_log_disable = false; @@ -51,6 +52,7 @@ bool FLAG_recompile = false; bool FLAG_tinyblas = false; bool FLAG_trace = false; bool FLAG_unsecure = false; +const char *FLAG_chat_template = ""; const char *FLAG_file = nullptr; const char *FLAG_ip_header = nullptr; const char *FLAG_listen = "127.0.0.1:8080"; @@ -123,6 +125,11 @@ static wontreturn void unknown(const char *flag) { exit(1); } +static bool is_valid_chat_template(const char *tmpl) { + llama_chat_message chat[] = {{"user", "test"}}; + return llama_chat_apply_template(nullptr, tmpl, chat, 1, true, nullptr, 0) >= 0; +} + void llamafile_get_flags(int argc, char **argv) { bool program_supports_gpu = FLAG_gpu != LLAMAFILE_GPU_DISABLE; for (int i = 1; i < argc;) { @@ -157,6 +164,16 @@ void llamafile_get_flags(int argc, char **argv) { continue; } + if (!strcmp(flag, "--chatbot-mode")) { + FLAG_completion_mode = false; + continue; + } + + if (!strcmp(flag, "--completion-mode")) { + FLAG_completion_mode = true; + continue; + } + if (!strcmp(flag, "--no-display-prompt") || // !strcmp(flag, "--silent-prompt")) { FLAG_no_display_prompt = true; @@ -345,6 +362,15 @@ void llamafile_get_flags(int argc, char **argv) { continue; } + if (!strcmp(flag, "--chat-template")) { + if (i == argc) + missing("--chat-template"); + if (!is_valid_chat_template(argv[i])) + bad("--chat-template"); + FLAG_chat_template = argv[i++]; + continue; + } + if (!strcmp(flag, "-s") || !strcmp(flag, "--slots")) { if (i == argc) missing("--slots"); diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h index 93e7aac8d6..4288ceb80a 100644 --- a/llamafile/llamafile.h +++ b/llamafile/llamafile.h @@ -8,6 +8,7 @@ extern "C" { extern bool FLAGS_READY; extern bool FLAG_ascii; +extern bool FLAG_completion_mode; extern bool FLAG_fast; extern bool FLAG_iq; extern bool FLAG_log_disable; @@ -22,6 +23,7 @@ extern bool FLAG_tinyblas; extern bool FLAG_trace; extern bool FLAG_trap; extern bool FLAG_unsecure; +extern const char *FLAG_chat_template; extern const char *FLAG_file; extern const char *FLAG_ip_header; extern const char *FLAG_listen; diff --git a/llamafile/server/flagz.cpp b/llamafile/server/flagz.cpp index 2b60751b08..b3dfc9b0a0 100644 --- a/llamafile/server/flagz.cpp +++ b/llamafile/server/flagz.cpp @@ -24,6 +24,17 @@ namespace lf { namespace server { +static bool is_base_model(llama_model *model) { + + // check if user explicitly passed --chat-template flag + if (*FLAG_chat_template) + return false; + + // check if gguf metadata has chat template. this should always be + // present for "instruct" models, and never specified on base ones + return llama_model_meta_val_str(model, "tokenizer.chat_template", 0, 0) == -1; +} + bool Client::flagz() { @@ -32,6 +43,8 @@ Client::flagz() json["prompt"] = FLAG_prompt; json["no_display_prompt"] = FLAG_no_display_prompt; json["nologo"] = FLAG_nologo; + json["completion_mode"] = FLAG_completion_mode; + json["is_base_model"] = is_base_model(model_); json["temperature"] = FLAG_temperature; json["top_p"] = FLAG_top_p; json["presence_penalty"] = FLAG_presence_penalty; diff --git a/llamafile/server/v1_chat_completions.cpp b/llamafile/server/v1_chat_completions.cpp index 910a074788..92b24ba126 100644 --- a/llamafile/server/v1_chat_completions.cpp +++ b/llamafile/server/v1_chat_completions.cpp @@ -462,7 +462,8 @@ Client::v1_chat_completions() // turn text into tokens state->prompt = - llama_chat_apply_template(model_, "", params->messages, ADD_ASSISTANT); + llama_chat_apply_template( + model_, FLAG_chat_template, params->messages, ADD_ASSISTANT); atomize(model_, &state->atoms, state->prompt, PARSE_SPECIAL); // find appropriate slot diff --git a/llamafile/server/www/chatbot.css b/llamafile/server/www/chatbot.css index 3ce8afd53c..34b46c4b96 100644 --- a/llamafile/server/www/chatbot.css +++ b/llamafile/server/www/chatbot.css @@ -37,6 +37,9 @@ p { background: #f8f9fa; border-bottom: 1px solid #e9ecef; border-radius: 12px 12px 0 0; + display: flex; + justify-content: space-between; + align-items: center; } .chat-header h1 { @@ -429,6 +432,140 @@ ul li:first-child { cursor: pointer; } +/* Completions Interface */ +.completions-container { + max-width: 960px; + margin: 2rem auto; + background: white; + border-radius: 12px; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); + display: flex; + flex-direction: column; + height: calc(100vh - 4rem); +} + +.completions-content { + flex: 1; + display: flex; + flex-direction: column; + padding: 1rem; +} + +#completions-input { + flex: 1; + padding: 1rem; + margin-bottom: 1rem; + border: 1px solid #dee2e6; + border-radius: 6px; + font-size: 1rem; + font-family: inherit; + resize: none; +} + +.completions-controls { + display: flex; + gap: 0.5rem; +} + +.complete-button { + padding: 0.75rem 1.5rem; + background: #0d6efd; + color: white; + border: none; + border-radius: 6px; + cursor: pointer; + font-size: 1rem; + transition: background-color 0.2s; +} + +.complete-button:hover { + background: #0b5ed7; +} + +.mode-dropdown { + display: none; +} + +.mode-trigger { + display: none; +} + +.mode-menu { + display: none; +} + +.mode-item { + display: none; +} + +.mode-switch { + display: none; +} + +.menu-dropdown { + position: relative; + display: inline-block; +} + +.menu-trigger { + padding: 0.5rem; + background: transparent; + border: none; + cursor: pointer; + font-size: 0.8rem; + color: #666; + transition: color 0.2s; +} + +.menu-trigger:hover { + color: #000; +} + +.menu { + position: absolute; + top: 100%; + right: 0; + background: white; + border: 1px solid #dee2e6; + border-radius: 6px; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); + display: none; + z-index: 1000; + min-width: 180px; +} + +.menu.show { + display: block; +} + +.menu-item { + display: block; + width: 100%; + padding: 0.5rem 1rem; + border: none; + background: none; + text-align: left; + cursor: pointer; + white-space: nowrap; + font-size: 0.9rem; + color: #333; +} + +.menu-item:hover { + background: #f8f9fa; +} + +.menu-item.disabled { + opacity: 0.5; + cursor: not-allowed; + background: #f0f0f0; + color: #666; +} + +.menu-item.disabled:hover { + background: #f0f0f0; +} + @media print { html, diff --git a/llamafile/server/www/chatbot.js b/llamafile/server/www/chatbot.js index 08eb266198..7662ec2d89 100644 --- a/llamafile/server/www/chatbot.js +++ b/llamafile/server/www/chatbot.js @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -const API_ENDPOINT = "/v1/chat/completions"; const API_KEY = "your-api-key-here"; const DEFAULT_SYSTEM_PROMPT = @@ -28,7 +27,9 @@ const DEFAULT_FLAGZ = { "presence_penalty": 0, "temperature": 0.8, "top_p": 0.95, - "seed": null + "seed": null, + "is_base_model": true, + "completion_mode": false, }; const chatMessages = document.getElementById("chat-messages"); @@ -39,10 +40,17 @@ const settingsButton = document.getElementById("settings-button"); const settingsModal = document.getElementById("settings-modal"); const closeSettings = document.getElementById("close-settings"); const redoButton = document.getElementById('redo-button'); +const chatInterface = document.getElementById("chat-interface"); +const completionsInterface = document.getElementById("completions-interface"); +const completionsInput = document.getElementById("completions-input"); +const completeButton = document.getElementById("complete-button"); +const completionsSettingsButton = document.getElementById("completions-settings-button"); +const completionsStopButton = document.getElementById("completions-stop-button"); let abortController = null; let disableAutoScroll = false; let streamingMessageContent = []; +let originalLength = 0; let uploadedFiles = []; let chatHistory = []; let flagz = null; @@ -112,8 +120,8 @@ async function handleChatStream(response) { const line = lines[i].trim(); if (line.startsWith("data: ")) { const data = line.slice(6); - if (data === "[DONE]") continue; - + if (data === "[DONE]") + continue; try { const parsed = JSON.parse(data); const content = parsed.choices[0]?.delta?.content || ""; @@ -177,14 +185,14 @@ async function sendMessage() { const settings = loadSettings(); try { - const response = await fetch(API_ENDPOINT, { + const response = await fetch("/v1/chat/completions", { method: "POST", headers: { "Content-Type": "application/json", "Authorization": `Bearer ${API_KEY}` }, body: JSON.stringify({ - model: "gpt-3.5-turbo", + model: flagz.model || "gpt-3.5-turbo", messages: chatHistory, temperature: settings.temperature, top_p: settings.top_p, @@ -356,7 +364,10 @@ function getSystemPrompt() { function updateModelInfo() { if (flagz.model) { - document.getElementById("model").textContent = flagz.model; + const modelName = flagz.model; + document.title = `${modelName} - llamafile`; + document.getElementById("model").textContent = modelName; + document.getElementById("model-completions").textContent = modelName; } } @@ -455,6 +466,114 @@ function setupSettings() { }); } +function setupCompletions() { + completeButton.addEventListener("click", sendCompletion); + completionsStopButton.addEventListener("click", stopCompletion); + completionsSettingsButton.addEventListener("click", () => { + settingsModal.style.display = "flex"; + updateSettingsDisplay(loadSettings()); + }); + completionsInput.addEventListener("keydown", (e) => { + if (e.key === "Enter" && !e.shiftKey && (e.ctrlKey || e.metaKey)) { + e.preventDefault(); + sendCompletion(); + } + }); +} + +function stopCompletion() { + if (abortController) { + abortController.abort(); + cleanupAfterCompletion(); + } +} + +function cleanupAfterCompletion() { + completeButton.style.display = "inline-block"; + completionsStopButton.style.display = "none"; + completeButton.disabled = false; + abortController = null; + + // select newly added text and restore focus + const textArea = completionsInput; + textArea.focus(); + textArea.selectionStart = originalLength || 0; + textArea.selectionEnd = textArea.value.length; +} + +async function sendCompletion() { + const text = completionsInput.value; + completeButton.style.display = "none"; + completionsStopButton.style.display = "inline-block"; + completeButton.disabled = true; + abortController = new AbortController(); + originalLength = text.length; + completionsStopButton.focus(); + const settings = loadSettings(); + try { + const response = await fetch("/v1/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${API_KEY}` + }, + body: JSON.stringify({ + model: flagz.model || "gpt-3.5-turbo", + prompt: text, + temperature: settings.temperature, + top_p: settings.top_p, + presence_penalty: settings.presence_penalty, + frequency_penalty: settings.frequency_penalty, + stream: true + }), + signal: abortController.signal + }); + if (response.ok) { + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + try { + while (true) { + const { done, value } = await reader.read(); + if (done) + break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + for (let i = 0; i < lines.length - 1; i++) { + const line = lines[i].trim(); + if (line.startsWith("data: ")) { + const data = line.slice(6); + if (data === "[DONE]") + continue; + try { + const parsed = JSON.parse(data); + const content = parsed.choices[0]?.text || ""; + completionsInput.value += content; + completionsInput.scrollTop = completionsInput.scrollHeight; + } catch (e) { + console.error("Error parsing JSON:", e); + } + } + } + buffer = lines[lines.length - 1]; + } + } catch (error) { + if (error.name !== "AbortError") { + console.error("Error reading stream:", error); + } + } + } else { + console.error("Completion failed:", response.statusText); + } + } catch (error) { + if (error.name !== "AbortError") { + console.error("Completion error:", error); + } + } finally { + cleanupAfterCompletion(); + } +} + function removeLastDirectChild(element) { if (element.lastElementChild) { element.removeChild(element.lastElementChild); @@ -475,11 +594,72 @@ function onRedo() { chatInput.dispatchEvent(new Event("input")); // adjust textarea height } +function setupMenu() { + const triggers = document.querySelectorAll('.menu-trigger'); + const menus = document.querySelectorAll('.menu'); + const chatModeSwitch = document.getElementById('chat-mode-switch'); + const completionsModeSwitch = document.getElementById('completions-mode-switch'); + if (flagz.is_base_model) { + completionsModeSwitch.classList.add('disabled'); + completionsModeSwitch.title = "Chatbot mode isn't possible because this is a base model that hasn't had instruction fine tuning; try passing --chat-template chatml or llama2 if this is really an instruct model."; + completionsModeSwitch.disabled = true; + } + triggers.forEach(trigger => { + trigger.addEventListener('click', (e) => { + e.stopPropagation(); + const menu = trigger.nextElementSibling; + menus.forEach(m => { + if (m !== menu) + m.classList.remove('show'); + }); + menu.classList.toggle('show'); + }); + }); + document.addEventListener('click', () => { + menus.forEach(menu => menu.classList.remove('show')); + }); + document.addEventListener('keydown', (e) => { + if (e.key === 'Escape') + menus.forEach(menu => menu.classList.remove('show')); + }); + chatModeSwitch.addEventListener('click', () => { + flagz.completion_mode = true; + setupCompletionsMode(); + menus.forEach(menu => menu.classList.remove('show')); + }); + completionsModeSwitch.addEventListener('click', () => { + if (!flagz.is_base_model) { + flagz.completion_mode = false; + setupChatCompletionsMode(); + menus.forEach(menu => menu.classList.remove('show')); + } + }); +} + +function setupChatCompletionsMode() { + chatInterface.style.display = "flex"; + completionsInterface.style.display = "none"; + startChat([{ role: "system", content: getSystemPrompt() }]); + chatInput.focus(); +} + +function setupCompletionsMode() { + chatInterface.style.display = "none"; + completionsInterface.style.display = "flex"; + completionsInput.focus(); +} + async function chatbot() { flagz = await fetchFlagz(); updateModelInfo(); setupSettings(); - startChat([{ role: "system", content: getSystemPrompt() }]); + setupCompletions(); + setupMenu(); + if (flagz.is_base_model || flagz.completion_mode) { + setupCompletionsMode(); + } else { + setupChatCompletionsMode(); + } sendButton.addEventListener("click", sendMessage); stopButton.addEventListener("click", stopMessage); redoButton.addEventListener("click", onRedo); @@ -492,7 +672,6 @@ async function chatbot() { document.addEventListener("drop", onDragEnd); document.addEventListener("drop", onDrop); document.addEventListener("paste", onPaste); - chatInput.focus(); } chatbot(); diff --git a/llamafile/server/www/index.html b/llamafile/server/www/index.html index 5c18dc95de..eec0250c16 100644 --- a/llamafile/server/www/index.html +++ b/llamafile/server/www/index.html @@ -6,12 +6,18 @@ -