Introduce raw completions web ui

Mozilla-Ocho · Nov 27, 2024 · 988c9ec · 988c9ec
1 parent e5c0921
commit 988c9ec
Show file tree

Hide file tree

Showing 8 changed files with 399 additions and 11 deletions.
diff --git a/llama.cpp/common.cpp b/llama.cpp/common.cpp
@@ -1213,6 +1213,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
             return true;
         }
         params.chat_template = argv[i];
+        FLAG_chat_template = argv[i]; // [jart]
         return true;
     }
     if (arg == "--slot-prompt-similarity" || arg == "-sps") {

diff --git a/llamafile/flags.cpp b/llamafile/flags.cpp
@@ -38,6 +38,7 @@
 
 bool FLAGS_READY = false;
 bool FLAG_ascii = false;
+bool FLAG_completion_mode = false;
 bool FLAG_fast = false;
 bool FLAG_iq = false;
 bool FLAG_log_disable = false;
@@ -51,6 +52,7 @@ bool FLAG_recompile = false;
 bool FLAG_tinyblas = false;
 bool FLAG_trace = false;
 bool FLAG_unsecure = false;
+const char *FLAG_chat_template = "";
 const char *FLAG_file = nullptr;
 const char *FLAG_ip_header = nullptr;
 const char *FLAG_listen = "127.0.0.1:8080";
@@ -123,6 +125,11 @@ static wontreturn void unknown(const char *flag) {
     exit(1);
 }
 
+static bool is_valid_chat_template(const char *tmpl) {
+    llama_chat_message chat[] = {{"user", "test"}};
+    return llama_chat_apply_template(nullptr, tmpl, chat, 1, true, nullptr, 0) >= 0;
+}
+
 void llamafile_get_flags(int argc, char **argv) {
     bool program_supports_gpu = FLAG_gpu != LLAMAFILE_GPU_DISABLE;
     for (int i = 1; i < argc;) {
@@ -157,6 +164,16 @@ void llamafile_get_flags(int argc, char **argv) {
             continue;
         }
 
+        if (!strcmp(flag, "--chatbot-mode")) {
+            FLAG_completion_mode = false;
+            continue;
+        }
+
+        if (!strcmp(flag, "--completion-mode")) {
+            FLAG_completion_mode = true;
+            continue;
+        }
+
         if (!strcmp(flag, "--no-display-prompt") || //
             !strcmp(flag, "--silent-prompt")) {
             FLAG_no_display_prompt = true;
@@ -345,6 +362,15 @@ void llamafile_get_flags(int argc, char **argv) {
             continue;
         }
 
+        if (!strcmp(flag, "--chat-template")) {
+            if (i == argc)
+                missing("--chat-template");
+            if (!is_valid_chat_template(argv[i]))
+                bad("--chat-template");
+            FLAG_chat_template = argv[i++];
+            continue;
+        }
+
         if (!strcmp(flag, "-s") || !strcmp(flag, "--slots")) {
             if (i == argc)
                 missing("--slots");

diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h
@@ -8,6 +8,7 @@ extern "C" {
 
 extern bool FLAGS_READY;
 extern bool FLAG_ascii;
+extern bool FLAG_completion_mode;
 extern bool FLAG_fast;
 extern bool FLAG_iq;
 extern bool FLAG_log_disable;
@@ -22,6 +23,7 @@ extern bool FLAG_tinyblas;
 extern bool FLAG_trace;
 extern bool FLAG_trap;
 extern bool FLAG_unsecure;
+extern const char *FLAG_chat_template;
 extern const char *FLAG_file;
 extern const char *FLAG_ip_header;
 extern const char *FLAG_listen;

diff --git a/llamafile/server/flagz.cpp b/llamafile/server/flagz.cpp
@@ -24,6 +24,17 @@
 namespace lf {
 namespace server {
 
+static bool is_base_model(llama_model *model) {
+
+    // check if user explicitly passed --chat-template flag
+    if (*FLAG_chat_template)
+        return false;
+
+    // check if gguf metadata has chat template. this should always be
+    // present for "instruct" models, and never specified on base ones
+    return llama_model_meta_val_str(model, "tokenizer.chat_template", 0, 0) == -1;
+}
+
 bool
 Client::flagz()
 {
@@ -32,6 +43,8 @@ Client::flagz()
     json["prompt"] = FLAG_prompt;
     json["no_display_prompt"] = FLAG_no_display_prompt;
     json["nologo"] = FLAG_nologo;
+    json["completion_mode"] = FLAG_completion_mode;
+    json["is_base_model"] = is_base_model(model_);
     json["temperature"] = FLAG_temperature;
     json["top_p"] = FLAG_top_p;
     json["presence_penalty"] = FLAG_presence_penalty;

diff --git a/llamafile/server/v1_chat_completions.cpp b/llamafile/server/v1_chat_completions.cpp
@@ -462,7 +462,8 @@ Client::v1_chat_completions()
 
     // turn text into tokens
     state->prompt =
-      llama_chat_apply_template(model_, "", params->messages, ADD_ASSISTANT);
+      llama_chat_apply_template(
+        model_, FLAG_chat_template, params->messages, ADD_ASSISTANT);
     atomize(model_, &state->atoms, state->prompt, PARSE_SPECIAL);
 
     // find appropriate slot

diff --git a/llamafile/server/www/chatbot.css b/llamafile/server/www/chatbot.css
@@ -37,6 +37,9 @@ p {
   background: #f8f9fa;
   border-bottom: 1px solid #e9ecef;
   border-radius: 12px 12px 0 0;
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
 }
 
 .chat-header h1 {
@@ -429,6 +432,140 @@ ul li:first-child {
   cursor: pointer;
 }
 
+/* Completions Interface */
+.completions-container {
+  max-width: 960px;
+  margin: 2rem auto;
+  background: white;
+  border-radius: 12px;
+  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
+  display: flex;
+  flex-direction: column;
+  height: calc(100vh - 4rem);
+}
+
+.completions-content {
+  flex: 1;
+  display: flex;
+  flex-direction: column;
+  padding: 1rem;
+}
+
+#completions-input {
+  flex: 1;
+  padding: 1rem;
+  margin-bottom: 1rem;
+  border: 1px solid #dee2e6;
+  border-radius: 6px;
+  font-size: 1rem;
+  font-family: inherit;
+  resize: none;
+}
+
+.completions-controls {
+  display: flex;
+  gap: 0.5rem;
+}
+
+.complete-button {
+  padding: 0.75rem 1.5rem;
+  background: #0d6efd;
+  color: white;
+  border: none;
+  border-radius: 6px;
+  cursor: pointer;
+  font-size: 1rem;
+  transition: background-color 0.2s;
+}
+
+.complete-button:hover {
+  background: #0b5ed7;
+}
+
+.mode-dropdown {
+  display: none;
+}
+
+.mode-trigger {
+  display: none;
+}
+
+.mode-menu {
+  display: none;
+}
+
+.mode-item {
+  display: none;
+}
+
+.mode-switch {
+  display: none;
+}
+
+.menu-dropdown {
+  position: relative;
+  display: inline-block;
+}
+
+.menu-trigger {
+  padding: 0.5rem;
+  background: transparent;
+  border: none;
+  cursor: pointer;
+  font-size: 0.8rem;
+  color: #666;
+  transition: color 0.2s;
+}
+
+.menu-trigger:hover {
+  color: #000;
+}
+
+.menu {
+  position: absolute;
+  top: 100%;
+  right: 0;
+  background: white;
+  border: 1px solid #dee2e6;
+  border-radius: 6px;
+  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
+  display: none;
+  z-index: 1000;
+  min-width: 180px;
+}
+
+.menu.show {
+  display: block;
+}
+
+.menu-item {
+  display: block;
+  width: 100%;
+  padding: 0.5rem 1rem;
+  border: none;
+  background: none;
+  text-align: left;
+  cursor: pointer;
+  white-space: nowrap;
+  font-size: 0.9rem;
+  color: #333;
+}
+
+.menu-item:hover {
+  background: #f8f9fa;
+}
+
+.menu-item.disabled {
+  opacity: 0.5;
+  cursor: not-allowed;
+  background: #f0f0f0;
+  color: #666;
+}
+
+.menu-item.disabled:hover {
+  background: #f0f0f0;
+}
+
 @media print {
 
   html,