From 08c5e350146a53bb7b6ff8f84a84e8b341c19f40 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Fri, 26 Apr 2024 02:01:58 -0700 Subject: [PATCH 01/13] main : don't print special tokens with --grammar The CLI interface was recently changed to print special control tokens like the stop message one. This token shouldn't be printed if the grammar flag was passed, unless the grammar specifies it, because that breaks shell-scriptability. --- examples/main/main.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index a74d4d9c72364..3fb9d3eafc62c 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -520,6 +520,7 @@ int main(int argc, char ** argv) { } struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams); + bool should_show_special_tokens = sparams.grammar.empty(); while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict @@ -733,7 +734,8 @@ int main(int argc, char ** argv) { // display text if (input_echo && display) { for (auto id : embd) { - const std::string token_str = llama_token_to_piece(ctx, id); + const std::string token_str = + llama_token_to_piece(ctx, id, should_show_special_tokens); printf("%s", token_str.c_str()); if (embd.size() > 1) { @@ -899,7 +901,7 @@ int main(int argc, char ** argv) { for (size_t i = original_size; i < embd_inp.size(); ++i) { const llama_token token = embd_inp[i]; output_tokens.push_back(token); - output_ss << llama_token_to_piece(ctx, token); + output_ss << llama_token_to_piece(ctx, token, should_show_special_tokens); } n_remain -= line_inp.size(); From bcd24f8974c9bfeacd35eaf244bc1ed4e6a0a36c Mon Sep 17 00:00:00 2001 From: brian khuu Date: Mon, 20 May 2024 01:57:43 +1000 Subject: [PATCH 02/13] main: use seperate stream for control characters --- examples/main/main.cpp | 36 ++++++++++++++++++++++++++++++++---- llama.cpp | 4 ++++ llama.h | 3 +++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 8153a71fb5791..5c453a57ed605 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -528,7 +528,20 @@ int main(int argc, char ** argv) { exit(1); } - bool should_show_special_tokens = sparams.grammar.empty(); + // Create the pipe for special token handling + int stok_pipe[2] = {0}; + if (pipe(stok_pipe) == -1) { + fprintf(stderr, "%s: failed to initialize special token output stream\n", __func__); + exit(1); + } + + close(stok_pipe[0]); // Read Special Token Not In Use + + FILE *special_token_stream_output_fd = fdopen(stok_pipe[1], "w"); + if (special_token_stream_output_fd == NULL) { + fprintf(stderr, "%s: failed to open special token output stream\n", __func__); + exit(1); + } while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict @@ -742,18 +755,31 @@ int main(int argc, char ** argv) { // display text if (input_echo && display) { for (auto id : embd) { - const std::string token_str = llama_token_to_piece(ctx, id, !params.conversation && should_show_special_tokens); - printf("%s", token_str.c_str()); + const std::string token_str = llama_token_to_piece(ctx, id); + // Console/Stream Output + if (llama_token_is_control_token(llama_get_model(ctx), id)) { + // Stream Output Token To Special Token Output + fprintf(special_token_stream_output_fd, "%s", token_str.c_str()); + } else { + // Stream Output Token To Standard Output + fprintf(stdout, "%s", token_str.c_str()); + } + + // Record Displayed Tokens To Log + // Note: Generated tokens are created one by one hence this check if (embd.size() > 1) { + // Incoming Requested Tokens input_tokens.push_back(id); } else { + // Outgoing Generated Tokens output_tokens.push_back(id); output_ss << token_str; } } fflush(stdout); } + // reset color to default if there is no pending user input if (input_echo && (int) embd_inp.size() == n_consumed) { console::set_display(console::reset); @@ -908,7 +934,7 @@ int main(int argc, char ** argv) { for (size_t i = original_size; i < embd_inp.size(); ++i) { const llama_token token = embd_inp[i]; output_tokens.push_back(token); - output_ss << llama_token_to_piece(ctx, token, should_show_special_tokens); + output_ss << llama_token_to_piece(ctx, token); } n_remain -= line_inp.size(); @@ -957,6 +983,8 @@ int main(int argc, char ** argv) { llama_sampling_free(ctx_sampling); llama_backend_free(); + fclose(special_token_stream_output_fd); + #ifndef LOG_DISABLE_LOGS LOG_TEE("Log end\n"); #endif // LOG_DISABLE_LOGS diff --git a/llama.cpp b/llama.cpp index b752ddc6b401f..f41c6e5b68192 100644 --- a/llama.cpp +++ b/llama.cpp @@ -17634,6 +17634,10 @@ bool llama_token_is_eog(const struct llama_model * model, llama_token token) { ); } +bool llama_token_is_control_token(const struct llama_model * model, llama_token token) { + return llama_is_control_token(model->vocab, token); +} + llama_token llama_token_bos(const struct llama_model * model) { return model->vocab.special_bos_id; } diff --git a/llama.h b/llama.h index 612e32c4ea058..7cacb3d645a40 100644 --- a/llama.h +++ b/llama.h @@ -816,6 +816,9 @@ extern "C" { // Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.) LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token); + // Identify if Token Id is a control token or a render-able token + LLAMA_API bool llama_token_is_control_token(const struct llama_model * model, llama_token token); + // Special tokens LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence From 9f445a793d93dc9d4cee5097e83c179d0cddc34b Mon Sep 17 00:00:00 2001 From: brian khuu Date: Mon, 20 May 2024 22:35:08 +1000 Subject: [PATCH 03/13] main: use dprintf and add --ctrl-token-no-out and --ctrl-token-fd-out --- common/common.cpp | 13 ++++++++++++- common/common.h | 2 ++ examples/main/main.cpp | 36 +++++++++++++++--------------------- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index e624fc7f35352..4cc482a302904 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -905,6 +905,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.interactive_specials = true; return true; } + if (arg == "--ctrl-token-no-out") { + params.ctrl_token_no_out = true; + return true; + } + if (arg == "--ctrl-token-fd-out") { + params.ctrl_token_fd_out = true; + return true; + } if (arg == "--embedding") { params.embedding = true; return true; @@ -1433,7 +1441,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --version show version and build info\n"); printf(" -i, --interactive run in interactive mode\n"); printf(" --interactive-specials allow special tokens in user text, in interactive mode\n"); - printf(" --interactive-first run in interactive mode and wait for input right away\n"); + printf(" --ctrl-token-no-out control tokens output disabled\n"); +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) + printf(" --ctrl-token-fd-out control tokens sent to file descriptor 3 out of band\n"); +#endif printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n"); printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n"); printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n"); diff --git a/common/common.h b/common/common.h index 566490e2f881a..20a776f6bf40b 100644 --- a/common/common.h +++ b/common/common.h @@ -142,6 +142,8 @@ struct gpt_params { bool use_color = false; // use color to distinguish generations and inputs bool interactive = false; // interactive mode bool interactive_specials = false; // whether to allow special tokens from user, during interactive mode + bool ctrl_token_no_out = false; // disable control token output + bool ctrl_token_fd_out = false; // enable control token output and redirect it to file descriptor 3 bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix) bool chatml = false; // chatml mode (used for models trained on chatml syntax) bool prompt_cache_all = false; // save user input and generations to prompt cache diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 5c453a57ed605..8effd02a8bdc0 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -18,6 +18,7 @@ #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) #include #include +#define CONTROL_TOKEN_FILE_DESCRIPTOR (3) #elif defined (_WIN32) #define WIN32_LEAN_AND_MEAN #ifndef NOMINMAX @@ -528,21 +529,6 @@ int main(int argc, char ** argv) { exit(1); } - // Create the pipe for special token handling - int stok_pipe[2] = {0}; - if (pipe(stok_pipe) == -1) { - fprintf(stderr, "%s: failed to initialize special token output stream\n", __func__); - exit(1); - } - - close(stok_pipe[0]); // Read Special Token Not In Use - - FILE *special_token_stream_output_fd = fdopen(stok_pipe[1], "w"); - if (special_token_stream_output_fd == NULL) { - fprintf(stderr, "%s: failed to open special token output stream\n", __func__); - exit(1); - } - while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict if (!embd.empty()) { @@ -758,12 +744,22 @@ int main(int argc, char ** argv) { const std::string token_str = llama_token_to_piece(ctx, id); // Console/Stream Output - if (llama_token_is_control_token(llama_get_model(ctx), id)) { - // Stream Output Token To Special Token Output - fprintf(special_token_stream_output_fd, "%s", token_str.c_str()); - } else { + if (!llama_token_is_control_token(llama_get_model(ctx), id)) { // Stream Output Token To Standard Output fprintf(stdout, "%s", token_str.c_str()); + } else if (!params.ctrl_token_no_out) { +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) + if (params.ctrl_token_fd_out) { + // Stream Control Token To Special Token Output. Useful for debugging control token behaviour + dprintf(CONTROL_TOKEN_FILE_DESCRIPTOR, "%s", token_str.c_str()); + } + else +#endif + if (!params.conversation && sparams.grammar.empty()) + { + // Stream Control Token To Standard Output as long as we are not in a conversation or grammar output + fprintf(stdout, "%s", token_str.c_str()); + } } // Record Displayed Tokens To Log @@ -983,8 +979,6 @@ int main(int argc, char ** argv) { llama_sampling_free(ctx_sampling); llama_backend_free(); - fclose(special_token_stream_output_fd); - #ifndef LOG_DISABLE_LOGS LOG_TEE("Log end\n"); #endif // LOG_DISABLE_LOGS From ad4b6097c0014e94ff80fa8b5592c05f5964a832 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 00:37:12 +1000 Subject: [PATCH 04/13] main: dprintf isn't part of the IEEE POSIX standard. Just use write(). --- examples/main/main.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 8effd02a8bdc0..4019485124029 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -748,10 +748,11 @@ int main(int argc, char ** argv) { // Stream Output Token To Standard Output fprintf(stdout, "%s", token_str.c_str()); } else if (!params.ctrl_token_no_out) { -#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#ifndef _MSC_VER if (params.ctrl_token_fd_out) { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour - dprintf(CONTROL_TOKEN_FILE_DESCRIPTOR, "%s", token_str.c_str()); + ssize_t result = write(CONTROL_TOKEN_FILE_DESCRIPTOR, token_str.c_str(), token_str.length()); + (void) result; } else #endif From c9ea9df7fbc04747772fb7e765ba8c9a35a9f00c Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 04:44:50 +1000 Subject: [PATCH 05/13] main: remove --ctrl-token-fd-out in favor for fcntl() based detection --- common/common.cpp | 7 ------- common/common.h | 1 - examples/main/main.cpp | 29 +++++++++++++++++++---------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 4cc482a302904..b256eef9e27cc 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -909,10 +909,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.ctrl_token_no_out = true; return true; } - if (arg == "--ctrl-token-fd-out") { - params.ctrl_token_fd_out = true; - return true; - } if (arg == "--embedding") { params.embedding = true; return true; @@ -1442,9 +1438,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" -i, --interactive run in interactive mode\n"); printf(" --interactive-specials allow special tokens in user text, in interactive mode\n"); printf(" --ctrl-token-no-out control tokens output disabled\n"); -#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) - printf(" --ctrl-token-fd-out control tokens sent to file descriptor 3 out of band\n"); -#endif printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n"); printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n"); printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n"); diff --git a/common/common.h b/common/common.h index 20a776f6bf40b..f7556cfec16b8 100644 --- a/common/common.h +++ b/common/common.h @@ -143,7 +143,6 @@ struct gpt_params { bool interactive = false; // interactive mode bool interactive_specials = false; // whether to allow special tokens from user, during interactive mode bool ctrl_token_no_out = false; // disable control token output - bool ctrl_token_fd_out = false; // enable control token output and redirect it to file descriptor 3 bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix) bool chatml = false; // chatml mode (used for models trained on chatml syntax) bool prompt_cache_all = false; // save user input and generations to prompt cache diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 4019485124029..aded5bbf1eb54 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -18,7 +18,8 @@ #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) #include #include -#define CONTROL_TOKEN_FILE_DESCRIPTOR (3) +#include +#define CONTROL_TOKEN_FILENO (3) #elif defined (_WIN32) #define WIN32_LEAN_AND_MEAN #ifndef NOMINMAX @@ -529,6 +530,14 @@ int main(int argc, char ** argv) { exit(1); } +#ifndef _MSC_VER + if (fcntl(CONTROL_TOKEN_FILENO, F_GETFL) == -1) { + // Control Token File Descriptor has nothing attached to it + // make control token file descriptor be an alias of stdout + dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); + } +#endif + while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict if (!embd.empty()) { @@ -746,23 +755,23 @@ int main(int argc, char ** argv) { // Console/Stream Output if (!llama_token_is_control_token(llama_get_model(ctx), id)) { // Stream Output Token To Standard Output + fflush(stdout); fprintf(stdout, "%s", token_str.c_str()); } else if (!params.ctrl_token_no_out) { + if (!params.conversation && sparams.grammar.empty()) + { + // Stream Control Token To Special Token Output. Useful for debugging control token behaviour + fflush(stdout); + fprintf(stdout, "%s", token_str.c_str()); + } #ifndef _MSC_VER - if (params.ctrl_token_fd_out) { + else { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour - ssize_t result = write(CONTROL_TOKEN_FILE_DESCRIPTOR, token_str.c_str(), token_str.length()); + ssize_t result = write(CONTROL_TOKEN_FILENO, token_str.c_str(), token_str.length()); (void) result; } - else #endif - if (!params.conversation && sparams.grammar.empty()) - { - // Stream Control Token To Standard Output as long as we are not in a conversation or grammar output - fprintf(stdout, "%s", token_str.c_str()); - } } - // Record Displayed Tokens To Log // Note: Generated tokens are created one by one hence this check if (embd.size() > 1) { From 5032f18f204f0e8f645da3b4607e071bea585b07 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 04:47:49 +1000 Subject: [PATCH 06/13] common.cpp: accidentally removed --interactive-first --- common/common.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/common/common.cpp b/common/common.cpp index b256eef9e27cc..2a6990f3474b2 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1437,6 +1437,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --version show version and build info\n"); printf(" -i, --interactive run in interactive mode\n"); printf(" --interactive-specials allow special tokens in user text, in interactive mode\n"); + printf(" --interactive-first run in interactive mode and wait for input right away\n"); printf(" --ctrl-token-no-out control tokens output disabled\n"); printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n"); printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n"); From 90456a5717ca6ab20750eba147d692ee4cbfb2f0 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 04:57:26 +1000 Subject: [PATCH 07/13] main: only merge stdout and control token if not in conversation or grammar mode --- examples/main/main.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index aded5bbf1eb54..66f6695b7ab1f 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -531,9 +531,10 @@ int main(int argc, char ** argv) { } #ifndef _MSC_VER - if (fcntl(CONTROL_TOKEN_FILENO, F_GETFL) == -1) { - // Control Token File Descriptor has nothing attached to it - // make control token file descriptor be an alias of stdout + const bool control_token_descriptor_is_attached = fcntl(CONTROL_TOKEN_FILENO, F_GETFL) != -1; + if (!control_token_descriptor_is_attached && !params.conversation && sparams.grammar.empty()) { + // Control Token File Descriptor has nothing attached to it so make control token file descriptor be an alias of stdout + // This is not done however if we are in conversation mode or grammar mode as that is typically discarded dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); } #endif From 50048f5b45ee57fc0c1d233ea6bc08e801ccbe13 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 11:20:00 +1000 Subject: [PATCH 08/13] main: rejig control token descriptor handling --- examples/main/main.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 66f6695b7ab1f..cbc65ba080b79 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -530,9 +530,11 @@ int main(int argc, char ** argv) { exit(1); } + const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty(); + #ifndef _MSC_VER const bool control_token_descriptor_is_attached = fcntl(CONTROL_TOKEN_FILENO, F_GETFL) != -1; - if (!control_token_descriptor_is_attached && !params.conversation && sparams.grammar.empty()) { + if (control_token_allowed_on_standard_stream && !control_token_descriptor_is_attached) { // Control Token File Descriptor has nothing attached to it so make control token file descriptor be an alias of stdout // This is not done however if we are in conversation mode or grammar mode as that is typically discarded dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); @@ -759,19 +761,19 @@ int main(int argc, char ** argv) { fflush(stdout); fprintf(stdout, "%s", token_str.c_str()); } else if (!params.ctrl_token_no_out) { - if (!params.conversation && sparams.grammar.empty()) - { - // Stream Control Token To Special Token Output. Useful for debugging control token behaviour - fflush(stdout); - fprintf(stdout, "%s", token_str.c_str()); - } #ifndef _MSC_VER - else { + if (control_token_descriptor_is_attached) { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour ssize_t result = write(CONTROL_TOKEN_FILENO, token_str.c_str(), token_str.length()); (void) result; - } + } else #endif + if (control_token_allowed_on_standard_stream) + { + // Stream Control Token To Standard Output Stream + fflush(stdout); + fprintf(stdout, "%s", token_str.c_str()); + } } // Record Displayed Tokens To Log // Note: Generated tokens are created one by one hence this check From c1e8a6d1c03a44eaf90fb120a102f5d7ea1c6e98 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 14:58:34 +1000 Subject: [PATCH 09/13] main: must check pipe status on very top of program --- examples/main/main.cpp | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index cbc65ba080b79..d997c713e2649 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -120,6 +120,12 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v } int main(int argc, char ** argv) { +#ifndef _MSC_VER + // Check if we have an external attachment to a file descriptor for out of band control tokens (e.g. bash `3>/dev/null` ) + // Placed here to avoid file descriptor being polluted by gpt_params_parse() opening files + const bool control_token_file_descriptor_is_attached = fcntl(CONTROL_TOKEN_FILENO, F_GETFL) != -1; +#endif + gpt_params params; g_params = ¶ms; @@ -128,6 +134,16 @@ int main(int argc, char ** argv) { } llama_sampling_params & sparams = params.sparams; + const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty(); + +#ifndef _MSC_VER + // Merge normal token stream and control token streams together only if not in conversation or grammar mode + if (control_token_allowed_on_standard_stream && !control_token_file_descriptor_is_attached) { + // Duplicate stdout file descriptor to control token file descriptor to merge the two streams + dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); + } +#endif + #ifndef LOG_DISABLE_LOGS log_set_target(log_filename_generator("main", "log")); LOG_TEE("Log start\n"); @@ -530,17 +546,6 @@ int main(int argc, char ** argv) { exit(1); } - const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty(); - -#ifndef _MSC_VER - const bool control_token_descriptor_is_attached = fcntl(CONTROL_TOKEN_FILENO, F_GETFL) != -1; - if (control_token_allowed_on_standard_stream && !control_token_descriptor_is_attached) { - // Control Token File Descriptor has nothing attached to it so make control token file descriptor be an alias of stdout - // This is not done however if we are in conversation mode or grammar mode as that is typically discarded - dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); - } -#endif - while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict if (!embd.empty()) { @@ -758,20 +763,18 @@ int main(int argc, char ** argv) { // Console/Stream Output if (!llama_token_is_control_token(llama_get_model(ctx), id)) { // Stream Output Token To Standard Output - fflush(stdout); fprintf(stdout, "%s", token_str.c_str()); } else if (!params.ctrl_token_no_out) { #ifndef _MSC_VER - if (control_token_descriptor_is_attached) { + if (control_token_file_descriptor_is_attached) { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour - ssize_t result = write(CONTROL_TOKEN_FILENO, token_str.c_str(), token_str.length()); - (void) result; + fflush(stdout); // Ensure control token is always appended to stdout stream + (void)! write(CONTROL_TOKEN_FILENO, token_str.c_str(), token_str.length()); } else #endif if (control_token_allowed_on_standard_stream) { // Stream Control Token To Standard Output Stream - fflush(stdout); fprintf(stdout, "%s", token_str.c_str()); } } From 7d52482bacf2eed07fbe5c18c64001892aab8c00 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 16:00:59 +1000 Subject: [PATCH 10/13] main: renamed --no-special from --ctrl-token-no-out and other refactoring --- common/common.cpp | 4 ++-- examples/main/main.cpp | 21 ++++++++------------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 2a6990f3474b2..a026fe50935b9 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -905,7 +905,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.interactive_specials = true; return true; } - if (arg == "--ctrl-token-no-out") { + if (arg == "--no-special") { params.ctrl_token_no_out = true; return true; } @@ -1438,7 +1438,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" -i, --interactive run in interactive mode\n"); printf(" --interactive-specials allow special tokens in user text, in interactive mode\n"); printf(" --interactive-first run in interactive mode and wait for input right away\n"); - printf(" --ctrl-token-no-out control tokens output disabled\n"); + printf(" --no-special control tokens output disabled\n"); printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n"); printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n"); printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n"); diff --git a/examples/main/main.cpp b/examples/main/main.cpp index d997c713e2649..d5288a5e62a65 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -19,7 +19,7 @@ #include #include #include -#define CONTROL_TOKEN_FILENO (3) +#define SPECIAL_FILENO 3 #elif defined (_WIN32) #define WIN32_LEAN_AND_MEAN #ifndef NOMINMAX @@ -123,7 +123,11 @@ int main(int argc, char ** argv) { #ifndef _MSC_VER // Check if we have an external attachment to a file descriptor for out of band control tokens (e.g. bash `3>/dev/null` ) // Placed here to avoid file descriptor being polluted by gpt_params_parse() opening files - const bool control_token_file_descriptor_is_attached = fcntl(CONTROL_TOKEN_FILENO, F_GETFL) != -1; + const bool control_token_file_descriptor_is_attached = fcntl(SPECIAL_FILENO, F_GETFL) != -1; + if (!control_token_file_descriptor_is_attached) { + // Duplicate stdout file descriptor to control token file descriptor to merge the two streams + dup2(STDOUT_FILENO, SPECIAL_FILENO); + } #endif gpt_params params; @@ -136,14 +140,6 @@ int main(int argc, char ** argv) { const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty(); -#ifndef _MSC_VER - // Merge normal token stream and control token streams together only if not in conversation or grammar mode - if (control_token_allowed_on_standard_stream && !control_token_file_descriptor_is_attached) { - // Duplicate stdout file descriptor to control token file descriptor to merge the two streams - dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); - } -#endif - #ifndef LOG_DISABLE_LOGS log_set_target(log_filename_generator("main", "log")); LOG_TEE("Log start\n"); @@ -768,8 +764,7 @@ int main(int argc, char ** argv) { #ifndef _MSC_VER if (control_token_file_descriptor_is_attached) { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour - fflush(stdout); // Ensure control token is always appended to stdout stream - (void)! write(CONTROL_TOKEN_FILENO, token_str.c_str(), token_str.length()); + (void)! write(SPECIAL_FILENO, token_str.c_str(), token_str.length()); } else #endif if (control_token_allowed_on_standard_stream) @@ -788,8 +783,8 @@ int main(int argc, char ** argv) { output_tokens.push_back(id); output_ss << token_str; } + fflush(stdout); } - fflush(stdout); } // reset color to default if there is no pending user input From 8f76ba54bac54a4fe8b33f570740dac61a08426f Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 16:03:18 +1000 Subject: [PATCH 11/13] main: refactor ctrl_token_no_out --> no_special --- common/common.cpp | 2 +- common/common.h | 2 +- examples/main/main.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index a026fe50935b9..ffc98137dbab8 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -906,7 +906,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa return true; } if (arg == "--no-special") { - params.ctrl_token_no_out = true; + params.no_special = true; return true; } if (arg == "--embedding") { diff --git a/common/common.h b/common/common.h index f7556cfec16b8..87e771ca10a02 100644 --- a/common/common.h +++ b/common/common.h @@ -142,7 +142,7 @@ struct gpt_params { bool use_color = false; // use color to distinguish generations and inputs bool interactive = false; // interactive mode bool interactive_specials = false; // whether to allow special tokens from user, during interactive mode - bool ctrl_token_no_out = false; // disable control token output + bool no_special = false; // disable control token output bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix) bool chatml = false; // chatml mode (used for models trained on chatml syntax) bool prompt_cache_all = false; // save user input and generations to prompt cache diff --git a/examples/main/main.cpp b/examples/main/main.cpp index d5288a5e62a65..fc54861811184 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -760,7 +760,7 @@ int main(int argc, char ** argv) { if (!llama_token_is_control_token(llama_get_model(ctx), id)) { // Stream Output Token To Standard Output fprintf(stdout, "%s", token_str.c_str()); - } else if (!params.ctrl_token_no_out) { + } else if (!params.no_special) { #ifndef _MSC_VER if (control_token_file_descriptor_is_attached) { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour From 12fcea5d0431f31340524e1dcfebbab4e416a737 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Wed, 22 May 2024 01:45:07 +1000 Subject: [PATCH 12/13] llama: rename llama_token_is_control_token() to llama_token_is_control() --- examples/main/main.cpp | 2 +- llama.cpp | 2 +- llama.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index fc54861811184..f11abe0072693 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -757,7 +757,7 @@ int main(int argc, char ** argv) { const std::string token_str = llama_token_to_piece(ctx, id); // Console/Stream Output - if (!llama_token_is_control_token(llama_get_model(ctx), id)) { + if (!llama_token_is_control(llama_get_model(ctx), id)) { // Stream Output Token To Standard Output fprintf(stdout, "%s", token_str.c_str()); } else if (!params.no_special) { diff --git a/llama.cpp b/llama.cpp index e83857eba9b3d..477ccec02dccd 100644 --- a/llama.cpp +++ b/llama.cpp @@ -17347,7 +17347,7 @@ bool llama_token_is_eog(const struct llama_model * model, llama_token token) { ); } -bool llama_token_is_control_token(const struct llama_model * model, llama_token token) { +bool llama_token_is_control(const struct llama_model * model, llama_token token) { return llama_is_control_token(model->vocab, token); } diff --git a/llama.h b/llama.h index 9b59076d05346..393c951d5aee5 100644 --- a/llama.h +++ b/llama.h @@ -818,7 +818,7 @@ extern "C" { LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token); // Identify if Token Id is a control token or a render-able token - LLAMA_API bool llama_token_is_control_token(const struct llama_model * model, llama_token token); + LLAMA_API bool llama_token_is_control(const struct llama_model * model, llama_token token); // Special tokens LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence From e75c5ca4512cef4bdd7470e4e756bf3d0af60ff3 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 25 May 2024 17:04:31 +1000 Subject: [PATCH 13/13] main: remove special token file descriptor feature (#5) --- examples/main/main.cpp | 31 +++++-------------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index f11abe0072693..f034316fd681b 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -18,8 +18,6 @@ #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) #include #include -#include -#define SPECIAL_FILENO 3 #elif defined (_WIN32) #define WIN32_LEAN_AND_MEAN #ifndef NOMINMAX @@ -120,16 +118,6 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v } int main(int argc, char ** argv) { -#ifndef _MSC_VER - // Check if we have an external attachment to a file descriptor for out of band control tokens (e.g. bash `3>/dev/null` ) - // Placed here to avoid file descriptor being polluted by gpt_params_parse() opening files - const bool control_token_file_descriptor_is_attached = fcntl(SPECIAL_FILENO, F_GETFL) != -1; - if (!control_token_file_descriptor_is_attached) { - // Duplicate stdout file descriptor to control token file descriptor to merge the two streams - dup2(STDOUT_FILENO, SPECIAL_FILENO); - } -#endif - gpt_params params; g_params = ¶ms; @@ -138,8 +126,6 @@ int main(int argc, char ** argv) { } llama_sampling_params & sparams = params.sparams; - const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty(); - #ifndef LOG_DISABLE_LOGS log_set_target(log_filename_generator("main", "log")); LOG_TEE("Log start\n"); @@ -760,19 +746,11 @@ int main(int argc, char ** argv) { if (!llama_token_is_control(llama_get_model(ctx), id)) { // Stream Output Token To Standard Output fprintf(stdout, "%s", token_str.c_str()); - } else if (!params.no_special) { -#ifndef _MSC_VER - if (control_token_file_descriptor_is_attached) { - // Stream Control Token To Special Token Output. Useful for debugging control token behaviour - (void)! write(SPECIAL_FILENO, token_str.c_str(), token_str.length()); - } else -#endif - if (control_token_allowed_on_standard_stream) - { - // Stream Control Token To Standard Output Stream - fprintf(stdout, "%s", token_str.c_str()); - } + } else if (!params.no_special && !params.conversation) { + // Stream Control Token To Standard Output Stream + fprintf(stdout, "%s", token_str.c_str()); } + // Record Displayed Tokens To Log // Note: Generated tokens are created one by one hence this check if (embd.size() > 1) { @@ -783,6 +761,7 @@ int main(int argc, char ** argv) { output_tokens.push_back(id); output_ss << token_str; } + fflush(stdout); } }