From 3221ab01ad34393b8ccd1a5f7de6068874fb0bf4 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 09:59:05 +0100 Subject: [PATCH 01/52] common: introduce llama_load_model_from_url to download model from hf url using libopenssl only --- common/CMakeLists.txt | 10 +++ common/common.cpp | 152 ++++++++++++++++++++++++++++++++++++- common/common.h | 10 +++ examples/server/server.cpp | 8 ++ 4 files changed, 179 insertions(+), 1 deletion(-) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 350bbdf7f7b1b..d275ef5a65a57 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -47,6 +47,16 @@ if (BUILD_SHARED_LIBS) set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() +# Check for OpenSSL +find_package(OpenSSL QUIET) +if (OPENSSL_FOUND) + add_definitions(-DHAVE_OPENSSL) + include_directories(${OPENSSL_INCLUDE_DIR}) + link_libraries(${OPENSSL_LIBRARIES}) +else() + message(WARNING "OpenSSL not found. Building without model download support.") +endif () + set(TARGET common) diff --git a/common/common.cpp b/common/common.cpp index 4912237e0d0f1..baa2ad2f9d62f 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1376,10 +1376,160 @@ void llama_batch_add( batch.n_tokens++; } +struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, + struct llama_model_params params) { +#ifdef HAVE_OPENSSL + // Initialize OpenSSL + SSL_library_init(); + SSL_load_error_strings(); + OpenSSL_add_all_algorithms(); + + // Parse the URL to extract host, path, user, and password + char host[256]; + char path[256]; + char userpass[256]; + + if (sscanf(model_url, "https://%255[^/]/%255s", host, path) != 2) { + fprintf(stderr, "%s: invalid URL format: %s\n", __func__, model_url); + return nullptr; + } + + if (strstr(host, "@")) { + sscanf(host, "%[^@]@%s", userpass, host); + } + + // Create an SSL context + auto ctx = SSL_CTX_new(TLS_client_method()); + if (!ctx) { + fprintf(stderr, "%s: error creating SSL context\n", __func__); + return nullptr; + } + + // Set up certificate verification + SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, nullptr); + + // Load trusted CA certificates based on platform + const char* ca_cert_path = nullptr; +#ifdef _WIN32 + ca_cert_path = "C:\\path\\to\\ca-certificates.crt"; // Windows path (FIXME) +#elif __APPLE__ + ca_cert_path = "/etc/ssl/cert.pem"; // macOS path +#else + ca_cert_path = "/etc/ssl/certs/ca-certificates.crt"; // Linux path +#endif + + if (!SSL_CTX_load_verify_locations(ctx, ca_cert_path, nullptr)) { + fprintf(stderr, "%s: error loading CA certificates\n", __func__); + SSL_CTX_free(ctx); + return nullptr; + } + + // Create an SSL connection + auto bio = BIO_new_ssl_connect(ctx); + if (!bio) { + fprintf(stderr, "%s: error creating SSL connection\n", __func__); + SSL_CTX_free(ctx); + return nullptr; + } + + // Set the hostname + if (!BIO_set_conn_hostname(bio, host)) { + fprintf(stderr, "%s: unable to set connection hostname %s\n", __func__, host); + BIO_free_all(bio); + SSL_CTX_free(ctx); + return nullptr; + } + + // Construct the HTTP request + char request[1024]; + snprintf(request, sizeof(request), "GET /%s HTTP/1.1\r\nHost: %s\r\nAccept: */*\r\nUser-Agent: llama-client\r\nConnection: close\r\n", path, host); + + // Add Authorization header if user credentials are available + if (strlen(userpass) > 0) { + char auth_header[256]; + snprintf(auth_header, sizeof(auth_header), "Authorization: Basic %s\r\n", userpass); + strcat(request, auth_header); + } + + // End of headers + strcat(request, "\r\n"); + + // Send the request + fprintf(stdout, "%s: downloading model from https://%s/%s to %s ...\n", __func__, host, path, path_model); + if (!BIO_puts(bio, request)) { + fprintf(stderr, "%s: error sending HTTP request https://%s/%s\n", __func__, host, path); + BIO_free_all(bio); + SSL_CTX_free(ctx); + return nullptr; + } + + // Read the response status line + char status_line[256]; + if (BIO_gets(bio, status_line, sizeof(status_line)) <= 0) { + fprintf(stderr, "%s: error reading response status line\n", __func__); + BIO_free_all(bio); + SSL_CTX_free(ctx); + return nullptr; + } + + // Verify HTTP status code + if (strncmp(status_line, "HTTP/1.1 200", 12) != 0) { + fprintf(stderr, "%s: HTTP request failed: %s\n", __func__, status_line); + BIO_free_all(bio); + SSL_CTX_free(ctx); + return nullptr; + } + + // Skip response headers + char buffer[4096]; + int n_bytes_received; + while ((n_bytes_received = BIO_read(bio, buffer, sizeof(buffer))) > 0) { + // Look for the end of headers (empty line) + if (strstr(buffer, "\r\n\r\n")) { + break; + } + } + + // Read and save the file content + FILE* outfile = fopen(path_model, "wb"); + if (!outfile) { + fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model); + BIO_free_all(bio); + SSL_CTX_free(ctx); + return nullptr; + } + + int n_bytes_received_total = 0; + while ((n_bytes_received = BIO_read(bio, buffer, sizeof(buffer))) > 0) { + fwrite(buffer, 1, n_bytes_received, outfile); + n_bytes_received_total += n_bytes_received; + if (n_bytes_received_total % (1024 * 1024) == 0) { + fprintf(stdout, "%s: model downloading %dGi %s ...\n", __func__, n_bytes_received_total / 1024 / 1024, path_model); + } + } + fclose(outfile); + + // Clean up + BIO_free_all(bio); + SSL_CTX_free(ctx); + fprintf(stdout, "%s: model downloaded from https://%s/%s to %s.\n", __func__, host, path, path_model); + + return llama_load_model_from_file(path_model, params); +#else + LLAMA_LOG_ERROR("llama.cpp built without SSL support, downloading from url not supported.\n", __func__); + return nullptr; +#endif +} + std::tuple llama_init_from_gpt_params(gpt_params & params) { auto mparams = llama_model_params_from_gpt_params(params); - llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams); + llama_model * model = nullptr; + if (!params.model_url.empty()) { + model = llama_load_model_from_url(params.model_url.c_str(), params.model.c_str(), mparams); + } else { + model = llama_load_model_from_file(params.model.c_str(), mparams); + } if (model == NULL) { fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); return std::make_tuple(nullptr, nullptr); diff --git a/common/common.h b/common/common.h index 687f3425e8544..b9b59211254f2 100644 --- a/common/common.h +++ b/common/common.h @@ -17,6 +17,12 @@ #include #include +#ifdef HAVE_OPENSSL +#include +#include +#include +#endif + #ifdef _WIN32 #define DIRECTORY_SEPARATOR '\\' #else @@ -89,6 +95,7 @@ struct gpt_params { struct llama_sampling_params sparams; std::string model = "models/7B/ggml-model-f16.gguf"; // model path + std::string model_url = ""; // model path std::string model_draft = ""; // draft model for speculative decoding std::string model_alias = "unknown"; // model alias std::string prompt = ""; @@ -191,6 +198,9 @@ std::tuple llama_init_from_gpt_par struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params); struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params); +struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, + struct llama_model_params params); + // Batch utils void llama_batch_clear(struct llama_batch & batch); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 895d608fdcc06..5e1020009cbf1 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2195,6 +2195,8 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co } printf(" -m FNAME, --model FNAME\n"); printf(" model path (default: %s)\n", params.model.c_str()); + printf(" -u MODEL_URL, --url MODEL_URL\n"); + printf(" model url (default: %s)\n", params.model_url.c_str()); printf(" -a ALIAS, --alias ALIAS\n"); printf(" set an alias for the model, will be added as `model` field in completion response\n"); printf(" --lora FNAME apply LoRA adapter (implies --no-mmap)\n"); @@ -2317,6 +2319,12 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams, break; } params.model = argv[i]; + } else if (arg == "-u" || arg == "--model-url") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.model_url = argv[i]; } else if (arg == "-a" || arg == "--alias") { if (++i >= argc) { invalid_param = true; From a0ebdfcc5d27d0438fe1555b35596d847a47691f Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 11:32:29 +0100 Subject: [PATCH 02/52] common: llama_load_model_from_url witch to libcurl dependency --- common/CMakeLists.txt | 14 +-- common/common.cpp | 173 +++++++++++-------------------------- examples/main/README.md | 1 + examples/server/README.md | 1 + examples/server/server.cpp | 6 +- 5 files changed, 64 insertions(+), 131 deletions(-) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index d275ef5a65a57..79c3abdfede8e 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -47,14 +47,14 @@ if (BUILD_SHARED_LIBS) set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() -# Check for OpenSSL -find_package(OpenSSL QUIET) -if (OPENSSL_FOUND) - add_definitions(-DHAVE_OPENSSL) - include_directories(${OPENSSL_INCLUDE_DIR}) - link_libraries(${OPENSSL_LIBRARIES}) +# Check for curl +find_package(CURL QUIET) +if (CURL_FOUND) + add_definitions(-DHAVE_CURL) + include_directories(${CURL_INCLUDE_DIRS}) + link_libraries(${CURL_LIBRARIES}) else() - message(WARNING "OpenSSL not found. Building without model download support.") + message(INFO "libcurl not found. Building without model download support.") endif () diff --git a/common/common.cpp b/common/common.cpp index baa2ad2f9d62f..4f955df30a116 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -16,6 +16,9 @@ #include #include #include +#ifdef HAVE_CURL +#include +#endif #if defined(__APPLE__) && defined(__MACH__) #include @@ -531,6 +534,12 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { break; } params.model = argv[i]; + } else if (arg == "-mu" || arg == "--model-url") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.model_url = argv[i]; } else if (arg == "-md" || arg == "--model-draft") { if (++i >= argc) { invalid_param = true; @@ -1131,6 +1140,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" layer range to apply the control vector(s) to, start and end inclusive\n"); printf(" -m FNAME, --model FNAME\n"); printf(" model path (default: %s)\n", params.model.c_str()); + printf(" -mu MODEL_URL, --model-url MODEL_URL\n"); + printf(" model download url (default: %s)\n", params.model_url.c_str()); printf(" -md FNAME, --model-draft FNAME\n"); printf(" draft model for speculative decoding\n"); printf(" -ld LOGDIR, --logdir LOGDIR\n"); @@ -1376,150 +1387,70 @@ void llama_batch_add( batch.n_tokens++; } +#ifdef HAVE_CURL struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, struct llama_model_params params) { -#ifdef HAVE_OPENSSL - // Initialize OpenSSL - SSL_library_init(); - SSL_load_error_strings(); - OpenSSL_add_all_algorithms(); - - // Parse the URL to extract host, path, user, and password - char host[256]; - char path[256]; - char userpass[256]; - - if (sscanf(model_url, "https://%255[^/]/%255s", host, path) != 2) { - fprintf(stderr, "%s: invalid URL format: %s\n", __func__, model_url); - return nullptr; - } - - if (strstr(host, "@")) { - sscanf(host, "%[^@]@%s", userpass, host); - } - - // Create an SSL context - auto ctx = SSL_CTX_new(TLS_client_method()); - if (!ctx) { - fprintf(stderr, "%s: error creating SSL context\n", __func__); - return nullptr; - } - - // Set up certificate verification - SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, nullptr); - - // Load trusted CA certificates based on platform - const char* ca_cert_path = nullptr; -#ifdef _WIN32 - ca_cert_path = "C:\\path\\to\\ca-certificates.crt"; // Windows path (FIXME) -#elif __APPLE__ - ca_cert_path = "/etc/ssl/cert.pem"; // macOS path -#else - ca_cert_path = "/etc/ssl/certs/ca-certificates.crt"; // Linux path -#endif + // Initialize libcurl + curl_global_init(CURL_GLOBAL_DEFAULT); + auto curl = curl_easy_init(); - if (!SSL_CTX_load_verify_locations(ctx, ca_cert_path, nullptr)) { - fprintf(stderr, "%s: error loading CA certificates\n", __func__); - SSL_CTX_free(ctx); - return nullptr; - } - - // Create an SSL connection - auto bio = BIO_new_ssl_connect(ctx); - if (!bio) { - fprintf(stderr, "%s: error creating SSL connection\n", __func__); - SSL_CTX_free(ctx); - return nullptr; - } - // Set the hostname - if (!BIO_set_conn_hostname(bio, host)) { - fprintf(stderr, "%s: unable to set connection hostname %s\n", __func__, host); - BIO_free_all(bio); - SSL_CTX_free(ctx); + if (!curl) { + curl_global_cleanup(); + fprintf(stderr, "%s: error initializing lib curl\n", __func__); return nullptr; } - // Construct the HTTP request - char request[1024]; - snprintf(request, sizeof(request), "GET /%s HTTP/1.1\r\nHost: %s\r\nAccept: */*\r\nUser-Agent: llama-client\r\nConnection: close\r\n", path, host); + // Set the URL + curl_easy_setopt(curl, CURLOPT_URL, model_url); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); - // Add Authorization header if user credentials are available - if (strlen(userpass) > 0) { - char auth_header[256]; - snprintf(auth_header, sizeof(auth_header), "Authorization: Basic %s\r\n", userpass); - strcat(request, auth_header); - } - - // End of headers - strcat(request, "\r\n"); - - // Send the request - fprintf(stdout, "%s: downloading model from https://%s/%s to %s ...\n", __func__, host, path, path_model); - if (!BIO_puts(bio, request)) { - fprintf(stderr, "%s: error sending HTTP request https://%s/%s\n", __func__, host, path); - BIO_free_all(bio); - SSL_CTX_free(ctx); + // Set the output file + auto outfile = fopen(path_model, "wb"); + if (!outfile) { + curl_easy_cleanup(curl); + curl_global_cleanup(); + fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model); return nullptr; } + curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile); - // Read the response status line - char status_line[256]; - if (BIO_gets(bio, status_line, sizeof(status_line)) <= 0) { - fprintf(stderr, "%s: error reading response status line\n", __func__); - BIO_free_all(bio); - SSL_CTX_free(ctx); + // start the download + fprintf(stdout, "%s: downloading model from %s to %s ...\n", __func__, model_url, path_model); + auto res = curl_easy_perform(curl); + if (res != CURLE_OK) { + fclose(outfile); + curl_easy_cleanup(curl); + curl_global_cleanup(); + fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); return nullptr; } - // Verify HTTP status code - if (strncmp(status_line, "HTTP/1.1 200", 12) != 0) { - fprintf(stderr, "%s: HTTP request failed: %s\n", __func__, status_line); - BIO_free_all(bio); - SSL_CTX_free(ctx); + int http_code = 0; + curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &http_code); + if (http_code < 200 || http_code >= 400) { + fclose(outfile); + curl_easy_cleanup(curl); + curl_global_cleanup(); + fprintf(stderr, "%s: invalid http status code failed: %d\n", __func__, http_code); return nullptr; } - // Skip response headers - char buffer[4096]; - int n_bytes_received; - while ((n_bytes_received = BIO_read(bio, buffer, sizeof(buffer))) > 0) { - // Look for the end of headers (empty line) - if (strstr(buffer, "\r\n\r\n")) { - break; - } - } - - // Read and save the file content - FILE* outfile = fopen(path_model, "wb"); - if (!outfile) { - fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model); - BIO_free_all(bio); - SSL_CTX_free(ctx); - return nullptr; - } - - int n_bytes_received_total = 0; - while ((n_bytes_received = BIO_read(bio, buffer, sizeof(buffer))) > 0) { - fwrite(buffer, 1, n_bytes_received, outfile); - n_bytes_received_total += n_bytes_received; - if (n_bytes_received_total % (1024 * 1024) == 0) { - fprintf(stdout, "%s: model downloading %dGi %s ...\n", __func__, n_bytes_received_total / 1024 / 1024, path_model); - } - } - fclose(outfile); - // Clean up - BIO_free_all(bio); - SSL_CTX_free(ctx); - fprintf(stdout, "%s: model downloaded from https://%s/%s to %s.\n", __func__, host, path, path_model); + fclose(outfile); + curl_easy_cleanup(curl); + curl_global_cleanup(); return llama_load_model_from_file(path_model, params); +} #else - LLAMA_LOG_ERROR("llama.cpp built without SSL support, downloading from url not supported.\n", __func__); +struct llama_model * llama_load_model_from_url(const char *, const char *, + struct llama_model_params) { + fprintf(stderr, "%s: llama.cpp built without SSL support, downloading from url not supported.\n", __func__); return nullptr; -#endif } +#endif std::tuple llama_init_from_gpt_params(gpt_params & params) { auto mparams = llama_model_params_from_gpt_params(params); diff --git a/examples/main/README.md b/examples/main/README.md index 7f84e42623274..daaa807d55952 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -67,6 +67,7 @@ main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1 --random-prompt In this section, we cover the most commonly used options for running the `main` program with the LLaMA models: - `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.bin`). +- `-mu MODEL_URL --model MODEL_URL`: Specify a remote http url to download the file (e.g https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf). - `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses. - `-ins, --instruct`: Run the program in instruction mode, which is particularly useful when working with Alpaca models. - `-n N, --n-predict N`: Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text. diff --git a/examples/server/README.md b/examples/server/README.md index 8f8454affaecd..df1ccce9bebe0 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -20,6 +20,7 @@ The project is under active development, and we are [looking for feedback and co - `-tb N, --threads-batch N`: Set the number of threads to use during batch and prompt processing. If not specified, the number of threads will be set to the number of threads used for generation. - `--threads-http N`: number of threads in the http server pool to process requests (default: `max(std::thread::hardware_concurrency() - 1, --parallel N + 2)`) - `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`). +- `-mu MODEL_URL --model MODEL_URL`: Specify a remote http url to download the file (e.g https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf). - `-a ALIAS`, `--alias ALIAS`: Set an alias for the model. The alias will be returned in API responses. - `-c N`, `--ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. The size may differ in other models, for example, baichuan models were build with a context of 4096. - `-ngl N`, `--n-gpu-layers N`: When compiled with appropriate support (currently CLBlast or cuBLAS), this option allows offloading some layers to the GPU for computation. Generally results in increased performance. diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 5e1020009cbf1..d2a8e541d3305 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2195,8 +2195,8 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co } printf(" -m FNAME, --model FNAME\n"); printf(" model path (default: %s)\n", params.model.c_str()); - printf(" -u MODEL_URL, --url MODEL_URL\n"); - printf(" model url (default: %s)\n", params.model_url.c_str()); + printf(" -mu MODEL_URL, --model-url MODEL_URL\n"); + printf(" model download url (default: %s)\n", params.model_url.c_str()); printf(" -a ALIAS, --alias ALIAS\n"); printf(" set an alias for the model, will be added as `model` field in completion response\n"); printf(" --lora FNAME apply LoRA adapter (implies --no-mmap)\n"); @@ -2319,7 +2319,7 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams, break; } params.model = argv[i]; - } else if (arg == "-u" || arg == "--model-url") { + } else if (arg == "-mu" || arg == "--model-url") { if (++i >= argc) { invalid_param = true; break; From 42b25dacab6ddb90fc91ef6d479f3926692e30ae Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 13:27:05 +0100 Subject: [PATCH 03/52] common: PR feedback, rename the definition to LLAMA_USE_CURL --- common/CMakeLists.txt | 2 +- common/common.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 79c3abdfede8e..9e85c2337f815 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -50,7 +50,7 @@ endif() # Check for curl find_package(CURL QUIET) if (CURL_FOUND) - add_definitions(-DHAVE_CURL) + add_definitions(-DLLAMA_USE_CURL) include_directories(${CURL_INCLUDE_DIRS}) link_libraries(${CURL_LIBRARIES}) else() diff --git a/common/common.cpp b/common/common.cpp index 4f955df30a116..1f57493dfda35 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -16,7 +16,7 @@ #include #include #include -#ifdef HAVE_CURL +#ifdef LLAMA_USE_CURL #include #endif @@ -1387,7 +1387,7 @@ void llama_batch_add( batch.n_tokens++; } -#ifdef HAVE_CURL +#ifdef LLAMA_USE_CURL struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, struct llama_model_params params) { // Initialize libcurl From 7e782856bd1416877960c2164a0f35908438c8f9 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 13:45:09 +0100 Subject: [PATCH 04/52] common: LLAMA_USE_CURL in make toolchain --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index c0f1250366a64..2ef626737745b 100644 --- a/Makefile +++ b/Makefile @@ -595,6 +595,11 @@ include scripts/get-flags.mk CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic endif +ifdef LLAMA_USE_CURL +override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL +override LDFLAGS := $(LDFLAGS) -lcurl +endif + # # Print build information # From df0d82289c14dc3d03e54c45132723a6dbcbc548 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 13:52:17 +0100 Subject: [PATCH 05/52] ci: compile the server with curl, add make option curl example in default cmake --- .github/workflows/build.yml | 1 + .github/workflows/server.yml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0da01d5ba6ead..386ab88f29c2d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,6 +39,7 @@ jobs: id: make_build env: LLAMA_FATAL_WARNINGS: 1 + LLAMA_USE_CURL: 1 run: | CC=gcc-8 make -j $(nproc) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 5e38b3547c659..51340662a277f 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -57,7 +57,8 @@ jobs: cmake \ python3-pip \ wget \ - language-pack-en + language-pack-en \ + libcurl4-openssl-dev - name: Build id: cmake_build From 80bec9890a57bc53d28c22669dbe9a6eed8ae1b9 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 14:08:21 +0100 Subject: [PATCH 06/52] llama_load_model_from_url: try to make the windows build passing --- common/common.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 1f57493dfda35..fc315e2fb4dc5 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1394,7 +1394,6 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha curl_global_init(CURL_GLOBAL_DEFAULT); auto curl = curl_easy_init(); - if (!curl) { curl_global_cleanup(); fprintf(stderr, "%s: error initializing lib curl\n", __func__); @@ -1445,11 +1444,13 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha return llama_load_model_from_file(path_model, params); } #else -struct llama_model * llama_load_model_from_url(const char *, const char *, - struct llama_model_params) { - fprintf(stderr, "%s: llama.cpp built without SSL support, downloading from url not supported.\n", __func__); + +struct llama_model *llama_load_model_from_url(const char * /*model_url*/, const char * /*path_model*/, + struct llama_model_params /*params*/) { + fprintf(stderr, "%s: llama.cpp built without curl support, downloading from an url not supported.\n", __func__); return nullptr; } + #endif std::tuple llama_init_from_gpt_params(gpt_params & params) { From 2c3a00e270bdcdde49cda0414eb4e4b848c96454 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 15:40:29 +0100 Subject: [PATCH 07/52] Update Makefile Co-authored-by: Georgi Gerganov --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2ef626737745b..838daf5c02acd 100644 --- a/Makefile +++ b/Makefile @@ -595,7 +595,7 @@ include scripts/get-flags.mk CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic endif -ifdef LLAMA_USE_CURL +ifdef LLAMA_CURL override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL override LDFLAGS := $(LDFLAGS) -lcurl endif From 4135d4a50564c9913b911c4d87458b50b09e4e6f Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 14:26:17 +0100 Subject: [PATCH 08/52] llama_load_model_from_url: typo --- common/common.cpp | 4 ++-- common/common.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index fc315e2fb4dc5..45187a7c65f91 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1390,7 +1390,7 @@ void llama_batch_add( #ifdef LLAMA_USE_CURL struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, struct llama_model_params params) { - // Initialize libcurl + // Initialize libcurl globally curl_global_init(CURL_GLOBAL_DEFAULT); auto curl = curl_easy_init(); @@ -1400,7 +1400,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha return nullptr; } - // Set the URL + // Set the URL, allow to follow http redirection and display download progress curl_easy_setopt(curl, CURLOPT_URL, model_url); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); diff --git a/common/common.h b/common/common.h index b9b59211254f2..3e5dd661c95aa 100644 --- a/common/common.h +++ b/common/common.h @@ -95,7 +95,7 @@ struct gpt_params { struct llama_sampling_params sparams; std::string model = "models/7B/ggml-model-f16.gguf"; // model path - std::string model_url = ""; // model path + std::string model_url = ""; // model url to download std::string model_draft = ""; // draft model for speculative decoding std::string model_alias = "unknown"; // model alias std::string prompt = ""; From 5d99f3224f19c98c568a5bcd3023dfcb33a9f046 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 16:27:06 +0100 Subject: [PATCH 09/52] llama_load_model_from_url: download the file only if modified based on etag and last-modified http headers --- common/common.cpp | 152 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 124 insertions(+), 28 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 45187a7c65f91..8b256e7fb09f9 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1388,61 +1388,157 @@ void llama_batch_add( } #ifdef LLAMA_USE_CURL + struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, - struct llama_model_params params) { + struct llama_model_params params) { // Initialize libcurl globally curl_global_init(CURL_GLOBAL_DEFAULT); - auto curl = curl_easy_init(); + CURL *curl = curl_easy_init(); if (!curl) { curl_global_cleanup(); fprintf(stderr, "%s: error initializing lib curl\n", __func__); - return nullptr; + return NULL; } - // Set the URL, allow to follow http redirection and display download progress + // Set the URL, allow to follow http redirection curl_easy_setopt(curl, CURLOPT_URL, model_url); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); + + // Check if the file already exists locally + struct stat buffer; + int file_exists = (stat(path_model, &buffer) == 0); + + // If the file exists, check for an ETag file or a lastModified file + char etag[256] = {0}; + char etag_path[256] = {0}; + strcpy(etag_path, path_model); + strcat(etag_path, ".etag"); + + char last_modified[256] = {0}; + char last_modified_path[256] = {0}; + strcpy(last_modified_path, path_model); + strcat(last_modified_path, ".lastModified"); + + if (file_exists) { + FILE *f_etag = fopen(etag_path, "r"); + if (f_etag) { + fgets(etag, sizeof(etag), f_etag); + fclose(f_etag); + fprintf(stderr, "%s: previous model .etag file found %s: %s\n", __func__, path_model, etag); + } - // Set the output file - auto outfile = fopen(path_model, "wb"); - if (!outfile) { - curl_easy_cleanup(curl); - curl_global_cleanup(); - fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model); - return nullptr; + FILE *f_last_modified = fopen(last_modified_path, "r"); + if (f_last_modified) { + fgets(last_modified, sizeof(last_modified), f_last_modified); + fclose(f_etag); + fprintf(stderr, "%s: previous model .lastModified file found %s: %s\n", __func__, last_modified_path, last_modified); + } } - curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile); - // start the download - fprintf(stdout, "%s: downloading model from %s to %s ...\n", __func__, model_url, path_model); - auto res = curl_easy_perform(curl); + // Send a HEAD request to retrieve the ETag and Last-Modified headers + struct llama_load_model_from_url_headers { + char etag[256] = {0}; + char last_modified[256] = {0}; + }; + typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); + auto header_callback = [](char *buffer, size_t /*size*/, size_t n_items, void *userdata) -> size_t { + llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers*) userdata; + + const char *etag_prefix = "etag: "; + if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) { + strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix)- 2); // Remove LRLF + } + + const char *last_modified_prefix = "last-modified: "; + if (strncmp(buffer, last_modified_prefix, strlen(last_modified_prefix)) == 0) { + strncpy(headers->last_modified, buffer + strlen(last_modified_prefix), n_items - strlen(last_modified_prefix) - 2); // Remove LRLF + } + return n_items; + }; + + curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); + llama_load_model_from_url_headers headers; + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, static_cast(header_callback)); + curl_easy_setopt(curl, CURLOPT_HEADERDATA, &headers); + + CURLcode res = curl_easy_perform(curl); if (res != CURLE_OK) { - fclose(outfile); curl_easy_cleanup(curl); curl_global_cleanup(); fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); - return nullptr; - } + return NULL; + } + + // If only the ETag or the Last-Modified header are different, trigger a new download + if (strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) { + // Set the output file + FILE *outfile = fopen(path_model, "wb"); + if (!outfile) { + curl_easy_cleanup(curl); + curl_global_cleanup(); + fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model); + return NULL; + } + curl_easy_setopt(curl, CURLOPT_NOBODY, 0L); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile); + + // display download progress + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); + + // start the download + fprintf(stderr, "%s: downloading model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, + model_url, path_model, headers.etag, headers.last_modified); + res = curl_easy_perform(curl); + if (res != CURLE_OK) { + fclose(outfile); + curl_easy_cleanup(curl); + curl_global_cleanup(); + fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); + return NULL; + } - int http_code = 0; - curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &http_code); - if (http_code < 200 || http_code >= 400) { + long http_code = 0; + curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &http_code); + if (http_code < 200 || http_code >= 400) { + fclose(outfile); + curl_easy_cleanup(curl); + curl_global_cleanup(); + fprintf(stderr, "%s: invalid http status code failed: %ld\n", __func__, http_code); + return NULL; + } + + // Clean up fclose(outfile); - curl_easy_cleanup(curl); - curl_global_cleanup(); - fprintf(stderr, "%s: invalid http status code failed: %d\n", __func__, http_code); - return nullptr; + + // Write the new ETag to the .etag file + if (strlen( headers.etag) > 0) { + FILE *etag_file = fopen(etag_path, "w"); + if (etag_file) { + fputs( headers.etag, etag_file); + fclose(etag_file); + fprintf(stderr, "%s: model etag saved %s:%s\n", __func__, etag_path, etag); + } + } + + // Write the new lastModified to the .etag file + if (strlen( headers.last_modified) > 0) { + FILE *last_modified_file = fopen(last_modified_path, "w"); + if (last_modified_file) { + fputs(headers.last_modified, last_modified_file); + fclose(last_modified_file); + fprintf(stderr, "%s: model last modified saved %s:%s\n", __func__, last_modified_path, headers.last_modified); + } + } } - // Clean up - fclose(outfile); curl_easy_cleanup(curl); curl_global_cleanup(); return llama_load_model_from_file(path_model, params); } + #else struct llama_model *llama_load_model_from_url(const char * /*model_url*/, const char * /*path_model*/, From 921e4af9302919a81513e8c1ba43004f81aa3c98 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 16:29:02 +0100 Subject: [PATCH 10/52] ci: build, fix the default build to use LLAMA_CURL --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 386ab88f29c2d..0977aa8ba93d4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,7 +39,7 @@ jobs: id: make_build env: LLAMA_FATAL_WARNINGS: 1 - LLAMA_USE_CURL: 1 + LLAMA_CURL: 1 run: | CC=gcc-8 make -j $(nproc) From 6633689fa5cd972bfa3de3c06477996fb554f79b Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 16:49:44 +0100 Subject: [PATCH 11/52] llama_load_model_from_url: cleanup code --- common/common.cpp | 130 +++++++++++++++++++++++++++------------------- 1 file changed, 77 insertions(+), 53 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 8b256e7fb09f9..89b5ee50113e2 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -53,6 +53,19 @@ #define GGML_USE_CUBLAS_SYCL_VULKAN #endif +#ifdef LLAMA_USE_CURL +#ifdef __linux__ +#include +#elif defined(_WIN32) +#include +#define PATH_MAX MAX_PATH +#else +#include +#endif +#define LLAMA_CURL_MAX_PATH_LENGTH PATH_MAX +#define LLAMA_CURL_MAX_HEADER_LENGTH 256 +#endif // LLAMA_USE_CURL + int32_t get_num_physical_cores() { #ifdef __linux__ // enumerate the set of thread siblings, num entries is num cores @@ -1389,11 +1402,17 @@ void llama_batch_add( #ifdef LLAMA_USE_CURL -struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, - struct llama_model_params params) { +struct llama_model *llama_load_model_from_url(const char *model_url, const char *path_model, + struct llama_model_params params) { + // Basic validation of the model_url + if (!model_url || strlen(model_url) == 0) { + fprintf(stderr, "%s: invalid model_url\n", __func__); + return NULL; + } + // Initialize libcurl globally curl_global_init(CURL_GLOBAL_DEFAULT); - CURL *curl = curl_easy_init(); + auto curl = curl_easy_init(); if (!curl) { curl_global_cleanup(); @@ -1408,73 +1427,77 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha // Check if the file already exists locally struct stat buffer; - int file_exists = (stat(path_model, &buffer) == 0); + auto file_exists = (stat(path_model, &buffer) == 0); - // If the file exists, check for an ETag file or a lastModified file - char etag[256] = {0}; - char etag_path[256] = {0}; - strcpy(etag_path, path_model); - strcat(etag_path, ".etag"); + // If the file exists, check for ${model_path}.etag or ${model_path}.lastModified files + char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; + char etag_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; + strncpy(etag_path, path_model, LLAMA_CURL_MAX_PATH_LENGTH - 6); // 6 is the length of ".etag\0" + strncat(etag_path, ".etag", 6); - char last_modified[256] = {0}; - char last_modified_path[256] = {0}; - strcpy(last_modified_path, path_model); - strcat(last_modified_path, ".lastModified"); + char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; + char last_modified_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; + strncpy(last_modified_path, path_model, LLAMA_CURL_MAX_PATH_LENGTH - 15); // 15 is the length of ".lastModified\0" + strncat(last_modified_path, ".lastModified", 15); if (file_exists) { - FILE *f_etag = fopen(etag_path, "r"); + auto *f_etag = fopen(etag_path, "r"); if (f_etag) { fgets(etag, sizeof(etag), f_etag); fclose(f_etag); fprintf(stderr, "%s: previous model .etag file found %s: %s\n", __func__, path_model, etag); } - FILE *f_last_modified = fopen(last_modified_path, "r"); + auto *f_last_modified = fopen(last_modified_path, "r"); if (f_last_modified) { fgets(last_modified, sizeof(last_modified), f_last_modified); - fclose(f_etag); - fprintf(stderr, "%s: previous model .lastModified file found %s: %s\n", __func__, last_modified_path, last_modified); + fclose(f_last_modified); + fprintf(stderr, "%s: previous model .lastModified file found %s: %s\n", __func__, last_modified_path, + last_modified); } } - // Send a HEAD request to retrieve the ETag and Last-Modified headers + // Send a HEAD request to retrieve the etag and last-modified headers struct llama_load_model_from_url_headers { - char etag[256] = {0}; - char last_modified[256] = {0}; + char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; + char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; }; - typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); - auto header_callback = [](char *buffer, size_t /*size*/, size_t n_items, void *userdata) -> size_t { - llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers*) userdata; + llama_load_model_from_url_headers headers; + { + typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); + auto header_callback = [](char *buffer, size_t /*size*/, size_t n_items, void *userdata) -> size_t { + llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata; - const char *etag_prefix = "etag: "; - if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) { - strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix)- 2); // Remove LRLF - } + const char *etag_prefix = "etag: "; + if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) { + strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove LRLF + } - const char *last_modified_prefix = "last-modified: "; - if (strncmp(buffer, last_modified_prefix, strlen(last_modified_prefix)) == 0) { - strncpy(headers->last_modified, buffer + strlen(last_modified_prefix), n_items - strlen(last_modified_prefix) - 2); // Remove LRLF - } - return n_items; - }; + const char *last_modified_prefix = "last-modified: "; + if (strncmp(buffer, last_modified_prefix, strlen(last_modified_prefix)) == 0) { + strncpy(headers->last_modified, buffer + strlen(last_modified_prefix), + n_items - strlen(last_modified_prefix) - 2); // Remove LRLF + } + return n_items; + }; - curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); - llama_load_model_from_url_headers headers; - curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, static_cast(header_callback)); - curl_easy_setopt(curl, CURLOPT_HEADERDATA, &headers); + curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, static_cast(header_callback)); + curl_easy_setopt(curl, CURLOPT_HEADERDATA, &headers); - CURLcode res = curl_easy_perform(curl); - if (res != CURLE_OK) { - curl_easy_cleanup(curl); - curl_global_cleanup(); - fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); - return NULL; + CURLcode res = curl_easy_perform(curl); + if (res != CURLE_OK) { + curl_easy_cleanup(curl); + curl_global_cleanup(); + fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); + return NULL; + } } // If only the ETag or the Last-Modified header are different, trigger a new download if (strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) { // Set the output file - FILE *outfile = fopen(path_model, "wb"); + auto *outfile = fopen(path_model, "wb"); if (!outfile) { curl_easy_cleanup(curl); curl_global_cleanup(); @@ -1490,7 +1513,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha // start the download fprintf(stderr, "%s: downloading model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, model_url, path_model, headers.etag, headers.last_modified); - res = curl_easy_perform(curl); + auto res = curl_easy_perform(curl); if (res != CURLE_OK) { fclose(outfile); curl_easy_cleanup(curl); @@ -1513,22 +1536,23 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha fclose(outfile); // Write the new ETag to the .etag file - if (strlen( headers.etag) > 0) { - FILE *etag_file = fopen(etag_path, "w"); + if (strlen(headers.etag) > 0) { + auto *etag_file = fopen(etag_path, "w"); if (etag_file) { - fputs( headers.etag, etag_file); + fputs(headers.etag, etag_file); fclose(etag_file); - fprintf(stderr, "%s: model etag saved %s:%s\n", __func__, etag_path, etag); + fprintf(stderr, "%s: model etag saved %s:%s\n", __func__, etag_path, headers.etag); } } // Write the new lastModified to the .etag file - if (strlen( headers.last_modified) > 0) { - FILE *last_modified_file = fopen(last_modified_path, "w"); + if (strlen(headers.last_modified) > 0) { + auto *last_modified_file = fopen(last_modified_path, "w"); if (last_modified_file) { fputs(headers.last_modified, last_modified_file); fclose(last_modified_file); - fprintf(stderr, "%s: model last modified saved %s:%s\n", __func__, last_modified_path, headers.last_modified); + fprintf(stderr, "%s: model last modified saved %s:%s\n", __func__, last_modified_path, + headers.last_modified); } } } @@ -1547,7 +1571,7 @@ struct llama_model *llama_load_model_from_url(const char * /*model_url*/, const return nullptr; } -#endif +#endif // LLAMA_USE_CURL std::tuple llama_init_from_gpt_params(gpt_params & params) { auto mparams = llama_model_params_from_gpt_params(params); From e84206d13203111e642e3bdc94ca34921078c176 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:46:18 +0100 Subject: [PATCH 12/52] Update examples/server/README.md Co-authored-by: Georgi Gerganov --- examples/server/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/README.md b/examples/server/README.md index df1ccce9bebe0..755e1d5384f55 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -20,7 +20,7 @@ The project is under active development, and we are [looking for feedback and co - `-tb N, --threads-batch N`: Set the number of threads to use during batch and prompt processing. If not specified, the number of threads will be set to the number of threads used for generation. - `--threads-http N`: number of threads in the http server pool to process requests (default: `max(std::thread::hardware_concurrency() - 1, --parallel N + 2)`) - `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`). -- `-mu MODEL_URL --model MODEL_URL`: Specify a remote http url to download the file (e.g https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf). +- `-mu MODEL_URL --model-url MODEL_URL`: Specify a remote http url to download the file (e.g https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf). - `-a ALIAS`, `--alias ALIAS`: Set an alias for the model. The alias will be returned in API responses. - `-c N`, `--ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. The size may differ in other models, for example, baichuan models were build with a context of 4096. - `-ngl N`, `--n-gpu-layers N`: When compiled with appropriate support (currently CLBlast or cuBLAS), this option allows offloading some layers to the GPU for computation. Generally results in increased performance. From 4bc47b75caef8fc17e150621b3c3617fd79acc7e Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:46:34 +0100 Subject: [PATCH 13/52] Update common/common.cpp Co-authored-by: Georgi Gerganov --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index f35296274270b..52b120dc2f725 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1803,7 +1803,7 @@ struct llama_model *llama_load_model_from_url(const char *model_url, const char #else -struct llama_model *llama_load_model_from_url(const char * /*model_url*/, const char * /*path_model*/, +struct llama_model * llama_load_model_from_url(const char * /*model_url*/, const char * /*path_model*/, struct llama_model_params /*params*/) { fprintf(stderr, "%s: llama.cpp built without curl support, downloading from an url not supported.\n", __func__); return nullptr; From 8751bd0c82d5768e73af17801e8e382c15ff47d8 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:46:46 +0100 Subject: [PATCH 14/52] Update common/common.cpp Co-authored-by: Georgi Gerganov --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index 52b120dc2f725..3c5fa79eb52ea 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1785,7 +1785,7 @@ struct llama_model *llama_load_model_from_url(const char *model_url, const char // Write the new lastModified to the .etag file if (strlen(headers.last_modified) > 0) { - auto *last_modified_file = fopen(last_modified_path, "w"); + auto * last_modified_file = fopen(last_modified_path, "w"); if (last_modified_file) { fputs(headers.last_modified, last_modified_file); fclose(last_modified_file); From f53bfd56afad0991ed140a7ccbbe5c2060bb06fc Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:46:53 +0100 Subject: [PATCH 15/52] Update common/common.cpp Co-authored-by: Georgi Gerganov --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index 3c5fa79eb52ea..d1654c59a1a07 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1735,7 +1735,7 @@ struct llama_model *llama_load_model_from_url(const char *model_url, const char // If only the ETag or the Last-Modified header are different, trigger a new download if (strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) { // Set the output file - auto *outfile = fopen(path_model, "wb"); + auto * outfile = fopen(path_model, "wb"); if (!outfile) { curl_easy_cleanup(curl); curl_global_cleanup(); From b088122719a9409642134bb1d5fe0c865d7099b0 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:47:04 +0100 Subject: [PATCH 16/52] Update common/common.cpp Co-authored-by: Georgi Gerganov --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index d1654c59a1a07..2720f93101ee3 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1775,7 +1775,7 @@ struct llama_model *llama_load_model_from_url(const char *model_url, const char // Write the new ETag to the .etag file if (strlen(headers.etag) > 0) { - auto *etag_file = fopen(etag_path, "w"); + auto * etag_file = fopen(etag_path, "w"); if (etag_file) { fputs(headers.etag, etag_file); fclose(etag_file); From f22456d8c33f50da15f7c98c74189dfba1855a0b Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:48:02 +0100 Subject: [PATCH 17/52] Update common/common.cpp Co-authored-by: Georgi Gerganov --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index 2720f93101ee3..b93ad05e3892d 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1711,7 +1711,7 @@ struct llama_model *llama_load_model_from_url(const char *model_url, const char strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove LRLF } - const char *last_modified_prefix = "last-modified: "; + const char * last_modified_prefix = "last-modified: "; if (strncmp(buffer, last_modified_prefix, strlen(last_modified_prefix)) == 0) { strncpy(headers->last_modified, buffer + strlen(last_modified_prefix), n_items - strlen(last_modified_prefix) - 2); // Remove LRLF From 9565ae31878b17e902cc83a02a903231485f58ac Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:48:10 +0100 Subject: [PATCH 18/52] Update common/common.cpp Co-authored-by: Georgi Gerganov --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index b93ad05e3892d..2b968b82b758a 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1706,7 +1706,7 @@ struct llama_model *llama_load_model_from_url(const char *model_url, const char auto header_callback = [](char *buffer, size_t /*size*/, size_t n_items, void *userdata) -> size_t { llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata; - const char *etag_prefix = "etag: "; + const char * etag_prefix = "etag: "; if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) { strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove LRLF } From 330e28df084aeafdc77911eb3b2a3e3901a3eda6 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:48:20 +0100 Subject: [PATCH 19/52] Update common/common.cpp Co-authored-by: Georgi Gerganov --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index 2b968b82b758a..c49f0920f6a3b 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1703,7 +1703,7 @@ struct llama_model *llama_load_model_from_url(const char *model_url, const char llama_load_model_from_url_headers headers; { typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); - auto header_callback = [](char *buffer, size_t /*size*/, size_t n_items, void *userdata) -> size_t { + auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata; const char * etag_prefix = "etag: "; From 89ab37a261cd50b00647c22a389ca938e14a1db9 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:48:27 +0100 Subject: [PATCH 20/52] Update common/common.cpp Co-authored-by: Georgi Gerganov --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index c49f0920f6a3b..7630e0fbd160f 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1686,7 +1686,7 @@ struct llama_model *llama_load_model_from_url(const char *model_url, const char fprintf(stderr, "%s: previous model .etag file found %s: %s\n", __func__, path_model, etag); } - auto *f_last_modified = fopen(last_modified_path, "r"); + auto * f_last_modified = fopen(last_modified_path, "r"); if (f_last_modified) { fgets(last_modified, sizeof(last_modified), f_last_modified); fclose(f_last_modified); From be561a7ffd3d4ee86be3d782b45c7a6491d64530 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:48:32 +0100 Subject: [PATCH 21/52] Update common/common.cpp Co-authored-by: Georgi Gerganov --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index 7630e0fbd160f..f07ab444849cc 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1679,7 +1679,7 @@ struct llama_model *llama_load_model_from_url(const char *model_url, const char strncat(last_modified_path, ".lastModified", 15); if (file_exists) { - auto *f_etag = fopen(etag_path, "r"); + auto * f_etag = fopen(etag_path, "r"); if (f_etag) { fgets(etag, sizeof(etag), f_etag); fclose(f_etag); From eb9e52a21832e2c062548c1ece6b849ce725d504 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:48:38 +0100 Subject: [PATCH 22/52] Update common/common.cpp Co-authored-by: Georgi Gerganov --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index f07ab444849cc..1bcf76ff6fe88 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1640,7 +1640,7 @@ void llama_batch_add( #ifdef LLAMA_USE_CURL -struct llama_model *llama_load_model_from_url(const char *model_url, const char *path_model, +struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, struct llama_model_params params) { // Basic validation of the model_url if (!model_url || strlen(model_url) == 0) { From b0b49e0bb8de8cc272c10b2503486c4ccd4edb4f Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sat, 16 Mar 2024 17:48:48 +0100 Subject: [PATCH 23/52] Update examples/main/README.md Co-authored-by: Georgi Gerganov --- examples/main/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/main/README.md b/examples/main/README.md index daaa807d55952..6a8d1e1c50cbb 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -67,7 +67,7 @@ main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1 --random-prompt In this section, we cover the most commonly used options for running the `main` program with the LLaMA models: - `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.bin`). -- `-mu MODEL_URL --model MODEL_URL`: Specify a remote http url to download the file (e.g https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf). +- `-mu MODEL_URL --model-url MODEL_URL`: Specify a remote http url to download the file (e.g https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf). - `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses. - `-ins, --instruct`: Run the program in instruction mode, which is particularly useful when working with Alpaca models. - `-n N, --n-predict N`: Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text. From 545fef6e0ef24ea9663ae44b08c6a7096e090baa Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 18:01:55 +0100 Subject: [PATCH 24/52] llama_load_model_from_url: fix compilation warning, clearer logging --- common/common.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 1bcf76ff6fe88..269a3afd88ecf 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1681,17 +1681,23 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha if (file_exists) { auto * f_etag = fopen(etag_path, "r"); if (f_etag) { - fgets(etag, sizeof(etag), f_etag); + if (!fgets(etag, sizeof(etag), f_etag)) { + fprintf(stderr, "%s: unable to read file %s\n", __func__, etag_path); + } else { + fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, etag_path, etag); + } fclose(f_etag); - fprintf(stderr, "%s: previous model .etag file found %s: %s\n", __func__, path_model, etag); } auto * f_last_modified = fopen(last_modified_path, "r"); if (f_last_modified) { - fgets(last_modified, sizeof(last_modified), f_last_modified); + if (!fgets(last_modified, sizeof(last_modified), f_last_modified)) { + fprintf(stderr, "%s: unable to read file %s\n", __func__, last_modified_path); + } else { + fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, last_modified_path, + last_modified); + } fclose(f_last_modified); - fprintf(stderr, "%s: previous model .lastModified file found %s: %s\n", __func__, last_modified_path, - last_modified); } } From 4fadb072e93ed724c93353eeddd6e207eb245991 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 18:15:20 +0100 Subject: [PATCH 25/52] server: tests: add `--model-url` tests --- examples/server/tests/README.md | 2 +- .../server/tests/features/embeddings.feature | 3 ++- examples/server/tests/features/steps/steps.py | 17 ++++++++++++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md index 95a0353b6a9c5..feb2b1d6cf5de 100644 --- a/examples/server/tests/README.md +++ b/examples/server/tests/README.md @@ -57,7 +57,7 @@ Feature or Scenario must be annotated with `@llama.cpp` to be included in the de To run a scenario annotated with `@bug`, start: ```shell -DEBUG=ON ./tests.sh --no-skipped --tags bug +DEBUG=ON ./tests.sh --no-skipped --tags bug --stop ``` After changing logic in `steps.py`, ensure that `@bug` and `@wrong_usage` scenario are updated. diff --git a/examples/server/tests/features/embeddings.feature b/examples/server/tests/features/embeddings.feature index 57359b267a668..fb821f802596d 100644 --- a/examples/server/tests/features/embeddings.feature +++ b/examples/server/tests/features/embeddings.feature @@ -4,7 +4,8 @@ Feature: llama.cpp server Background: Server startup Given a server listening on localhost:8080 - And a model file bert-bge-small/ggml-model-f16.gguf from HF repo ggml-org/models + And a model url https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16.gguf + And a model file /tmp/ggml-model-f16.gguf And a model alias bert-bge-small And 42 as server seed And 2 slots diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index a59a52d21748a..19d064dfd0304 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -32,6 +32,8 @@ def step_server_config(context, server_fqdn, server_port): context.base_url = f'http://{context.server_fqdn}:{context.server_port}' context.model_alias = None + context.model_file = None + context.model_url = None context.n_batch = None context.n_ubatch = None context.n_ctx = None @@ -65,6 +67,16 @@ def step_download_hf_model(context, hf_file, hf_repo): print(f"model file: {context.model_file}\n") +@step('a model file {model_file}') +def step_model_file(context, model_file): + context.model_file = model_file + + +@step('a model url {model_url}') +def step_model_url(context, model_url): + context.model_url = model_url + + @step('a model alias {model_alias}') def step_model_alias(context, model_alias): context.model_alias = model_alias @@ -1038,8 +1050,11 @@ def start_server_background(context): server_args = [ '--host', server_listen_addr, '--port', context.server_port, - '--model', context.model_file ] + if context.model_file: + server_args.extend(['--model', context.model_file]) + if context.model_url: + server_args.extend(['--model-url', context.model_url]) if context.n_batch: server_args.extend(['--batch-size', context.n_batch]) if context.n_ubatch: From 124c474bba8d339a3c3e9a555c6c2d46d9ff8b25 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 18:24:21 +0100 Subject: [PATCH 26/52] llama_load_model_from_url: coherent clearer logging --- common/common.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 269a3afd88ecf..5775840dd27d5 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1785,7 +1785,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha if (etag_file) { fputs(headers.etag, etag_file); fclose(etag_file); - fprintf(stderr, "%s: model etag saved %s:%s\n", __func__, etag_path, headers.etag); + fprintf(stderr, "%s: model etag saved %s: %s\n", __func__, etag_path, headers.etag); } } @@ -1795,7 +1795,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha if (last_modified_file) { fputs(headers.last_modified, last_modified_file); fclose(last_modified_file); - fprintf(stderr, "%s: model last modified saved %s:%s\n", __func__, last_modified_path, + fprintf(stderr, "%s: model last modified saved %s: %s\n", __func__, last_modified_path, headers.last_modified); } } From 064dc076bb7751785bc7f52039091ba23ea1dfdc Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 18:34:36 +0100 Subject: [PATCH 27/52] common: CMakeLists.txt fix typo in logging when lib curl is not found --- common/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 9e85c2337f815..0331788fd88ca 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -54,7 +54,7 @@ if (CURL_FOUND) include_directories(${CURL_INCLUDE_DIRS}) link_libraries(${CURL_LIBRARIES}) else() - message(INFO "libcurl not found. Building without model download support.") + message(INFO " libcurl not found. Building without model download support.") endif () From 838178a1969ed02e8378003a742182d1add218e6 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 18:34:53 +0100 Subject: [PATCH 28/52] ci: tests: windows tests add libcurl --- .github/workflows/server.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 51340662a277f..519e0313fbd8f 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -102,6 +102,25 @@ jobs: with: fetch-depth: 0 + - name: Download libCURL + id: get_libcurl + env: + CURL_VERSION: 8.6.0 + run: | + curl.exe -o $env:RUNNER_TEMP/libcurl.tar.gz -L "https://github.com/curl/curl/releases/download/v${env:CURL_VERSION}/curl-v${env:CURL_VERSION}.tar.gz" + mkdir $env:RUNNER_TEMP/libcurl + tar.exe -xvf $env:RUNNER_TEMP/libcurl.tar.gz --strip-components=1 -C $env:RUNNER_TEMP/libcurl + + - name: Install libcurl + id: install_libcurl + run: | + cd $env:RUNNER_TEMP/libcurl + mkdir build + cd build + cmake .. -DCMAKE_BUILD_TYPE=Release ; + cmake --build . --config Release + make install + - name: Build id: cmake_build run: | From 176f039a91788b3b1e20527573eaf3fb866de73a Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 19:51:44 +0100 Subject: [PATCH 29/52] ci: tests: windows tests add libcurl --- .github/workflows/server.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 519e0313fbd8f..5530e9bd80a23 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -105,28 +105,28 @@ jobs: - name: Download libCURL id: get_libcurl env: + CURL_TAG: 8_6_0 CURL_VERSION: 8.6.0 run: | - curl.exe -o $env:RUNNER_TEMP/libcurl.tar.gz -L "https://github.com/curl/curl/releases/download/v${env:CURL_VERSION}/curl-v${env:CURL_VERSION}.tar.gz" + curl.exe -o $env:RUNNER_TEMP/libcurl.tar.gz -L "https://github.com/curl/curl/releases/download/curl-${env:CURL_TAG}/curl-${env:CURL_VERSION}.tar.gz" mkdir $env:RUNNER_TEMP/libcurl tar.exe -xvf $env:RUNNER_TEMP/libcurl.tar.gz --strip-components=1 -C $env:RUNNER_TEMP/libcurl - - name: Install libcurl - id: install_libcurl + - name: Build libcurl + id: build_libcurl run: | cd $env:RUNNER_TEMP/libcurl mkdir build cd build - cmake .. -DCMAKE_BUILD_TYPE=Release ; + cmake .. -DCMAKE_BUILD_TYPE=Release cmake --build . --config Release - make install - name: Build id: cmake_build run: | mkdir build cd build - cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ; + cmake .. -DCURL_LIBRARY="${env:RUNNER_TEMP}/libcurl/lib/Release/libcurl_imp.lib" -DCURL_INCLUDE_DIR="${env:RUNNER_TEMP}/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server - name: Python setup From 5df5605b0267360b0d6f9bc219496eb34a2a79df Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 19:52:11 +0100 Subject: [PATCH 30/52] ci: build: add libcurl in default make toolchain step --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0977aa8ba93d4..375625beb36f6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,7 +33,7 @@ jobs: id: depends run: | sudo apt-get update - sudo apt-get install build-essential gcc-8 + sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev - name: Build id: make_build From 78812c6d638c3dba75a521d39bfeca5c30af6310 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 20:02:34 +0100 Subject: [PATCH 31/52] llama_load_model_from_url: PR feedback, use snprintf instead of strncp and strncat --- common/common.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 5775840dd27d5..90902542a6971 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1667,16 +1667,14 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha struct stat buffer; auto file_exists = (stat(path_model, &buffer) == 0); - // If the file exists, check for ${model_path}.etag or ${model_path}.lastModified files + // If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; char etag_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; - strncpy(etag_path, path_model, LLAMA_CURL_MAX_PATH_LENGTH - 6); // 6 is the length of ".etag\0" - strncat(etag_path, ".etag", 6); + snprintf(etag_path, sizeof(etag_path), "%s.etag", path_model); char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; char last_modified_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; - strncpy(last_modified_path, path_model, LLAMA_CURL_MAX_PATH_LENGTH - 15); // 15 is the length of ".lastModified\0" - strncat(last_modified_path, ".lastModified", 15); + snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path_model); if (file_exists) { auto * f_etag = fopen(etag_path, "r"); From 1ad5a45210d573d65e2243882f4d84e0b9d17c49 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 20:06:18 +0100 Subject: [PATCH 32/52] ci: build: add libcurl in default make toolchain step for tests --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 375625beb36f6..b36fad09da0a4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -45,6 +45,8 @@ jobs: - name: Test id: make_test + env: + LLAMA_CURL: 1 run: | CC=gcc-8 make tests -j $(nproc) make test -j $(nproc) From 22b3bb3ceb4bd94a8028d4b4cdb3c4c3f790a71f Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 21:50:37 +0100 Subject: [PATCH 33/52] common: fix windows build caused by double windows.h import --- common/common.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index 90902542a6971..3cbd4dbae6f22 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -57,7 +57,6 @@ #ifdef __linux__ #include #elif defined(_WIN32) -#include #define PATH_MAX MAX_PATH #else #include From e6848ab0e699533579e60bbeb23c900e2e625a8c Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 21:53:07 +0100 Subject: [PATCH 34/52] build: move the make build with env LLAMA_CURL to a dedicated place --- .github/workflows/build.yml | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b36fad09da0a4..8130197461c8c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,18 +39,37 @@ jobs: id: make_build env: LLAMA_FATAL_WARNINGS: 1 - LLAMA_CURL: 1 run: | CC=gcc-8 make -j $(nproc) - name: Test id: make_test - env: - LLAMA_CURL: 1 run: | CC=gcc-8 make tests -j $(nproc) make test -j $(nproc) + ubuntu-focal-make-curl: + runs-on: ubuntu-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev + + - name: Build + id: make_build + env: + LLAMA_FATAL_WARNINGS: 1 + LLAMA_CURL: 1 + run: | + CC=gcc-8 make -j $(nproc) + ubuntu-latest-cmake: runs-on: ubuntu-latest From d81acb68476d7fa05e443d63511d0da91ec39fb9 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 21:59:53 +0100 Subject: [PATCH 35/52] build: introduce cmake option LLAMA_CURL to trigger libcurl linking to be coherent with the make toolchain --- .github/workflows/server.yml | 3 ++- CMakeLists.txt | 1 + common/CMakeLists.txt | 7 +++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 5530e9bd80a23..8abe6f49619ef 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -68,6 +68,7 @@ jobs: cmake .. \ -DLLAMA_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ + -DLLAMA_CURL=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server @@ -126,7 +127,7 @@ jobs: run: | mkdir build cd build - cmake .. -DCURL_LIBRARY="${env:RUNNER_TEMP}/libcurl/lib/Release/libcurl_imp.lib" -DCURL_INCLUDE_DIR="${env:RUNNER_TEMP}/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release + cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="${env:RUNNER_TEMP}/libcurl/lib/Release/libcurl_imp.lib" -DCURL_INCLUDE_DIR="${env:RUNNER_TEMP}/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server - name: Python setup diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ac2804a6881a..fc4cff28f44ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,6 +99,7 @@ option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING "llama: max. batch size for using peer access") +option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) option(LLAMA_CLBLAST "llama: use CLBlast" OFF) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 0331788fd88ca..c8a21a9c2b6e7 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -47,14 +47,13 @@ if (BUILD_SHARED_LIBS) set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() + # Check for curl -find_package(CURL QUIET) -if (CURL_FOUND) +if (LLAMA_CURL) + find_package(CURL) add_definitions(-DLLAMA_USE_CURL) include_directories(${CURL_INCLUDE_DIRS}) link_libraries(${CURL_LIBRARIES}) -else() - message(INFO " libcurl not found. Building without model download support.") endif () From dbd969142e22997e801032b45f8ce145031742aa Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 22:01:19 +0100 Subject: [PATCH 36/52] build: move the make build with env LLAMA_CURL to a dedicated place --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8130197461c8c..ded19606284d0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,7 +33,7 @@ jobs: id: depends run: | sudo apt-get update - sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev + sudo apt-get install build-essential gcc-8 - name: Build id: make_build From 9da4eec082fd3d7339485e453cde96763566fc84 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 22:13:46 +0100 Subject: [PATCH 37/52] llama_load_model_from_url: minor spacing and log message changes --- common/common.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 3cbd4dbae6f22..007424fd91a4b 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1391,7 +1391,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" -m FNAME, --model FNAME\n"); printf(" model path (default: %s)\n", params.model.c_str()); printf(" -mu MODEL_URL, --model-url MODEL_URL\n"); - printf(" model download url (default: %s)\n", params.model_url.c_str()); + printf(" model download url (default: %s)\n", params.model_url.c_str()); printf(" -md FNAME, --model-draft FNAME\n"); printf(" draft model for speculative decoding\n"); printf(" -ld LOGDIR, --logdir LOGDIR\n"); @@ -1653,18 +1653,17 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha if (!curl) { curl_global_cleanup(); - fprintf(stderr, "%s: error initializing lib curl\n", __func__); + fprintf(stderr, "%s: error initializing libcurl\n", __func__); return NULL; } // Set the URL, allow to follow http redirection curl_easy_setopt(curl, CURLOPT_URL, model_url); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // Check if the file already exists locally - struct stat buffer; - auto file_exists = (stat(path_model, &buffer) == 0); + struct stat model_file_info; + auto file_exists = (stat(path_model, &model_file_info) == 0); // If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; @@ -1722,7 +1721,8 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha return n_items; }; - curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); + curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, static_cast(header_callback)); curl_easy_setopt(curl, CURLOPT_HEADERDATA, &headers); @@ -1735,7 +1735,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha } } - // If only the ETag or the Last-Modified header are different, trigger a new download + // If the ETag or the Last-Modified headers are different: trigger a new download if (strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) { // Set the output file auto * outfile = fopen(path_model, "wb"); @@ -1769,7 +1769,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha fclose(outfile); curl_easy_cleanup(curl); curl_global_cleanup(); - fprintf(stderr, "%s: invalid http status code failed: %ld\n", __func__, http_code); + fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code); return NULL; } @@ -1808,7 +1808,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha struct llama_model * llama_load_model_from_url(const char * /*model_url*/, const char * /*path_model*/, struct llama_model_params /*params*/) { - fprintf(stderr, "%s: llama.cpp built without curl support, downloading from an url not supported.\n", __func__); + fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__); return nullptr; } From 89d3483860ea10ab7c49cbc910aa9a455969c279 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 22:27:02 +0100 Subject: [PATCH 38/52] ci: build: fix ubuntu-focal-make-curl --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ded19606284d0..945df42f886a6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -49,7 +49,7 @@ jobs: make test -j $(nproc) ubuntu-focal-make-curl: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - name: Clone From 13d8817ce260dceeed4a776aec89c9c19cba31b8 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 22:34:01 +0100 Subject: [PATCH 39/52] ci: build: try to fix the windows build --- .github/workflows/server.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 8abe6f49619ef..d0458629ad957 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -127,7 +127,7 @@ jobs: run: | mkdir build cd build - cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="${env:RUNNER_TEMP}/libcurl/lib/Release/libcurl_imp.lib" -DCURL_INCLUDE_DIR="${env:RUNNER_TEMP}/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release + cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="${env:RUNNER_TEMP}/libcurl/lib/Release/libcurl_imp.lib" -DCURL_INCLUDE_DIR="${env:RUNNER_TEMP}/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server - name: Python setup From 1ddaf7109acd3eef882929e755e48f30e1aaec8c Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 16 Mar 2024 22:43:05 +0100 Subject: [PATCH 40/52] common: remove old dependency to openssl --- common/common.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/common/common.h b/common/common.h index 3e5dd661c95aa..8dd8a3edc9c94 100644 --- a/common/common.h +++ b/common/common.h @@ -17,12 +17,6 @@ #include #include -#ifdef HAVE_OPENSSL -#include -#include -#include -#endif - #ifdef _WIN32 #define DIRECTORY_SEPARATOR '\\' #else From 73b4b44785d803e3c74e97bdea59d230a94f3ed1 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 00:43:35 +0100 Subject: [PATCH 41/52] common: fix build --- .github/workflows/server.yml | 20 +++++--------------- common/CMakeLists.txt | 17 ++++++++--------- common/common.cpp | 4 ++-- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index d0458629ad957..bb321aa1c1ece 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -103,31 +103,21 @@ jobs: with: fetch-depth: 0 - - name: Download libCURL + - name: libCURL id: get_libcurl env: - CURL_TAG: 8_6_0 - CURL_VERSION: 8.6.0 + CURL_VERSION: 8.6.0_6 run: | - curl.exe -o $env:RUNNER_TEMP/libcurl.tar.gz -L "https://github.com/curl/curl/releases/download/curl-${env:CURL_TAG}/curl-${env:CURL_VERSION}.tar.gz" + curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip" mkdir $env:RUNNER_TEMP/libcurl - tar.exe -xvf $env:RUNNER_TEMP/libcurl.tar.gz --strip-components=1 -C $env:RUNNER_TEMP/libcurl - - - name: Build libcurl - id: build_libcurl - run: | - cd $env:RUNNER_TEMP/libcurl - mkdir build - cd build - cmake .. -DCMAKE_BUILD_TYPE=Release - cmake --build . --config Release + tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl - name: Build id: cmake_build run: | mkdir build cd build - cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="${env:RUNNER_TEMP}/libcurl/lib/Release/libcurl_imp.lib" -DCURL_INCLUDE_DIR="${env:RUNNER_TEMP}/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release + cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server - name: Python setup diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index c8a21a9c2b6e7..3beda6d25caec 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -48,15 +48,6 @@ if (BUILD_SHARED_LIBS) endif() -# Check for curl -if (LLAMA_CURL) - find_package(CURL) - add_definitions(-DLLAMA_USE_CURL) - include_directories(${CURL_INCLUDE_DIRS}) - link_libraries(${CURL_LIBRARIES}) -endif () - - set(TARGET common) add_library(${TARGET} STATIC @@ -80,3 +71,11 @@ endif() target_include_directories(${TARGET} PUBLIC .) target_compile_features(${TARGET} PUBLIC cxx_std_11) target_link_libraries(${TARGET} PRIVATE build_info PUBLIC llama) + +# Use curl to download model url +if (LLAMA_CURL) + find_package(CURL) + add_definitions(-DLLAMA_USE_CURL) + target_include_directories(${TARGET} ${CURL_INCLUDE_DIRS}) + target_link_libraries(${TARGET} PRIVATE curl) +endif () diff --git a/common/common.cpp b/common/common.cpp index 007424fd91a4b..77b8f1d7c594d 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1710,13 +1710,13 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha const char * etag_prefix = "etag: "; if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) { - strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove LRLF + strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove CRLF } const char * last_modified_prefix = "last-modified: "; if (strncmp(buffer, last_modified_prefix, strlen(last_modified_prefix)) == 0) { strncpy(headers->last_modified, buffer + strlen(last_modified_prefix), - n_items - strlen(last_modified_prefix) - 2); // Remove LRLF + n_items - strlen(last_modified_prefix) - 2); // Remove CRLF } return n_items; }; From a3ed3d48d30af5c096a0b301d0bef384a0b2c22d Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 01:17:58 +0100 Subject: [PATCH 42/52] common: fix windows build --- common/CMakeLists.txt | 10 +++++----- common/common.cpp | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 3beda6d25caec..cb4e538698337 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -68,14 +68,14 @@ if (BUILD_SHARED_LIBS) set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() -target_include_directories(${TARGET} PUBLIC .) -target_compile_features(${TARGET} PUBLIC cxx_std_11) -target_link_libraries(${TARGET} PRIVATE build_info PUBLIC llama) - # Use curl to download model url if (LLAMA_CURL) find_package(CURL) add_definitions(-DLLAMA_USE_CURL) - target_include_directories(${TARGET} ${CURL_INCLUDE_DIRS}) + include_directories(${CURL_INCLUDE_DIRS}) target_link_libraries(${TARGET} PRIVATE curl) endif () + +target_include_directories(${TARGET} PUBLIC .) +target_compile_features(${TARGET} PUBLIC cxx_std_11) +target_link_libraries(${TARGET} PRIVATE build_info PUBLIC llama) diff --git a/common/common.cpp b/common/common.cpp index 77b8f1d7c594d..fd4ee9f1efa2e 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -16,9 +16,6 @@ #include #include #include -#ifdef LLAMA_USE_CURL -#include -#endif #if defined(__APPLE__) && defined(__MACH__) #include @@ -40,6 +37,9 @@ #include #include #endif +#if defined(LLAMA_USE_CURL) +#include +#endif #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data @@ -53,7 +53,7 @@ #define GGML_USE_CUBLAS_SYCL_VULKAN #endif -#ifdef LLAMA_USE_CURL +#if defined(LLAMA_USE_CURL) #ifdef __linux__ #include #elif defined(_WIN32) From 5e66ec80b33451ee9949e308f5ecf8637613af90 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 02:07:06 +0100 Subject: [PATCH 43/52] common: fix windows tests --- .github/workflows/server.yml | 4 +++- common/CMakeLists.txt | 9 ++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index bb321aa1c1ece..92268fe9dc4b0 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -117,7 +117,7 @@ jobs: run: | mkdir build cd build - cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON + cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server - name: Python setup @@ -136,6 +136,7 @@ jobs: if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} run: | cd examples/server/tests + $env:PATH += ";$env:RUNNER_TEMP/libcurl/bin" behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp - name: Slow tests @@ -143,4 +144,5 @@ jobs: if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} run: | cd examples/server/tests + $env:PATH += ";$env:RUNNER_TEMP/libcurl/bin" behave.exe --stop --no-skipped --no-capture --tags slow diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index cb4e538698337..af2629a460b93 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -68,14 +68,17 @@ if (BUILD_SHARED_LIBS) set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() +set(LLAMA_COMMON_EXTRA_LIBS build_info) + # Use curl to download model url if (LLAMA_CURL) - find_package(CURL) + find_package(CURL REQUIRED) add_definitions(-DLLAMA_USE_CURL) include_directories(${CURL_INCLUDE_DIRS}) - target_link_libraries(${TARGET} PRIVATE curl) + find_library(CURL_LIBRARY curl REQUIRED) + set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY}) endif () target_include_directories(${TARGET} PUBLIC .) target_compile_features(${TARGET} PUBLIC cxx_std_11) -target_link_libraries(${TARGET} PRIVATE build_info PUBLIC llama) +target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama) From 9ca4acc5fb4b77ce3369c52b9e5fa5c7bb52da1b Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 02:30:20 +0100 Subject: [PATCH 44/52] common: fix windows tests --- .github/workflows/server.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 92268fe9dc4b0..e27daf0c3b1aa 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -135,8 +135,8 @@ jobs: id: server_integration_tests if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} run: | + cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl.dll cd examples/server/tests - $env:PATH += ";$env:RUNNER_TEMP/libcurl/bin" behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp - name: Slow tests @@ -144,5 +144,4 @@ jobs: if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} run: | cd examples/server/tests - $env:PATH += ";$env:RUNNER_TEMP/libcurl/bin" behave.exe --stop --no-skipped --no-capture --tags slow From c1b002e06772fe39136651b192cd5cea1c2cc553 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 09:35:19 +0100 Subject: [PATCH 45/52] common: llama_load_model_from_url windows set CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA --- .github/workflows/server.yml | 6 +++++- common/common.cpp | 5 +++++ examples/server/tests/features/embeddings.feature | 2 +- examples/server/tests/features/environment.py | 10 ++++++++++ examples/server/tests/features/steps/steps.py | 2 ++ 5 files changed, 23 insertions(+), 2 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index e27daf0c3b1aa..79807f8971d41 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -131,11 +131,15 @@ jobs: run: | pip install -r examples/server/tests/requirements.txt + - name: Copy Libcurl + id: prepare_libcurl + run: | + cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll + - name: Tests id: server_integration_tests if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} run: | - cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl.dll cd examples/server/tests behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp diff --git a/common/common.cpp b/common/common.cpp index fd4ee9f1efa2e..789466fdaf0f9 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1660,6 +1660,11 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha // Set the URL, allow to follow http redirection curl_easy_setopt(curl, CURLOPT_URL, model_url); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); +#if defined(_WIN32) + // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of + // operating system. Currently implemented under MS-Windows. + curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +#endif // Check if the file already exists locally struct stat model_file_info; diff --git a/examples/server/tests/features/embeddings.feature b/examples/server/tests/features/embeddings.feature index fb821f802596d..dcf1434f97124 100644 --- a/examples/server/tests/features/embeddings.feature +++ b/examples/server/tests/features/embeddings.feature @@ -5,7 +5,7 @@ Feature: llama.cpp server Background: Server startup Given a server listening on localhost:8080 And a model url https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16.gguf - And a model file /tmp/ggml-model-f16.gguf + And a model file ggml-model-f16.gguf And a model alias bert-bge-small And 42 as server seed And 2 slots diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py index 8ad987e1bb618..3b45de6bafc82 100644 --- a/examples/server/tests/features/environment.py +++ b/examples/server/tests/features/environment.py @@ -33,6 +33,16 @@ def after_scenario(context, scenario): print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n") if not pid_exists(context.server_process.pid): + print("Trying to find server logs:") + out, err = context.server_process.communicate() + if out: + print("Server stdout:\n") + print(out) + print("\n") + if err: + print("Server stderr:\n") + print(err) + print("\n") assert False, f"Server not running pid={context.server_process.pid} ..." server_graceful_shutdown(context) diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 19d064dfd0304..9b25b1aebe587 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -1094,6 +1094,8 @@ def start_server_background(context): pkwargs = { 'creationflags': flags, + 'stderr': subprocess.PIPE, + 'stdout': subprocess.PIPE } context.server_process = subprocess.Popen( [str(arg) for arg in [context.server_path, *server_args]], From cff7faaccbebdd64275fa801bd65f514b5d14699 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 16:28:01 +0100 Subject: [PATCH 46/52] ci: tests: print server logs in case of scenario failure --- examples/server/tests/features/environment.py | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py index 3b45de6bafc82..9ae3954e9aa62 100644 --- a/examples/server/tests/features/environment.py +++ b/examples/server/tests/features/environment.py @@ -33,16 +33,7 @@ def after_scenario(context, scenario): print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n") if not pid_exists(context.server_process.pid): - print("Trying to find server logs:") - out, err = context.server_process.communicate() - if out: - print("Server stdout:\n") - print(out) - print("\n") - if err: - print("Server stderr:\n") - print(err) - print("\n") + print_server_logs(context) assert False, f"Server not running pid={context.server_process.pid} ..." server_graceful_shutdown(context) @@ -58,6 +49,9 @@ def after_scenario(context, scenario): if attempts > 5: server_kill_hard(context) + if scenario.status == "failed" or context.debug: + print_server_logs(context) + def server_graceful_shutdown(context): print(f"shutting down server pid={context.server_process.pid} ...\n") @@ -108,3 +102,17 @@ def pid_exists(pid): return e.errno == errno.EPERM else: return True + + +def print_server_logs(context): + print("Trying to find server logs:") + out, err = context.server_process.communicate() + if out: + print("Server stdout:\n") + print(out.decode("utf-8")) + print("\n") + if err: + print("Server stderr:\n") + print(err.decode("utf-8")) + print("\n") + From 4fe431d429e5f887a62f07b0906e32c32717749a Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 16:31:34 +0100 Subject: [PATCH 47/52] common: llama_load_model_from_url: make it working on windows: disable global curl function, use a write callback. --- common/common.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 789466fdaf0f9..25d1ff4fa81bf 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1648,11 +1648,9 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha } // Initialize libcurl globally - curl_global_init(CURL_GLOBAL_DEFAULT); auto curl = curl_easy_init(); if (!curl) { - curl_global_cleanup(); fprintf(stderr, "%s: error initializing libcurl\n", __func__); return NULL; } @@ -1734,23 +1732,36 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha CURLcode res = curl_easy_perform(curl); if (res != CURLE_OK) { curl_easy_cleanup(curl); - curl_global_cleanup(); fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); return NULL; } + + long http_code = 0; + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code); + if (http_code != 200) { + // HEAD not supported, we don't know if the file has changed + // force trigger downloading + file_exists = false; + fprintf(stderr, "%s: HEAD invalid http status code received: %ld\n", __func__, http_code); + } } // If the ETag or the Last-Modified headers are different: trigger a new download - if (strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) { + if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) { // Set the output file auto * outfile = fopen(path_model, "wb"); if (!outfile) { curl_easy_cleanup(curl); - curl_global_cleanup(); fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model); return NULL; } + + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); + auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { + return fwrite(data, size, nmemb, (FILE *)fd);; + }; curl_easy_setopt(curl, CURLOPT_NOBODY, 0L); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, static_cast(write_callback)); curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile); // display download progress @@ -1763,7 +1774,6 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha if (res != CURLE_OK) { fclose(outfile); curl_easy_cleanup(curl); - curl_global_cleanup(); fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); return NULL; } @@ -1773,7 +1783,6 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha if (http_code < 200 || http_code >= 400) { fclose(outfile); curl_easy_cleanup(curl); - curl_global_cleanup(); fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code); return NULL; } @@ -1804,7 +1813,6 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha } curl_easy_cleanup(curl); - curl_global_cleanup(); return llama_load_model_from_file(path_model, params); } From 47a9e5d76c1cb30b71f6f79eb29bfe1cc58cda50 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 16:37:40 +0100 Subject: [PATCH 48/52] ci: tests: increase timeout for windows --- examples/server/tests/features/steps/steps.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 9b25b1aebe587..93845244ad1b2 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -153,7 +153,8 @@ def step_start_server(context): async def step_wait_for_the_server_to_be_started(context, expecting_status): match expecting_status: case 'healthy': - await wait_for_health_status(context, context.base_url, 200, 'ok') + await wait_for_health_status(context, context.base_url, 200, 'ok', + timeout=30) case 'ready' | 'idle': await wait_for_health_status(context, context.base_url, 200, 'ok', From 31272c635a46722f0ec46813f63b038686b9652f Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 16:46:53 +0100 Subject: [PATCH 49/52] common: fix typo --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index 25d1ff4fa81bf..de05018550f65 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1758,7 +1758,7 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { - return fwrite(data, size, nmemb, (FILE *)fd);; + return fwrite(data, size, nmemb, (FILE *)fd); }; curl_easy_setopt(curl, CURLOPT_NOBODY, 0L); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, static_cast(write_callback)); From f902ab6de2b99abb6569f06642963bacfd0fc81d Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 16:37:02 +0100 Subject: [PATCH 50/52] common: llama_load_model_from_url use a temporary file for downloading --- .github/workflows/server.yml | 2 +- common/common.cpp | 11 ++++++++++- examples/server/tests/features/environment.py | 4 ++-- examples/server/tests/features/server.feature | 3 ++- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 79807f8971d41..4ea09115a3c44 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -117,7 +117,7 @@ jobs: run: | mkdir build cd build - cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON + cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server - name: Python setup diff --git a/common/common.cpp b/common/common.cpp index de05018550f65..3ecd4e5cdbd11 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1748,8 +1748,11 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha // If the ETag or the Last-Modified headers are different: trigger a new download if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) { + char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; + snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model); + // Set the output file - auto * outfile = fopen(path_model, "wb"); + auto * outfile = fopen(path_model_temporary, "wb"); if (!outfile) { curl_easy_cleanup(curl); fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model); @@ -1810,6 +1813,12 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha headers.last_modified); } } + + if (rename(path_model_temporary, path_model) != 0) { + curl_easy_cleanup(curl); + fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model); + return NULL; + } } curl_easy_cleanup(curl); diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py index 9ae3954e9aa62..96751d71364d2 100644 --- a/examples/server/tests/features/environment.py +++ b/examples/server/tests/features/environment.py @@ -109,10 +109,10 @@ def print_server_logs(context): out, err = context.server_process.communicate() if out: print("Server stdout:\n") - print(out.decode("utf-8")) + print(out.decode('utf-8')) print("\n") if err: print("Server stderr:\n") - print(err.decode("utf-8")) + print(err.decode('utf-8')) print("\n") diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index 5014f326dc050..7448986e75a49 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -4,7 +4,8 @@ Feature: llama.cpp server Background: Server startup Given a server listening on localhost:8080 - And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models + And a model url https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf + And a model file stories260K.gguf And a model alias tinyllama-2 And 42 as server seed # KV Cache corresponds to the total amount of tokens From b24f30fdad741cf0178d29f008519b5349f52e9a Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 16:52:38 +0100 Subject: [PATCH 51/52] common: llama_load_model_from_url delete previous file before downloading --- common/common.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/common/common.cpp b/common/common.cpp index 3ecd4e5cdbd11..2f5d965d6511c 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1750,6 +1750,14 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) { char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model); + if (file_exists) { + fprintf(stderr, "%s: deleting previous downloaded model file: %s\n", __func__, path_model); + if (remove(path_model) != 0) { + curl_easy_cleanup(curl); + fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path_model); + return NULL; + } + } // Set the output file auto * outfile = fopen(path_model_temporary, "wb"); From fcf327f0e64002dfd9e5146f6eb74a3069fda38f Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 17 Mar 2024 17:45:09 +0100 Subject: [PATCH 52/52] ci: tests: fix behavior on windows --- examples/server/tests/features/environment.py | 113 ++++++++---------- examples/server/tests/features/steps/steps.py | 19 ++- examples/server/tests/requirements.txt | 1 + 3 files changed, 66 insertions(+), 67 deletions(-) diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py index 96751d71364d2..82104e9202e5e 100644 --- a/examples/server/tests/features/environment.py +++ b/examples/server/tests/features/environment.py @@ -1,10 +1,12 @@ -import errno import os +import signal import socket -import subprocess +import sys import time +import traceback from contextlib import closing -import signal + +import psutil def before_scenario(context, scenario): @@ -20,37 +22,40 @@ def before_scenario(context, scenario): def after_scenario(context, scenario): - if context.server_process is None: - return - if scenario.status == "failed": - if 'GITHUB_ACTIONS' in os.environ: - print(f"\x1b[33;101mSCENARIO FAILED: {scenario.name} server logs:\x1b[0m\n\n") - if os.path.isfile('llama.log'): - with closing(open('llama.log', 'r')) as f: - for line in f: - print(line) - if not is_server_listening(context.server_fqdn, context.server_port): - print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n") - - if not pid_exists(context.server_process.pid): - print_server_logs(context) - assert False, f"Server not running pid={context.server_process.pid} ..." - - server_graceful_shutdown(context) - - # Wait few for socket to free up - time.sleep(0.05) - - attempts = 0 - while pid_exists(context.server_process.pid) or is_server_listening(context.server_fqdn, context.server_port): - server_kill(context) - time.sleep(0.1) - attempts += 1 - if attempts > 5: - server_kill_hard(context) - - if scenario.status == "failed" or context.debug: - print_server_logs(context) + try: + if 'server_process' not in context or context.server_process is None: + return + if scenario.status == "failed": + if 'GITHUB_ACTIONS' in os.environ: + print(f"\x1b[33;101mSCENARIO FAILED: {scenario.name} server logs:\x1b[0m\n\n") + if os.path.isfile('llama.log'): + with closing(open('llama.log', 'r')) as f: + for line in f: + print(line) + if not is_server_listening(context.server_fqdn, context.server_port): + print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n") + + if not pid_exists(context.server_process.pid): + assert False, f"Server not running pid={context.server_process.pid} ..." + + server_graceful_shutdown(context) + + # Wait few for socket to free up + time.sleep(0.05) + + attempts = 0 + while pid_exists(context.server_process.pid) or is_server_listening(context.server_fqdn, context.server_port): + server_kill(context) + time.sleep(0.1) + attempts += 1 + if attempts > 5: + server_kill_hard(context) + except: + exc = sys.exception() + print("error in after scenario: \n") + print(exc) + print("*** print_tb: \n") + traceback.print_tb(exc.__traceback__, file=sys.stdout) def server_graceful_shutdown(context): @@ -71,11 +76,11 @@ def server_kill_hard(context): path = context.server_path print(f"Server dangling exits, hard killing force {pid}={path}...\n") - if os.name == 'nt': - process = subprocess.check_output(['taskkill', '/F', '/pid', str(pid)]).decode() - print(process) - else: - os.kill(-pid, signal.SIGKILL) + try: + psutil.Process(pid).kill() + except psutil.NoSuchProcess: + return False + return True def is_server_listening(server_fqdn, server_port): @@ -88,31 +93,9 @@ def is_server_listening(server_fqdn, server_port): def pid_exists(pid): - """Check whether pid exists in the current process table.""" - if pid < 0: + try: + psutil.Process(pid) + except psutil.NoSuchProcess: return False - if os.name == 'nt': - output = subprocess.check_output(['TASKLIST', '/FI', f'pid eq {pid}']).decode() - print(output) - return "No tasks are running" not in output - else: - try: - os.kill(pid, 0) - except OSError as e: - return e.errno == errno.EPERM - else: - return True - - -def print_server_logs(context): - print("Trying to find server logs:") - out, err = context.server_process.communicate() - if out: - print("Server stdout:\n") - print(out.decode('utf-8')) - print("\n") - if err: - print("Server stderr:\n") - print(err.decode('utf-8')) - print("\n") + return True diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 93845244ad1b2..9e348d5fc4c37 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -5,6 +5,8 @@ import re import socket import subprocess +import sys +import threading import time from contextlib import closing from re import RegexFlag @@ -1095,10 +1097,23 @@ def start_server_background(context): pkwargs = { 'creationflags': flags, - 'stderr': subprocess.PIPE, - 'stdout': subprocess.PIPE + 'stdout': subprocess.PIPE, + 'stderr': subprocess.PIPE } context.server_process = subprocess.Popen( [str(arg) for arg in [context.server_path, *server_args]], **pkwargs) + + def log_stdout(process): + for line in iter(process.stdout.readline, b''): + print(line.decode('utf-8'), end='') + thread_stdout = threading.Thread(target=log_stdout, args=(context.server_process,)) + thread_stdout.start() + + def log_stderr(process): + for line in iter(process.stderr.readline, b''): + print(line.decode('utf-8'), end='', file=sys.stderr) + thread_stderr = threading.Thread(target=log_stderr, args=(context.server_process,)) + thread_stderr.start() + print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") diff --git a/examples/server/tests/requirements.txt b/examples/server/tests/requirements.txt index 2e4f42ad28c23..c2c960102b523 100644 --- a/examples/server/tests/requirements.txt +++ b/examples/server/tests/requirements.txt @@ -3,4 +3,5 @@ behave~=1.2.6 huggingface_hub~=0.20.3 numpy~=1.24.4 openai~=0.25.0 +psutil~=5.9.8 prometheus-client~=0.20.0