From cc712a6184a059e6469910a6597fdb720d3f6bbc Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Fri, 7 Feb 2025 09:15:22 +0100 Subject: [PATCH] common : add default embeddings presets (#11677) * common : add default embeddings presets This commit adds default embeddings presets for the following models: - bge-small-en-v1.5 - e5-small-v2 - gte-small These can be used with llama-embedding and llama-server. For example, with llama-embedding: ```console ./build/bin/llama-embedding --embd-gte-small-default -p "Hello, how are you?" ``` And with llama-server: ```console ./build/bin/llama-server --embd-gte-small-default ``` And the embeddings endpoint can then be called with a POST request: ```console curl --request POST \ --url http://localhost:8080/embeddings \ --header "Content-Type: application/json" \ --data '{"input": "Hello, how are you?"}' ``` I'm not sure if these are the most common embedding models but hopefully this can be a good starting point for discussion and further improvements. Refs: https://github.com/ggerganov/llama.cpp/issues/10932 --- common/arg.cpp | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/common/arg.cpp b/common/arg.cpp index 76b8988819cda..152f671ab738e 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2324,5 +2324,47 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } ).set_examples({LLAMA_EXAMPLE_TTS})); + add_opt(common_arg( + {"--embd-bge-small-en-default"}, + string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"), + [](common_params & params) { + params.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF"; + params.hf_file = "bge-small-en-v1.5-q8_0.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--embd-e5-small-en-default"}, + string_format("use default e5-small-v2 model (note: can download weights from the internet)"), + [](common_params & params) { + params.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF"; + params.hf_file = "e5-small-v2-q8_0.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--embd-gte-small-default"}, + string_format("use default gte-small model (note: can download weights from the internet)"), + [](common_params & params) { + params.hf_repo = "ggml-org/gte-small-Q8_0-GGUF"; + params.hf_file = "gte-small-q8_0.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + return ctx_arg; }