argilla-io · davidberenstein1957 · Jan 9, 2025 · Sep 24, 2024 · Sep 24, 2024 · Sep 25, 2024
diff --git a/.gitignore b/.gitignore
@@ -77,4 +77,6 @@ venv.bak/
 # Other
 *.log
 *.swp
-.DS_Store
+.DS_Store
+#models
+tests/model
diff --git a/src/distilabel/embeddings/__init__.py b/src/distilabel/embeddings/__init__.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 from distilabel.embeddings.base import Embeddings
+from distilabel.embeddings.llamacpp import LlamaCppEmbeddings
 from distilabel.embeddings.sentence_transformers import SentenceTransformerEmbeddings
 from distilabel.embeddings.vllm import vLLMEmbeddings
 
 __all__ = [
     "Embeddings",
     "SentenceTransformerEmbeddings",
     "vLLMEmbeddings",
+    "LlamaCppEmbeddings",
 ]
diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py
@@ -0,0 +1,153 @@
+# Copyright 2023-present, Argilla, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING, Any, Dict, List, Union
+
+from pydantic import Field, PrivateAttr
+
+from distilabel.embeddings.base import Embeddings
+from distilabel.llms.mixins.cuda_device_placement import CudaDevicePlacementMixin
+from distilabel.mixins.hub_downloader import HuggingFaceModelLoaderMixin
+from distilabel.mixins.runtime_parameters import RuntimeParameter
+
+if TYPE_CHECKING:
+    from llama_cpp import Llama as _LlamaCpp
+
+
+class LlamaCppEmbeddings(
+    Embeddings, CudaDevicePlacementMixin, HuggingFaceModelLoaderMixin
+):
+    """`LlamaCpp` library implementation for embedding generation.
+
+    Attributes:
+        model_path: contains the path to the GGUF quantized model, compatible with the
+            installed version of the `llama.cpp` Python bindings.
+        repo_id: the Hugging Face Hub repository id.
+        hf_token: Hugging Face token for accessing gated models.
+        verbose: whether to print verbose output. Defaults to `False`.
+        n_gpu_layers: number of layers to run on the GPU. Defaults to `-1` (use the GPU if available).
+        disable_cuda_device_placement: whether to disable CUDA device placement. Defaults to `True`.
+        normalize_embeddings: whether to normalize the embeddings. Defaults to `False`.
+        seed: RNG seed, -1 for random
+        n_ctx: Text context, 0 = from model
+        n_batch: Prompt processing maximum batch size
+        extra_kwargs: additional dictionary of keyword arguments that will be passed to the
+            `Llama` class of `llama_cpp` library. Defaults to `{}`.
+        _model: the `Llama` model instance. This attribute is meant to be used internally
+            and should not be accessed directly. It will be set in the `load` method.
+
+    References:
+        - [Offline inference embeddings](https://llama-cpp-python.readthedocs.io/en/stable/#embeddings)
+
+    Examples:
+        Generating sentence embeddings:
+
+        ```python
+        from distilabel.embeddings import LlamaCppEmbeddings
+
+        embeddings = LlamaCppEmbeddings(model="/path/to/model.gguf")
+
+        ## Hugging Face Hub
+
+        ## embeddings = LlamaCppEmbeddings(repo_id="second-state/All-MiniLM-L6-v2-Embedding-GGUF", model="all-MiniLM-L6-v2-Q2_K.gguf")
+
+        embeddings.load()
+
+        results = embeddings.encode(inputs=["distilabel is awesome!", "and Argilla!"])
+        # [
+        #   [-0.05447685346007347, -0.01623094454407692, ...],
+        #   [4.4889533455716446e-05, 0.044016145169734955, ...],
+        # ]
+        ```
+    """
+
+    model_file: str
+    n_gpu_layers: RuntimeParameter[int] = Field(default=0, description="Numbe of gpu")
+    disable_cuda_device_placement: RuntimeParameter[bool] = Field(
+        default=True,
+        description="Whether to disable CUDA device placement.",
+    )
+    verbose: RuntimeParameter[bool] = Field(
+        default=False,
+        description="Whether to print verbose output from llama.cpp library.",
+    )
+    normalize_embeddings: RuntimeParameter[bool] = Field(
+        default=False,
+        description="Whether to normalize the embeddings.",
+    )
+    seed: int = 4294967295
+    n_ctx: int = 512
+    n_batch: int = 512
+    extra_kwargs: RuntimeParameter[Dict[str, Any]] = Field(
+        default={},
+        description="Additional dictionary of keyword arguments that will be passed to the `Llama` class of `llama_cpp` library.",
+    )
+    _model: Union["_LlamaCpp", None] = PrivateAttr(None)
+
+    def load(self) -> None:
+        """
+        Loads the `gguf` model using either the path or the Hugging Face Hub repository id.
+        If using Hugging Face Hub, the model will be downloaded to a local directory
+        specified by the DISTILABEL_MODEL_DIR environment variable or to a temporary directory.
+        """
+        super().load()
+
+        CudaDevicePlacementMixin.load(self)
+
+        try:
+            from llama_cpp import Llama as _LlamaCpp
+        except ImportError as ie:
+            raise ImportError(
+                "`llama-cpp-python` package is not installed. Please install it using"
+                " `pip install llama-cpp-python`."
+            ) from ie
+
+        model_path = self.download_model()
+        try:
+            self._logger.info(f"Attempting to load model from: {self.model_file}")
+            self._model = _LlamaCpp(
+                model_path=model_path,
+                seed=self.seed,
+                n_gpu_layers=self.n_gpu_layers,
+                n_ctx=self.n_ctx,
+                n_batch=self.n_batch,
+                verbose=self.verbose,
+                embedding=True,
+                **self.extra_kwargs,
+            )
+            self._logger.info("Model loaded successfully")
+        except Exception as e:
+            self._logger.error(f"Failed to load model: {str(e)}")
+            raise
+
+    def unload(self) -> None:
+        """Unloads the `gguf` model."""
+        CudaDevicePlacementMixin.unload(self)
+        super().unload()
+
+    @property
+    def model_name(self) -> str:
+        """Returns the name of the model."""
+        return self.model_file
+
+    def encode(self, inputs: List[str]) -> List[List[Union[int, float]]]:
+        """Generates embeddings for the provided inputs.
+
+        Args:
+            inputs: a list of texts for which an embedding has to be generated.
+
+        Returns:
+            The generated embeddings.
+        """
+        return self._model.embed(inputs, normalize=self.normalize_embeddings)
diff --git a/src/distilabel/mixins/hub_downloader.py b/src/distilabel/mixins/hub_downloader.py
@@ -0,0 +1,89 @@
+# Copyright 2023-present, Argilla, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class HuggingFaceModelLoaderMixin(BaseModel):
+    """
+    A mixin for downloading models from the Hugging Face Hub.
+
+    Attributes:
+        repo_id (Optional[str]): The Hugging Face Hub repository id.
+        model_file (str): The name of the model file to download.
+        hf_token (Optional[str]): Hugging Face token for accessing gated models.
+    """
+
+    repo_id: Optional[str] = Field(
+        default=None,
+        description="The Hugging Face Hub repository id.",
+    )
+    model_file: str = Field(
+        description="The name of the model file to download.",
+    )
+    hf_token: Optional[str] = Field(
+        default=None,
+        description="Hugging Face token for accessing gated models.",
+    )
+
+    def download_model(self) -> str:
+        """
+        Downloads the model from Hugging Face Hub if repo_id is provided.
+
+        Returns:
+            str: The path to the downloaded or local model file.
+
+        Raises:
+            ImportError: If huggingface_hub is not installed.
+            ValueError: If repo_id is not provided or invalid.
+            Exception: If there's an error downloading or loading the model.
+        """
+        if self.repo_id is None:
+            return self.model_file
+
+        try:
+            from huggingface_hub import hf_hub_download
+            from huggingface_hub.utils import validate_repo_id
+        except ImportError as ie:
+            raise ImportError(
+                "huggingface_hub package is not installed. "
+                "You can install it with `pip install huggingface_hub`."
+            ) from ie
+
+        try:
+            validate_repo_id(self.repo_id)
+        except ValueError as ve:
+            raise ValueError(f"Invalid repo_id: {self.repo_id}") from ve
+
+        # Determine the download directory
+        download_dir = os.environ.get("DISTILABEL_MODEL_DIR")
+        if download_dir is None:
+            download_dir = tempfile.gettempdir()
+
+        try:
+            model_path = hf_hub_download(
+                repo_id=self.repo_id,
+                filename=self.model_file,
+                token=self.hf_token,
+                local_dir=download_dir,
+            )
+            return model_path
+        except Exception as e:
+            raise Exception(
+                f"Failed to download model from Hugging Face Hub: {str(e)}"
+            ) from e
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
@@ -12,7 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import atexit
+import os
 from typing import TYPE_CHECKING, Any, Dict, List, Union
+from urllib.request import urlretrieve
 
 import pytest
 
@@ -102,3 +105,65 @@ class DummyTaskOfflineBatchGeneration(DummyTask):
 @pytest.fixture
 def dummy_llm() -> AsyncLLM:
     return DummyAsyncLLM()
+
+
+@pytest.fixture(scope="session")
+def local_llamacpp_model_path(tmp_path_factory):
+    """
+    Session-scoped fixture that provides the local model path for LlamaCpp testing.
+
+    The model path can be set using the LLAMACPP_TEST_MODEL_PATH environment variable.
+    If not set, it downloads a small test model to a temporary directory.
+    The model is downloaded once per test session and cleaned up after all tests.
+
+    To use a custom model:
+    1. Set the LLAMACPP_TEST_MODEL_PATH environment variable to the path of your model file.
+    2. Ensure the model file exists at the specified path.
+
+    Example:
+        export LLAMACPP_TEST_MODEL_PATH="/path/to/your/model.gguf"
+
+    Args:
+        tmp_path_factory: Pytest fixture providing a temporary directory factory.
+
+    Returns:
+        str: The path to the local LlamaCpp model file.
+    """
+    print("\nLlamaCpp model path information:")
+
+    # Check for environment variable first
+    env_path = os.environ.get("LLAMACPP_TEST_MODEL_PATH")
+    if env_path:
+        print(f"Using custom model path from LLAMACPP_TEST_MODEL_PATH: {env_path}")
+        if not os.path.exists(env_path):
+            raise FileNotFoundError(
+                f"Custom model file not found at {env_path}. Please ensure the file exists."
+            )
+        return env_path
+
+    print("LLAMACPP_TEST_MODEL_PATH not set. Using default test model.")
+    print(
+        "To use a custom model, set the LLAMACPP_TEST_MODEL_PATH environment variable to the path of your model file."
+    )
+
+    # If env var not set, use a small test model
+    model_name = "all-MiniLM-L6-v2-Q2_K.gguf"
+    model_url = f"https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/{model_name}"
+    tmp_path = tmp_path_factory.getbasetemp()
+    model_path = tmp_path / model_name
+
+    if not model_path.exists():
+        print(f"Downloading test model to {model_path}...")
+        urlretrieve(model_url, model_path)
+        print("Download complete.")
+
+    def cleanup():
+        if model_path.exists():
+            print(f"Cleaning up downloaded model at {model_path}...")
+            os.remove(model_path)
+            print("Cleanup complete.")
+
+    # Register the cleanup function to be called at exit
+    atexit.register(cleanup)
+
+    return str(model_path)