diff --git a/src/distilabel/models/embeddings/llamacpp.py b/src/distilabel/models/embeddings/llamacpp.py index 6596bb45ea..47f1f9720e 100644 --- a/src/distilabel/models/embeddings/llamacpp.py +++ b/src/distilabel/models/embeddings/llamacpp.py @@ -181,7 +181,7 @@ def load(self) -> None: except ImportError as ie: raise ImportError( "`llama-cpp-python` package is not installed. Please install it using" - " `pip install llama-cpp-python`." + " `pip install 'distilabel[llama-cpp]'`." ) from ie if self.repo_id is not None: diff --git a/src/distilabel/models/embeddings/sentence_transformers.py b/src/distilabel/models/embeddings/sentence_transformers.py index 8c6e015027..a96b40a7ed 100644 --- a/src/distilabel/models/embeddings/sentence_transformers.py +++ b/src/distilabel/models/embeddings/sentence_transformers.py @@ -110,7 +110,7 @@ def load(self) -> None: except ImportError as e: raise ImportError( "`sentence-transformers` package is not installed. Please install it using" - " `pip install sentence-transformers`." + " `pip install 'distilabel[sentence-transformers]'`." ) from e self._model = SentenceTransformer( diff --git a/src/distilabel/models/embeddings/vllm.py b/src/distilabel/models/embeddings/vllm.py index 8ddaccd7bb..28ba10a12b 100644 --- a/src/distilabel/models/embeddings/vllm.py +++ b/src/distilabel/models/embeddings/vllm.py @@ -93,7 +93,7 @@ def load(self) -> None: except ImportError as ie: raise ImportError( - "vLLM is not installed. Please install it using `pip install vllm`." + "vLLM is not installed. Please install it using `pip install 'distilabel[vllm]'`." ) from ie self._model = _vLLM( diff --git a/src/distilabel/models/llms/anthropic.py b/src/distilabel/models/llms/anthropic.py index 0eefc092dc..ab364bad58 100644 --- a/src/distilabel/models/llms/anthropic.py +++ b/src/distilabel/models/llms/anthropic.py @@ -176,7 +176,7 @@ def load(self) -> None: except ImportError as ie: raise ImportError( "Anthropic Python client is not installed. Please install it using" - " `pip install anthropic`." + " `pip install 'distilabel[anthropic]'`." ) from ie if self.api_key is None: diff --git a/src/distilabel/models/llms/azure.py b/src/distilabel/models/llms/azure.py index 964612f372..b9132991a2 100644 --- a/src/distilabel/models/llms/azure.py +++ b/src/distilabel/models/llms/azure.py @@ -131,7 +131,7 @@ def load(self) -> None: except ImportError as ie: raise ImportError( "OpenAI Python client is not installed. Please install it using" - " `pip install openai`." + " `pip install 'distilabel[openai]'`." ) from ie if self.api_key is None: diff --git a/src/distilabel/models/llms/groq.py b/src/distilabel/models/llms/groq.py index 8000211936..fec511bbee 100644 --- a/src/distilabel/models/llms/groq.py +++ b/src/distilabel/models/llms/groq.py @@ -144,7 +144,7 @@ def load(self) -> None: except ImportError as ie: raise ImportError( "Groq Python client is not installed. Please install it using" - ' `pip install groq` or from the extras as `pip install "distilabel[groq]"`.' + ' `pip install "distilabel[groq]"`.' ) from ie if self.api_key is None: diff --git a/src/distilabel/models/llms/huggingface/inference_endpoints.py b/src/distilabel/models/llms/huggingface/inference_endpoints.py index d4e53f1ed2..6f97c5814a 100644 --- a/src/distilabel/models/llms/huggingface/inference_endpoints.py +++ b/src/distilabel/models/llms/huggingface/inference_endpoints.py @@ -262,7 +262,7 @@ def load(self) -> None: # noqa: C901 except ImportError as ie: raise ImportError( "Hugging Face Hub Python client is not installed. Please install it using" - " `pip install huggingface-hub`." + " `pip install 'distilabel[hf-inference-endpoints]'`." ) from ie if self.api_key is None: @@ -311,7 +311,7 @@ def load(self) -> None: # noqa: C901 except ImportError as ie: raise ImportError( "Transformers Python client is not installed. Please install it using" - " `pip install transformers`." + " `pip install 'distilabel[hf-inference-endpoints]'`." ) from ie self._tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_id) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index a4f9de95ab..69f3d02a2e 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -122,7 +122,7 @@ def load(self) -> None: from transformers import pipeline except ImportError as ie: raise ImportError( - "Transformers is not installed. Please install it using `pip install transformers`." + "Transformers is not installed. Please install it using `pip install 'distilabel[hf-transformers]'`." ) from ie token = self.token.get_secret_value() if self.token is not None else self.token diff --git a/src/distilabel/models/llms/litellm.py b/src/distilabel/models/llms/litellm.py index d2471f2991..9b52ad8c71 100644 --- a/src/distilabel/models/llms/litellm.py +++ b/src/distilabel/models/llms/litellm.py @@ -104,7 +104,7 @@ def load(self) -> None: except ImportError as e: raise ImportError( "LiteLLM Python client is not installed. Please install it using" - " `pip install litellm`." + " `pip install 'distilabel[litellm]'`." ) from e self._aclient = litellm.acompletion diff --git a/src/distilabel/models/llms/mistral.py b/src/distilabel/models/llms/mistral.py index 9fe9f357da..4147edaf03 100644 --- a/src/distilabel/models/llms/mistral.py +++ b/src/distilabel/models/llms/mistral.py @@ -140,7 +140,7 @@ def load(self) -> None: except ImportError as ie: raise ImportError( "MistralAI Python client is not installed. Please install it using" - " `pip install mistralai`." + " `pip install 'distilabel[mistralai]'`." ) from ie if self.api_key is None: diff --git a/src/distilabel/models/llms/ollama.py b/src/distilabel/models/llms/ollama.py index ff0779d881..a930399114 100644 --- a/src/distilabel/models/llms/ollama.py +++ b/src/distilabel/models/llms/ollama.py @@ -163,7 +163,7 @@ def load(self) -> None: except ImportError as e: raise ImportError( "Ollama Python client is not installed. Please install it using" - " `pip install ollama`." + " `pip install 'distilabel[ollama]'`." ) from e if self.tokenizer_id: diff --git a/src/distilabel/models/llms/openai.py b/src/distilabel/models/llms/openai.py index a6dd9dbb5b..91f24a3336 100644 --- a/src/distilabel/models/llms/openai.py +++ b/src/distilabel/models/llms/openai.py @@ -54,6 +54,7 @@ class OpenAILLM(AsyncLLM): api_key: the API key to authenticate the requests to the OpenAI API. Defaults to `None` which means that the value set for the environment variable `OPENAI_API_KEY` will be used, or `None` if not set. + default_headers: the default headers to use for the OpenAI API requests. max_retries: the maximum number of times to retry the request to the API before failing. Defaults to `6`. timeout: the maximum time in seconds to wait for a response from the API. Defaults @@ -186,7 +187,7 @@ def load(self) -> None: except ImportError as ie: raise ImportError( "OpenAI Python client is not installed. Please install it using" - " `pip install openai`." + " `pip install 'distilabel[openai]'`." ) from ie if self.api_key is None: diff --git a/src/distilabel/models/llms/vertexai.py b/src/distilabel/models/llms/vertexai.py index 62235dd321..7c1b3e6bb4 100644 --- a/src/distilabel/models/llms/vertexai.py +++ b/src/distilabel/models/llms/vertexai.py @@ -89,7 +89,7 @@ def load(self) -> None: except ImportError as e: raise ImportError( "vertexai is not installed. Please install it using" - " `pip install google-cloud-aiplatform`." + " `pip install 'distilabel[vertexai]'`." ) from e if _is_gemini_model(self.model): diff --git a/src/distilabel/models/llms/vllm.py b/src/distilabel/models/llms/vllm.py index 401bc66d09..ceab8e3e30 100644 --- a/src/distilabel/models/llms/vllm.py +++ b/src/distilabel/models/llms/vllm.py @@ -189,7 +189,7 @@ def load(self) -> None: from vllm import LLM as _vLLM except ImportError as ie: raise ImportError( - "vLLM is not installed. Please install it using `pip install vllm`." + "vLLM is not installed. Please install it using `pip install 'distilabel[vllm]'`." ) from ie self._model = _vLLM( @@ -585,7 +585,7 @@ def load(self) -> None: except ImportError as ie: raise ImportError( "OpenAI Python client is not installed. Please install it using" - " `pip install openai`." + " `pip install 'distilabel[openai]'`." ) from ie self._client = OpenAI( @@ -602,7 +602,7 @@ def load(self) -> None: except ImportError as ie: raise ImportError( "To use `ClientvLLM` you need to install `transformers`." - "Please install it using `pip install transformers`." + "Please install it using `pip install 'distilabel[hf-transformers]'`." ) from ie self._tokenizer = AutoTokenizer.from_pretrained( diff --git a/src/distilabel/pipeline/ray.py b/src/distilabel/pipeline/ray.py index 30d2e5a47e..c2e85afd86 100644 --- a/src/distilabel/pipeline/ray.py +++ b/src/distilabel/pipeline/ray.py @@ -204,7 +204,7 @@ def _init_ray(self) -> None: import ray except ImportError as ie: raise ImportError( - "ray is not installed. Please install it using `pip install ray[default]`." + "ray is not installed. Please install it using `pip install 'distilabel[ray]'`." ) from ie if self._ray_head_node_url: diff --git a/src/distilabel/steps/argilla/base.py b/src/distilabel/steps/argilla/base.py index ea491e07a5..06db05e05b 100644 --- a/src/distilabel/steps/argilla/base.py +++ b/src/distilabel/steps/argilla/base.py @@ -94,8 +94,7 @@ def model_post_init(self, __context: Any) -> None: if importlib.util.find_spec("argilla") is None: raise ImportError( - "Argilla is not installed. Please install it using `pip install argilla" - " --upgrade`." + "Argilla is not installed. Please install it using `pip install 'distilabel[argilla]'`." ) def _client_init(self) -> None: diff --git a/src/distilabel/steps/clustering/dbscan.py b/src/distilabel/steps/clustering/dbscan.py index 03ac5dcb3e..2124d787c1 100644 --- a/src/distilabel/steps/clustering/dbscan.py +++ b/src/distilabel/steps/clustering/dbscan.py @@ -124,7 +124,7 @@ def load(self) -> None: super().load() if importlib.util.find_spec("sklearn") is None: raise ImportError( - "`sklearn` package is not installed. Please install it using `pip install scikit-learn`." + "`sklearn` package is not installed. Please install it using `pip install 'distilabel[text-clustering]'`." ) from sklearn.cluster import DBSCAN as _DBSCAN diff --git a/src/distilabel/steps/clustering/umap.py b/src/distilabel/steps/clustering/umap.py index daeb37486d..9bf71c68e3 100644 --- a/src/distilabel/steps/clustering/umap.py +++ b/src/distilabel/steps/clustering/umap.py @@ -112,7 +112,7 @@ def load(self) -> None: super().load() if importlib.util.find_spec("umap") is None: raise ImportError( - "`umap` package is not installed. Please install it using `pip install umap-learn`." + "`umap` package is not installed. Please install it using `pip install 'distilabel[text-clustering]'`." ) from umap import UMAP as _UMAP diff --git a/src/distilabel/steps/embeddings/nearest_neighbour.py b/src/distilabel/steps/embeddings/nearest_neighbour.py index df5f48f8fa..a962ca3b14 100644 --- a/src/distilabel/steps/embeddings/nearest_neighbour.py +++ b/src/distilabel/steps/embeddings/nearest_neighbour.py @@ -163,7 +163,7 @@ def load(self) -> None: if importlib.util.find_spec("faiss") is None: raise ImportError( "`faiss` package is not installed. Please install it using `pip install" - " faiss-cpu` or `pip install faiss-gpu`." + " 'distilabel[faiss-cpu]' or 'distilabel[faiss-gpu]'`." ) @property diff --git a/src/distilabel/steps/filtering/_datasketch.py b/src/distilabel/steps/filtering/_datasketch.py index 5e21940499..d3d0db74ef 100644 --- a/src/distilabel/steps/filtering/_datasketch.py +++ b/src/distilabel/steps/filtering/_datasketch.py @@ -43,7 +43,7 @@ def __init__(self, config, name) -> None: except ImportError as e: raise ImportError( "`diskcache` is required for disk storage using `MinHashDedup`. " - "Please install it using `pip install diskcache`." + "Please install it using `pip install 'distilabel[minhash]'`." ) from e # Start with a clean file on each pipeline diff --git a/src/distilabel/steps/filtering/minhash.py b/src/distilabel/steps/filtering/minhash.py index e6bb8038a3..7e86d30543 100644 --- a/src/distilabel/steps/filtering/minhash.py +++ b/src/distilabel/steps/filtering/minhash.py @@ -176,7 +176,7 @@ def load(self) -> None: if not importlib.import_module("datasketch"): raise ImportError( "`datasketch` is needed to deduplicate with MinHash, but is not installed. " - "Please install it using `pip install datasketch`." + "Please install it using `pip install 'distilabel[minhash]'`." ) from datasketch import MinHash @@ -193,7 +193,7 @@ def load(self) -> None: if not importlib.import_module("nltk"): raise ImportError( "`nltk` is needed to tokenize based on words, but is not installed. " - "Please install it using `pip install nltk`. Then run `nltk.download('punkt_tab')`." + "Please install it using `pip install 'distilabel[minhash]'`. Then run `nltk.download('punkt_tab')`." ) self._tokenizer = tokenized_on_words else: diff --git a/src/distilabel/steps/reward_model.py b/src/distilabel/steps/reward_model.py index fcb5b27371..0af5d5cfdd 100644 --- a/src/distilabel/steps/reward_model.py +++ b/src/distilabel/steps/reward_model.py @@ -156,7 +156,7 @@ def load(self) -> None: from transformers import AutoModelForSequenceClassification, AutoTokenizer except ImportError as e: raise ImportError( - "`transformers` is not installed. Please install it using `pip install transformers`." + "`transformers` is not installed. Please install it using `pip install 'distilabel[hf-transformers]'`." ) from e token = self.token.get_secret_value() if self.token is not None else self.token diff --git a/src/distilabel/steps/tasks/structured_outputs/instructor.py b/src/distilabel/steps/tasks/structured_outputs/instructor.py index 93b90d9916..184c9be7b6 100644 --- a/src/distilabel/steps/tasks/structured_outputs/instructor.py +++ b/src/distilabel/steps/tasks/structured_outputs/instructor.py @@ -109,7 +109,7 @@ def prepare_instructor( """ if not importlib.util.find_spec("instructor"): raise ImportError( - "`instructor` is not installed. Please install it using `pip install instructor`." + "`instructor` is not installed. Please install it using `pip install 'distilabel[instructor]'`." ) import instructor diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index fe561d11af..b8ac03641a 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -99,7 +99,7 @@ def prepare_guided_output( """ if not importlib.util.find_spec("outlines"): raise ImportError( - "Outlines is not installed. Please install it using `pip install outlines`." + "Outlines is not installed. Please install it using `pip install 'distilabel[outlines]'`." ) json_processor, regex_processor = _get_logits_processor(framework) diff --git a/src/distilabel/steps/truncate.py b/src/distilabel/steps/truncate.py index 6e68af6630..a2240d716b 100644 --- a/src/distilabel/steps/truncate.py +++ b/src/distilabel/steps/truncate.py @@ -108,7 +108,7 @@ def load(self): if not importlib.util.find_spec("transformers"): raise ImportError( "`transformers` is needed to tokenize, but is not installed. " - "Please install it using `pip install transformers`." + "Please install it using `pip install 'distilabel[hf-transformers]'`." ) from transformers import AutoTokenizer diff --git a/tests/unit/models/llms/test_anyscale.py b/tests/unit/models/llms/test_anyscale.py index d12dbebd02..6a31d60809 100644 --- a/tests/unit/models/llms/test_anyscale.py +++ b/tests/unit/models/llms/test_anyscale.py @@ -46,6 +46,7 @@ def test_serialization(self) -> None: "model": self.model_id, "generation_kwargs": {}, "max_retries": 6, + "default_headers": None, "base_url": "https://api.endpoints.anyscale.com/v1", "timeout": 120, "structured_output": None, diff --git a/tests/unit/models/llms/test_azure.py b/tests/unit/models/llms/test_azure.py index a2122b611f..1e874c5f9b 100644 --- a/tests/unit/models/llms/test_azure.py +++ b/tests/unit/models/llms/test_azure.py @@ -71,6 +71,7 @@ def test_azure_openai_llm_env_vars(self) -> None: "api_version": "preview", "generation_kwargs": {}, "max_retries": 6, + "default_headers": None, "base_url": "https://example-resource.azure.openai.com/", "timeout": 120, "structured_output": None, @@ -95,6 +96,7 @@ def test_azure_openai_llm_env_vars(self) -> None: "generation_kwargs": {}, "max_retries": 6, "base_url": "https://example-resource.azure.openai.com/", + "default_headers": None, "timeout": 120, "structured_output": { "schema": DummyUserDetail.model_json_schema(), diff --git a/tests/unit/models/llms/test_together.py b/tests/unit/models/llms/test_together.py index 88208bf6c6..b7a045fbbb 100644 --- a/tests/unit/models/llms/test_together.py +++ b/tests/unit/models/llms/test_together.py @@ -46,6 +46,7 @@ def test_serialization(self) -> None: "model": self.model_id, "generation_kwargs": {}, "max_retries": 6, + "default_headers": None, "base_url": "https://api.together.xyz/v1", "timeout": 120, "structured_output": None,