From 329b645d3e02eebdac7f7de0d508c6f951130fa2 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 07:58:42 +0100 Subject: [PATCH 01/37] add outlines 0.1.0 support --- .../models/llms/huggingface/transformers.py | 24 +++++-- .../tasks/structured_outputs/outlines.py | 70 ++++++++++++++----- 2 files changed, 69 insertions(+), 25 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index a4f9de95ab..2fb99f1b45 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -27,7 +27,7 @@ from distilabel.utils.huggingface import HF_TOKEN_ENV_VAR if TYPE_CHECKING: - from transformers import Pipeline + from transformers import LogitsProcessorList, Pipeline from transformers.modeling_utils import PreTrainedModel from transformers.tokenization_utils import PreTrainedTokenizer @@ -111,6 +111,7 @@ class TransformersLLM(LLM, MagpieChatTemplateMixin, CudaDevicePlacementMixin): _pipeline: Optional["Pipeline"] = PrivateAttr(...) _prefix_allowed_tokens_fn: Union[Callable, None] = PrivateAttr(default=None) + _logits_processor: Optional["LogitsProcessorList"] = PrivateAttr(default=None) def load(self) -> None: """Loads the model and tokenizer and creates the text generation pipeline. In addition, @@ -119,7 +120,7 @@ def load(self) -> None: CudaDevicePlacementMixin.load(self) try: - from transformers import pipeline + from transformers import LogitsProcessorList, pipeline except ImportError as ie: raise ImportError( "Transformers is not installed. Please install it using `pip install transformers`." @@ -149,10 +150,20 @@ def load(self) -> None: self._pipeline.tokenizer.pad_token = self._pipeline.tokenizer.eos_token # type: ignore if self.structured_output: - self._prefix_allowed_tokens_fn = self._prepare_structured_output( - self.structured_output + from distilabel.steps.tasks.structured_outputs.outlines import ( + outlines_below_0_1_0, ) + if outlines_below_0_1_0: + self._prefix_allowed_tokens_fn = self._prepare_structured_output( + self.structured_output + ) + else: + logits_processor = self._prepare_structured_output( + self.structured_output + ) + self._logits_processor = LogitsProcessorList([logits_processor]) + super().load() def unload(self) -> None: @@ -222,7 +233,7 @@ def generate( # type: ignore """ prepared_inputs = [self.prepare_input(input=input) for input in inputs] - outputs: List[List[Dict[str, str]]] = self._pipeline( # type: ignore + outputs: List[List[Dict[str, str]]] = self._pipeline( prepared_inputs, max_new_tokens=max_new_tokens, temperature=temperature, @@ -232,7 +243,8 @@ def generate( # type: ignore do_sample=do_sample, num_return_sequences=num_generations, prefix_allowed_tokens_fn=self._prefix_allowed_tokens_fn, - pad_token_id=self._pipeline.tokenizer.eos_token_id, # type: ignore + logits_processor=self._logits_processor, + pad_token_id=self._pipeline.tokenizer.eos_token_id, ) llm_output = [ [generation["generated_text"] for generation in output] diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index fe561d11af..bda692f442 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -27,6 +27,7 @@ get_args, ) +import pkg_resources from pydantic import BaseModel from distilabel.errors import DistilabelUserError @@ -36,7 +37,11 @@ from distilabel.steps.tasks.typing import OutlinesStructuredOutputType Frameworks = Literal["transformers", "llamacpp", "vllm"] -"""Available frameworks for the structured output configuration. """ +# Available frameworks for the structured output configuration. +_outlines_version = pkg_resources.get_distribution("outlines").version +outlines_below_0_1_0 = pkg_resources.parse_version( + _outlines_version +) < pkg_resources.parse_version("0.1.0") def model_to_schema(schema: Type[BaseModel]) -> Dict[str, Any]: @@ -46,31 +51,56 @@ def model_to_schema(schema: Type[BaseModel]) -> Dict[str, Any]: def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: """Helper function to return the appropriate logits processor for the given framework.""" - if framework == "transformers": - from outlines.integrations.transformers import ( - JSONPrefixAllowedTokens, - RegexPrefixAllowedTokens, + if framework not in Frameworks.__args__: + raise DistilabelUserError( + f"Invalid framework '{framework}'. Must be one of {get_args(Frameworks)}", + page="sections/how_to_guides/advanced/structured_generation/", ) - return JSONPrefixAllowedTokens, RegexPrefixAllowedTokens + if outlines_below_0_1_0: + if framework == "transformers": + from outlines.integrations.transformers import ( + JSONPrefixAllowedTokens, + RegexPrefixAllowedTokens, + ) - if framework == "llamacpp": - from outlines.integrations.llamacpp import ( - JSONLogitsProcessor, - RegexLogitsProcessor, - ) + return JSONPrefixAllowedTokens, RegexPrefixAllowedTokens - return JSONLogitsProcessor, RegexLogitsProcessor + if framework == "llamacpp": + from outlines.integrations.llamacpp import ( + JSONLogitsProcessor, + RegexLogitsProcessor, + ) + + return JSONLogitsProcessor, RegexLogitsProcessor - if framework == "vllm": - from outlines.integrations.vllm import JSONLogitsProcessor, RegexLogitsProcessor + if framework == "vllm": + from outlines.integrations.vllm import ( + JSONLogitsProcessor, + RegexLogitsProcessor, + ) + + return JSONLogitsProcessor, RegexLogitsProcessor + else: + from outlines.processors import JSONLogitsProcessor, RegexLogitsProcessor return JSONLogitsProcessor, RegexLogitsProcessor - raise DistilabelUserError( - f"Invalid framework '{framework}'. Must be one of {get_args(Frameworks)}", - page="sections/how_to_guides/advanced/structured_generation/", - ) + +def _get_outlines_tokenizer_or_model(llm: Any, framework: Frameworks) -> Callable: + if not outlines_below_0_1_0: + if framework == "llamacpp": + from outlines.models.llamacpp import LlamaCppTokenizer + + return LlamaCppTokenizer(llm) + elif framework == "transformers": + from outlines.models.transformers import TransformerTokenizer + + return TransformerTokenizer(llm.tokenizer) + elif framework == "vllm": + return llm.get_tokenizer() + else: + return llm def prepare_guided_output( @@ -104,6 +134,8 @@ def prepare_guided_output( json_processor, regex_processor = _get_logits_processor(framework) + tokenizer_or_model = _get_outlines_tokenizer_or_model(llm, framework) + format = structured_output.get("format") schema = structured_output.get("schema") @@ -120,7 +152,7 @@ def prepare_guided_output( return { "processor": json_processor( schema, - llm, + tokenizer_or_model, whitespace_pattern=structured_output.get("whitespace_pattern"), ), "schema": schema_as_dict(schema), From 9dd4be972c220917c7769c5a1bf75c767540e41e Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 07:58:50 +0100 Subject: [PATCH 02/37] update tests --- tests/unit/steps/tasks/structured_outputs/test_outlines.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/unit/steps/tasks/structured_outputs/test_outlines.py b/tests/unit/steps/tasks/structured_outputs/test_outlines.py index e4eb2025c8..fc6f9a2f7c 100644 --- a/tests/unit/steps/tasks/structured_outputs/test_outlines.py +++ b/tests/unit/steps/tasks/structured_outputs/test_outlines.py @@ -100,9 +100,6 @@ class DummyUserTest(BaseModel): } -@pytest.mark.skip( - reason="won't work until we update our code to work with `outlines>0.1.0`" -) class TestOutlinesIntegration: @pytest.mark.parametrize( "format, schema, prompt", From 3ce1ff3b0a22824a2e3c54f228b36e153020e5f9 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 09:07:50 +0100 Subject: [PATCH 03/37] fix passing tokenizer to regex processor as well --- .../steps/tasks/structured_outputs/outlines.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index bda692f442..52707dc720 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -88,7 +88,9 @@ def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: def _get_outlines_tokenizer_or_model(llm: Any, framework: Frameworks) -> Callable: - if not outlines_below_0_1_0: + if outlines_below_0_1_0: + return llm + else: if framework == "llamacpp": from outlines.models.llamacpp import LlamaCppTokenizer @@ -99,8 +101,6 @@ def _get_outlines_tokenizer_or_model(llm: Any, framework: Frameworks) -> Callabl return TransformerTokenizer(llm.tokenizer) elif framework == "vllm": return llm.get_tokenizer() - else: - return llm def prepare_guided_output( @@ -127,6 +127,7 @@ def prepare_guided_output( case of "json" will also include the schema as a dict, to simplify serialization and deserialization. """ + if not importlib.util.find_spec("outlines"): raise ImportError( "Outlines is not installed. Please install it using `pip install outlines`." @@ -159,7 +160,7 @@ def prepare_guided_output( } if format == "regex": - return {"processor": regex_processor(schema, llm)} + return {"processor": regex_processor(schema, tokenizer_or_model)} raise DistilabelUserError( f"Invalid format '{format}'. Must be either 'json' or 'regex'.", From d8d7b35ea9e39f572757d25ee9b7a555fe08d7bd Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 09:08:08 +0100 Subject: [PATCH 04/37] fix test by specifically passing None as token to transformersllm --- .../unit/steps/tasks/structured_outputs/test_outlines.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/unit/steps/tasks/structured_outputs/test_outlines.py b/tests/unit/steps/tasks/structured_outputs/test_outlines.py index fc6f9a2f7c..236c0954ba 100644 --- a/tests/unit/steps/tasks/structured_outputs/test_outlines.py +++ b/tests/unit/steps/tasks/structured_outputs/test_outlines.py @@ -20,6 +20,7 @@ from distilabel.models.llms.huggingface.transformers import TransformersLLM from distilabel.steps.tasks.structured_outputs.outlines import ( model_to_schema, + outlines_below_0_1_0, ) from distilabel.steps.tasks.typing import OutlinesStructuredOutputType @@ -171,6 +172,7 @@ def test_serialization( structured_output=OutlinesStructuredOutputType( format=format, schema=schema ), + token=None, ) llm.load() assert llm.dump() == dump @@ -179,4 +181,9 @@ def test_load_from_dict(self) -> None: llm = TransformersLLM.from_dict(DUMP_JSON) assert isinstance(llm, TransformersLLM) llm.load() - assert llm._prefix_allowed_tokens_fn is not None + if outlines_below_0_1_0: + assert llm._prefix_allowed_tokens_fn is not None + assert llm._logits_processor is None + else: + assert llm._prefix_allowed_tokens_fn is None + assert llm._logits_processor is not None From 2e0b42cade251f221d548336d28876d6a73d6bb5 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 09:21:54 +0100 Subject: [PATCH 05/37] fix tests by increeasing the temperature to avoid exploding beam search logic --- tests/unit/steps/tasks/structured_outputs/test_outlines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/steps/tasks/structured_outputs/test_outlines.py b/tests/unit/steps/tasks/structured_outputs/test_outlines.py index 236c0954ba..446967a2d5 100644 --- a/tests/unit/steps/tasks/structured_outputs/test_outlines.py +++ b/tests/unit/steps/tasks/structured_outputs/test_outlines.py @@ -136,7 +136,7 @@ def test_generation( prompt = [ [{"role": "system", "content": ""}, {"role": "user", "content": prompt}] ] - result = llm.generate(prompt, max_new_tokens=30) + result = llm.generate(prompt, max_new_tokens=30, temperature=0.7) assert isinstance(result, list) assert isinstance(result[0], dict) assert "generations" in result[0] and "statistics" in result[0] From 5ee7dce349a5b9abc91b814807721c23004e5873 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 13:17:37 +0100 Subject: [PATCH 06/37] fix logit processor assignment during generation --- src/distilabel/models/llms/llamacpp.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py index 822e5cea77..71d29aecb4 100644 --- a/src/distilabel/models/llms/llamacpp.py +++ b/src/distilabel/models/llms/llamacpp.py @@ -194,9 +194,7 @@ def load(self) -> None: ) if self.structured_output: - self._logits_processor = self._prepare_structured_output( - self.structured_output - ) + self._set_logits_processor(self.structured_output) if self.use_magpie_template or self.magpie_pre_query_template: if not self.tokenizer_id: @@ -223,6 +221,19 @@ def load(self) -> None: # out of the model name, which won't be available until the `Llama` instance is created. super().load() + def _set_logits_processor( + self, structured_output: Optional[OutlinesStructuredOutputType] = None + ) -> None: + from distilabel.steps.tasks.structured_outputs.outlines import ( + outlines_below_0_1_0, + ) + + processor = self._prepare_structured_output(structured_output) + if outlines_below_0_1_0: + self._logits_processor = processor + else: + self._logits_processor = [processor] + @property def model_name(self) -> str: """Returns the model name used for the LLM.""" @@ -341,9 +352,8 @@ def generate( # type: ignore # after each generation, so subsequent calls yield nothing. This is a workaround # until is fixed in the `llama_cpp` or `outlines` libraries. if structured_output: - self._logits_processor = self._prepare_structured_output( - structured_output - ) + self._set_logits_processor(structured_output) + if self.tokenizer_id is None: completion = self._generate_chat_completion( input, From 0d26a1e4b48d0a58578e94a4beece52ca77581d6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:30:08 +0000 Subject: [PATCH 07/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/distilabel/models/embeddings/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/distilabel/models/embeddings/__init__.py b/src/distilabel/models/embeddings/__init__.py index 9177298748..8d4dce0f7d 100644 --- a/src/distilabel/models/embeddings/__init__.py +++ b/src/distilabel/models/embeddings/__init__.py @@ -20,6 +20,7 @@ __all__ = [ "Embeddings", + "LlamaCppEmbeddings", "SentenceTransformerEmbeddings", "vLLMEmbeddings", ] From 47e38dc0b36cfaf63e5e062fdca8946c2f2e7df1 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 17:42:17 +0100 Subject: [PATCH 08/37] add support transformers --- .../models/llms/huggingface/transformers.py | 151 ++++++++++++------ .../tasks/structured_outputs/outlines.py | 6 +- 2 files changed, 110 insertions(+), 47 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index 2fb99f1b45..faa29cb310 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -23,11 +23,15 @@ from distilabel.models.llms.utils import compute_tokens, prepare_output from distilabel.models.mixins.cuda_device_placement import CudaDevicePlacementMixin from distilabel.models.mixins.magpie import MagpieChatTemplateMixin +from distilabel.steps.tasks.structured_outputs.outlines import ( + outlines_below_0_1_0, +) from distilabel.steps.tasks.typing import OutlinesStructuredOutputType, StandardInput from distilabel.utils.huggingface import HF_TOKEN_ENV_VAR if TYPE_CHECKING: - from transformers import LogitsProcessorList, Pipeline + from outlines.models.transformers import Transformers + from transformers import LogitsProcessor, LogitsProcessorList, Pipeline from transformers.modeling_utils import PreTrainedModel from transformers.tokenization_utils import PreTrainedTokenizer @@ -109,24 +113,38 @@ class TransformersLLM(LLM, MagpieChatTemplateMixin, CudaDevicePlacementMixin): description="The structured output format to use across all the generations.", ) - _pipeline: Optional["Pipeline"] = PrivateAttr(...) + _pipeline: Optional[Union["Pipeline", "Transformers"]] = PrivateAttr(...) _prefix_allowed_tokens_fn: Union[Callable, None] = PrivateAttr(default=None) - _logits_processor: Optional["LogitsProcessorList"] = PrivateAttr(default=None) - - def load(self) -> None: - """Loads the model and tokenizer and creates the text generation pipeline. In addition, - it will configure the tokenizer chat template.""" - if self.device == "cuda": - CudaDevicePlacementMixin.load(self) + _logits_processor: Optional[Union["LogitsProcessor", "LogitsProcessorList"]] = ( + PrivateAttr(default=None) + ) - try: - from transformers import LogitsProcessorList, pipeline - except ImportError as ie: - raise ImportError( - "Transformers is not installed. Please install it using `pip install transformers`." - ) from ie + def _set_outlines_pipeline(self): + from outlines.models.transformers import Transformers + from transformers import AutoModelForCausalLM, AutoTokenizer token = self.token.get_secret_value() if self.token is not None else self.token + model = AutoModelForCausalLM.from_pretrained( + self.model, + output_attentions=True, + token=token, + revision=self.revision, + torch_dtype=self.torch_dtype, + trust_remote_code=self.trust_remote_code, + device_map=self.device_map, + **(self.model_kwargs or {}), + ).to(self.device) + tokenizer = AutoTokenizer.from_pretrained( + self.tokenizer or self.model, + token=token, + use_fast=self.use_fast, + revision=self.revision, + trust_remote_code=self.trust_remote_code, + ) + self._pipeline = Transformers(model, tokenizer) + + def _set_native_tf_pipeline(self): + from transformers import pipeline self._pipeline = pipeline( "text-generation", @@ -139,30 +157,44 @@ def load(self) -> None: use_fast=self.use_fast, device=self.device, device_map=self.device_map, - token=token, + token=self.token.get_secret_value() + if self.token is not None + else self.token, return_full_text=False, ) if self.chat_template is not None: - self._pipeline.tokenizer.chat_template = self.chat_template # type: ignore + self._pipeline.tokenizer.chat_template = self.chat_template - if self._pipeline.tokenizer.pad_token is None: # type: ignore - self._pipeline.tokenizer.pad_token = self._pipeline.tokenizer.eos_token # type: ignore + if self._pipeline.tokenizer.pad_token is None: + self._pipeline.tokenizer.pad_token = self._pipeline.tokenizer.eos_token - if self.structured_output: - from distilabel.steps.tasks.structured_outputs.outlines import ( - outlines_below_0_1_0, - ) + def load(self) -> None: + """Loads the model and tokenizer and creates the text generation pipeline. In addition, + it will configure the tokenizer chat template.""" + if self.device == "cuda": + CudaDevicePlacementMixin.load(self) + try: + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # noqa + except ImportError as ie: + raise ImportError( + "Transformers is not installed. Please install it using `pip install transformers`." + ) from ie + + if self.structured_output: if outlines_below_0_1_0: + self._set_native_tf_pipeline() self._prefix_allowed_tokens_fn = self._prepare_structured_output( self.structured_output ) else: - logits_processor = self._prepare_structured_output( + self._set_outlines_pipeline() + self._logits_processor = self._prepare_structured_output( self.structured_output ) - self._logits_processor = LogitsProcessorList([logits_processor]) + else: + self._set_native_tf_pipeline() super().load() @@ -186,12 +218,9 @@ def prepare_input(self, input: "StandardInput") -> str: Returns: The prompt to send to the LLM. """ - if self._pipeline.tokenizer.chat_template is None: # type: ignore - return input[0]["content"] - prompt: str = ( - self._pipeline.tokenizer.apply_chat_template( # type: ignore - input, # type: ignore + self._pipeline.tokenizer.tokenizer.apply_chat_template( + input, tokenize=False, add_generation_prompt=True, ) @@ -201,10 +230,10 @@ def prepare_input(self, input: "StandardInput") -> str: return super().apply_magpie_pre_query_template(prompt, input) @validate_call - def generate( # type: ignore + def generate( self, inputs: List[StandardInput], - num_generations: int = 1, + num_generations: int = 2, max_new_tokens: int = 128, temperature: float = 0.1, repetition_penalty: float = 1.1, @@ -231,21 +260,51 @@ def generate( # type: ignore Returns: A list of lists of strings containing the generated responses for each input. """ + prepared_inputs = [self.prepare_input(input=input) for input in inputs] - outputs: List[List[Dict[str, str]]] = self._pipeline( - prepared_inputs, - max_new_tokens=max_new_tokens, - temperature=temperature, - repetition_penalty=repetition_penalty, - top_p=top_p, - top_k=top_k, - do_sample=do_sample, - num_return_sequences=num_generations, - prefix_allowed_tokens_fn=self._prefix_allowed_tokens_fn, - logits_processor=self._logits_processor, - pad_token_id=self._pipeline.tokenizer.eos_token_id, - ) + if self.structured_output and not outlines_below_0_1_0: + from outlines.models.transformers import ( + GenerationParameters, + SamplingParameters, + ) + + outputs = [ + [[] for _ in range(num_generations)] + for _ in range(len(prepared_inputs)) + ] + for idx_generation in range(num_generations): + generations = self._pipeline.generate( + prepared_inputs, + generation_parameters=GenerationParameters( + max_tokens=max_new_tokens, + stop_at=None, + seed=None, + ), + logits_processor=self._logits_processor, + sampling_parameters=SamplingParameters( + sampler="multinomial", + top_p=top_p, + top_k=top_k, + temperature=temperature, + ), + ) + for idx_sample, generation in enumerate(generations): + outputs[idx_sample][idx_generation] = {"generated_text": generation} + else: + outputs: List[List[Dict[str, str]]] = self._pipeline( + prepared_inputs, + max_new_tokens=max_new_tokens, + temperature=temperature, + repetition_penalty=repetition_penalty, + top_p=top_p, + top_k=top_k, + do_sample=do_sample, + num_return_sequences=num_generations, + prefix_allowed_tokens_fn=self._prefix_allowed_tokens_fn, + pad_token_id=self._pipeline.tokenizer.eos_token_id, + ) + llm_output = [ [generation["generated_text"] for generation in output] for output in outputs @@ -295,7 +354,7 @@ def get_last_hidden_states( last_hidden_states = model(**input_ids)["last_hidden_state"] return [ - seq_last_hidden_state[attention_mask.bool(), :].detach().cpu().numpy() + seq_last_hidden_state[attention_mask.bool(), :].detach().cpu().numpy() # type: ignore for seq_last_hidden_state, attention_mask in zip( last_hidden_states, input_ids["attention_mask"], # type: ignore diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index 52707dc720..1772761e4c 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -96,8 +96,12 @@ def _get_outlines_tokenizer_or_model(llm: Any, framework: Frameworks) -> Callabl return LlamaCppTokenizer(llm) elif framework == "transformers": - from outlines.models.transformers import TransformerTokenizer + from outlines.models.transformers import Transformers, TransformerTokenizer + if isinstance(llm, Transformers): + return llm.tokenizer + else: + return TransformerTokenizer(llm.tokenizer) return TransformerTokenizer(llm.tokenizer) elif framework == "vllm": return llm.get_tokenizer() From 66ac934bcd449b26dcbabff18f32f3656cd61733 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Jan 2025 16:42:33 +0000 Subject: [PATCH 09/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/distilabel/models/embeddings/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/distilabel/models/embeddings/__init__.py b/src/distilabel/models/embeddings/__init__.py index 8d4dce0f7d..a37b3d0985 100644 --- a/src/distilabel/models/embeddings/__init__.py +++ b/src/distilabel/models/embeddings/__init__.py @@ -21,6 +21,7 @@ __all__ = [ "Embeddings", "LlamaCppEmbeddings", + "LlamaCppEmbeddings", "SentenceTransformerEmbeddings", "vLLMEmbeddings", ] From 61c353864692e92d2332bc1107905ea94903c201 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 17:49:21 +0100 Subject: [PATCH 10/37] remove duplicate import --- src/distilabel/models/embeddings/__init__.py | 1 - vllm | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) create mode 160000 vllm diff --git a/src/distilabel/models/embeddings/__init__.py b/src/distilabel/models/embeddings/__init__.py index a37b3d0985..8d4dce0f7d 100644 --- a/src/distilabel/models/embeddings/__init__.py +++ b/src/distilabel/models/embeddings/__init__.py @@ -21,7 +21,6 @@ __all__ = [ "Embeddings", "LlamaCppEmbeddings", - "LlamaCppEmbeddings", "SentenceTransformerEmbeddings", "vLLMEmbeddings", ] diff --git a/vllm b/vllm new file mode 160000 index 0000000000..65097ca0af --- /dev/null +++ b/vllm @@ -0,0 +1 @@ +Subproject commit 65097ca0af5c1d7caa3d9d8224fa8b4790a5f7bc From 0738b27093cd0d2cffdc48064f61d45e3c76ddcb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Jan 2025 16:50:27 +0000 Subject: [PATCH 11/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/distilabel/models/embeddings/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/distilabel/models/embeddings/__init__.py b/src/distilabel/models/embeddings/__init__.py index 8d4dce0f7d..a37b3d0985 100644 --- a/src/distilabel/models/embeddings/__init__.py +++ b/src/distilabel/models/embeddings/__init__.py @@ -21,6 +21,7 @@ __all__ = [ "Embeddings", "LlamaCppEmbeddings", + "LlamaCppEmbeddings", "SentenceTransformerEmbeddings", "vLLMEmbeddings", ] From 8e6613b825a31e57410e395a4d1dc8b0b4ec689d Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 17:50:38 +0100 Subject: [PATCH 12/37] remove duplicate --- src/distilabel/models/embeddings/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/distilabel/models/embeddings/__init__.py b/src/distilabel/models/embeddings/__init__.py index 56e994390a..65eb00c469 100644 --- a/src/distilabel/models/embeddings/__init__.py +++ b/src/distilabel/models/embeddings/__init__.py @@ -22,7 +22,6 @@ __all__ = [ "Embeddings", "LlamaCppEmbeddings", - "LlamaCppEmbeddings", "SentenceTransformerEmbeddings", "vLLMEmbeddings", ] From cb4c2ce42b25bdded64ab01340da160109670419 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 17:51:00 +0100 Subject: [PATCH 13/37] remove duplicate import --- src/distilabel/models/embeddings/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/distilabel/models/embeddings/__init__.py b/src/distilabel/models/embeddings/__init__.py index 56e994390a..65eb00c469 100644 --- a/src/distilabel/models/embeddings/__init__.py +++ b/src/distilabel/models/embeddings/__init__.py @@ -22,7 +22,6 @@ __all__ = [ "Embeddings", "LlamaCppEmbeddings", - "LlamaCppEmbeddings", "SentenceTransformerEmbeddings", "vLLMEmbeddings", ] From 7f20d9fcb3dd77db4a57c1b9d4df8f8504cab3e9 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 18:36:02 +0100 Subject: [PATCH 14/37] return content when nog chat template is present --- .../models/llms/huggingface/transformers.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index faa29cb310..16da2a1b9f 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -143,7 +143,7 @@ def _set_outlines_pipeline(self): ) self._pipeline = Transformers(model, tokenizer) - def _set_native_tf_pipeline(self): + def _set_transformers_pipeline(self): from transformers import pipeline self._pipeline = pipeline( @@ -182,9 +182,9 @@ def load(self) -> None: "Transformers is not installed. Please install it using `pip install transformers`." ) from ie - if self.structured_output: + if self.structured_output is not None: if outlines_below_0_1_0: - self._set_native_tf_pipeline() + self._set_transformers_pipeline() self._prefix_allowed_tokens_fn = self._prepare_structured_output( self.structured_output ) @@ -194,7 +194,7 @@ def load(self) -> None: self.structured_output ) else: - self._set_native_tf_pipeline() + self._set_transformers_pipeline() super().load() @@ -218,8 +218,16 @@ def prepare_input(self, input: "StandardInput") -> str: Returns: The prompt to send to the LLM. """ + if self._pipeline.tokenizer.chat_template is None: # type: ignore + return input[0]["content"] + + if self.structured_output and not outlines_below_0_1_0: + tokenizer = self._pipeline.tokenizer.tokenizer + else: + tokenizer = self._pipeline.tokenizer + prompt: str = ( - self._pipeline.tokenizer.tokenizer.apply_chat_template( + tokenizer.apply_chat_template( input, tokenize=False, add_generation_prompt=True, @@ -233,7 +241,7 @@ def prepare_input(self, input: "StandardInput") -> str: def generate( self, inputs: List[StandardInput], - num_generations: int = 2, + num_generations: int = 1, max_new_tokens: int = 128, temperature: float = 0.1, repetition_penalty: float = 1.1, @@ -263,7 +271,7 @@ def generate( prepared_inputs = [self.prepare_input(input=input) for input in inputs] - if self.structured_output and not outlines_below_0_1_0: + if self.structured_output is not None and not outlines_below_0_1_0: from outlines.models.transformers import ( GenerationParameters, SamplingParameters, From 61aa597fc251da43ec56f24dfe7f4b4f621691cf Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 18:39:52 +0100 Subject: [PATCH 15/37] refactor clean code --- src/distilabel/models/llms/huggingface/transformers.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index 16da2a1b9f..a3bc6bd456 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -142,6 +142,8 @@ def _set_outlines_pipeline(self): trust_remote_code=self.trust_remote_code, ) self._pipeline = Transformers(model, tokenizer) + self._pipeline.tokenizer.chat_template = tokenizer.chat_template + self._pipeline.tokenizer.apply_chat_template = tokenizer.apply_chat_template def _set_transformers_pipeline(self): from transformers import pipeline @@ -221,13 +223,8 @@ def prepare_input(self, input: "StandardInput") -> str: if self._pipeline.tokenizer.chat_template is None: # type: ignore return input[0]["content"] - if self.structured_output and not outlines_below_0_1_0: - tokenizer = self._pipeline.tokenizer.tokenizer - else: - tokenizer = self._pipeline.tokenizer - prompt: str = ( - tokenizer.apply_chat_template( + self._pipeline.tokenizer.apply_chat_template( input, tokenize=False, add_generation_prompt=True, From b994f064eeba40bfa00b4a7dc540c44cafa1554a Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 18:41:36 +0100 Subject: [PATCH 16/37] chore refactor --- .../models/llms/huggingface/transformers.py | 58 +++++++++---------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index a3bc6bd456..b2a3c8a706 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -119,6 +119,35 @@ class TransformersLLM(LLM, MagpieChatTemplateMixin, CudaDevicePlacementMixin): PrivateAttr(default=None) ) + def load(self) -> None: + """Loads the model and tokenizer and creates the text generation pipeline. In addition, + it will configure the tokenizer chat template.""" + if self.device == "cuda": + CudaDevicePlacementMixin.load(self) + + try: + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # noqa + except ImportError as ie: + raise ImportError( + "Transformers is not installed. Please install it using `pip install transformers`." + ) from ie + + if self.structured_output is not None: + if outlines_below_0_1_0: + self._set_transformers_pipeline() + self._prefix_allowed_tokens_fn = self._prepare_structured_output( + self.structured_output + ) + else: + self._set_outlines_pipeline() + self._logits_processor = self._prepare_structured_output( + self.structured_output + ) + else: + self._set_transformers_pipeline() + + super().load() + def _set_outlines_pipeline(self): from outlines.models.transformers import Transformers from transformers import AutoModelForCausalLM, AutoTokenizer @@ -171,35 +200,6 @@ def _set_transformers_pipeline(self): if self._pipeline.tokenizer.pad_token is None: self._pipeline.tokenizer.pad_token = self._pipeline.tokenizer.eos_token - def load(self) -> None: - """Loads the model and tokenizer and creates the text generation pipeline. In addition, - it will configure the tokenizer chat template.""" - if self.device == "cuda": - CudaDevicePlacementMixin.load(self) - - try: - from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # noqa - except ImportError as ie: - raise ImportError( - "Transformers is not installed. Please install it using `pip install transformers`." - ) from ie - - if self.structured_output is not None: - if outlines_below_0_1_0: - self._set_transformers_pipeline() - self._prefix_allowed_tokens_fn = self._prepare_structured_output( - self.structured_output - ) - else: - self._set_outlines_pipeline() - self._logits_processor = self._prepare_structured_output( - self.structured_output - ) - else: - self._set_transformers_pipeline() - - super().load() - def unload(self) -> None: """Unloads the `vLLM` model.""" CudaDevicePlacementMixin.unload(self) From a47963d5460e69f292eb4b5d0060ff30c032afb0 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 18:45:06 +0100 Subject: [PATCH 17/37] refactor logic if else statement --- .../models/llms/huggingface/transformers.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index b2a3c8a706..d35a531c1b 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -268,7 +268,22 @@ def generate( prepared_inputs = [self.prepare_input(input=input) for input in inputs] - if self.structured_output is not None and not outlines_below_0_1_0: + if self.structured_output is None or ( + self.structured_output and outlines_below_0_1_0 + ): + outputs: List[List[Dict[str, str]]] = self._pipeline( + prepared_inputs, + max_new_tokens=max_new_tokens, + temperature=temperature, + repetition_penalty=repetition_penalty, + top_p=top_p, + top_k=top_k, + do_sample=do_sample, + num_return_sequences=num_generations, + prefix_allowed_tokens_fn=self._prefix_allowed_tokens_fn, + pad_token_id=self._pipeline.tokenizer.eos_token_id, + ) + else: from outlines.models.transformers import ( GenerationParameters, SamplingParameters, @@ -296,19 +311,6 @@ def generate( ) for idx_sample, generation in enumerate(generations): outputs[idx_sample][idx_generation] = {"generated_text": generation} - else: - outputs: List[List[Dict[str, str]]] = self._pipeline( - prepared_inputs, - max_new_tokens=max_new_tokens, - temperature=temperature, - repetition_penalty=repetition_penalty, - top_p=top_p, - top_k=top_k, - do_sample=do_sample, - num_return_sequences=num_generations, - prefix_allowed_tokens_fn=self._prefix_allowed_tokens_fn, - pad_token_id=self._pipeline.tokenizer.eos_token_id, - ) llm_output = [ [generation["generated_text"] for generation in output] From a0f8acd0e87dc08de66d70a27b6878bbd1965931 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 18:58:43 +0100 Subject: [PATCH 18/37] fix import when outlines is not present --- .../steps/tasks/structured_outputs/outlines.py | 12 +++++++----- vllm | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index 1772761e4c..7a48f226e4 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -37,11 +37,13 @@ from distilabel.steps.tasks.typing import OutlinesStructuredOutputType Frameworks = Literal["transformers", "llamacpp", "vllm"] -# Available frameworks for the structured output configuration. -_outlines_version = pkg_resources.get_distribution("outlines").version -outlines_below_0_1_0 = pkg_resources.parse_version( - _outlines_version -) < pkg_resources.parse_version("0.1.0") + +if importlib.util.find_spec("outlines"): + outlines_below_0_1_0 = pkg_resources.parse_version( + pkg_resources.get_distribution("outlines").version + ) < pkg_resources.parse_version("0.1.0") +else: + outlines_below_0_1_0 = True def model_to_schema(schema: Type[BaseModel]) -> Dict[str, Any]: diff --git a/vllm b/vllm index 65097ca0af..9a228348d2 160000 --- a/vllm +++ b/vllm @@ -1 +1 @@ -Subproject commit 65097ca0af5c1d7caa3d9d8224fa8b4790a5f7bc +Subproject commit 9a228348d2f9a2c85dfc67d6b9fe883bf10a4680 From b41d6f07e8fea9f25b3d9f87999a9050b629bad3 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 19:46:34 +0100 Subject: [PATCH 19/37] chore pin transformers version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b203f7edf5..8b1d950c33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,7 +78,7 @@ argilla = ["argilla >= 2.0.0", "ipython"] cohere = ["cohere >= 5.2.0"] groq = ["groq >= 0.4.1"] hf-inference-endpoints = ["huggingface_hub >= 0.22.0"] -hf-transformers = ["transformers >= 4.34.1", "torch >= 2.0.0"] +hf-transformers = ["transformers >= 4.34.1, < 4.45.0", "torch >= 2.0.0"] instructor = ["instructor >= 1.2.3"] litellm = ["litellm >= 1.30.0"] llama-cpp = ["llama-cpp-python >= 0.2.0"] From d2fdd4c6eecb6861480e723ea13a145a35187bb2 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 19:47:17 +0100 Subject: [PATCH 20/37] chore add context w.r.t. logit processor --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 8b1d950c33..1a4e48cb30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,7 @@ argilla = ["argilla >= 2.0.0", "ipython"] cohere = ["cohere >= 5.2.0"] groq = ["groq >= 0.4.1"] hf-inference-endpoints = ["huggingface_hub >= 0.22.0"] +# logit processor breaks in transformers 4.45.0 hf-transformers = ["transformers >= 4.34.1, < 4.45.0", "torch >= 2.0.0"] instructor = ["instructor >= 1.2.3"] litellm = ["litellm >= 1.30.0"] From 2b8f634ee9e80c5742e77e175c2db4d2b34203f6 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 19:48:59 +0100 Subject: [PATCH 21/37] chore bump version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1a4e48cb30..2757a461e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,8 +78,8 @@ argilla = ["argilla >= 2.0.0", "ipython"] cohere = ["cohere >= 5.2.0"] groq = ["groq >= 0.4.1"] hf-inference-endpoints = ["huggingface_hub >= 0.22.0"] -# logit processor breaks in transformers 4.45.0 -hf-transformers = ["transformers >= 4.34.1, < 4.45.0", "torch >= 2.0.0"] +# logit processor breaks in transformers 4.47.0 +hf-transformers = ["transformers >= 4.34.1, < 4.47.0", "torch >= 2.0.0"] instructor = ["instructor >= 1.2.3"] litellm = ["litellm >= 1.30.0"] llama-cpp = ["llama-cpp-python >= 0.2.0"] From ed5f00f17012847f34aa5332bea0c317f79ec43a Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Thu, 9 Jan 2025 19:58:21 +0100 Subject: [PATCH 22/37] add simplification of transformers implementation --- .../models/llms/huggingface/transformers.py | 143 +++++------------- .../tasks/structured_outputs/outlines.py | 6 +- 2 files changed, 36 insertions(+), 113 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index d35a531c1b..ec178796ee 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -23,15 +23,12 @@ from distilabel.models.llms.utils import compute_tokens, prepare_output from distilabel.models.mixins.cuda_device_placement import CudaDevicePlacementMixin from distilabel.models.mixins.magpie import MagpieChatTemplateMixin -from distilabel.steps.tasks.structured_outputs.outlines import ( - outlines_below_0_1_0, -) +from distilabel.steps.tasks.structured_outputs.outlines import outlines_below_0_1_0 from distilabel.steps.tasks.typing import OutlinesStructuredOutputType, StandardInput from distilabel.utils.huggingface import HF_TOKEN_ENV_VAR if TYPE_CHECKING: - from outlines.models.transformers import Transformers - from transformers import LogitsProcessor, LogitsProcessorList, Pipeline + from transformers import Pipeline from transformers.modeling_utils import PreTrainedModel from transformers.tokenization_utils import PreTrainedTokenizer @@ -113,11 +110,9 @@ class TransformersLLM(LLM, MagpieChatTemplateMixin, CudaDevicePlacementMixin): description="The structured output format to use across all the generations.", ) - _pipeline: Optional[Union["Pipeline", "Transformers"]] = PrivateAttr(...) + _pipeline: Optional["Pipeline"] = PrivateAttr(...) _prefix_allowed_tokens_fn: Union[Callable, None] = PrivateAttr(default=None) - _logits_processor: Optional[Union["LogitsProcessor", "LogitsProcessorList"]] = ( - PrivateAttr(default=None) - ) + _logits_processor: Union[Callable, None] = PrivateAttr(default=None) def load(self) -> None: """Loads the model and tokenizer and creates the text generation pipeline. In addition, @@ -126,56 +121,13 @@ def load(self) -> None: CudaDevicePlacementMixin.load(self) try: - from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # noqa + from transformers import pipeline except ImportError as ie: raise ImportError( "Transformers is not installed. Please install it using `pip install transformers`." ) from ie - if self.structured_output is not None: - if outlines_below_0_1_0: - self._set_transformers_pipeline() - self._prefix_allowed_tokens_fn = self._prepare_structured_output( - self.structured_output - ) - else: - self._set_outlines_pipeline() - self._logits_processor = self._prepare_structured_output( - self.structured_output - ) - else: - self._set_transformers_pipeline() - - super().load() - - def _set_outlines_pipeline(self): - from outlines.models.transformers import Transformers - from transformers import AutoModelForCausalLM, AutoTokenizer - token = self.token.get_secret_value() if self.token is not None else self.token - model = AutoModelForCausalLM.from_pretrained( - self.model, - output_attentions=True, - token=token, - revision=self.revision, - torch_dtype=self.torch_dtype, - trust_remote_code=self.trust_remote_code, - device_map=self.device_map, - **(self.model_kwargs or {}), - ).to(self.device) - tokenizer = AutoTokenizer.from_pretrained( - self.tokenizer or self.model, - token=token, - use_fast=self.use_fast, - revision=self.revision, - trust_remote_code=self.trust_remote_code, - ) - self._pipeline = Transformers(model, tokenizer) - self._pipeline.tokenizer.chat_template = tokenizer.chat_template - self._pipeline.tokenizer.apply_chat_template = tokenizer.apply_chat_template - - def _set_transformers_pipeline(self): - from transformers import pipeline self._pipeline = pipeline( "text-generation", @@ -188,17 +140,24 @@ def _set_transformers_pipeline(self): use_fast=self.use_fast, device=self.device, device_map=self.device_map, - token=self.token.get_secret_value() - if self.token is not None - else self.token, + token=token, return_full_text=False, ) if self.chat_template is not None: - self._pipeline.tokenizer.chat_template = self.chat_template + self._pipeline.tokenizer.chat_template = self.chat_template # type: ignore + + if self._pipeline.tokenizer.pad_token is None: # type: ignore + self._pipeline.tokenizer.pad_token = self._pipeline.tokenizer.eos_token # type: ignore + + if self.structured_output: + processor = self._prepare_structured_output(self.structured_output) + if outlines_below_0_1_0: + self._prefix_allowed_tokens_fn = processor + else: + self._logits_processor = [processor] - if self._pipeline.tokenizer.pad_token is None: - self._pipeline.tokenizer.pad_token = self._pipeline.tokenizer.eos_token + super().load() def unload(self) -> None: """Unloads the `vLLM` model.""" @@ -224,8 +183,8 @@ def prepare_input(self, input: "StandardInput") -> str: return input[0]["content"] prompt: str = ( - self._pipeline.tokenizer.apply_chat_template( - input, + self._pipeline.tokenizer.apply_chat_template( # type: ignore + input, # type: ignore tokenize=False, add_generation_prompt=True, ) @@ -235,7 +194,7 @@ def prepare_input(self, input: "StandardInput") -> str: return super().apply_magpie_pre_query_template(prompt, input) @validate_call - def generate( + def generate( # type: ignore self, inputs: List[StandardInput], num_generations: int = 1, @@ -265,53 +224,21 @@ def generate( Returns: A list of lists of strings containing the generated responses for each input. """ - prepared_inputs = [self.prepare_input(input=input) for input in inputs] - if self.structured_output is None or ( - self.structured_output and outlines_below_0_1_0 - ): - outputs: List[List[Dict[str, str]]] = self._pipeline( - prepared_inputs, - max_new_tokens=max_new_tokens, - temperature=temperature, - repetition_penalty=repetition_penalty, - top_p=top_p, - top_k=top_k, - do_sample=do_sample, - num_return_sequences=num_generations, - prefix_allowed_tokens_fn=self._prefix_allowed_tokens_fn, - pad_token_id=self._pipeline.tokenizer.eos_token_id, - ) - else: - from outlines.models.transformers import ( - GenerationParameters, - SamplingParameters, - ) - - outputs = [ - [[] for _ in range(num_generations)] - for _ in range(len(prepared_inputs)) - ] - for idx_generation in range(num_generations): - generations = self._pipeline.generate( - prepared_inputs, - generation_parameters=GenerationParameters( - max_tokens=max_new_tokens, - stop_at=None, - seed=None, - ), - logits_processor=self._logits_processor, - sampling_parameters=SamplingParameters( - sampler="multinomial", - top_p=top_p, - top_k=top_k, - temperature=temperature, - ), - ) - for idx_sample, generation in enumerate(generations): - outputs[idx_sample][idx_generation] = {"generated_text": generation} - + outputs: List[List[Dict[str, str]]] = self._pipeline( # type: ignore + prepared_inputs, + max_new_tokens=max_new_tokens, + temperature=temperature, + repetition_penalty=repetition_penalty, + top_p=top_p, + top_k=top_k, + do_sample=do_sample, + num_return_sequences=num_generations, + prefix_allowed_tokens_fn=self._prefix_allowed_tokens_fn, + pad_token_id=self._pipeline.tokenizer.eos_token_id, + logits_processor=self._logits_processor, + ) llm_output = [ [generation["generated_text"] for generation in output] for output in outputs @@ -361,7 +288,7 @@ def get_last_hidden_states( last_hidden_states = model(**input_ids)["last_hidden_state"] return [ - seq_last_hidden_state[attention_mask.bool(), :].detach().cpu().numpy() # type: ignore + seq_last_hidden_state[attention_mask.bool(), :].detach().cpu().numpy() for seq_last_hidden_state, attention_mask in zip( last_hidden_states, input_ids["attention_mask"], # type: ignore diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index 7a48f226e4..b0466766b2 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -98,12 +98,8 @@ def _get_outlines_tokenizer_or_model(llm: Any, framework: Frameworks) -> Callabl return LlamaCppTokenizer(llm) elif framework == "transformers": - from outlines.models.transformers import Transformers, TransformerTokenizer + from outlines.models.transformers import TransformerTokenizer - if isinstance(llm, Transformers): - return llm.tokenizer - else: - return TransformerTokenizer(llm.tokenizer) return TransformerTokenizer(llm.tokenizer) elif framework == "vllm": return llm.get_tokenizer() From 473de031a98bf66db34e24b16fea0bedb55ff993 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 10:12:01 +0100 Subject: [PATCH 23/37] Update .gitignore to exclude .DS_Store files and remove vllm subproject; delete unnecessary .DS_Store files from unit tests --- .gitignore | 1 + tests/unit/.DS_Store | Bin 6148 -> 0 bytes tests/unit/pipeline/.DS_Store | Bin 6148 -> 0 bytes vllm | 1 - 4 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 tests/unit/.DS_Store delete mode 100644 tests/unit/pipeline/.DS_Store delete mode 160000 vllm diff --git a/.gitignore b/.gitignore index d8337200af..1aab313fb9 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,4 @@ venv.bak/ # Other *.log *.swp +.DS_Store diff --git a/tests/unit/.DS_Store b/tests/unit/.DS_Store deleted file mode 100644 index 213c7078d0b2be0d3b5775898e4f42658e5c59de..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKy-vh147TAwm2SC_F=Auv%o0vzWa27-YfU;uZvNHsSM9Sj5m!N8sYIUf?5V0Ii0_2_`o5&$UAXcgE}OGr*~%#NcW zJP@{2prx{x7;NbnPac;YM?*^|_Tqzm<&WZpb#=_2G@Ljah7JaTfhhxr)?LW`e~DkF zSmaMrq8AJV1OJQxo;7W=#7Ftv`sMTFu1#pyXd)6fiUEN=c?6&%=g5gV&7Z_ZTy`7{ UWfmFNbYMILl#o!tz%MZH4qEmw6aWAK diff --git a/tests/unit/pipeline/.DS_Store b/tests/unit/pipeline/.DS_Store deleted file mode 100644 index 37a1397976ae5b949966572accd4de98bb796f20..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKOHKnZ47J-XM(UXWziS@}?$DjOid0{^xb5eKWY#4no5DXkLFt*`L z>i;QzrNK-7VMvUEfneaDF~F00SugNXURyuBp48fec7Y}$enk`rbS=d|EyWnfK60d! cHlIYte0D5`vWnO>92f@yB_z6F;1?Kp24W;LTmS$7 diff --git a/vllm b/vllm deleted file mode 160000 index 9a228348d2..0000000000 --- a/vllm +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9a228348d2f9a2c85dfc67d6b9fe883bf10a4680 From 995e4d41b3cf8c050d0a7c3d119f0c22c517a59f Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 10:23:24 +0100 Subject: [PATCH 24/37] Refactor outlines version check and logits processor handling - Introduced a helper function to check if the 'outlines' package is installed and its version. - Updated the logic in `_get_logits_processor` to use the new version check, simplifying the processor selection based on the outlines version. - Adjusted the handling of tokenizers in `_get_tokenizer_from_model` to streamline the integration with different frameworks. - Modified `prepare_guided_output` to differentiate processing based on the outlines version, ensuring compatibility with both pre-0.1.0 and post-0.1.0 versions of the outlines package. --- .../tasks/structured_outputs/outlines.py | 162 ++++++++++-------- 1 file changed, 95 insertions(+), 67 deletions(-) diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index b0466766b2..9575b82ba1 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -38,12 +38,19 @@ Frameworks = Literal["transformers", "llamacpp", "vllm"] -if importlib.util.find_spec("outlines"): - outlines_below_0_1_0 = pkg_resources.parse_version( - pkg_resources.get_distribution("outlines").version - ) < pkg_resources.parse_version("0.1.0") -else: - outlines_below_0_1_0 = True + +def _outlines_version_below_0_1_0() -> bool: + """Helper function to check outlines availability and version. + + Returns: + bool: True if outlines is not installed or version is below 0.1.0 + """ + if not importlib.util.find_spec("outlines"): + raise ImportError( + "Outlines is not installed. Please install it using `pip install outlines`." + ) + version = pkg_resources.get_distribution("outlines").version + return pkg_resources.parse_version(version) < pkg_resources.parse_version("0.1.0") def model_to_schema(schema: Type[BaseModel]) -> Dict[str, Any]: @@ -52,57 +59,66 @@ def model_to_schema(schema: Type[BaseModel]) -> Dict[str, Any]: def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: - """Helper function to return the appropriate logits processor for the given framework.""" - if framework not in Frameworks.__args__: + """Helper function to return the appropriate logits processors for the given framework.""" + if _outlines_version_below_0_1_0(): + processors = { + "transformers": ( + "outlines.integrations.transformers", + "JSONPrefixAllowedTokens", + "RegexPrefixAllowedTokens", + ), + "llamacpp": ( + "outlines.integrations.llamacpp", + "JSONLogitsProcessor", + "RegexLogitsProcessor", + ), + "vllm": ( + "outlines.integrations.vllm", + "JSONLogitsProcessor", + "RegexLogitsProcessor", + ), + } + else: + processors = { + "transformers": ( + "outlines.processors", + "JSONLogitsProcessor", + "RegexLogitsProcessor", + ), + "llamacpp": ( + "outlines.processors", + "JSONLogitsProcessor", + "RegexLogitsProcessor", + ), + "vllm": ( + "outlines.processors", + "JSONLogitsProcessor", + "RegexLogitsProcessor", + ), + } + + if framework not in processors: raise DistilabelUserError( f"Invalid framework '{framework}'. Must be one of {get_args(Frameworks)}", page="sections/how_to_guides/advanced/structured_generation/", ) - if outlines_below_0_1_0: - if framework == "transformers": - from outlines.integrations.transformers import ( - JSONPrefixAllowedTokens, - RegexPrefixAllowedTokens, - ) + module_path, json_cls, regex_cls = processors[framework] + module = importlib.import_module(module_path) + return getattr(module, json_cls), getattr(module, regex_cls) - return JSONPrefixAllowedTokens, RegexPrefixAllowedTokens - if framework == "llamacpp": - from outlines.integrations.llamacpp import ( - JSONLogitsProcessor, - RegexLogitsProcessor, - ) +def _get_tokenizer_from_model(llm: Any, framework: Frameworks) -> Callable: + if framework == "llamacpp": + from outlines.models.llamacpp import LlamaCppTokenizer - return JSONLogitsProcessor, RegexLogitsProcessor + return LlamaCppTokenizer(llm) + elif framework == "transformers": + from outlines.models.transformers import TransformerTokenizer - if framework == "vllm": - from outlines.integrations.vllm import ( - JSONLogitsProcessor, - RegexLogitsProcessor, - ) - - return JSONLogitsProcessor, RegexLogitsProcessor - else: - from outlines.processors import JSONLogitsProcessor, RegexLogitsProcessor - - return JSONLogitsProcessor, RegexLogitsProcessor - - -def _get_outlines_tokenizer_or_model(llm: Any, framework: Frameworks) -> Callable: - if outlines_below_0_1_0: - return llm - else: - if framework == "llamacpp": - from outlines.models.llamacpp import LlamaCppTokenizer - - return LlamaCppTokenizer(llm) - elif framework == "transformers": - from outlines.models.transformers import TransformerTokenizer - - return TransformerTokenizer(llm.tokenizer) - elif framework == "vllm": - return llm.get_tokenizer() + return TransformerTokenizer(llm.tokenizer) + elif framework == "vllm": + return llm.get_tokenizer() def prepare_guided_output( @@ -130,15 +146,8 @@ def prepare_guided_output( and deserialization. """ - if not importlib.util.find_spec("outlines"): - raise ImportError( - "Outlines is not installed. Please install it using `pip install outlines`." - ) - json_processor, regex_processor = _get_logits_processor(framework) - tokenizer_or_model = _get_outlines_tokenizer_or_model(llm, framework) - format = structured_output.get("format") schema = structured_output.get("schema") @@ -151,18 +160,37 @@ def prepare_guided_output( elif isinstance(schema, str): format = "regex" - if format == "json": - return { - "processor": json_processor( - schema, - tokenizer_or_model, - whitespace_pattern=structured_output.get("whitespace_pattern"), - ), - "schema": schema_as_dict(schema), - } - - if format == "regex": - return {"processor": regex_processor(schema, tokenizer_or_model)} + if _outlines_version_below_0_1_0(): + # use the model/llm, processor is NOT a list + if format == "json": + return { + "processor": json_processor( + schema, + llm, + whitespace_pattern=structured_output.get("whitespace_pattern"), + ), + "schema": schema_as_dict(schema), + } + + if format == "regex": + return {"processor": regex_processor(schema, llm)} + else: + # use tokenizer, processor is a list + tokenizer = _get_tokenizer_from_model(llm, framework) + if format == "json": + return { + "processor": [ + json_processor( + schema, + tokenizer, + whitespace_pattern=structured_output.get("whitespace_pattern"), + ) + ], + "schema": schema_as_dict(schema), + } + + if format == "regex": + return {"processor": [regex_processor(schema, tokenizer)]} raise DistilabelUserError( f"Invalid format '{format}'. Must be either 'json' or 'regex'.", From 59604413fc16d4d8992752571dbba2e7ce5493bd Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 10:23:33 +0100 Subject: [PATCH 25/37] Refactor logits processor handling in LlamaCppLLM - Replaced the `_set_logits_processor` method with direct assignment of `_logits_processor` using `_prepare_structured_output`. - Simplified the logic for setting the logits processor in both the `load` and generation methods, enhancing code clarity and maintainability. --- src/distilabel/models/llms/llamacpp.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py index 71d29aecb4..822e5cea77 100644 --- a/src/distilabel/models/llms/llamacpp.py +++ b/src/distilabel/models/llms/llamacpp.py @@ -194,7 +194,9 @@ def load(self) -> None: ) if self.structured_output: - self._set_logits_processor(self.structured_output) + self._logits_processor = self._prepare_structured_output( + self.structured_output + ) if self.use_magpie_template or self.magpie_pre_query_template: if not self.tokenizer_id: @@ -221,19 +223,6 @@ def load(self) -> None: # out of the model name, which won't be available until the `Llama` instance is created. super().load() - def _set_logits_processor( - self, structured_output: Optional[OutlinesStructuredOutputType] = None - ) -> None: - from distilabel.steps.tasks.structured_outputs.outlines import ( - outlines_below_0_1_0, - ) - - processor = self._prepare_structured_output(structured_output) - if outlines_below_0_1_0: - self._logits_processor = processor - else: - self._logits_processor = [processor] - @property def model_name(self) -> str: """Returns the model name used for the LLM.""" @@ -352,8 +341,9 @@ def generate( # type: ignore # after each generation, so subsequent calls yield nothing. This is a workaround # until is fixed in the `llama_cpp` or `outlines` libraries. if structured_output: - self._set_logits_processor(structured_output) - + self._logits_processor = self._prepare_structured_output( + structured_output + ) if self.tokenizer_id is None: completion = self._generate_chat_completion( input, From cfac5743ded71d5f73e1f6ca598bec2c5c4f3c76 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 10:25:38 +0100 Subject: [PATCH 26/37] Refactor outlines import and logits processor handling in TransformersLLM - Updated the import statement for outlines to use the new helper function `_outlines_version_below_0_1_0`. - Simplified the logic for setting the `_logits_processor` based on the outlines version check, enhancing code clarity and maintainability. --- src/distilabel/models/llms/huggingface/transformers.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index ec178796ee..976534dbc7 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -23,7 +23,9 @@ from distilabel.models.llms.utils import compute_tokens, prepare_output from distilabel.models.mixins.cuda_device_placement import CudaDevicePlacementMixin from distilabel.models.mixins.magpie import MagpieChatTemplateMixin -from distilabel.steps.tasks.structured_outputs.outlines import outlines_below_0_1_0 +from distilabel.steps.tasks.structured_outputs.outlines import ( + _outlines_version_below_0_1_0, +) from distilabel.steps.tasks.typing import OutlinesStructuredOutputType, StandardInput from distilabel.utils.huggingface import HF_TOKEN_ENV_VAR @@ -152,10 +154,10 @@ def load(self) -> None: if self.structured_output: processor = self._prepare_structured_output(self.structured_output) - if outlines_below_0_1_0: + if _outlines_version_below_0_1_0(): self._prefix_allowed_tokens_fn = processor else: - self._logits_processor = [processor] + self._logits_processor = processor super().load() From 337876928449fe777b9027d94e9174f794eae4e9 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 10:29:10 +0100 Subject: [PATCH 27/37] Refactor outlines version check and update function naming - Renamed the helper function from `_outlines_version_below_0_1_0` to `_is_outlines_version_below_0_1_0` for clarity. - Updated all references to the renamed function across the codebase, ensuring consistent usage in the `TransformersLLM` class and related functions. - Enhanced code readability and maintainability by standardizing function naming conventions. --- src/distilabel/models/llms/huggingface/transformers.py | 4 ++-- src/distilabel/steps/tasks/structured_outputs/outlines.py | 6 +++--- tests/unit/steps/tasks/structured_outputs/test_outlines.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index 976534dbc7..61e3d09906 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -24,7 +24,7 @@ from distilabel.models.mixins.cuda_device_placement import CudaDevicePlacementMixin from distilabel.models.mixins.magpie import MagpieChatTemplateMixin from distilabel.steps.tasks.structured_outputs.outlines import ( - _outlines_version_below_0_1_0, + _is_outlines_version_below_0_1_0, ) from distilabel.steps.tasks.typing import OutlinesStructuredOutputType, StandardInput from distilabel.utils.huggingface import HF_TOKEN_ENV_VAR @@ -154,7 +154,7 @@ def load(self) -> None: if self.structured_output: processor = self._prepare_structured_output(self.structured_output) - if _outlines_version_below_0_1_0(): + if _is_outlines_version_below_0_1_0(): self._prefix_allowed_tokens_fn = processor else: self._logits_processor = processor diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index 9575b82ba1..432717140e 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -39,7 +39,7 @@ Frameworks = Literal["transformers", "llamacpp", "vllm"] -def _outlines_version_below_0_1_0() -> bool: +def _is_outlines_version_below_0_1_0() -> bool: """Helper function to check outlines availability and version. Returns: @@ -60,7 +60,7 @@ def model_to_schema(schema: Type[BaseModel]) -> Dict[str, Any]: def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: """Helper function to return the appropriate logits processors for the given framework.""" - if _outlines_version_below_0_1_0(): + if _is_outlines_version_below_0_1_0(): processors = { "transformers": ( "outlines.integrations.transformers", @@ -160,7 +160,7 @@ def prepare_guided_output( elif isinstance(schema, str): format = "regex" - if _outlines_version_below_0_1_0(): + if _is_outlines_version_below_0_1_0(): # use the model/llm, processor is NOT a list if format == "json": return { diff --git a/tests/unit/steps/tasks/structured_outputs/test_outlines.py b/tests/unit/steps/tasks/structured_outputs/test_outlines.py index 446967a2d5..2812c2e48b 100644 --- a/tests/unit/steps/tasks/structured_outputs/test_outlines.py +++ b/tests/unit/steps/tasks/structured_outputs/test_outlines.py @@ -19,8 +19,8 @@ from distilabel.models.llms.huggingface.transformers import TransformersLLM from distilabel.steps.tasks.structured_outputs.outlines import ( + _is_outlines_version_below_0_1_0, model_to_schema, - outlines_below_0_1_0, ) from distilabel.steps.tasks.typing import OutlinesStructuredOutputType @@ -181,7 +181,7 @@ def test_load_from_dict(self) -> None: llm = TransformersLLM.from_dict(DUMP_JSON) assert isinstance(llm, TransformersLLM) llm.load() - if outlines_below_0_1_0: + if _is_outlines_version_below_0_1_0(): assert llm._prefix_allowed_tokens_fn is not None assert llm._logits_processor is None else: From d56b6bcc7d66de4c188797c107a5c6c08d6df416 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 11:02:55 +0100 Subject: [PATCH 28/37] Refactor processor handling in LlamaCppLLM and TransformersLLM based on outlines version - Introduced version check for outlines in both LlamaCppLLM and TransformersLLM to determine processor return type. - Updated `prepare_guided_output` to handle processor initialization differently for outlines versions below and above 0.1.0. - Enhanced tokenizer handling in `_get_tokenizer_from_model` to support multiple frameworks, ensuring compatibility and improved functionality. --- .../models/llms/huggingface/transformers.py | 6 +- src/distilabel/models/llms/llamacpp.py | 6 +- .../tasks/structured_outputs/outlines.py | 59 +++++++++---------- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index 61e3d09906..ad79a0d936 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -309,6 +309,7 @@ def _prepare_structured_output( The callable that will be used to guide the generation of the model. """ from distilabel.steps.tasks.structured_outputs.outlines import ( + _is_outlines_version_below_0_1_0, prepare_guided_output, ) @@ -317,4 +318,7 @@ def _prepare_structured_output( ) if schema := result.get("schema"): self.structured_output["schema"] = schema - return result["processor"] + if _is_outlines_version_below_0_1_0(): + return result["processor"] + else: + return [result["processor"]] diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py index 822e5cea77..19715836f1 100644 --- a/src/distilabel/models/llms/llamacpp.py +++ b/src/distilabel/models/llms/llamacpp.py @@ -393,10 +393,14 @@ def _prepare_structured_output( The callable that will be used to guide the generation of the model. """ from distilabel.steps.tasks.structured_outputs.outlines import ( + _is_outlines_version_below_0_1_0, prepare_guided_output, ) result = prepare_guided_output(structured_output, "llamacpp", self._model) if (schema := result.get("schema")) and self.structured_output: self.structured_output["schema"] = schema - return result["processor"] + if _is_outlines_version_below_0_1_0(): + return result["processor"] + else: + return [result["processor"]] diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index 432717140e..52139a1e80 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -24,6 +24,7 @@ Literal, Tuple, Type, + Union, get_args, ) @@ -34,6 +35,10 @@ from distilabel.steps.tasks.structured_outputs.utils import schema_as_dict if TYPE_CHECKING: + from llama_cpp import Llama + from transformers import Pipeline + from vllm import LLM + from distilabel.steps.tasks.typing import OutlinesStructuredOutputType Frameworks = Literal["transformers", "llamacpp", "vllm"] @@ -108,7 +113,9 @@ def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: return getattr(module, json_cls), getattr(module, regex_cls) -def _get_tokenizer_from_model(llm: Any, framework: Frameworks) -> Callable: +def _get_tokenizer_from_model( + llm: Union["LLM", "Pipeline", "Llama"], framework: Frameworks +) -> Callable: if framework == "llamacpp": from outlines.models.llamacpp import LlamaCppTokenizer @@ -118,7 +125,9 @@ def _get_tokenizer_from_model(llm: Any, framework: Frameworks) -> Callable: return TransformerTokenizer(llm.tokenizer) elif framework == "vllm": - return llm.get_tokenizer() + from outlines.models.vllm import adapt_tokenizer + + return adapt_tokenizer(llm.get_tokenizer()) def prepare_guided_output( @@ -161,36 +170,26 @@ def prepare_guided_output( format = "regex" if _is_outlines_version_below_0_1_0(): - # use the model/llm, processor is NOT a list - if format == "json": - return { - "processor": json_processor( - schema, - llm, - whitespace_pattern=structured_output.get("whitespace_pattern"), - ), - "schema": schema_as_dict(schema), - } - - if format == "regex": - return {"processor": regex_processor(schema, llm)} + # use the llm for processor initialization + model = llm + tokenizer = None else: - # use tokenizer, processor is a list + # use the tokenizer for processor initialization + model = None tokenizer = _get_tokenizer_from_model(llm, framework) - if format == "json": - return { - "processor": [ - json_processor( - schema, - tokenizer, - whitespace_pattern=structured_output.get("whitespace_pattern"), - ) - ], - "schema": schema_as_dict(schema), - } - - if format == "regex": - return {"processor": [regex_processor(schema, tokenizer)]} + + if format == "json": + return { + "processor": json_processor( + schema, + model or tokenizer, + whitespace_pattern=structured_output.get("whitespace_pattern"), + ), + "schema": schema_as_dict(schema), + } + + if format == "regex": + return {"processor": regex_processor(schema, llm)} raise DistilabelUserError( f"Invalid format '{format}'. Must be either 'json' or 'regex'.", From 4056f0898efa184a3588e837f888aa10bf8edf62 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 12:16:10 +0100 Subject: [PATCH 29/37] Refactor structured output return types in LlamaCppLLM, MlxLLM, and TransformersLLM - Updated return types of `_prepare_structured_output` methods to reflect changes in processor handling. - Changed return type in LlamaCppLLM from `Union["LogitsProcessorList", None]` to `Union["LogitsProcessorList", "LogitsProcessor"]`. - Modified MlxLLM and TransformersLLM to return `Union[List[Callable], Callable>` instead of `Union[Callable, None]`, ensuring consistency across implementations. - Enhanced code clarity and maintainability by standardizing output handling in structured output preparation. --- src/distilabel/models/llms/huggingface/transformers.py | 2 +- src/distilabel/models/llms/llamacpp.py | 9 +++++++-- src/distilabel/models/llms/mlx.py | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index ad79a0d936..d84290fd3b 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -299,7 +299,7 @@ def get_last_hidden_states( def _prepare_structured_output( self, structured_output: Optional[OutlinesStructuredOutputType] = None - ) -> Union[Callable, None]: + ) -> Union[Callable, List[Callable]]: """Creates the appropriate function to filter tokens to generate structured outputs. Args: diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py index 19715836f1..8d90502b93 100644 --- a/src/distilabel/models/llms/llamacpp.py +++ b/src/distilabel/models/llms/llamacpp.py @@ -24,7 +24,12 @@ from distilabel.steps.tasks.typing import FormattedInput, OutlinesStructuredOutputType if TYPE_CHECKING: - from llama_cpp import CreateChatCompletionResponse, Llama, LogitsProcessorList + from llama_cpp import ( + CreateChatCompletionResponse, + Llama, + LogitsProcessor, + LogitsProcessorList, + ) from distilabel.steps.tasks.typing import FormattedInput, StandardInput @@ -383,7 +388,7 @@ def generate( # type: ignore def _prepare_structured_output( self, structured_output: Optional[OutlinesStructuredOutputType] = None - ) -> Union["LogitsProcessorList", None]: + ) -> Union["LogitsProcessorList", "LogitsProcessor"]: """Creates the appropriate function to filter tokens to generate structured outputs. Args: diff --git a/src/distilabel/models/llms/mlx.py b/src/distilabel/models/llms/mlx.py index 4ffcceddab..c754f40d5f 100644 --- a/src/distilabel/models/llms/mlx.py +++ b/src/distilabel/models/llms/mlx.py @@ -267,7 +267,7 @@ def generate( def _prepare_structured_output( self, structured_output: Optional[OutlinesStructuredOutputType] = None - ) -> Union[Callable, None]: + ) -> Union[List[Callable], Callable]: """Creates the appropriate function to filter tokens to generate structured outputs. Args: @@ -285,4 +285,4 @@ def _prepare_structured_output( ) if schema := result.get("schema"): self.structured_output["schema"] = schema - return result["processor"] + return [result["processor"]] From 11a7957c81a489588e2c0403ff46c18026a58b04 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 13:07:02 +0100 Subject: [PATCH 30/37] Enhance MlxLLM integration and expand framework support - Added support for the 'mlx' framework in the outlines processing logic. - Updated the `prepare_guided_output` function to utilize `TransformerTokenizer` for 'mlx' framework. - Modified the `_get_logits_processor` and `_get_tokenizer_from_model` functions to include 'mlx' as a valid framework option, ensuring consistent handling across different frameworks. - Improved code clarity and maintainability by standardizing framework handling in the structured output preparation process. --- src/distilabel/models/llms/mlx.py | 4 +++- .../tasks/structured_outputs/outlines.py | 24 +++++++++++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/distilabel/models/llms/mlx.py b/src/distilabel/models/llms/mlx.py index c754f40d5f..d78ce0b5c8 100644 --- a/src/distilabel/models/llms/mlx.py +++ b/src/distilabel/models/llms/mlx.py @@ -276,12 +276,14 @@ def _prepare_structured_output( Returns: The callable that will be used to guide the generation of the model. """ + from outlines.models.mlxlm import TransformerTokenizer + from distilabel.steps.tasks.structured_outputs.outlines import ( prepare_guided_output, ) result = prepare_guided_output( - structured_output, "transformers", self._pipeline + structured_output, "mlx", TransformerTokenizer(self._tokenizer._tokenizer) ) if schema := result.get("schema"): self.structured_output["schema"] = schema diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index 52139a1e80..e78f1f13e8 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -36,12 +36,13 @@ if TYPE_CHECKING: from llama_cpp import Llama + from outlines.models.mlxlm import TransformerTokenizer from transformers import Pipeline from vllm import LLM from distilabel.steps.tasks.typing import OutlinesStructuredOutputType -Frameworks = Literal["transformers", "llamacpp", "vllm"] +Frameworks = Literal["transformers", "llamacpp", "vllm", "mlx"] def _is_outlines_version_below_0_1_0() -> bool: @@ -82,6 +83,11 @@ def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: "JSONLogitsProcessor", "RegexLogitsProcessor", ), + "mlx": ( + "outlines.processors.mlxlm", + "JSONLogitsProcessor", + "RegexLogitsProcessor", + ), } else: processors = { @@ -100,6 +106,11 @@ def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: "JSONLogitsProcessor", "RegexLogitsProcessor", ), + "mlx": ( + "outlines.processors", + "JSONLogitsProcessor", + "RegexLogitsProcessor", + ), } if framework not in processors: @@ -114,26 +125,29 @@ def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: def _get_tokenizer_from_model( - llm: Union["LLM", "Pipeline", "Llama"], framework: Frameworks + llm: Union["LLM", "Pipeline", "Llama", "TransformerTokenizer"], + framework: Frameworks, ) -> Callable: if framework == "llamacpp": from outlines.models.llamacpp import LlamaCppTokenizer return LlamaCppTokenizer(llm) - elif framework == "transformers": + if framework == "transformers": from outlines.models.transformers import TransformerTokenizer return TransformerTokenizer(llm.tokenizer) - elif framework == "vllm": + if framework == "vllm": from outlines.models.vllm import adapt_tokenizer return adapt_tokenizer(llm.get_tokenizer()) + if framework == "mlx": + return llm def prepare_guided_output( structured_output: "OutlinesStructuredOutputType", framework: Frameworks, - llm: Any, + llm: Union["LLM", "Pipeline", "Llama", "TransformerTokenizer"], ) -> Dict[str, Any]: """Prepares the `LLM` to generate guided output using `outlines`. From e9fefc4c0553e441890757085a246ad9195c1195 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 15:10:40 +0100 Subject: [PATCH 31/37] Refactor structured output handling in LlamaCppLLM and MlxLLM - Simplified return types in LlamaCppLLM and MlxLLM by removing version checks and directly returning the processor. - Enhanced code clarity and maintainability by standardizing the output structure across both classes. - Updated `prepare_guided_output` usage to ensure consistent handling of structured outputs. --- src/distilabel/models/llms/llamacpp.py | 6 +----- src/distilabel/models/llms/mlx.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/distilabel/models/llms/llamacpp.py b/src/distilabel/models/llms/llamacpp.py index 8d90502b93..a754f6b84f 100644 --- a/src/distilabel/models/llms/llamacpp.py +++ b/src/distilabel/models/llms/llamacpp.py @@ -398,14 +398,10 @@ def _prepare_structured_output( The callable that will be used to guide the generation of the model. """ from distilabel.steps.tasks.structured_outputs.outlines import ( - _is_outlines_version_below_0_1_0, prepare_guided_output, ) result = prepare_guided_output(structured_output, "llamacpp", self._model) if (schema := result.get("schema")) and self.structured_output: self.structured_output["schema"] = schema - if _is_outlines_version_below_0_1_0(): - return result["processor"] - else: - return [result["processor"]] + return [result["processor"]] diff --git a/src/distilabel/models/llms/mlx.py b/src/distilabel/models/llms/mlx.py index d78ce0b5c8..5d510771d7 100644 --- a/src/distilabel/models/llms/mlx.py +++ b/src/distilabel/models/llms/mlx.py @@ -287,4 +287,4 @@ def _prepare_structured_output( ) if schema := result.get("schema"): self.structured_output["schema"] = schema - return [result["processor"]] + return result["processor"] From df24685d2332f8fe17a71d452703a2a08b0dad9b Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 15:52:49 +0100 Subject: [PATCH 32/37] Refactor MlxLLM structured output handling and remove unused components - Removed the `structured_output` attribute and related processing logic from MlxLLM to simplify the class structure. - Updated the `load` and generation methods to eliminate references to structured output, enhancing clarity and maintainability. - Adjusted imports and type hints in `outlines.py` to reflect the removal of 'mlx' framework support, streamlining the framework handling. - Improved code readability by cleaning up unnecessary complexity in structured output preparation. --- src/distilabel/models/llms/mlx.py | 54 +------------------ .../tasks/structured_outputs/outlines.py | 19 ++----- 2 files changed, 5 insertions(+), 68 deletions(-) diff --git a/src/distilabel/models/llms/mlx.py b/src/distilabel/models/llms/mlx.py index 5d510771d7..1f8c9b8c65 100644 --- a/src/distilabel/models/llms/mlx.py +++ b/src/distilabel/models/llms/mlx.py @@ -19,22 +19,18 @@ Dict, List, Optional, - Union, ) from pydantic import ( - Field, PrivateAttr, validate_call, ) -from distilabel.mixins.runtime_parameters import RuntimeParameter from distilabel.models.llms.base import LLM from distilabel.models.llms.typing import GenerateOutput from distilabel.models.llms.utils import compute_tokens, prepare_output from distilabel.models.mixins.magpie import MagpieChatTemplateMixin from distilabel.steps.tasks.typing import ( - OutlinesStructuredOutputType, StandardInput, ) @@ -51,8 +47,6 @@ class MlxLLM(LLM, MagpieChatTemplateMixin): tokenizer_config: the tokenizer configuration. model_config: the model configuration. adapter_path: the path to the adapter. - structured_output: a dictionary containing the structured output configuration or if more - fine-grained control is needed, an instance of `OutlinesStructuredOutput`. Defaults to None. use_magpie_template: a flag used to enable/disable applying the Magpie pre-query template. Defaults to `False`. magpie_pre_query_template: the pre-query template to be applied to the prompt or @@ -82,17 +76,10 @@ class MlxLLM(LLM, MagpieChatTemplateMixin): tokenizer_config: Dict[str, Any] = {} model_config: Dict[str, Any] = {} adapter_path: Optional[str] = None - structured_output: Optional[RuntimeParameter[OutlinesStructuredOutputType]] = Field( - default=None, - description="The structured output format to use across all the generations.", - ) _mlx_generate: Optional[Callable] = PrivateAttr(default=None) _model: Optional["nn.Module"] = PrivateAttr(...) _tokenizer: Optional["TokenizerWrapper"] = PrivateAttr(...) - _structured_output_logits_processor: Union[Callable, None] = PrivateAttr( - default=None - ) def load(self) -> None: """Loads the model and tokenizer and creates the text generation pipeline. In addition, @@ -112,11 +99,6 @@ def load(self) -> None: adapter_path=self.adapter_path, ) - if self.structured_output: - self._structured_output_logits_processor = self._prepare_structured_output( - self.structured_output - ) - if self._tokenizer.pad_token is None: self._tokenizer.pad_token = self._tokenizer.eos_token @@ -207,10 +189,6 @@ def generate( Returns: A list of lists of strings containing the generated responses for each input. """ - logits_processors = [] - if self._structured_output_logits_processor: - logits_processors.append(self._structured_output_logits_processor) - structured_output = None result = [] for input in inputs: @@ -219,13 +197,9 @@ def generate( output: List[str] = [] for _ in range(num_generations): - if structured_output: - additional_logits_processors = self._prepare_structured_output( - structured_output - ) - logits_processors.append(additional_logits_processors) + if structured_output: # will raise a NotImplementedError + self._prepare_structured_output(structured_output) prompt = self.prepare_input(input) - generation = self._mlx_generate( prompt=prompt, model=self._model, @@ -264,27 +238,3 @@ def generate( ) ) return result - - def _prepare_structured_output( - self, structured_output: Optional[OutlinesStructuredOutputType] = None - ) -> Union[List[Callable], Callable]: - """Creates the appropriate function to filter tokens to generate structured outputs. - - Args: - structured_output: the configuration dict to prepare the structured output. - - Returns: - The callable that will be used to guide the generation of the model. - """ - from outlines.models.mlxlm import TransformerTokenizer - - from distilabel.steps.tasks.structured_outputs.outlines import ( - prepare_guided_output, - ) - - result = prepare_guided_output( - structured_output, "mlx", TransformerTokenizer(self._tokenizer._tokenizer) - ) - if schema := result.get("schema"): - self.structured_output["schema"] = schema - return result["processor"] diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index e78f1f13e8..71d88a41cf 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -36,13 +36,12 @@ if TYPE_CHECKING: from llama_cpp import Llama - from outlines.models.mlxlm import TransformerTokenizer from transformers import Pipeline from vllm import LLM from distilabel.steps.tasks.typing import OutlinesStructuredOutputType -Frameworks = Literal["transformers", "llamacpp", "vllm", "mlx"] +Frameworks = Literal["transformers", "llamacpp", "vllm"] def _is_outlines_version_below_0_1_0() -> bool: @@ -83,11 +82,6 @@ def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: "JSONLogitsProcessor", "RegexLogitsProcessor", ), - "mlx": ( - "outlines.processors.mlxlm", - "JSONLogitsProcessor", - "RegexLogitsProcessor", - ), } else: processors = { @@ -106,11 +100,6 @@ def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: "JSONLogitsProcessor", "RegexLogitsProcessor", ), - "mlx": ( - "outlines.processors", - "JSONLogitsProcessor", - "RegexLogitsProcessor", - ), } if framework not in processors: @@ -125,7 +114,7 @@ def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: def _get_tokenizer_from_model( - llm: Union["LLM", "Pipeline", "Llama", "TransformerTokenizer"], + llm: Union["LLM", "Pipeline", "Llama"], framework: Frameworks, ) -> Callable: if framework == "llamacpp": @@ -140,14 +129,12 @@ def _get_tokenizer_from_model( from outlines.models.vllm import adapt_tokenizer return adapt_tokenizer(llm.get_tokenizer()) - if framework == "mlx": - return llm def prepare_guided_output( structured_output: "OutlinesStructuredOutputType", framework: Frameworks, - llm: Union["LLM", "Pipeline", "Llama", "TransformerTokenizer"], + llm: Union["LLM", "Pipeline", "Llama"], ) -> Dict[str, Any]: """Prepares the `LLM` to generate guided output using `outlines`. From 65272bd894591ff79d807fd9b65a917beb3cff17 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 16:02:43 +0100 Subject: [PATCH 33/37] Refactor logits processor handling in TransformersLLM - Changed the assignment of `_logits_processor` to always use a list, ensuring consistent handling across different outlines versions. - Removed the version check for outlines in the `load` method, simplifying the logic and enhancing maintainability. - Updated the return type in the structured output preparation to directly return the processor, improving code clarity. --- src/distilabel/models/llms/huggingface/transformers.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py index d84290fd3b..f4475dc1b4 100644 --- a/src/distilabel/models/llms/huggingface/transformers.py +++ b/src/distilabel/models/llms/huggingface/transformers.py @@ -157,7 +157,7 @@ def load(self) -> None: if _is_outlines_version_below_0_1_0(): self._prefix_allowed_tokens_fn = processor else: - self._logits_processor = processor + self._logits_processor = [processor] super().load() @@ -309,7 +309,6 @@ def _prepare_structured_output( The callable that will be used to guide the generation of the model. """ from distilabel.steps.tasks.structured_outputs.outlines import ( - _is_outlines_version_below_0_1_0, prepare_guided_output, ) @@ -318,7 +317,4 @@ def _prepare_structured_output( ) if schema := result.get("schema"): self.structured_output["schema"] = schema - if _is_outlines_version_below_0_1_0(): - return result["processor"] - else: - return [result["processor"]] + return result["processor"] From d2eda4ee8853d6a98e6b884969df5d3b62c8bfe3 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 16:23:10 +0100 Subject: [PATCH 34/37] Refactor type hints in outlines.py for improved clarity - Updated type hints for the `llm` parameter in `_get_tokenizer_from_model` and `prepare_guided_output` functions to use `_vLLM` instead of `LLM`, enhancing code readability. - Adjusted imports to reflect the new alias for `LLM`, streamlining the code structure. --- src/distilabel/steps/tasks/structured_outputs/outlines.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index 71d88a41cf..5bc16ae9df 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -37,7 +37,7 @@ if TYPE_CHECKING: from llama_cpp import Llama from transformers import Pipeline - from vllm import LLM + from vllm import LLM as _vLLM from distilabel.steps.tasks.typing import OutlinesStructuredOutputType @@ -114,7 +114,7 @@ def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]: def _get_tokenizer_from_model( - llm: Union["LLM", "Pipeline", "Llama"], + llm: Union["_vLLM", "Pipeline", "Llama"], framework: Frameworks, ) -> Callable: if framework == "llamacpp": @@ -134,7 +134,7 @@ def _get_tokenizer_from_model( def prepare_guided_output( structured_output: "OutlinesStructuredOutputType", framework: Frameworks, - llm: Union["LLM", "Pipeline", "Llama"], + llm: Union["_vLLM", "Pipeline", "Llama"], ) -> Dict[str, Any]: """Prepares the `LLM` to generate guided output using `outlines`. From 85494c46d2997a40d7fafbdc857e2256cd1ae665 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 16:29:22 +0100 Subject: [PATCH 35/37] Refactor type hint imports in outlines.py for improved clarity - Updated type hint imports to include `# noqa` comments, enhancing code readability and maintaining consistency with type checking. - No functional changes were made; this commit focuses on code structure and clarity. --- .../steps/tasks/structured_outputs/outlines.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index 5bc16ae9df..ab62d63f56 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -34,12 +34,12 @@ from distilabel.errors import DistilabelUserError from distilabel.steps.tasks.structured_outputs.utils import schema_as_dict -if TYPE_CHECKING: - from llama_cpp import Llama - from transformers import Pipeline - from vllm import LLM as _vLLM +if TYPE_CHECKING: # noqa + from llama_cpp import Llama # noqa + from transformers import Pipeline # noqa + from vllm import LLM as _vLLM # noqa - from distilabel.steps.tasks.typing import OutlinesStructuredOutputType + from distilabel.steps.tasks.typing import OutlinesStructuredOutputType # noqa Frameworks = Literal["transformers", "llamacpp", "vllm"] From 01ea5f1d99e57a6b7d34267c2bd3c260c2d27d17 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 16:54:35 +0100 Subject: [PATCH 36/37] Refactor regex processor handling in prepare_guided_output function - Updated the return statement in the `prepare_guided_output` function to use `model or tokenizer` instead of `llm`, improving clarity and consistency in processor assignment. - This change enhances the function's flexibility in handling different input types while maintaining existing functionality. --- src/distilabel/steps/tasks/structured_outputs/outlines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py index ab62d63f56..a5aceacb3b 100644 --- a/src/distilabel/steps/tasks/structured_outputs/outlines.py +++ b/src/distilabel/steps/tasks/structured_outputs/outlines.py @@ -190,7 +190,7 @@ def prepare_guided_output( } if format == "regex": - return {"processor": regex_processor(schema, llm)} + return {"processor": regex_processor(schema, model or tokenizer)} raise DistilabelUserError( f"Invalid format '{format}'. Must be either 'json' or 'regex'.", From 399154e690738386f84ebfea50a37a3ef5c9847f Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Fri, 10 Jan 2025 17:58:31 +0100 Subject: [PATCH 37/37] Update transformer dependency constraints in pyproject.toml - Removed the upper version limit for the `transformers` package, allowing for updates beyond version 4.47.0. --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 48aded2d78..3123d56b55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,8 +78,7 @@ argilla = ["argilla >= 2.0.0", "ipython"] cohere = ["cohere >= 5.2.0"] groq = ["groq >= 0.4.1"] hf-inference-endpoints = ["huggingface_hub >= 0.22.0"] -# logit processor breaks in transformers 4.47.0 -hf-transformers = ["transformers >= 4.34.1, < 4.47.0", "torch >= 2.0.0"] +hf-transformers = ["transformers >= 4.34.1", "torch >= 2.0.0"] instructor = ["instructor >= 1.2.3"] litellm = ["litellm >= 1.30.0"] llama-cpp = ["llama-cpp-python >= 0.2.0"]