argilla-io · davidberenstein1957 · Jan 10, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025
diff --git a/src/distilabel/models/llms/huggingface/transformers.py b/src/distilabel/models/llms/huggingface/transformers.py
@@ -27,7 +27,7 @@
 from distilabel.utils.huggingface import HF_TOKEN_ENV_VAR
 
 if TYPE_CHECKING:
-    from transformers import Pipeline
+    from transformers import LogitsProcessorList, Pipeline
     from transformers.modeling_utils import PreTrainedModel
     from transformers.tokenization_utils import PreTrainedTokenizer
 
@@ -111,6 +111,7 @@ class TransformersLLM(LLM, MagpieChatTemplateMixin, CudaDevicePlacementMixin):
 
     _pipeline: Optional["Pipeline"] = PrivateAttr(...)
     _prefix_allowed_tokens_fn: Union[Callable, None] = PrivateAttr(default=None)
+    _logits_processor: Optional["LogitsProcessorList"] = PrivateAttr(default=None)
 
     def load(self) -> None:
         """Loads the model and tokenizer and creates the text generation pipeline. In addition,
@@ -119,7 +120,7 @@ def load(self) -> None:
             CudaDevicePlacementMixin.load(self)
 
         try:
-            from transformers import pipeline
+            from transformers import LogitsProcessorList, pipeline
         except ImportError as ie:
             raise ImportError(
                 "Transformers is not installed. Please install it using `pip install transformers`."
@@ -149,10 +150,20 @@ def load(self) -> None:
             self._pipeline.tokenizer.pad_token = self._pipeline.tokenizer.eos_token  # type: ignore
 
         if self.structured_output:
-            self._prefix_allowed_tokens_fn = self._prepare_structured_output(
-                self.structured_output
+            from distilabel.steps.tasks.structured_outputs.outlines import (
+                outlines_below_0_1_0,
             )
 
+            if outlines_below_0_1_0:
+                self._prefix_allowed_tokens_fn = self._prepare_structured_output(
+                    self.structured_output
+                )
+            else:
+                logits_processor = self._prepare_structured_output(
+                    self.structured_output
+                )
+                self._logits_processor = LogitsProcessorList([logits_processor])
+
         super().load()
 
     def unload(self) -> None:
@@ -222,7 +233,7 @@ def generate(  # type: ignore
         """
         prepared_inputs = [self.prepare_input(input=input) for input in inputs]
 
-        outputs: List[List[Dict[str, str]]] = self._pipeline(  # type: ignore
+        outputs: List[List[Dict[str, str]]] = self._pipeline(
             prepared_inputs,
             max_new_tokens=max_new_tokens,
             temperature=temperature,
@@ -232,7 +243,8 @@ def generate(  # type: ignore
             do_sample=do_sample,
             num_return_sequences=num_generations,
             prefix_allowed_tokens_fn=self._prefix_allowed_tokens_fn,
-            pad_token_id=self._pipeline.tokenizer.eos_token_id,  # type: ignore
+            logits_processor=self._logits_processor,
+            pad_token_id=self._pipeline.tokenizer.eos_token_id,
         )
         llm_output = [
             [generation["generated_text"] for generation in output]

diff --git a/src/distilabel/steps/tasks/structured_outputs/outlines.py b/src/distilabel/steps/tasks/structured_outputs/outlines.py
@@ -27,6 +27,7 @@
     get_args,
 )
 
+import pkg_resources
 from pydantic import BaseModel
 
 from distilabel.errors import DistilabelUserError
@@ -36,7 +37,11 @@
     from distilabel.steps.tasks.typing import OutlinesStructuredOutputType
 
 Frameworks = Literal["transformers", "llamacpp", "vllm"]
-"""Available frameworks for the structured output configuration. """
+# Available frameworks for the structured output configuration.
+_outlines_version = pkg_resources.get_distribution("outlines").version
+outlines_below_0_1_0 = pkg_resources.parse_version(
+    _outlines_version
+) < pkg_resources.parse_version("0.1.0")
 
 
 def model_to_schema(schema: Type[BaseModel]) -> Dict[str, Any]:
@@ -46,31 +51,56 @@ def model_to_schema(schema: Type[BaseModel]) -> Dict[str, Any]:
 
 def _get_logits_processor(framework: Frameworks) -> Tuple[Callable, Callable]:
     """Helper function to return the appropriate logits processor for the given framework."""
-    if framework == "transformers":
-        from outlines.integrations.transformers import (
-            JSONPrefixAllowedTokens,
-            RegexPrefixAllowedTokens,
+    if framework not in Frameworks.__args__:
+        raise DistilabelUserError(
+            f"Invalid framework '{framework}'. Must be one of {get_args(Frameworks)}",
+            page="sections/how_to_guides/advanced/structured_generation/",
         )
 
-        return JSONPrefixAllowedTokens, RegexPrefixAllowedTokens
+    if outlines_below_0_1_0:
+        if framework == "transformers":
+            from outlines.integrations.transformers import (
+                JSONPrefixAllowedTokens,
+                RegexPrefixAllowedTokens,
+            )
 
-    if framework == "llamacpp":
-        from outlines.integrations.llamacpp import (
-            JSONLogitsProcessor,
-            RegexLogitsProcessor,
-        )
+            return JSONPrefixAllowedTokens, RegexPrefixAllowedTokens
 
-        return JSONLogitsProcessor, RegexLogitsProcessor
+        if framework == "llamacpp":
+            from outlines.integrations.llamacpp import (
+                JSONLogitsProcessor,
+                RegexLogitsProcessor,
+            )
+
+            return JSONLogitsProcessor, RegexLogitsProcessor
 
-    if framework == "vllm":
-        from outlines.integrations.vllm import JSONLogitsProcessor, RegexLogitsProcessor
+        if framework == "vllm":
+            from outlines.integrations.vllm import (
+                JSONLogitsProcessor,
+                RegexLogitsProcessor,
+            )
+
+            return JSONLogitsProcessor, RegexLogitsProcessor
+    else:
+        from outlines.processors import JSONLogitsProcessor, RegexLogitsProcessor
 
         return JSONLogitsProcessor, RegexLogitsProcessor
 
-    raise DistilabelUserError(
-        f"Invalid framework '{framework}'. Must be one of {get_args(Frameworks)}",
-        page="sections/how_to_guides/advanced/structured_generation/",
-    )
+
+def _get_outlines_tokenizer_or_model(llm: Any, framework: Frameworks) -> Callable:
+    if outlines_below_0_1_0:
+        return llm
+    else:
+        if framework == "llamacpp":
+            from outlines.models.llamacpp import LlamaCppTokenizer
+
+            return LlamaCppTokenizer(llm)
+        elif framework == "transformers":
+            from outlines.models.transformers import TransformerTokenizer
+
+            return TransformerTokenizer(llm.tokenizer)
+        elif framework == "vllm":
+            return llm.get_tokenizer()
 
 
 def prepare_guided_output(
@@ -97,13 +127,16 @@ def prepare_guided_output(
         case of "json" will also include the schema as a dict, to simplify serialization
         and deserialization.
     """
+
     if not importlib.util.find_spec("outlines"):
         raise ImportError(
             "Outlines is not installed. Please install it using `pip install outlines`."
         )
 
     json_processor, regex_processor = _get_logits_processor(framework)
 
+    tokenizer_or_model = _get_outlines_tokenizer_or_model(llm, framework)
+
     format = structured_output.get("format")
     schema = structured_output.get("schema")
 
@@ -120,14 +153,14 @@ def prepare_guided_output(
         return {
             "processor": json_processor(
                 schema,
-                llm,
+                tokenizer_or_model,
                 whitespace_pattern=structured_output.get("whitespace_pattern"),
             ),
             "schema": schema_as_dict(schema),
         }
 
     if format == "regex":
-        return {"processor": regex_processor(schema, llm)}
+        return {"processor": regex_processor(schema, tokenizer_or_model)}
 
     raise DistilabelUserError(
         f"Invalid format '{format}'. Must be either 'json' or 'regex'.",

diff --git a/tests/unit/steps/tasks/structured_outputs/test_outlines.py b/tests/unit/steps/tasks/structured_outputs/test_outlines.py
@@ -20,6 +20,7 @@
 from distilabel.models.llms.huggingface.transformers import TransformersLLM
 from distilabel.steps.tasks.structured_outputs.outlines import (
     model_to_schema,
+    outlines_below_0_1_0,
 )
 from distilabel.steps.tasks.typing import OutlinesStructuredOutputType
 
@@ -100,9 +101,6 @@ class DummyUserTest(BaseModel):
 }
 
 
-@pytest.mark.skip(
-    reason="won't work until we update our code to work with `outlines>0.1.0`"
-)
 class TestOutlinesIntegration:
     @pytest.mark.parametrize(
         "format, schema, prompt",
@@ -138,7 +136,7 @@ def test_generation(
         prompt = [
             [{"role": "system", "content": ""}, {"role": "user", "content": prompt}]
         ]
-        result = llm.generate(prompt, max_new_tokens=30)
+        result = llm.generate(prompt, max_new_tokens=30, temperature=0.7)
         assert isinstance(result, list)
         assert isinstance(result[0], dict)
         assert "generations" in result[0] and "statistics" in result[0]
@@ -174,6 +172,7 @@ def test_serialization(
             structured_output=OutlinesStructuredOutputType(
                 format=format, schema=schema
             ),
+            token=None,
         )
         llm.load()
         assert llm.dump() == dump
@@ -182,4 +181,9 @@ def test_load_from_dict(self) -> None:
         llm = TransformersLLM.from_dict(DUMP_JSON)
         assert isinstance(llm, TransformersLLM)
         llm.load()
-        assert llm._prefix_allowed_tokens_fn is not None
+        if outlines_below_0_1_0:
+            assert llm._prefix_allowed_tokens_fn is not None
+            assert llm._logits_processor is None
+        else:
+            assert llm._prefix_allowed_tokens_fn is None
+            assert llm._logits_processor is not None