fixie-ai · juberti · Jun 10, 2024 · Jun 7, 2024 · Jun 9, 2024 · Jun 9, 2024
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+*.json filter=lfs diff=lfs merge=lfs -text
diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/special_tokens_map.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/special_tokens_map.json
diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer.json
diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer_config.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer_config.json
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/preprocessor_config.json b/ultravox/assets/hf/wav2vec2-base-960h/preprocessor_config.json
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/special_tokens_map.json b/ultravox/assets/hf/wav2vec2-base-960h/special_tokens_map.json
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/tokenizer_config.json b/ultravox/assets/hf/wav2vec2-base-960h/tokenizer_config.json
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/vocab.json b/ultravox/assets/hf/wav2vec2-base-960h/vocab.json
diff --git a/ultravox/inference/infer_test.py b/ultravox/inference/infer_test.py
@@ -1,5 +1,3 @@
-import logging
-import os
 from unittest import mock
 
 import numpy as np
@@ -12,23 +10,21 @@
 from ultravox.inference import infer
 from ultravox.model import ultravox_processing
 
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
 
+# We cache these files in our repo to make CI faster and also
+# work properly for external contributions (since Llama 3 is gated).
 @pytest.fixture(scope="module")
 def tokenizer():
-    logging.info("Loading tokenizer")
-    yield transformers.AutoTokenizer.from_pretrained(
-        "meta-llama/Meta-Llama-3-8B-Instruct"
+    return transformers.AutoTokenizer.from_pretrained(
+        "./assets/hf/Meta-Llama-3-8B-Instruct", local_files_only=True
     )
-    logging.info("Tearing down tokenizer")
 
 
 @pytest.fixture(scope="module")
 def audio_processor():
-    logging.info("Loading audio processor")
-    yield transformers.AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
-    logging.info("Tearing down audio processor")
+    return transformers.AutoProcessor.from_pretrained(
+        "./assets/hf/wav2vec2-base-960h", local_files_only=True
+    )
 
 
 class FakeInference(infer.LocalInference):
@@ -50,9 +46,6 @@ def __init__(
         self.model.device = "cpu"
         self.model.generate = mock.MagicMock(return_value=[range(25)])
 
-    def __del__(self):
-        logging.info("Tearing down inference")
-
 
 EXPECTED_TOKEN_IDS_START = [128000, 128006, 882, 128007]
 EXPECTED_TOKEN_IDS_END = [128009, 128006, 78191, 128007, 271]