huggingface · ydshieh · Jun 16, 2023 · Jun 7, 2023 · Jun 7, 2023 · Jun 7, 2023
diff --git a/.github/workflows/build-past-ci-docker-images.yml b/.github/workflows/build-past-ci-docker-images.yml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        version: ["1.13", "1.12", "1.11", "1.10", "1.9"]
+        version: ["1.13", "1.12", "1.11", "1.10"]
     runs-on: ubuntu-latest
     steps:
       -

diff --git a/.github/workflows/self-nightly-past-ci-caller.yml b/.github/workflows/self-nightly-past-ci-caller.yml
@@ -67,21 +67,10 @@ jobs:
       sha: ${{ github.sha }}
     secrets: inherit
 
-  run_past_ci_pytorch_1-9:
-    name: PyTorch 1.9
-    if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
-    needs: [run_past_ci_pytorch_1-10]
-    uses: ./.github/workflows/self-past.yml
-    with:
-      framework: pytorch
-      version: "1.9"
-      sha: ${{ github.sha }}
-    secrets: inherit
-
   run_past_ci_tensorflow_2-11:
     name: TensorFlow 2.11
     if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
-    needs: [run_past_ci_pytorch_1-9]
+    needs: [run_past_ci_pytorch_1-10]
     uses: ./.github/workflows/self-past.yml
     with:
       framework: tensorflow

diff --git a/docker/transformers-past-gpu/Dockerfile b/docker/transformers-past-gpu/Dockerfile
@@ -24,7 +24,7 @@ ARG FRAMEWORK
 ARG VERSION
 
 # Control `setuptools` version to avoid some issues
-RUN [ "$VERSION" != "1.9" -a "$VERSION" != "1.10" ] && python3 -m pip install -U setuptools || python3 -m pip install -U "setuptools<=59.5"
+RUN [ "$VERSION" != "1.10" ] && python3 -m pip install -U setuptools || python3 -m pip install -U "setuptools<=59.5"
 
 # Remove all frameworks
 RUN python3 -m pip uninstall -y torch torchvision torchaudio tensorflow jax flax

diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py
@@ -33,19 +33,13 @@
     SequenceClassifierOutput,
 )
 from ...modeling_utils import PreTrainedModel, apply_chunking_to_forward
-from ...pytorch_utils import find_pruneable_heads_and_indices, is_torch_greater_or_equal_than_1_10, prune_linear_layer
+from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
 from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
 from .configuration_bridgetower import BridgeTowerConfig, BridgeTowerTextConfig, BridgeTowerVisionConfig
 
 
 logger = logging.get_logger(__name__)
 
-if not is_torch_greater_or_equal_than_1_10:
-    logger.warning(
-        f"You are using torch=={torch.__version__}, but torch>=1.10.0 is required to use "
-        "BridgeTowerModel. Please upgrade torch."
-    )
-
 _CONFIG_FOR_DOC = "BridgeTowerConfig"
 _CHECKPOINT_FOR_DOC = "BridgeTower/bridgetower-base"
 _TOKENIZER_FOR_DOC = "RobertaTokenizer"

diff --git a/src/transformers/models/vilt/modeling_vilt.py b/src/transformers/models/vilt/modeling_vilt.py
@@ -36,7 +36,6 @@
 from ...modeling_utils import PreTrainedModel
 from ...pytorch_utils import (
     find_pruneable_heads_and_indices,
-    is_torch_greater_or_equal_than_1_10,
     meshgrid,
     prune_linear_layer,
 )
@@ -46,12 +45,6 @@
 
 logger = logging.get_logger(__name__)
 
-if not is_torch_greater_or_equal_than_1_10:
-    logger.warning(
-        f"You are using torch=={torch.__version__}, but torch>=1.10.0 is required to use "
-        "ViltModel. Please upgrade torch."
-    )
-
 _CONFIG_FOR_DOC = "ViltConfig"
 _CHECKPOINT_FOR_DOC = "dandelin/vilt-b32-mlm"
 

diff --git a/src/transformers/pytorch_utils.py b/src/transformers/pytorch_utils.py
@@ -31,7 +31,6 @@
 is_torch_greater_or_equal_than_2_0 = parsed_torch_version_base >= version.parse("2.0")
 is_torch_greater_or_equal_than_1_12 = parsed_torch_version_base >= version.parse("1.12")
 is_torch_greater_or_equal_than_1_11 = parsed_torch_version_base >= version.parse("1.11")
-is_torch_greater_or_equal_than_1_10 = parsed_torch_version_base >= version.parse("1.10")
 is_torch_less_than_1_11 = parsed_torch_version_base < version.parse("1.11")
 
 
@@ -275,12 +274,7 @@ def meshgrid(
 
     Reference: https://pytorch.org/docs/1.13/generated/torch.meshgrid.html
     """
-    if is_torch_greater_or_equal_than_1_10:
-        return torch.meshgrid(*tensors, indexing=indexing)
-    else:
-        if indexing != "ij":
-            raise ValueError('torch.meshgrid only supports `indexing="ij"` for torch<1.10.')
-        return torch.meshgrid(*tensors)
+    return torch.meshgrid(*tensors, indexing=indexing)
 
 
 def id_tensor_storage(tensor: torch.Tensor) -> Tuple[torch.device, int, int]:

diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
@@ -70,7 +70,7 @@
 from .modeling_utils import PreTrainedModel, load_sharded_checkpoint, unwrap_model
 from .models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_MAPPING_NAMES
 from .optimization import Adafactor, get_scheduler
-from .pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_10
+from .pytorch_utils import ALL_LAYERNORM_LAYERS
 from .tokenization_utils_base import PreTrainedTokenizerBase
 from .trainer_callback import (
     CallbackHandler,
@@ -155,8 +155,6 @@
 from .utils.generic import ContextManagers
 
 
-_is_native_cpu_amp_available = is_torch_greater_or_equal_than_1_10
-
 DEFAULT_CALLBACKS = [DefaultFlowCallback]
 DEFAULT_PROGRESS_CALLBACK = ProgressCallback
 
@@ -621,10 +619,8 @@ def __init__(
                 if args.device == torch.device("cpu"):
                     if args.fp16:
                         raise ValueError("Tried to use `fp16` but it is not supported on cpu")
-                    elif _is_native_cpu_amp_available:
-                        args.half_precision_backend = "cpu_amp"
                     else:
-                        raise ValueError("Tried to use cpu amp but native cpu amp is not available")
+                        args.half_precision_backend = "cpu_amp"
                 else:
                     args.half_precision_backend = "cuda_amp"
 
@@ -2595,14 +2591,11 @@ def autocast_smart_context_manager(self, cache_enabled: Optional[bool] = True):
         arguments, depending on the situation.
         """
         if self.use_cuda_amp or self.use_cpu_amp:
-            if is_torch_greater_or_equal_than_1_10:
-                ctx_manager = (
-                    torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
-                    if self.use_cpu_amp
-                    else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
-                )
-            else:
-                ctx_manager = torch.cuda.amp.autocast()
+            ctx_manager = (
+                torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
+                if self.use_cpu_amp
+                else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)
+            )
         else:
             ctx_manager = contextlib.nullcontext() if sys.version_info >= (3, 7) else contextlib.suppress()
 

diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py
@@ -258,16 +258,12 @@ def is_torch_bf16_gpu_available():
     # since currently no utility function is available we build our own.
     # some bits come from https://github.com/pytorch/pytorch/blob/2289a12f21c54da93bf5d696e3f9aea83dd9c10d/torch/testing/_internal/common_cuda.py#L51
     # with additional check for torch version
-    # to succeed:
-    # 1. torch >= 1.10 (1.9 should be enough for AMP API has changed in 1.10, so using 1.10 as minimal)
-    # 2. the hardware needs to support bf16 (GPU arch >= Ampere, or CPU)
-    # 3. if using gpu, CUDA >= 11
-    # 4. torch.autocast exists
+    # to succeed: (torch is required to be >= 1.10 anyway)
+    # 1. the hardware needs to support bf16 (GPU arch >= Ampere, or CPU)
+    # 2. if using gpu, CUDA >= 11
+    # 3. torch.autocast exists
     # XXX: one problem here is that it may give invalid results on mixed gpus setup, so it's
     # really only correct for the 0th gpu (or currently set default device if different from 0)
-    if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.10"):
-        return False
-
     if torch.cuda.is_available() and torch.version.cuda is not None:
         if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8:
             return False
@@ -287,9 +283,6 @@ def is_torch_bf16_cpu_available():
 
     import torch
 
-    if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.10"):
-        return False
-
     try:
         # multiple levels of AttributeError depending on the pytorch version so do them all in one check
         _ = torch.cpu.amp.autocast
@@ -526,8 +519,6 @@ def is_optimum_neuron_available():
 
 
 def is_safetensors_available():
-    if is_torch_available() and version.parse(_torch_version) < version.parse("1.10"):
-        return False
     return _safetensors_available
 
 

diff --git a/tests/models/bloom/test_modeling_bloom.py b/tests/models/bloom/test_modeling_bloom.py
@@ -38,9 +38,6 @@
         BloomModel,
         BloomTokenizerFast,
     )
-    from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
-else:
-    is_torch_greater_or_equal_than_1_10 = False
 
 
 @require_torch
@@ -518,10 +515,6 @@ def setUp(self):
         super().setUp()
         self.path_bigscience_model = "bigscience/bigscience-small-testing"
 
-    @unittest.skipIf(
-        not is_torch_greater_or_equal_than_1_10,
-        "Test failed with torch < 1.10 (`LayerNormKernelImpl` not implemented for `BFloat16`)",
-    )
     @require_torch
     def test_embeddings(self):
         # The config in this checkpoint has `bfloat16` as `torch_dtype` -> model in `bfloat16`

diff --git a/tests/models/bridgetower/test_modeling_bridgetower.py b/tests/models/bridgetower/test_modeling_bridgetower.py
@@ -50,9 +50,6 @@
         BridgeTowerModel,
     )
     from transformers.models.bridgetower.modeling_bridgetower import BRIDGETOWER_PRETRAINED_MODEL_ARCHIVE_LIST
-    from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
-else:
-    is_torch_greater_or_equal_than_1_10 = False
 
 if is_vision_available():
     from PIL import Image
@@ -298,7 +295,6 @@ def prepare_config_and_inputs_for_common(self):
 
 
 @require_torch
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+")
 class BridgeTowerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
     all_model_classes = (
         (
@@ -516,7 +512,6 @@ def prepare_img():
 
 @require_torch
 @require_vision
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+")
 class BridgeTowerModelIntegrationTest(unittest.TestCase):
     @cached_property
     def default_processor(self):
@@ -601,7 +596,6 @@ def test_constrastive_learning(self):
 
 @slow
 @require_torch
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "BridgeTower is only available in torch v1.10+")
 class BridgeTowerModelTrainingTest(unittest.TestCase):
     all_training_supported_model_classes = (
         (BridgeTowerForImageAndTextRetrieval, BridgeTowerForMaskedLM, BridgeTowerForContrastiveLearning)

diff --git a/tests/models/levit/test_modeling_levit.py b/tests/models/levit/test_modeling_levit.py
@@ -20,8 +20,6 @@
 import warnings
 from math import ceil, floor
 
-from packaging import version
-
 from transformers import LevitConfig
 from transformers.file_utils import cached_property, is_torch_available, is_vision_available
 from transformers.models.auto import get_values
@@ -346,10 +344,6 @@ def test_training_gradient_checkpointing(self):
             loss.backward()
 
     def test_problem_types(self):
-        parsed_torch_version_base = version.parse(version.parse(torch.__version__).base_version)
-        if parsed_torch_version_base.base_version.startswith("1.9"):
-            self.skipTest(reason="This test fails with PyTorch 1.9.x: some CUDA issue")
-
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
 
         problem_types = [

diff --git a/tests/models/tvlt/test_modeling_tvlt.py b/tests/models/tvlt/test_modeling_tvlt.py
@@ -42,9 +42,6 @@
 
     from transformers import TvltForAudioVisualClassification, TvltForPreTraining, TvltModel
     from transformers.models.tvlt.modeling_tvlt import TVLT_PRETRAINED_MODEL_ARCHIVE_LIST
-    from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
-else:
-    is_torch_greater_or_equal_than_1_10 = False
 
 
 if is_datasets_available():
@@ -322,7 +319,6 @@ def prepare_audio_values(self):
 
 
 @require_torch
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "TVLT is only available in torch v1.10+")
 class TvltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
     all_model_classes = (
         (TvltModel, TvltForPreTraining, TvltForAudioVisualClassification) if is_torch_available() else ()

diff --git a/tests/models/vilt/test_modeling_vilt.py b/tests/models/vilt/test_modeling_vilt.py
@@ -42,9 +42,6 @@
         ViltModel,
     )
     from transformers.models.vilt.modeling_vilt import VILT_PRETRAINED_MODEL_ARCHIVE_LIST
-    from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_10
-else:
-    is_torch_greater_or_equal_than_1_10 = False
 
 if is_vision_available():
     import PIL
@@ -218,7 +215,6 @@ def prepare_pixel_values(self):
 
 
 @require_torch
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+")
 class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
     all_model_classes = (
         (
@@ -520,7 +516,6 @@ def test_model_from_pretrained(self):
 
 
 @require_torch
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+")
 class ViltForImagesAndTextClassificationModelTest(ViltModelTest, unittest.TestCase):
     all_model_classes = (ViltForImagesAndTextClassification,) if is_torch_available() else ()
 
@@ -545,7 +540,6 @@ def prepare_img():
 
 @require_torch
 @require_vision
-@unittest.skipIf(not is_torch_greater_or_equal_than_1_10, "Vilt is only available in torch v1.10+")
 class ViltModelIntegrationTest(unittest.TestCase):
     @cached_property
     def default_processor(self):