From e4da724e4a244c98b6bb13f77b16a8d8a80a6029 Mon Sep 17 00:00:00 2001
From: Akiki Liang <asq@google.com>
Date: Thu, 30 Jan 2025 23:17:06 +0000
Subject: [PATCH] update NeMo framework examples to 24.12

---
 .../a3-highgpu-8g/nemo-framework/Dockerfile                 | 2 +-
 .../machine-learning/a3-highgpu-8g/nemo-framework/README.md | 4 ++--
 .../a3-highgpu-8g/nemo-framework/setup_nemo.sh              | 2 +-
 .../a3-megagpu-8g/nemo-framework/Dockerfile                 | 2 +-
 .../machine-learning/a3-megagpu-8g/nemo-framework/README.md | 6 +++---
 .../a3-megagpu-8g/nemo-framework/setup_nemo.sh              | 2 +-
 .../a3-ultragpu-8g/nemo-framework/Dockerfile                | 2 +-
 .../a3-ultragpu-8g/nemo-framework/README.md                 | 6 +++---
 .../a3-ultragpu-8g/nemo-framework/setup_nemo.sh             | 2 +-
 9 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/examples/machine-learning/a3-highgpu-8g/nemo-framework/Dockerfile b/examples/machine-learning/a3-highgpu-8g/nemo-framework/Dockerfile
index a4264709bc..3bfa23198b 100644
--- a/examples/machine-learning/a3-highgpu-8g/nemo-framework/Dockerfile
+++ b/examples/machine-learning/a3-highgpu-8g/nemo-framework/Dockerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG NEMOFW_VERSION=24.07
+ARG NEMOFW_VERSION=24.12
 FROM nvcr.io/nvidia/nemo:${NEMOFW_VERSION}
 
 ENV USE_TCPX=yes
diff --git a/examples/machine-learning/a3-highgpu-8g/nemo-framework/README.md b/examples/machine-learning/a3-highgpu-8g/nemo-framework/README.md
index 9eb9252106..6850852623 100644
--- a/examples/machine-learning/a3-highgpu-8g/nemo-framework/README.md
+++ b/examples/machine-learning/a3-highgpu-8g/nemo-framework/README.md
@@ -3,7 +3,7 @@ README
 
 1. Set up NeMo Framework Container
 
-   This makes a few environment variable modifications to the [nvcr.io/nvidia/nemo:24.07](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo)
+   This makes a few environment variable modifications to the [nvcr.io/nvidia/nemo:24.12](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo)
    container, and submits a Slurm job to copy the framework launcher scripts and a
    few other auxiliary files into your working directory.
 
@@ -45,7 +45,7 @@ README
        launcher_scripts_path=${PWD} \
        stages=[training] \
        env_vars.TRANSFORMERS_OFFLINE=0 \
-       container=../nemofw+tcpx-24.07.sqsh \
+       container=../nemofw+tcpx-24.12.sqsh \
        container_mounts='['${HOME}/.cache',"/var/lib/tcpx/lib64","/run/tcpx-\${SLURM_JOB_ID}:/run/tcpx"]' \
        cluster.srun_args=["--container-writable"] \
        training.model.data.data_impl=mock \
diff --git a/examples/machine-learning/a3-highgpu-8g/nemo-framework/setup_nemo.sh b/examples/machine-learning/a3-highgpu-8g/nemo-framework/setup_nemo.sh
index 5692b0342b..19c6212c6a 100644
--- a/examples/machine-learning/a3-highgpu-8g/nemo-framework/setup_nemo.sh
+++ b/examples/machine-learning/a3-highgpu-8g/nemo-framework/setup_nemo.sh
@@ -18,7 +18,7 @@
 #SBATCH --partition=a3
 #SBATCH --exclusive
 
-: "${NEMOFW_VERSION:=24.07}"
+: "${NEMOFW_VERSION:=24.12}"
 
 srun docker build --build-arg="NEMOFW_VERSION=${NEMOFW_VERSION}" -t nemofw:tcpx-"${NEMOFW_VERSION}" .
 srun rm -f nemofw+tcpx-"${NEMOFW_VERSION}".sqsh
diff --git a/examples/machine-learning/a3-megagpu-8g/nemo-framework/Dockerfile b/examples/machine-learning/a3-megagpu-8g/nemo-framework/Dockerfile
index 381591b9e0..4c0d449889 100644
--- a/examples/machine-learning/a3-megagpu-8g/nemo-framework/Dockerfile
+++ b/examples/machine-learning/a3-megagpu-8g/nemo-framework/Dockerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG NEMOFW_VERSION=24.07
+ARG NEMOFW_VERSION=24.12
 FROM nvcr.io/nvidia/nemo:$NEMOFW_VERSION
 
 ENV NCCL_FASTRAK_CTRL_DEV=enp0s12
diff --git a/examples/machine-learning/a3-megagpu-8g/nemo-framework/README.md b/examples/machine-learning/a3-megagpu-8g/nemo-framework/README.md
index e1f16377bb..6e66a68adf 100644
--- a/examples/machine-learning/a3-megagpu-8g/nemo-framework/README.md
+++ b/examples/machine-learning/a3-megagpu-8g/nemo-framework/README.md
@@ -3,7 +3,7 @@ README
 
 1. Set up NeMo Framework Container
 
-   This makes a few environment variable modifications to the [nvcr.io/nvidia/nemo:24.07](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo)
+   This makes a few environment variable modifications to the [nvcr.io/nvidia/nemo:24.12](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo)
    container, and submits a Slurm job to copy the framework launcher scripts and a
    few other auxiliary files into your working directory.
 
@@ -21,7 +21,7 @@ README
    python3 -m venv env
    source env/bin/activate
    pip install -r requirements.txt # Copied from the NeMo Framework Container earlier
-   # This is needed to use 24.07 and python3.11, which is what is present on
+   # This is needed to use 24.12 and python3.11, which is what is present on
    # Debian 12
    pip install -U hydra-core
    ```
@@ -53,7 +53,7 @@ README
        stages=[training] \
        training=gpt3/5b \
        env_vars.TRANSFORMERS_OFFLINE=0 \
-       container=../nemofw+tcpxo-24.07.sqsh \
+       container=../nemofw+tcpxo-24.12.sqsh \
        container_mounts=[${HOME}/.cache,/var/lib/tcpxo/lib64] \
        cluster.srun_args=["--container-writable"] \
        training.model.data.data_impl=mock \
diff --git a/examples/machine-learning/a3-megagpu-8g/nemo-framework/setup_nemo.sh b/examples/machine-learning/a3-megagpu-8g/nemo-framework/setup_nemo.sh
index e20e58d6aa..eac2fbfb23 100644
--- a/examples/machine-learning/a3-megagpu-8g/nemo-framework/setup_nemo.sh
+++ b/examples/machine-learning/a3-megagpu-8g/nemo-framework/setup_nemo.sh
@@ -18,7 +18,7 @@
 #SBATCH --partition=a3mega
 #SBATCH --exclusive
 
-: "${NEMOFW_VERSION:=24.07}"
+: "${NEMOFW_VERSION:=24.12}"
 
 srun docker build --build-arg="NEMOFW_VERSION=${NEMOFW_VERSION}" -t nemofw:tcpxo-"${NEMOFW_VERSION}" .
 srun rm -f nemofw+tcpxo-"${NEMOFW_VERSION}".sqsh
diff --git a/examples/machine-learning/a3-ultragpu-8g/nemo-framework/Dockerfile b/examples/machine-learning/a3-ultragpu-8g/nemo-framework/Dockerfile
index 6b36f9a52d..ce1c9fecda 100644
--- a/examples/machine-learning/a3-ultragpu-8g/nemo-framework/Dockerfile
+++ b/examples/machine-learning/a3-ultragpu-8g/nemo-framework/Dockerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG NEMOFW_VERSION=24.07
+ARG NEMOFW_VERSION=24.12
 FROM nvcr.io/nvidia/nemo:$NEMOFW_VERSION
 
 ENV NCCL_DEBUG=INFO,WARN
diff --git a/examples/machine-learning/a3-ultragpu-8g/nemo-framework/README.md b/examples/machine-learning/a3-ultragpu-8g/nemo-framework/README.md
index e895f43765..9c4c9b395d 100644
--- a/examples/machine-learning/a3-ultragpu-8g/nemo-framework/README.md
+++ b/examples/machine-learning/a3-ultragpu-8g/nemo-framework/README.md
@@ -3,7 +3,7 @@ README
 
 1. Set up NeMo Framework Container
 
-   This makes a few environment variable modifications to the [nvcr.io/nvidia/nemo:24.07](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo)
+   This makes a few environment variable modifications to the [nvcr.io/nvidia/nemo:24.12](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo)
    container, and submits a Slurm job to copy the framework launcher scripts and a
    few other auxiliary files into your working directory.
 
@@ -21,7 +21,7 @@ README
    python3 -m venv env
    source env/bin/activate
    pip install -r requirements.txt # Copied from the NeMo Framework Container earlier
-   # This is needed to use 24.07 and python3.11, which is what is present on
+   # This is needed to use 24.12 and python3.11, which is what is present on
    # Debian 12
    pip install -U hydra-core
    ```
@@ -53,7 +53,7 @@ README
        stages=[training] \
        training=gpt3/5b \
        env_vars.TRANSFORMERS_OFFLINE=0 \
-       container=../nemo-24.07.sqsh \
+       container=../nemo-24.12.sqsh \
        container_mounts=[${HOME}/.cache,/usr/local/gib] \
        cluster.srun_args=["--container-writable"] \
        training.model.data.data_impl=mock \
diff --git a/examples/machine-learning/a3-ultragpu-8g/nemo-framework/setup_nemo.sh b/examples/machine-learning/a3-ultragpu-8g/nemo-framework/setup_nemo.sh
index 9fafa83e91..53ac15486b 100644
--- a/examples/machine-learning/a3-ultragpu-8g/nemo-framework/setup_nemo.sh
+++ b/examples/machine-learning/a3-ultragpu-8g/nemo-framework/setup_nemo.sh
@@ -18,7 +18,7 @@
 #SBATCH --partition=a3ultra
 #SBATCH --exclusive
 
-: "${NEMOFW_VERSION:=24.07}"
+: "${NEMOFW_VERSION:=24.12}"
 
 srun docker build --build-arg="NEMOFW_VERSION=${NEMOFW_VERSION}" -t nemo-"${NEMOFW_VERSION}" .
 srun rm -f nemo-"${NEMOFW_VERSION}".sqsh