NVIDIA · terrykong · Jan 28, 2025 · Dec 13, 2024 · Dec 17, 2024 · Dec 17, 2024
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
@@ -20,6 +20,8 @@ on:
       - 'r**'
       - 'dev'
     types: [labeled]
+  merge_group:
+    types: [checks_requested]
   workflow_dispatch:
     inputs:
       test_to_run:
@@ -41,6 +43,7 @@ jobs:
     outputs:
       test_to_run: ${{ steps.test_to_run.outputs.main }}
       all: ${{ steps.all.outputs.main }}
+      run_ci: ${{ steps.evaluate.outputs.run_ci }}
     steps:
       - name: Parse test_to_run
         id: test_to_run
@@ -51,23 +54,58 @@ jobs:
         id: all
         run: |
           echo "main=${{ contains(fromJSON(steps.test_to_run.outputs.main), 'all') }}" | tee -a "$GITHUB_OUTPUT"
-
+
+      - name: Get changed files
+        id: changed-files
+        if: github.event_name == 'pull_request'
+        uses: tj-actions/changed-files@v44
+        with:
+          files_yaml: |
+            doc:
+              - '**.md'
+              - docs/**
+            src:
+              - '!**.md'
+              - '!docs/**'
+
+      - name: Evaluate conditions
+        id: evaluate
+        env:
+          DOCS_ONLY: ${{ steps.changed-files.outputs.doc_any_changed == 'true' && steps.changed-files.outputs.src_any_changed == 'false' }}
+          CHANGED_DOCS: ${{ steps.changed-files.outputs.doc_all_changed_files }}
+          CHANGED_SRC: ${{ steps.changed-files.outputs.src_all_changed_files }}
+          IS_PULLREQUEST: ${{ github.event_name == 'pull_request' }}
+          LABEL: ${{ github.event.label.name == 'Run CICD' }}
+          MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
+        run: |
+          # Some output that's helpful for debugging
+          echo "Docs changed: $CHANGED_DOCS"
+          echo "Src changed: $CHANGED_SRC"
+
+          echo "DOCS_ONLY: $DOCS_ONLY"
+          echo "LABEL: $LABEL"
+          echo "IS_PULLREQUEST: $IS_PULLREQUEST"
+
+          # Run CI only (on main or if label is attached) and if it's not only docs
+          echo run_ci=$([[ ("$LABEL" = "true" || "$IS_PULLREQUEST" = "false" || "$MERGE_GROUP" = "true") && "$DOCS_ONLY" = "false" ]] && echo "true" || echo "false") | tee -a "$GITHUB_OUTPUT"
+
   build-container:
-    if: ${{ github.event.label.name == 'Run CICD' || github.ref == 'refs/heads/main' }}
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
+    needs: [pre-flight]
     uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected]
     with:
       image-name: nemo_aligner_container
       dockerfile: Dockerfile
       image-label: nemo-aligner
       build-args: |
         MAX_JOBS=32
-        ALIGNER_COMMIT=${{ github.event.pull_request.head.sha || github.sha }}
+        ALIGNER_COMMIT=${{ github.sha }}
 
   Unit_Tests:
     name: ${{ matrix.test_case }}
     needs: [build-container, pre-flight]
     uses: ./.github/workflows/_run_test.yml
-    if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'unit') || needs.pre-flight.outputs.all == 'true'
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
     strategy:
       matrix:
         test_case:
@@ -85,7 +123,7 @@ jobs:
     name: ${{ matrix.test_case }}
     needs: [build-container, pre-flight]
     uses: ./.github/workflows/_run_test.yml
-    if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'functional') || needs.pre-flight.outputs.all == 'true'
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
     strategy:
       matrix:
         test_case:
@@ -102,3 +140,24 @@ jobs:
       TIMEOUT: 8
       SCRIPT: |
         bash /opt/NeMo-Aligner/tests/functional/test_cases/${{ matrix.test_case }}
+
+  CI_QA_Gate:
+    name: CI quality check
+    if: always()
+    runs-on: ubuntu-latest
+    needs: 
+      - Unit_Tests
+      - Functional_Tests
+    steps:
+      - name: main
+        env:
+          JOB_RESULTS: ${{ toJSON(needs) }}
+          ALL_SUCCESS: ${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && !contains(needs.*.result, 'skipped') }}
+          CI_SKIP: ${{ github.event.label.name == 'Skip CICD' }}
+        run: |
+
+          SUMMARY=$(echo $JOB_RESULTS | jq 'to_entries[] | .key + ": " + .value.result' | tr -d '"')
+          echo '🤖: CICD Result' >> $GITHUB_STEP_SUMMARY
+          echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
+
+          test "$ALL_SUCCESS" = "true" || test "$CI_SKIP" = "true"
diff --git a/.github/workflows/release-freeze.yml b/.github/workflows/release-freeze.yml
@@ -3,20 +3,25 @@ name: "Code freeze"
 on:
   workflow_dispatch:
     inputs:
-      type_of_release:
+      release-type:
         type: choice
         description: Type of release
         options: 
         - major
         - minor
-
+      freeze-commit:
+        type: string
+        description: Commit SHA to use for cut-off
+        required: false
+        default: main
 jobs:
   code-freeze:
-    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.8.0
+    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.17.3
     with:
-      name_of_library: NeMo-Aligner
-      type_of_release: ${{ inputs.type_of_release }}
-      python_package: nemo_aligner
+      library-name: NeMo-Aligner
+      python-package: nemo_aligner
+      release-type: ${{ inputs.release-type }}
+      freeze-commit: ${{ inputs.freeze-commit }}
     secrets:
       SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
       SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -25,10 +25,14 @@ on:
         required: true
         default: true
         type: boolean
-
+      version-bump-branch:
+        description: Branch for version bump
+        required: true
+        type: string
+
 jobs:
   release:
-    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.15.0
+    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.18.4
     with:
       release-ref: ${{ inputs.release-ref }}
       image-name: nemo_aligner_container
@@ -42,9 +46,10 @@ jobs:
       container-workdir: /opt/NeMo-Aligner
       library-name: NeMo-Aligner
       dry-run: ${{ inputs.dry-run }}
+      version-bump-branch: ${{ inputs.version-bump-branch }}
     secrets:
       TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
       TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
       SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
-      PAT: ${{ secrets.PAT }}
       SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+      PAT: ${{ secrets.PAT }}
diff --git a/Dockerfile b/Dockerfile
@@ -38,7 +38,7 @@ git pull --rebase || true
 pip install --no-cache-dir --no-deps -e .
 EOF
 
-FROM ${BASE_IMAGE} as final
+FROM ${BASE_IMAGE} AS final
 LABEL "nemo.library"="nemo-aligner"
 WORKDIR /opt
 # needed in case git complains that it can't detect a valid email, this email is fake but works
@@ -70,6 +70,10 @@ RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \
     pip install -e .
 ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12/compat/lib.real/
 
+# TODO: This pinning of pynvml is only needed while on TRTLLM v13 since pynvml>=11.5.0 but pynvml==12.0.0 contains a
+#   breaking change. The last known working verison is 11.5.3
+RUN pip install pynvml==11.5.3
+
 # install TransformerEngine
 ARG MAX_JOBS
 ARG TE_TAG
@@ -103,6 +107,11 @@ RUN git clone https://github.com/NVIDIA/NeMo.git && \
     pip install -e ".[nlp]" && \
     cd nemo/collections/nlp/data/language_modeling/megatron && make
 
+# TODO: While we are on Pytorch 24.07, we need to downgrade triton since 3.2.0 introduced a breaking change
+#   This un-pinned requirement comes from mamba-ssm, and this pin can be removed once Pytorch base image is
+#   updated.
+RUN pip install triton==3.1.0
+
 # MLM
 ARG MLM_TAG
 RUN pip uninstall -y megatron-core && \

diff --git a/tests/conftest.py → conftest.py b/tests/conftest.py → conftest.py
@@ -22,8 +22,8 @@
 from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
 from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
 from nemo_aligner.models.nlp.gpt.megatron_gpt_ppo_actor import MegatronGPTActorModel
+from nemo_aligner.testing.utils import Utils
 from nemo_aligner.utils.train_script_utils import init_distributed, resolve_and_create_trainer
-from tests.test_mcore_utilities import Utils
 
 dir_path = os.path.dirname(os.path.abspath(__file__))
 # TODO: This file exists because in cases where TRTLLM MPI communicators are involved,
@@ -67,7 +67,7 @@ def run_only_on_device_fixture(request, device):
 
 @pytest.fixture
 def init_model_parallel():
-    from tests.test_mcore_utilities import Utils
+    from nemo_aligner.testing.utils import Utils
 
     def initialize(*args, **kwargs):
         Utils.initialize_model_parallel(*args, **kwargs)
@@ -401,7 +401,9 @@ def pytest_collection_modifyitems(config, items):
 
 def pytest_sessionstart(session):
     # Remove the file at the start of the session, if it exists
-    if os.path.exists(SUCCESS_FILE) and os.environ["LOCAL_RANK"] == "0":
+    if os.path.exists(SUCCESS_FILE) and (
+        os.environ.get("LOCAL_RANK", None) == "0" or os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK", None) == "0"
+    ):
         os.remove(SUCCESS_FILE)
 
 

diff --git a/docs/user-guide-experimental/README.md b/docs/user-guide-experimental/README.md
@@ -0,0 +1,5 @@
+# Experimental Docs
+
+This directory contains documentation for features that are still experimental or under development and not yet ready for general use.
+
+More context can be found in the [experimental/README.md](../../nemo_aligner/experimental/README.md) file.
diff --git a/docs/user-guide/dpo.rst b/docs/user-guide/dpo.rst
@@ -46,10 +46,14 @@ To start, we must first get a pretrained model to align. There are two models we
                   --in-folder ./model_checkpoint \
                   --out-file ./mcore_gpt.nemo
 
-    .. tab-item:: LLaMa3 7B
+    .. tab-item:: LLaMa3 8B
         :sync: key2
 
-        #. Download the `Llama 3 8B LLM model and tokenizer <https://huggingface.co/meta-llama/Meta-Llama-3-8B>`__ into the models folder.
+        #. Download the `Llama 3 8B LLM model and tokenizer <https://huggingface.co/meta-llama/Meta-Llama-3-8B>`__ into the models folder. You can use the Hugging Face CLI for this:
+            .. code-block:: bash
+
+               huggingface-cli download meta-llama/Meta-Llama-3-8B --local-dir /path/to/llama
+
         #. Convert the LLaMa3 LLM into ``.nemo`` format.
             .. code-block:: bash 
 

diff --git a/docs/user-guide/index.rst b/docs/user-guide/index.rst
@@ -7,6 +7,7 @@
 
    sft.rst
    knowledge-distillation.rst
+   reinforce.rst
    dpo.rst
    rlhf.rst
    steerlm.rst
@@ -25,6 +26,9 @@
 :ref:`Supervised Fine-Tuning (SFT) with Knowledge Distillation <nemo-aligner-knowledge-distillation>`
    In this section, we walk through a variation of SFT using Knowledge Distillation where we train a smaller "student" model using a larger "teacher" model.
 
+:ref:`Model Alignment by REINFORCE <nemo-aligner-reinforce>`
+   In this tutorial, we will guide you through the process of aligning a NeMo Framework model using REINFORCE. This method can be applied to various models, including LLaMa2 and Mistral, with our scripts functioning consistently across different models.
+
 :ref:`Model Alignment by DPO, RPO and IPO <nemo-aligner-dpo>`
    DPO, RPO, and IPO are simpler alignment methods compared to RLHF. DPO introduces a novel parameterization of the reward model in RLHF, which allows us to extract the corresponding optimal policy. Similarly, RPO and IPO provide alternative parameterizations or optimization strategies, each contributing unique approaches to refining model alignment.
 
@@ -75,6 +79,14 @@
      - Yes
      - Yes
      - 
+   * - :ref:`REINFORCE <nemo-aligner-reinforce>`
+     - Yes
+     - Yes
+     - Yes
+     - Yes (✓)
+     - Yes
+     - Yes
+     - 
    * - :ref:`DPO <nemo-aligner-dpo>`
      - 
      - Yes (✓)

diff --git a/docs/user-guide/reinforce.rst b/docs/user-guide/reinforce.rst
@@ -1,16 +1,16 @@
 .. include:: /content/nemo.rsts
 
-.. _model-aligner-reinforce:
+.. _nemo-aligner-reinforce:
 
 Model Alignment by REINFORCE
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 
 In this tutorial, we will guide you through the process of aligning a NeMo Framework model using REINFORCE. This method can be applied to various models, including LLaMa2 and Mistral, with our scripts functioning consistently across different models.
 
 REINFORCE is usually preceded by a Supervised Fine-Tuning (SFT). We should first follow the :ref:`Prerequisite guide <prerequisite>` and the :ref:`SFT guide <sft>`. After obtaining the SFT model, we will also need to train a reward model as in :ref:`PPO guide <ppo>`. We will use the REINFORCE algorithm on the `Anthropic-HH-RLHF <https://huggingface.co/datasets/Anthropic/hh-rlhf>`__ dataset.
 
 REINFORCE Training
-############
+##################
 
 After you have fine-tuned a GPT model using Supervised Fine-Tuning (SFT), and trained a reward model as explained in the preceding section, you can start aligning the policy using REINFORCE.
 
@@ -48,7 +48,7 @@ To launch the server:
 The above example launches the reward model server on eight GPUs and one node. Make sure to change trainer.devices, trainer.num_nodes depending on your model size and scale. Aligner will work on any scale. Also, make sure to tune the trainer.reinforce.inference_micro_batch_size argument. This argument sets the size of the batch the REINFORCE actor is allowed to send to the reward per DP rank.
 
 Launch the Initial Policy and REINFORCE Actor Training
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 The REINFORCE Actor training job contains the master controller that makes the HTTP calls to all servers when needed. To launch the REINFORCE Actor and Initial Policy server:
 
@@ -58,7 +58,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
    TRAIN_DATA_PATH="/path/to/train_prompts.jsonl"
    VALID_DATA_PATH="/path/to/test_prompts.jsonl"
 
-   PRETRAINED_ACTOR_NEMO_FILE="/path/to/sft_checkpoint.nemo"
+   ACTOR_NEMO_FILE="/path/to/sft_checkpoint.nemo"
    RESULTS_DIR="/path/to/actor_results_dir"
 
    USE_FLASK=False
@@ -73,7 +73,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
    cd ${GPFS}
    export PYTHONPATH="${GPFS}:${PYTHONPATH}" \
    && export HYDRA_FULL_ERROR=1 \
-   && python -u examples/nlp/gpt/train_gpt_reinforce_actor.py \
+   && mpirun -n 8 --allow-run-as-root python -u examples/nlp/gpt/train_gpt_reinforce_actor.py \
       "model.data.data_prefix={train: [${TRAIN_DATA_PATH}], validation: [${VALID_DATA_PATH}], test: [${VALID_DATA_PATH}]}" \
       pretrained_checkpoint.restore_from_path=\"${ACTOR_NEMO_FILE}\" \
       exp_manager.checkpoint_callback_params.save_top_k=1 \
@@ -114,7 +114,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
 The above command launches the initial and actor server on one node with eight GPUs.
 
 Launching Both Servers for REINFORCE training
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 You can use slurm to launch the two jobs and get them to coordinate together in a full REINFORCE job through the following:
 
@@ -239,7 +239,7 @@ You can use slurm to launch the two jobs and get them to coordinate together in
       trainer.reinforce.rollout_batch_seq_length=4096
    EOF
 
-   srun --het-group=1 -o $PPO_OUTFILE -e $PPO_ERRFILE --container-image=${CONTAINER} $MOUNTS bash -c "${cmd_reinforce}" &
+   srun --mpi=pmix --het-group=1 -o $PPO_OUTFILE -e $PPO_ERRFILE --container-image=${CONTAINER} $MOUNTS bash -c "${cmd_reinforce}" &
 
    wait
 
@@ -251,6 +251,6 @@ It is important to launch all jobs with ``&`` after the srun command to ensure t
    Make sure to change the reward model arg ``trainer.reinforce.inference_micro_batch_size`` such that ``trainer.reinforce.inference_micro_batch_size * DP size <= model.reinforce.rollout_micro_batch_size``.
 
 REINFORCE Results
-%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%
 
-After you've completed reinforce training, you can serve your model using the `megatron_gpt_eval.py <https://github.com/NVIDIA/NeMo/blob/8cd5f1c8e7d4fed9f4f946028cd02047c5d2296f/examples/nlp/language_modeling/megatron_gpt_eval.py#L4>`__ script from the NeMo codebase to run more rigorous evaluation of your trained model.
+After you've completed reinforce training, you can serve your model using the `megatron_gpt_eval.py <https://github.com/NVIDIA/NeMo/blob/8cd5f1c8e7d4fed9f4f946028cd02047c5d2296f/examples/nlp/language_modeling/megatron_gpt_eval.py#L4>`__ script from the NeMo codebase to run more rigorous evaluation of your trained model.