diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 3f11fa876..728f3b6bf 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -20,6 +20,8 @@ on:
       - 'r**'
       - 'dev'
     types: [labeled]
+  merge_group:
+    types: [checks_requested]
   workflow_dispatch:
     inputs:
       test_to_run:
@@ -41,6 +43,7 @@ jobs:
     outputs:
       test_to_run: ${{ steps.test_to_run.outputs.main }}
       all: ${{ steps.all.outputs.main }}
+      run_ci: ${{ steps.evaluate.outputs.run_ci }}
     steps:
       - name: Parse test_to_run
         id: test_to_run
@@ -51,9 +54,44 @@ jobs:
         id: all
         run: |
           echo "main=${{ contains(fromJSON(steps.test_to_run.outputs.main), 'all') }}" | tee -a "$GITHUB_OUTPUT"
-  
+
+      - name: Get changed files
+        id: changed-files
+        if: github.event_name == 'pull_request'
+        uses: tj-actions/changed-files@v44
+        with:
+          files_yaml: |
+            doc:
+              - '**.md'
+              - docs/**
+            src:
+              - '!**.md'
+              - '!docs/**'
+
+      - name: Evaluate conditions
+        id: evaluate
+        env:
+          DOCS_ONLY: ${{ steps.changed-files.outputs.doc_any_changed == 'true' && steps.changed-files.outputs.src_any_changed == 'false' }}
+          CHANGED_DOCS: ${{ steps.changed-files.outputs.doc_all_changed_files }}
+          CHANGED_SRC: ${{ steps.changed-files.outputs.src_all_changed_files }}
+          IS_PULLREQUEST: ${{ github.event_name == 'pull_request' }}
+          LABEL: ${{ github.event.label.name == 'Run CICD' }}
+          MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
+        run: |
+          # Some output that's helpful for debugging
+          echo "Docs changed: $CHANGED_DOCS"
+          echo "Src changed: $CHANGED_SRC"
+          
+          echo "DOCS_ONLY: $DOCS_ONLY"
+          echo "LABEL: $LABEL"
+          echo "IS_PULLREQUEST: $IS_PULLREQUEST"
+          
+          # Run CI only (on main or if label is attached) and if it's not only docs
+          echo run_ci=$([[ ("$LABEL" = "true" || "$IS_PULLREQUEST" = "false" || "$MERGE_GROUP" = "true") && "$DOCS_ONLY" = "false" ]] && echo "true" || echo "false") | tee -a "$GITHUB_OUTPUT"
+
   build-container:
-    if: ${{ github.event.label.name == 'Run CICD' || github.ref == 'refs/heads/main' }}
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
+    needs: [pre-flight]
     uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_container.yml@v0.1.0
     with:
       image-name: nemo_aligner_container
@@ -61,13 +99,13 @@ jobs:
       image-label: nemo-aligner
       build-args: |
         MAX_JOBS=32
-        ALIGNER_COMMIT=${{ github.event.pull_request.head.sha || github.sha }}
+        ALIGNER_COMMIT=${{ github.sha }}
   
   Unit_Tests:
     name: ${{ matrix.test_case }}
     needs: [build-container, pre-flight]
     uses: ./.github/workflows/_run_test.yml
-    if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'unit') || needs.pre-flight.outputs.all == 'true'
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
     strategy:
       matrix:
         test_case:
@@ -85,7 +123,7 @@ jobs:
     name: ${{ matrix.test_case }}
     needs: [build-container, pre-flight]
     uses: ./.github/workflows/_run_test.yml
-    if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'functional') || needs.pre-flight.outputs.all == 'true'
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
     strategy:
       matrix:
         test_case:
@@ -102,3 +140,24 @@ jobs:
       TIMEOUT: 8
       SCRIPT: |
         bash /opt/NeMo-Aligner/tests/functional/test_cases/${{ matrix.test_case }}
+
+  CI_QA_Gate:
+    name: CI quality check
+    if: always()
+    runs-on: ubuntu-latest
+    needs: 
+      - Unit_Tests
+      - Functional_Tests
+    steps:
+      - name: main
+        env:
+          JOB_RESULTS: ${{ toJSON(needs) }}
+          ALL_SUCCESS: ${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && !contains(needs.*.result, 'skipped') }}
+          CI_SKIP: ${{ github.event.label.name == 'Skip CICD' }}
+        run: |
+         
+          SUMMARY=$(echo $JOB_RESULTS | jq 'to_entries[] | .key + ": " + .value.result' | tr -d '"')
+          echo '🤖: CICD Result' >> $GITHUB_STEP_SUMMARY
+          echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
+          
+          test "$ALL_SUCCESS" = "true" || test "$CI_SKIP" = "true"
\ No newline at end of file
diff --git a/.github/workflows/release-freeze.yml b/.github/workflows/release-freeze.yml
index 2513928c9..10ae3386e 100644
--- a/.github/workflows/release-freeze.yml
+++ b/.github/workflows/release-freeze.yml
@@ -3,20 +3,25 @@ name: "Code freeze"
 on:
   workflow_dispatch:
     inputs:
-      type_of_release:
+      release-type:
         type: choice
         description: Type of release
         options: 
         - major
         - minor
-
+      freeze-commit:
+        type: string
+        description: Commit SHA to use for cut-off
+        required: false
+        default: main
 jobs:
   code-freeze:
-    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.8.0
+    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.17.3
     with:
-      name_of_library: NeMo-Aligner
-      type_of_release: ${{ inputs.type_of_release }}
-      python_package: nemo_aligner
+      library-name: NeMo-Aligner
+      python-package: nemo_aligner
+      release-type: ${{ inputs.release-type }}
+      freeze-commit: ${{ inputs.freeze-commit }}
     secrets:
       SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
       SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 6991a5cfb..9678d8db5 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -25,10 +25,14 @@ on:
         required: true
         default: true
         type: boolean
-    
+      version-bump-branch:
+        description: Branch for version bump
+        required: true
+        type: string
+          
 jobs:
   release:
-    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.15.0
+    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.18.4
     with:
       release-ref: ${{ inputs.release-ref }}
       image-name: nemo_aligner_container
@@ -42,9 +46,10 @@ jobs:
       container-workdir: /opt/NeMo-Aligner
       library-name: NeMo-Aligner
       dry-run: ${{ inputs.dry-run }}
+      version-bump-branch: ${{ inputs.version-bump-branch }}
     secrets:
       TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
       TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
       SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
-      PAT: ${{ secrets.PAT }}
       SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+      PAT: ${{ secrets.PAT }}
diff --git a/Dockerfile b/Dockerfile
index 44a9f8651..6eb6aad40 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -38,7 +38,7 @@ git pull --rebase || true
 pip install --no-cache-dir --no-deps -e .
 EOF
 
-FROM ${BASE_IMAGE} as final
+FROM ${BASE_IMAGE} AS final
 LABEL "nemo.library"="nemo-aligner"
 WORKDIR /opt
 # needed in case git complains that it can't detect a valid email, this email is fake but works
@@ -70,6 +70,10 @@ RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \
     pip install -e .
 ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12/compat/lib.real/
 
+# TODO: This pinning of pynvml is only needed while on TRTLLM v13 since pynvml>=11.5.0 but pynvml==12.0.0 contains a
+#   breaking change. The last known working verison is 11.5.3
+RUN pip install pynvml==11.5.3
+
 # install TransformerEngine
 ARG MAX_JOBS
 ARG TE_TAG
@@ -103,6 +107,11 @@ RUN git clone https://github.com/NVIDIA/NeMo.git && \
     pip install -e ".[nlp]" && \
     cd nemo/collections/nlp/data/language_modeling/megatron && make
 
+# TODO: While we are on Pytorch 24.07, we need to downgrade triton since 3.2.0 introduced a breaking change
+#   This un-pinned requirement comes from mamba-ssm, and this pin can be removed once Pytorch base image is
+#   updated.
+RUN pip install triton==3.1.0
+
 # MLM
 ARG MLM_TAG
 RUN pip uninstall -y megatron-core && \
diff --git a/tests/conftest.py b/conftest.py
similarity index 98%
rename from tests/conftest.py
rename to conftest.py
index 8ac1c2af7..c57b01f43 100644
--- a/tests/conftest.py
+++ b/conftest.py
@@ -22,8 +22,8 @@
 from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
 from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
 from nemo_aligner.models.nlp.gpt.megatron_gpt_ppo_actor import MegatronGPTActorModel
+from nemo_aligner.testing.utils import Utils
 from nemo_aligner.utils.train_script_utils import init_distributed, resolve_and_create_trainer
-from tests.test_mcore_utilities import Utils
 
 dir_path = os.path.dirname(os.path.abspath(__file__))
 # TODO: This file exists because in cases where TRTLLM MPI communicators are involved,
@@ -67,7 +67,7 @@ def run_only_on_device_fixture(request, device):
 
 @pytest.fixture
 def init_model_parallel():
-    from tests.test_mcore_utilities import Utils
+    from nemo_aligner.testing.utils import Utils
 
     def initialize(*args, **kwargs):
         Utils.initialize_model_parallel(*args, **kwargs)
@@ -401,7 +401,9 @@ def pytest_collection_modifyitems(config, items):
 
 def pytest_sessionstart(session):
     # Remove the file at the start of the session, if it exists
-    if os.path.exists(SUCCESS_FILE) and os.environ["LOCAL_RANK"] == "0":
+    if os.path.exists(SUCCESS_FILE) and (
+        os.environ.get("LOCAL_RANK", None) == "0" or os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK", None) == "0"
+    ):
         os.remove(SUCCESS_FILE)
 
 
diff --git a/docs/user-guide-experimental/README.md b/docs/user-guide-experimental/README.md
new file mode 100644
index 000000000..4801e725c
--- /dev/null
+++ b/docs/user-guide-experimental/README.md
@@ -0,0 +1,5 @@
+# Experimental Docs
+
+This directory contains documentation for features that are still experimental or under development and not yet ready for general use.
+
+More context can be found in the [experimental/README.md](../../nemo_aligner/experimental/README.md) file.
\ No newline at end of file
diff --git a/docs/user-guide/dpo.rst b/docs/user-guide/dpo.rst
index fa75941d0..d227df6f5 100644
--- a/docs/user-guide/dpo.rst
+++ b/docs/user-guide/dpo.rst
@@ -46,10 +46,14 @@ To start, we must first get a pretrained model to align. There are two models we
                   --in-folder ./model_checkpoint \
                   --out-file ./mcore_gpt.nemo
 
-    .. tab-item:: LLaMa3 7B
+    .. tab-item:: LLaMa3 8B
         :sync: key2
 
-        #. Download the `Llama 3 8B LLM model and tokenizer <https://huggingface.co/meta-llama/Meta-Llama-3-8B>`__ into the models folder.
+        #. Download the `Llama 3 8B LLM model and tokenizer <https://huggingface.co/meta-llama/Meta-Llama-3-8B>`__ into the models folder. You can use the Hugging Face CLI for this:
+            .. code-block:: bash
+
+               huggingface-cli download meta-llama/Meta-Llama-3-8B --local-dir /path/to/llama
+
         #. Convert the LLaMa3 LLM into ``.nemo`` format.
             .. code-block:: bash 
 
diff --git a/docs/user-guide/index.rst b/docs/user-guide/index.rst
index bf80fb618..e467c335c 100644
--- a/docs/user-guide/index.rst
+++ b/docs/user-guide/index.rst
@@ -7,6 +7,7 @@
 
    sft.rst
    knowledge-distillation.rst
+   reinforce.rst
    dpo.rst
    rlhf.rst
    steerlm.rst
@@ -25,6 +26,9 @@
 :ref:`Supervised Fine-Tuning (SFT) with Knowledge Distillation <nemo-aligner-knowledge-distillation>`
    In this section, we walk through a variation of SFT using Knowledge Distillation where we train a smaller "student" model using a larger "teacher" model.
 
+:ref:`Model Alignment by REINFORCE <nemo-aligner-reinforce>`
+   In this tutorial, we will guide you through the process of aligning a NeMo Framework model using REINFORCE. This method can be applied to various models, including LLaMa2 and Mistral, with our scripts functioning consistently across different models.
+
 :ref:`Model Alignment by DPO, RPO and IPO <nemo-aligner-dpo>`
    DPO, RPO, and IPO are simpler alignment methods compared to RLHF. DPO introduces a novel parameterization of the reward model in RLHF, which allows us to extract the corresponding optimal policy. Similarly, RPO and IPO provide alternative parameterizations or optimization strategies, each contributing unique approaches to refining model alignment.
 
@@ -75,6 +79,14 @@
      - Yes
      - Yes
      - 
+   * - :ref:`REINFORCE <nemo-aligner-reinforce>`
+     - Yes
+     - Yes
+     - Yes
+     - Yes (✓)
+     - Yes
+     - Yes
+     - 
    * - :ref:`DPO <nemo-aligner-dpo>`
      - 
      - Yes (✓)
diff --git a/docs/user-guide/reinforce.rst b/docs/user-guide/reinforce.rst
index cc3005db1..6d7897281 100644
--- a/docs/user-guide/reinforce.rst
+++ b/docs/user-guide/reinforce.rst
@@ -1,16 +1,16 @@
 .. include:: /content/nemo.rsts
 
-.. _model-aligner-reinforce:
+.. _nemo-aligner-reinforce:
 
 Model Alignment by REINFORCE
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 
 In this tutorial, we will guide you through the process of aligning a NeMo Framework model using REINFORCE. This method can be applied to various models, including LLaMa2 and Mistral, with our scripts functioning consistently across different models.
 
 REINFORCE is usually preceded by a Supervised Fine-Tuning (SFT). We should first follow the :ref:`Prerequisite guide <prerequisite>` and the :ref:`SFT guide <sft>`. After obtaining the SFT model, we will also need to train a reward model as in :ref:`PPO guide <ppo>`. We will use the REINFORCE algorithm on the `Anthropic-HH-RLHF <https://huggingface.co/datasets/Anthropic/hh-rlhf>`__ dataset.
 
 REINFORCE Training
-############
+##################
 
 After you have fine-tuned a GPT model using Supervised Fine-Tuning (SFT), and trained a reward model as explained in the preceding section, you can start aligning the policy using REINFORCE.
 
@@ -48,7 +48,7 @@ To launch the server:
 The above example launches the reward model server on eight GPUs and one node. Make sure to change trainer.devices, trainer.num_nodes depending on your model size and scale. Aligner will work on any scale. Also, make sure to tune the trainer.reinforce.inference_micro_batch_size argument. This argument sets the size of the batch the REINFORCE actor is allowed to send to the reward per DP rank.
 
 Launch the Initial Policy and REINFORCE Actor Training
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 The REINFORCE Actor training job contains the master controller that makes the HTTP calls to all servers when needed. To launch the REINFORCE Actor and Initial Policy server:
 
@@ -58,7 +58,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
    TRAIN_DATA_PATH="/path/to/train_prompts.jsonl"
    VALID_DATA_PATH="/path/to/test_prompts.jsonl"
 
-   PRETRAINED_ACTOR_NEMO_FILE="/path/to/sft_checkpoint.nemo"
+   ACTOR_NEMO_FILE="/path/to/sft_checkpoint.nemo"
    RESULTS_DIR="/path/to/actor_results_dir"
 
    USE_FLASK=False
@@ -73,7 +73,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
    cd ${GPFS}
    export PYTHONPATH="${GPFS}:${PYTHONPATH}" \
    && export HYDRA_FULL_ERROR=1 \
-   && python -u examples/nlp/gpt/train_gpt_reinforce_actor.py \
+   && mpirun -n 8 --allow-run-as-root python -u examples/nlp/gpt/train_gpt_reinforce_actor.py \
       "model.data.data_prefix={train: [${TRAIN_DATA_PATH}], validation: [${VALID_DATA_PATH}], test: [${VALID_DATA_PATH}]}" \
       pretrained_checkpoint.restore_from_path=\"${ACTOR_NEMO_FILE}\" \
       exp_manager.checkpoint_callback_params.save_top_k=1 \
@@ -114,7 +114,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
 The above command launches the initial and actor server on one node with eight GPUs.
 
 Launching Both Servers for REINFORCE training
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 You can use slurm to launch the two jobs and get them to coordinate together in a full REINFORCE job through the following:
 
@@ -239,7 +239,7 @@ You can use slurm to launch the two jobs and get them to coordinate together in
       trainer.reinforce.rollout_batch_seq_length=4096
    EOF
 
-   srun --het-group=1 -o $PPO_OUTFILE -e $PPO_ERRFILE --container-image=${CONTAINER} $MOUNTS bash -c "${cmd_reinforce}" &
+   srun --mpi=pmix --het-group=1 -o $PPO_OUTFILE -e $PPO_ERRFILE --container-image=${CONTAINER} $MOUNTS bash -c "${cmd_reinforce}" &
 
    wait
 
@@ -251,6 +251,6 @@ It is important to launch all jobs with ``&`` after the srun command to ensure t
    Make sure to change the reward model arg ``trainer.reinforce.inference_micro_batch_size`` such that ``trainer.reinforce.inference_micro_batch_size * DP size <= model.reinforce.rollout_micro_batch_size``.
 
 REINFORCE Results
-%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%
 
-After you've completed reinforce training, you can serve your model using the `megatron_gpt_eval.py <https://github.com/NVIDIA/NeMo/blob/8cd5f1c8e7d4fed9f4f946028cd02047c5d2296f/examples/nlp/language_modeling/megatron_gpt_eval.py#L4>`__ script from the NeMo codebase to run more rigorous evaluation of your trained model.
\ No newline at end of file
+After you've completed reinforce training, you can serve your model using the `megatron_gpt_eval.py <https://github.com/NVIDIA/NeMo/blob/8cd5f1c8e7d4fed9f4f946028cd02047c5d2296f/examples/nlp/language_modeling/megatron_gpt_eval.py#L4>`__ script from the NeMo codebase to run more rigorous evaluation of your trained model.
diff --git a/docs/user-guide/rlhf.rst b/docs/user-guide/rlhf.rst
index 5c68edb60..3e98e7fe2 100644
--- a/docs/user-guide/rlhf.rst
+++ b/docs/user-guide/rlhf.rst
@@ -383,6 +383,30 @@ NeMo-Aligner has support for accelerating RLHF with `TensorRT-LLM <https://githu
 
 For more information please see the NeMo-Aligner `paper <https://arxiv.org/abs/2405.01481>`__.
 
+.. note::
+    If you are running ``train_gpt_ppo_actor.py`` interactively (outside of SLURM) with TensorRT-LLM acceleration,
+    you must prepend ``mpirun -n 8 --allow-run-as-root`` to the python run command:
+
+    .. code-block:: bash
+
+        mpirun -n 8 --allow-run-as-root python -u ${GPFS}/examples/nlp/gpt/train_gpt_ppo_actor.py ...
+
+    If you are using SLURM, you do not need to prepend ``mpirun`` since this will be handled automatically
+    if you run ``srun`` with ``--mpi=pmix``:
+
+    .. code-block:: bash
+
+        read -r -d '' cmd_ppo <<EOF
+        cd ${GPFS} \
+        && export PYTHONPATH="${GPFS}:${PYTHONPATH}" \
+        && export HYDRA_FULL_ERROR=1 \
+        && python -u ${GPFS}/examples/nlp/gpt/train_gpt_ppo_actor.py \
+            ...
+        EOF
+
+        srun --mpi=pmix ... bash -c "${cmd_ppo}"
+
+
 PPO Results with TensorRT-LLM
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
diff --git a/nemo_aligner/data/nlp/tests/__init__.py b/nemo_aligner/data/nlp/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_cai_utils.py b/nemo_aligner/data/nlp/tests/cai_utils_test.py
similarity index 100%
rename from tests/test_cai_utils.py
rename to nemo_aligner/data/nlp/tests/cai_utils_test.py
diff --git a/tests/test_datasets.py b/nemo_aligner/data/nlp/tests/datasets_test.py
similarity index 100%
rename from tests/test_datasets.py
rename to nemo_aligner/data/nlp/tests/datasets_test.py
diff --git a/nemo_aligner/experimental/README.md b/nemo_aligner/experimental/README.md
new file mode 100644
index 000000000..c33352547
--- /dev/null
+++ b/nemo_aligner/experimental/README.md
@@ -0,0 +1,50 @@
+# Experimental Package
+
+The `experimental` sub-package contains projects that are under active development and may not be fully stable.
+
+## Experimental Project Directory Structure:
+
+```
+NeMo-Aligner/
+├── docs/
+│   ├── user-guide/
+│   │   └── ppo.html
+│   └── user-guide-experimental/    <----- experimental docs
+│       └── new-thing.html
+├── nemo_aligner/
+│   ├── algorithms/
+│   ├── data/
+│   │   ├── datasets.py
+│   │   └── tests/
+│   │       └── datasets_test.py
+│   └── experimental/               <----- experimental sub-package
+│       ├── <proj-name>/
+│           ├── dataset.py          <----- experimental dataset
+│           ├── new_algo.py         <----- experimental algo
+│           ├── model.py            <----- experimental model
+│           └── tests/
+│               └── model_test.py   <----- experimental model test
+└── tests/
+    └── functional/
+        └── dpo.sh
+        └── test_cases/
+            └── dpo-llama3
+    └── functional_experimental/    <----- experimental functional tests (mirrors functional/ structure)
+        ├── new_algo.sh
+        └── test_cases/
+            └── new_algo-llama3
+```
+
+The directories below exist to organize experimental projects (source code), tests, and documentation.
+
+- [nemo_aligner/experimental/](../../nemo_aligner/experimental/): Main experimental sub-package containing projects under development
+- [tests/functional_experimental/](../../tests/functional_experimental/): Functional tests for experimental projects
+- [docs/user-guide-experimental/](../../docs/user-guide-experimental/): Documentation directory for experimental features and algorithms
+
+The `experimental` sub-package follows a modular structure where each project has its own directory (sub-package) containing implementation and tests.
+
+## Guidelines for "experimental/" Projects
+
+- **Scope**: Projects can include new model definitions, training loops, utilities, or unit tests.
+- **Independence**: Projects should ideally be independent. Dependence on other projects signals it might benefit from being added to core with tests (and documentation if applicable).
+- **Testing**: Must include at least one functional test [example](../../tests/functional/test_cases/dpo-llama3).
diff --git a/nemo_aligner/experimental/__init__.py b/nemo_aligner/experimental/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/nemo_aligner/package_info.py b/nemo_aligner/package_info.py
index 98a8850de..e219e397d 100644
--- a/nemo_aligner/package_info.py
+++ b/nemo_aligner/package_info.py
@@ -23,7 +23,13 @@
 VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE, DEV)
 
 __shortversion__ = ".".join(map(str, VERSION[:3]))
-__version__ = __shortversion__ + VERSION[3] + "." + ".".join(VERSION[4:])
+__version__ = __shortversion__
+
+if VERSION[3] != "":
+    __version__ = __version__ + VERSION[3]
+
+if VERSION[4] != "":
+    __version__ = __version__ + "." + ".".join(VERSION[4:])
 
 __package_name__ = "nemo_aligner"
 __contact_names__ = "NVIDIA"
diff --git a/tests/test_mcore_utilities.py b/nemo_aligner/testing/utils.py
similarity index 100%
rename from tests/test_mcore_utilities.py
rename to nemo_aligner/testing/utils.py
diff --git a/nemo_aligner/utils/tests/__init__.py b/nemo_aligner/utils/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_distributed.py b/nemo_aligner/utils/tests/distributed_test.py
similarity index 100%
rename from tests/test_distributed.py
rename to nemo_aligner/utils/tests/distributed_test.py
diff --git a/tests/test_ppo_utils.py b/nemo_aligner/utils/tests/ppo_utils_test.py
similarity index 100%
rename from tests/test_ppo_utils.py
rename to nemo_aligner/utils/tests/ppo_utils_test.py
diff --git a/tests/test_text_generation_utils.py b/nemo_aligner/utils/tests/text_generation_utils_test.py
similarity index 100%
rename from tests/test_text_generation_utils.py
rename to nemo_aligner/utils/tests/text_generation_utils_test.py
diff --git a/tests/test_trainer_utils.py b/nemo_aligner/utils/tests/trainer_utils_test.py
similarity index 100%
rename from tests/test_trainer_utils.py
rename to nemo_aligner/utils/tests/trainer_utils_test.py
diff --git a/tests/test_trt_llm.py b/nemo_aligner/utils/tests/trt_llm_test.py
similarity index 100%
rename from tests/test_trt_llm.py
rename to nemo_aligner/utils/tests/trt_llm_test.py
diff --git a/tests/test_utils.py b/nemo_aligner/utils/tests/utils_test.py
similarity index 100%
rename from tests/test_utils.py
rename to nemo_aligner/utils/tests/utils_test.py
diff --git a/setup/requirements.txt b/setup/requirements.txt
index d074f3672..4aa22afa1 100644
--- a/setup/requirements.txt
+++ b/setup/requirements.txt
@@ -3,3 +3,6 @@ jsonlines
 megatron_core>=0.8
 nemo_toolkit[nlp]
 nvidia-pytriton
+# pynvml pin is needed for TRTLLM v0.13.0 since 12.0.0 contains a breaking change.
+pynvml==11.5.3
+tensorrt-llm==0.13.0
diff --git a/tests/functional_experimental/README.md b/tests/functional_experimental/README.md
new file mode 100644
index 000000000..69694a82d
--- /dev/null
+++ b/tests/functional_experimental/README.md
@@ -0,0 +1,3 @@
+# Experimental Functional Tests
+
+More context can be found in the [experimental/README.md](../../nemo_aligner/experimental/README.md) file.
\ No newline at end of file
diff --git a/tests/functional_experimental/test_cases/.gitkeep b/tests/functional_experimental/test_cases/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/functional_experimental/test_data/.gitkeep b/tests/functional_experimental/test_data/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/run_mpi_unit.sh b/tests/run_mpi_unit.sh
index e11e5cf10..d905d09cd 100755
--- a/tests/run_mpi_unit.sh
+++ b/tests/run_mpi_unit.sh
@@ -24,9 +24,9 @@ if [[ $NUM_GPUS_AVAILABLE -lt 2 ]]; then
 fi
 
 export PYTHONPATH=$(realpath ..):${PYTHONPATH:-}
-CUDA_VISIBLE_DEVICES=0,1 mpirun -np 2 --allow-run-as-root pytest .. -rA -s -x -vv --mpi $@ || true
+CUDA_VISIBLE_DEVICES=0,1 mpirun -np 2 --allow-run-as-root pytest ../nemo_aligner -rA -s -x -vv --mpi $@ || true
 
-if [[ -f PYTEST_SUCCESS ]]; then
+if [[ -f ../PYTEST_SUCCESS ]]; then
     echo SUCCESS
 else
     echo FAILURE
diff --git a/tests/run_unit.sh b/tests/run_unit.sh
index 41216da52..619379dcc 100755
--- a/tests/run_unit.sh
+++ b/tests/run_unit.sh
@@ -24,9 +24,9 @@ if [[ $NUM_GPUS_AVAILABLE -lt 2 ]]; then
 fi
 
 export PYTHONPATH=$(realpath ..):${PYTHONPATH:-}
-CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node 2 -m pytest .. -rA -s -x -vv $@ || true
+CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node 2 -m pytest ../nemo_aligner -rA -s -x -vv $@ || true
 
-if [[ -f PYTEST_SUCCESS ]]; then
+if [[ -f ../PYTEST_SUCCESS ]]; then
     echo SUCCESS
 else
     echo FAILURE