Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AL2023 Isolated build fix removing nvidia open drivers, install nvidia ctk from repo #2086

Merged
merged 1 commit into from
Dec 10, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 2 additions & 18 deletions templates/al2023/provisioners/install-nvidia-driver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,6 @@ function rpm_install() {
done
}

function install-nvidia-container-toolkit() {
# The order of these RPMs is important, as they have dependencies on each other
VERSION="1.17.2-1"
RPMS=(
"libnvidia-container1-${VERSION}.x86_64.rpm"
"nvidia-container-toolkit-base-${VERSION}.x86_64.rpm"
"libnvidia-container-tools-${VERSION}.x86_64.rpm"
"nvidia-container-toolkit-${VERSION}.x86_64.rpm"
)
for RPM in "${RPMS[@]}"; do
echo "pulling and installing rpms: (${RPM}) from s3 bucket: (${BINARY_BUCKET_NAME}) in region: (${BINARY_BUCKET_REGION})"
aws s3 cp --region ${BINARY_BUCKET_REGION} s3://${BINARY_BUCKET_NAME}/rpms/${RPM} ${WORKING_DIR}/${RPM}
echo "installing rpm: ${WORKING_DIR}/${RPM}"
sudo rpm -ivh ${WORKING_DIR}/${RPM}
done
}

echo "Installing NVIDIA ${NVIDIA_DRIVER_MAJOR_VERSION} drivers..."

################################################################################
Expand Down Expand Up @@ -101,6 +84,7 @@ function archive-open-kmods() {

if is-isolated-partition; then
sudo dnf -y remove --all nvidia-driver
sudo dnf -y remove --all "kmod-nvidia-open*"
else
sudo dnf -y module remove --all nvidia-driver
sudo dnf -y module reset nvidia-driver
Expand Down Expand Up @@ -138,7 +122,7 @@ sudo dnf -y install nvidia-fabric-manager

# NVIDIA Container toolkit needs to be locally installed for isolated partitions, also install NVIDIA-Persistenced
if is-isolated-partition; then
install-nvidia-container-toolkit
sudo dnf -y install nvidia-container-toolkit
sudo dnf -y install "nvidia-persistenced-${NVIDIA_DRIVER_MAJOR_VERSION}.*"
else
sudo dnf -y install nvidia-container-toolkit
whoix marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
Loading