Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEBUG {2023.06}[foss/2023a] TensorFlow v2.15.1 w/ CUDA 12.1.1 #808

Open
wants to merge 5 commits into
base: 2023.06-software.eessi.io
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
easyconfigs:
- CUDA-12.1.1.eb
- cuDNN-8.9.2.26-CUDA-12.1.1.eb
- TensorFlow-2.15.1-foss-2023a-CUDA-12.1.1.eb:
options:
# need to use updated tensorflow easyblock to work around ImportError not
# finding libnccl.so.2
# see https://github.com/easybuilders/easybuild-easyblocks/pull/3497
include-easyblocks-from-commit: 68d89b954b9a67ac157e8030f2ad670ff9374964
24 changes: 24 additions & 0 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,29 @@ def parse_hook_qt5_check_qtwebengine_disable(ec, eprefix):
raise EasyBuildError("Qt5-specific hook triggered for non-Qt5 easyconfig?!")


def parse_hook_tensorflow_replace_bazel(ec, eprefix):
"""
Replace Bazel/6.1.0 build dependency in TensorFlow/2.15.1 easyconfigs with
Bazel/6.3.1.
"""
# This is an attempt to work around some build errors. TensorFlow/2.13.0 was
# built with Bazel/6.3.1, hence using that version may just work.
if ec.name == 'TensorFlow' and ec.version in ['2.15.1']:
print_msg("Trying to replace Bazel/6.1.0 build dependency with Bazel/6.3.1 for TensorFlow/2.15.1 easyconfig")
build_deps = ec['builddependencies']
bazel_build_dep = None
bazel_name, bazel_version = ('Bazel', '6.1.0')
for idx, build_dep in enumerate(build_deps):
if build_dep[0] == bazel_name and build_dep[1] == bazel_version:
bazel_build_dep = build_dep
break
if bazel_build_dep:
build_deps[idx] = (bazel_name, '6.3.1')
print_msg(f"Replaced build dependency {idx} with ({build_deps[idx][0]}, {build_deps[idx][1]})")
else:
raise EasyBuildError("TensorFlow-specific parse_hook triggered for non-TensorFlow easyconfig?!")


def parse_hook_ucx_eprefix(ec, eprefix):
"""Make UCX aware of compatibility layer via additional configuration options."""
if ec.name == 'UCX':
Expand Down Expand Up @@ -973,6 +996,7 @@ def inject_gpu_property(ec):
'OpenBLAS': parse_hook_openblas_relax_lapack_tests_num_errors,
'pybind11': parse_hook_pybind11_replace_catch2,
'Qt5': parse_hook_qt5_check_qtwebengine_disable,
'TensorFlow': parse_hook_tensorflow_replace_bazel,
'UCX': parse_hook_ucx_eprefix,
}

Expand Down
Loading