Skip to content

Commit

Permalink
Update the test checking for cooperative kernels in conditional nodes.
Browse files Browse the repository at this point in the history
Now we conditionally xfail only when a cuda driver version less than
12.5 is installed. CUDA 12.5 fixes this issue. Before it, cooperative
kernels could not be used within the body of a conditional node.

Signed-off-by: Daniel Galvez <[email protected]>
  • Loading branch information
galv committed Jul 11, 2024
1 parent d4f1d3c commit ad6127a
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
22 changes: 22 additions & 0 deletions nemo/core/utils/cuda_python_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,28 @@ def cu_call(f_call_out):
return tuple(others)


def cuda_python_conditional_node_cooperative_kernels_supported():
"""
Returns true if cuda-python is installed and CUDA driver 12.5 or newer is
installed. Before this CUDA driver version, cooperative nodes could not run
within cuda graph conditional nodes.
"""
try:
check_cuda_python_cuda_graphs_conditional_nodes_supported()
except:
return False
else:
from cuda import cuda

error, driver_version = cuda.cuDriverGetVersion()
if error != cuda.CUresult.CUDA_SUCCESS:
raise ImportError(f"cuDriverGetVersion() returned {cuda.cuGetErrorString(error)}")
driver_version_major = driver_version // 1000
driver_version_minor = (driver_version % 1000) // 10
driver_version = (driver_version_major, driver_version_minor)
return driver_version >= (12,5)


@contextlib.contextmanager
def with_conditional_node(while_loop_kernel, while_loop_args, while_loop_conditional_handle, device):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
from omegaconf import open_dict

from nemo.collections.asr.models import ASRModel
from nemo.core.utils.cuda_python_utils import skip_cuda_python_test_if_cuda_graphs_conditional_nodes_not_supported
from nemo.core.utils.cuda_python_utils import (
skip_cuda_python_test_if_cuda_graphs_conditional_nodes_not_supported,
cuda_python_conditional_node_cooperative_kernels_supported
)


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -53,6 +56,7 @@ def stt_en_fastconformer_transducer_large():
8,
True,
marks=pytest.mark.xfail(
not cuda_python_conditional_node_cooperative_kernels_supported(),
reason="""Cannot instantiate the
body cuda graph of a conditional node with a persistent kernel (in this case,
a persistent LSTM), which is triggered in cudnn by using a batch size of 8."""
Expand Down

0 comments on commit ad6127a

Please sign in to comment.