Skip to content

Commit

Permalink
Ensure device context before launching kernel (#3731)
Browse files Browse the repository at this point in the history
If a kernel is launched on a thread which has not initialized a CUDA
context (as can happen in the linked issue), it will throw an error. A
simple fix is to call `cudaFree(0)` to establish a device context.

Fixes #3729
  • Loading branch information
bertmaher committed Jan 21, 2025
1 parent 6771065 commit 7ab0c15
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions third_party/nvidia/backend/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,15 @@ def format_of(ty):
static void _launch(int gridX, int gridY, int gridZ, int num_warps, int num_ctas, int clusterDimX, int clusterDimY, int clusterDimZ, int shared_memory, CUstream stream, CUfunction function{', ' + arg_decls if len(arg_decls) > 0 else ''}) {{
void *params[] = {{ {', '.join(f"&arg{i}" for i in params)} }};
if (gridX*gridY*gridZ > 0) {{
CUcontext pctx;
CUDA_CHECK(cuCtxGetCurrent(&pctx));
if (!pctx) {{
// Ensure device context.
CUdevice device;
CUDA_CHECK(cuDeviceGet(&device, 0));
CUDA_CHECK(cuDevicePrimaryCtxRetain(&pctx, device));
CUDA_CHECK(cuCtxSetCurrent(pctx));
}}
if (num_ctas == 1) {{
CUDA_CHECK(cuLaunchKernel(function, gridX, gridY, gridZ, 32*num_warps, 1, 1, shared_memory, stream, params, 0));
}} else {{
Expand Down

0 comments on commit 7ab0c15

Please sign in to comment.