Skip to content

Commit

Permalink
shared memory macro
Browse files Browse the repository at this point in the history
  • Loading branch information
monofuel committed Jul 21, 2024
1 parent 5e9078a commit a13e089
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 79 deletions.
34 changes: 0 additions & 34 deletions src/cuda.nim
Original file line number Diff line number Diff line change
Expand Up @@ -104,37 +104,3 @@ let
blockIdx* {.importc, inject, header: "cuda_runtime.h".}: BlockIdx
gridDim* {.importc, inject, header: "cuda_runtime.h".}: GridDim
threadIdx* {.importc, inject, header: "cuda_runtime.h".}: ThreadIdx

macro hippoGlobal*(fn: untyped): untyped =
let globalPragma: NimNode = quote:
{. exportc, codegenDecl: "__global__ $# $#$#".}

fn.addPragma(globalPragma[0])
fn.addPragma(globalPragma[1])
quote do:
{.push stackTrace: off, checks: off.}
`fn`
{.pop.}

macro hippoDevice*(fn: untyped): untyped =
let globalPragma: NimNode = quote:
{. exportc, codegenDecl: "__device__ $# $#$#".}

fn.addPragma(globalPragma[0])
fn.addPragma(globalPragma[1])
quote do:
{.push stackTrace: off, checks: off.}
`fn`
{.pop.}


macro hippoHost*(fn: untyped): untyped =
let globalPragma: NimNode = quote:
{. exportc, codegenDecl: "__host__ $# $#$#".}

fn.addPragma(globalPragma[0])
fn.addPragma(globalPragma[1])
quote do:
{.push stackTrace: off, checks: off.}
`fn`
{.pop.}
34 changes: 0 additions & 34 deletions src/hip.nim
Original file line number Diff line number Diff line change
Expand Up @@ -100,37 +100,3 @@ let
blockIdx* {.importc, inject, header: "hip/hip_runtime.h".}: BlockIdx
gridDim* {.importc, inject, header: "hip/hip_runtime.h".}: GridDim
threadIdx* {.importc, inject, header: "hip/hip_runtime.h".}: ThreadIdx

macro hippoGlobal*(fn: untyped): untyped =
let globalPragma: NimNode = quote:
{. exportc, codegenDecl: "__global__ $# $#$#".}

fn.addPragma(globalPragma[0])
fn.addPragma(globalPragma[1])
quote do:
{.push stackTrace: off, checks: off.}
`fn`
{.pop.}

macro hippoDevice*(fn: untyped): untyped =
let globalPragma: NimNode = quote:
{. exportc, codegenDecl: "__device__ $# $#$#".}

fn.addPragma(globalPragma[0])
fn.addPragma(globalPragma[1])
quote do:
{.push stackTrace: off, checks: off.}
`fn`
{.pop.}


macro hippoHost*(fn: untyped): untyped =
let globalPragma: NimNode = quote:
{. exportc, codegenDecl: "__host__ $# $#$#".}

fn.addPragma(globalPragma[0])
fn.addPragma(globalPragma[1])
quote do:
{.push stackTrace: off, checks: off.}
`fn`
{.pop.}
45 changes: 45 additions & 0 deletions src/hippo.nim
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,48 @@ template hippoLaunchKernel*(
args: tuple
) =
handleError(launchKernel(kernel, gridDim, blockDim, sharedMemBytes, stream, args))


## Macros

macro hippoGlobal*(fn: untyped): untyped =
let globalPragma: NimNode = quote:
{. exportc, codegenDecl: "__global__ $# $#$#".}

fn.addPragma(globalPragma[0])
fn.addPragma(globalPragma[1])
quote do:
{.push stackTrace: off, checks: off.}
`fn`
{.pop.}

macro hippoDevice*(fn: untyped): untyped =
let globalPragma: NimNode = quote:
{. exportc, codegenDecl: "__device__ $# $#$#".}

fn.addPragma(globalPragma[0])
fn.addPragma(globalPragma[1])
quote do:
{.push stackTrace: off, checks: off.}
`fn`
{.pop.}


macro hippoHost*(fn: untyped): untyped =
let globalPragma: NimNode = quote:
{. exportc, codegenDecl: "__host__ $# $#$#".}

fn.addPragma(globalPragma[0])
fn.addPragma(globalPragma[1])
quote do:
{.push stackTrace: off, checks: off.}
`fn`
{.pop.}


## {.hippoShared.} pragma for shared GPU memory
macro hippoShared*(v: untyped): untyped =
quote do:
{.push stackTrace: off, checks: off, noinit, exportc, codegenDecl: "__shared__ $# $#".}
`v`
{.pop.}
3 changes: 2 additions & 1 deletion tests/hip/config.nims
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
--define:"pixieNoSimd"
--define:"zippyNoSimd"
--define:"crunchyNoSimd"
--cc:hipcc
--cc:hipcc
--define:"useMalloc"
11 changes: 1 addition & 10 deletions tests/hip/dot.nim
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,8 @@ const
ThreadsPerBlock: int = 256
BlocksPerGrid: int = min(32, ((N + ThreadsPerBlock - 1) div ThreadsPerBlock))

# TODO improve this
{.pragma: hippoShared, exportc, codegenDecl: "__shared__ $# $#".}

proc dot(a, b, c: ptr[float64]){.hippoGlobal.} =

#var cache {.hippoShared.}: ptr[float64]
#var cache {.importc: "cache"}: ptr[float64]
# __shared__ float cache[threadsPerBlock
{.emit:["""__shared__ float cache[256];"""].}
let cache {.codeGenDecl:"" importc noinit.}: ptr UncheckedArray[float64]
#var cache {.hippoShared.}: ptr[float64]
var cache {.hippoShared.}: array[256, float]

# TODO figure out how to do this properly
let aArray = cast[ptr UncheckedArray[float64]](a)
Expand Down

0 comments on commit a13e089

Please sign in to comment.