diff --git a/src/cuda.nim b/src/cuda.nim index 3c4c561..0b1e3de 100644 --- a/src/cuda.nim +++ b/src/cuda.nim @@ -104,37 +104,3 @@ let blockIdx* {.importc, inject, header: "cuda_runtime.h".}: BlockIdx gridDim* {.importc, inject, header: "cuda_runtime.h".}: GridDim threadIdx* {.importc, inject, header: "cuda_runtime.h".}: ThreadIdx - -macro hippoGlobal*(fn: untyped): untyped = - let globalPragma: NimNode = quote: - {. exportc, codegenDecl: "__global__ $# $#$#".} - - fn.addPragma(globalPragma[0]) - fn.addPragma(globalPragma[1]) - quote do: - {.push stackTrace: off, checks: off.} - `fn` - {.pop.} - -macro hippoDevice*(fn: untyped): untyped = - let globalPragma: NimNode = quote: - {. exportc, codegenDecl: "__device__ $# $#$#".} - - fn.addPragma(globalPragma[0]) - fn.addPragma(globalPragma[1]) - quote do: - {.push stackTrace: off, checks: off.} - `fn` - {.pop.} - - -macro hippoHost*(fn: untyped): untyped = - let globalPragma: NimNode = quote: - {. exportc, codegenDecl: "__host__ $# $#$#".} - - fn.addPragma(globalPragma[0]) - fn.addPragma(globalPragma[1]) - quote do: - {.push stackTrace: off, checks: off.} - `fn` - {.pop.} diff --git a/src/hip.nim b/src/hip.nim index bd13604..0d1fff4 100644 --- a/src/hip.nim +++ b/src/hip.nim @@ -100,37 +100,3 @@ let blockIdx* {.importc, inject, header: "hip/hip_runtime.h".}: BlockIdx gridDim* {.importc, inject, header: "hip/hip_runtime.h".}: GridDim threadIdx* {.importc, inject, header: "hip/hip_runtime.h".}: ThreadIdx - -macro hippoGlobal*(fn: untyped): untyped = - let globalPragma: NimNode = quote: - {. exportc, codegenDecl: "__global__ $# $#$#".} - - fn.addPragma(globalPragma[0]) - fn.addPragma(globalPragma[1]) - quote do: - {.push stackTrace: off, checks: off.} - `fn` - {.pop.} - -macro hippoDevice*(fn: untyped): untyped = - let globalPragma: NimNode = quote: - {. exportc, codegenDecl: "__device__ $# $#$#".} - - fn.addPragma(globalPragma[0]) - fn.addPragma(globalPragma[1]) - quote do: - {.push stackTrace: off, checks: off.} - `fn` - {.pop.} - - -macro hippoHost*(fn: untyped): untyped = - let globalPragma: NimNode = quote: - {. exportc, codegenDecl: "__host__ $# $#$#".} - - fn.addPragma(globalPragma[0]) - fn.addPragma(globalPragma[1]) - quote do: - {.push stackTrace: off, checks: off.} - `fn` - {.pop.} diff --git a/src/hippo.nim b/src/hippo.nim index 3d79fd0..9b39d68 100644 --- a/src/hippo.nim +++ b/src/hippo.nim @@ -162,3 +162,48 @@ template hippoLaunchKernel*( args: tuple ) = handleError(launchKernel(kernel, gridDim, blockDim, sharedMemBytes, stream, args)) + + +## Macros + +macro hippoGlobal*(fn: untyped): untyped = + let globalPragma: NimNode = quote: + {. exportc, codegenDecl: "__global__ $# $#$#".} + + fn.addPragma(globalPragma[0]) + fn.addPragma(globalPragma[1]) + quote do: + {.push stackTrace: off, checks: off.} + `fn` + {.pop.} + +macro hippoDevice*(fn: untyped): untyped = + let globalPragma: NimNode = quote: + {. exportc, codegenDecl: "__device__ $# $#$#".} + + fn.addPragma(globalPragma[0]) + fn.addPragma(globalPragma[1]) + quote do: + {.push stackTrace: off, checks: off.} + `fn` + {.pop.} + + +macro hippoHost*(fn: untyped): untyped = + let globalPragma: NimNode = quote: + {. exportc, codegenDecl: "__host__ $# $#$#".} + + fn.addPragma(globalPragma[0]) + fn.addPragma(globalPragma[1]) + quote do: + {.push stackTrace: off, checks: off.} + `fn` + {.pop.} + + +## {.hippoShared.} pragma for shared GPU memory +macro hippoShared*(v: untyped): untyped = + quote do: + {.push stackTrace: off, checks: off, noinit, exportc, codegenDecl: "__shared__ $# $#".} + `v` + {.pop.} \ No newline at end of file diff --git a/tests/hip/config.nims b/tests/hip/config.nims index 4cc25dd..2c63256 100644 --- a/tests/hip/config.nims +++ b/tests/hip/config.nims @@ -3,4 +3,5 @@ --define:"pixieNoSimd" --define:"zippyNoSimd" --define:"crunchyNoSimd" ---cc:hipcc \ No newline at end of file +--cc:hipcc +--define:"useMalloc" \ No newline at end of file diff --git a/tests/hip/dot.nim b/tests/hip/dot.nim index 565d4b5..3b96f4a 100644 --- a/tests/hip/dot.nim +++ b/tests/hip/dot.nim @@ -7,17 +7,8 @@ const ThreadsPerBlock: int = 256 BlocksPerGrid: int = min(32, ((N + ThreadsPerBlock - 1) div ThreadsPerBlock)) -# TODO improve this -{.pragma: hippoShared, exportc, codegenDecl: "__shared__ $# $#".} - proc dot(a, b, c: ptr[float64]){.hippoGlobal.} = - - #var cache {.hippoShared.}: ptr[float64] - #var cache {.importc: "cache"}: ptr[float64] - # __shared__ float cache[threadsPerBlock - {.emit:["""__shared__ float cache[256];"""].} - let cache {.codeGenDecl:"" importc noinit.}: ptr UncheckedArray[float64] - #var cache {.hippoShared.}: ptr[float64] + var cache {.hippoShared.}: array[256, float] # TODO figure out how to do this properly let aArray = cast[ptr UncheckedArray[float64]](a)