Skip to content

Commit

Permalink
[ROCm] fixes ambiguous calls to shfl* where there is no explicit ty…
Browse files Browse the repository at this point in the history
…pe conversion from `c10::Half` to `__half` (#360)

[ROCm] fixes ambiguous calls to `shfl*` where there is no explicit type
conversion from `c10::Half` to `__half`
  • Loading branch information
ashwinma authored Jan 23, 2024
1 parent 05b62b1 commit bf2591a
Showing 1 changed file with 25 additions and 3 deletions.
28 changes: 25 additions & 3 deletions csrc/cuda/utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
AT_ASSERTM(x.device().is_cuda(), #x " must be CUDA tensor")
#define CHECK_INPUT(x) AT_ASSERTM(x, "Input mismatch")

__device__ __inline__ at::Half
__shfl_sync(const unsigned mask, const at::Half var, const int srcLane) {
return __shfl_sync(mask, var.operator __half(), srcLane);
__device__ __inline__ at::Half __shfl_up_sync(const unsigned mask,
const at::Half var,
const unsigned int delta) {
return __shfl_up_sync(mask, var.operator __half(), delta);
}

__device__ __inline__ at::Half __shfl_down_sync(const unsigned mask,
Expand All @@ -17,6 +18,27 @@ __device__ __inline__ at::Half __shfl_down_sync(const unsigned mask,
return __shfl_down_sync(mask, var.operator __half(), delta);
}

__device__ __inline__ at::Half __shfl_sync(const unsigned mask,
const at::Half var,
const int delta) {
return __shfl_sync(mask, var.operator __half(), delta);
}

__device__ __inline__ at::Half __shfl_up(const at::Half var,
const unsigned int delta) {
return __shfl_up(var.operator __half(), delta);
}

__device__ __inline__ at::Half __shfl_down(const at::Half var,
const unsigned int delta) {
return __shfl_down(var.operator __half(), delta);
}

__device__ __inline__ at::Half
__shfl(const at::Half var, const int delta) {
return __shfl(var.operator __half(), delta);
}

#ifdef USE_ROCM
__device__ __inline__ at::Half __ldg(const at::Half* ptr) {
return __ldg(reinterpret_cast<const __half*>(ptr));
Expand Down

0 comments on commit bf2591a

Please sign in to comment.