Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

cublas gemm support fp64 #1421

Merged
merged 2 commits into from
May 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions cinn/runtime/cuda/cublas_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,23 @@ inline cublasStatus_t cublasGemm(cudaDataType_t dtype,
reinterpret_cast<const float *>(&beta),
reinterpret_cast<float *>(C),
ldc);
} else if (dtype == CUDA_R_64F) {
const double alpha_fp64 = static_cast<double>(alpha);
const double beta_fp64 = static_cast<double>(beta);
return cublasDgemm(handle,
transa,
transb,
m,
n,
k,
&alpha_fp64,
reinterpret_cast<const double *>(A),
lda,
reinterpret_cast<const double *>(B),
ldb,
&beta_fp64,
reinterpret_cast<double *>(C),
ldc);
} else if (dtype == CUDA_R_16F) {
common::float16 alpha_fp16{alpha};
common::float16 beta_fp16{beta};
Expand Down Expand Up @@ -135,6 +152,27 @@ inline cublasStatus_t cublasGemmStridedBatched(cudaDataType_t dtype,
ldc,
strideC,
batchCount);
} else if (dtype == CUDA_R_64F) {
const double alpha_fp64 = static_cast<double>(alpha);
const double beta_fp64 = static_cast<double>(beta);
return cublasDgemmStridedBatched(handle,
transa,
transb,
m,
n,
k,
&alpha_fp64,
reinterpret_cast<const double *>(A),
lda,
strideA,
reinterpret_cast<const double *>(B),
ldb,
strideB,
&beta_fp64,
reinterpret_cast<double *>(C),
ldc,
strideC,
batchCount);
} else if (dtype == CUDA_R_16F) {
common::float16 alpha_fp16{alpha};
common::float16 beta_fp16{beta};
Expand Down Expand Up @@ -220,6 +258,24 @@ inline cublasStatus_t cublasGemmBatched(cudaDataType_t dtype,
reinterpret_cast<float **>(C),
ldc,
batchCount);
} else if (dtype == CUDA_R_64F) {
const double alpha_fp64 = static_cast<double>(alpha);
const double beta_fp64 = static_cast<double>(beta);
return cublasDgemmBatched(handle,
transa,
transb,
m,
n,
k,
&alpha_fp64,
reinterpret_cast<double **>(A),
lda,
reinterpret_cast<double **>(B),
ldb,
&beta_fp64,
reinterpret_cast<double **>(C),
ldc,
batchCount);
} else if (dtype == CUDA_R_16F) {
__half alpha_fp16{alpha};
__half beta_fp16{beta};
Expand Down
4 changes: 4 additions & 0 deletions cinn/runtime/cuda/cuda_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ void cinn_call_cublas(void *v_args,
cuda_dtype = CUDA_R_16F;
} else if (is_float && bytes == sizeof(float)) {
cuda_dtype = CUDA_R_32F;
} else if (is_float && bytes == sizeof(double)) {
cuda_dtype = CUDA_R_64F;
} else if (is_bfloat16) {
cuda_dtype = CUDA_R_16BF;
} else {
Expand Down Expand Up @@ -326,6 +328,8 @@ void cinn_call_batched_cublas(void *v_args,
cuda_dtype = CUDA_R_16F;
} else if (is_float && bytes == sizeof(float)) {
cuda_dtype = CUDA_R_32F;
} else if (is_float && bytes == sizeof(double)) {
cuda_dtype = CUDA_R_64F;
} else if (is_bfloat16) {
cuda_dtype = CUDA_R_16BF;
} else {
Expand Down
Loading