You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
What is your question?
How do I implement conv3d using CUTLASS on NVIDIA's Hopper architecture? Here is my code.
`
// A matrix configuration
using ElementA = CutlassT; // Element type for A matrix operand
using LayoutA = cutlass::layout::TensorNDHWC; // Layout type for A matrix operand
constexpr int AlignmentA = 128 / cutlass::sizeof_bits::value;
// B matrix configuration
using ElementB = CutlassT; // Element type for B matrix operand
using LayoutB = cutlass::layout::TensorNDHWC; // Layout type for B matrix operand
constexpr int AlignmentB = 128 / cutlass::sizeof_bits<ElementB>::value;
// C/D matrix configuration
using ElementC = CutlassT; // Element type for C and D matrix operands
using LayoutC = cutlass::layout::TensorNDHWC; // Layout type for C and D matrix operands
constexpr int AlignmentC = 128 / cutlass::sizeof_bits<ElementC>::value; // Memory access granularity/alignment of C matrix in units of elements (up to 16 bytes)
// Core kernel configurations
using ElementAccumulator = float; // Element type for internal accumulation
using ArchTag = cutlass::arch::Sm90; // Tag indicating the minimum SM that supports the intended feature
using OperatorClass = cutlass::arch::OpClassTensorOp; // Operator class tag
using TileShape = cutlass::Shape<cute::Int<BLOCK_M>,cute::_128,cute::_64>;// Threadblock-level tile size
using ClusterShape = cutlass::Shape<cute::_1,cute::_1,cute::_1>; // Shape of the threadblocks in a cluster
using StageCountType = cutlass::conv::collective::StageCount<Stages>; // Stage count maximized based on the tile size
// using KernelSchedule = cutlass::gemm::collective::KernelTmaWarpSpecializedPingpong; // Kernel to launch based on the default setting in the Collective Builder
// using KernelSchedule = cutlass::gemm::KernelTmaWarpSpecializedPingpong;
using KernelSchedule = cutlass::conv::KernelImplicitTmaWarpSpecializedSm90;
using CollectiveMainloop = typename cutlass::conv::collective::CollectiveBuilder<
ArchTag, OperatorClass,
cutlass::conv::Operator::kFprop,
ElementA, LayoutA, AlignmentA,
ElementB, LayoutB, AlignmentB,
ElementAccumulator,
TileShape, ClusterShape,
StageCountType,
KernelSchedule>::CollectiveOp;
using CollectiveEpilogue = typename cutlass::epilogue::collective::CollectiveBuilder<
cutlass::arch::Sm90,
cutlass::arch::OpClassTensorOp,
TileShape, ClusterShape,
cutlass::epilogue::collective::EpilogueTileAuto,
ElementAccumulator, ElementAccumulator,
ElementC, LayoutC, AlignmentC,
ElementC, LayoutC, AlignmentC,
cutlass::epilogue::collective::EpilogueScheduleAuto>::CollectiveOp;
using ConvUniversal = cutlass::conv::kernel::ConvUniversal<
cutlass::conv::ConvProblemShape<cutlass::conv::Operator::kFprop, 3>, // Indicates ProblemShape
CollectiveMainloop,
CollectiveEpilogue,
cutlass::gemm::PersistentScheduler>;
using ConvUniversalAdapter = cutlass::conv::device::ConvUniversalAdapter<ConvUniversal>;`
The text was updated successfully, but these errors were encountered:
What is your question?
How do I implement conv3d using CUTLASS on NVIDIA's Hopper architecture? Here is my code.
`
// A matrix configuration
using ElementA = CutlassT; // Element type for A matrix operand
using LayoutA = cutlass::layout::TensorNDHWC; // Layout type for A matrix operand
constexpr int AlignmentA = 128 / cutlass::sizeof_bits::value;
The text was updated successfully, but these errors were encountered: