Back to Cutlass

CUTLASS: cutlass::gemm::kernel Namespace Reference

docs/namespacecutlass_1_1gemm_1_1kernel.html

4.4.211.4 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

Namespaces | Classes | Functions

cutlass::gemm::kernel Namespace Reference

|

Namespaces

| | | detail | | |

|

Classes

| | struct | DefaultGemm | | | | struct | DefaultGemm< ElementA, layout::ColumnMajorInterleaved< InterleavedK >, kAlignmentA, ElementB, layout::RowMajorInterleaved< InterleavedK >, kAlignmentB, ElementC, layout::ColumnMajorInterleaved< InterleavedK >, int32_t, arch::OpClassTensorOp, arch::Sm75, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator, IsBetaZero > | | | Partial specialization for Turing Integer Matrix Multiply Interleaved layout. More...
| | | | struct | DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, GemmShape< 1, 1, 1 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator > | | | Partial specialization for SIMT. More...
| | | | struct | DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassTensorOp, arch::Sm70, ThreadblockShape, WarpShape, GemmShape< 8, 8, 4 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator > | | | Partial specialization for Volta architecture. More...
| | | | struct | DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassTensorOp, arch::Sm75, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator > | | | Partial specialization for Turing Architecture. More...
| | | | struct | DefaultGemm< int8_t, LayoutA, kAlignmentA, int8_t, LayoutB, kAlignmentB, ElementC, LayoutC, ElementAccumulator, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, GemmShape< 1, 1, 4 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator, false > | | | Partial specialization for SIMT DP4A. More...
| | | | struct | DefaultGemmSplitKParallel | | | | struct | DefaultGemv | | | | struct | Gemm | | | | struct | GemmBatched | | | | struct | GemmSplitKParallel | | |

|

Functions

| | template<typename Mma , typename Epilogue , typename ThreadblockSwizzle > | | __global__ void | GemmPipelined (cutlass::gemm::GemmCoord problem_size, cutlass::gemm::GemmCoord grid_tiled_shape, typename Mma::IteratorA::Params params_A, typename Mma::IteratorA::TensorRef ref_A, typename Mma::IteratorB::Params params_B, typename Mma::IteratorB::TensorRef ref_B, typename Epilogue::Params params_epilogue) | | | | template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero = false> | | CUTLASS_DEVICE void | GemvBatchedStridedDevice (cutlass::gemm::BatchedGemmCoord problem_size, ElementAlphaBeta alpha, ElementAlphaBeta beta, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_C, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldc, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd) | | | | template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero> | | __global__ void | GemvBatchedStrided (cutlass::gemm::BatchedGemmCoord problem_size, ElementAlphaBeta alpha, ElementAlphaBeta beta, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_C, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldc, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd) | | | | template<typename GemvKernel , typename ElementAlphaBeta > | | __global__ void | GemvBatchedStrided (cutlass::gemm::BatchedGemmCoord problem_size, ElementAlphaBeta alpha, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd) | | | | template<typename GemvKernel > | | __global__ void | GemvBatchedStrided (cutlass::gemm::BatchedGemmCoord problem_size, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd) | | |

Function Documentation

template<typename Mma , typename Epilogue , typename ThreadblockSwizzle >

| __global__ void cutlass::gemm::kernel::GemmPipelined | ( | cutlass::gemm::GemmCoord | problem_size, | | | | cutlass::gemm::GemmCoord | grid_tiled_shape, | | | | typename Mma::IteratorA::Params | params_A, | | | | typename Mma::IteratorA::TensorRef | ref_A, | | | | typename Mma::IteratorB::Params | params_B, | | | | typename Mma::IteratorB::TensorRef | ref_B, | | | | typename Epilogue::Params | params_epilogue | | | ) | | |

template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero>

| __global__ void cutlass::gemm::kernel::GemvBatchedStrided | ( | cutlass::gemm::BatchedGemmCoord | problem_size, | | | | ElementAlphaBeta | alpha, | | | | ElementAlphaBeta | beta, | | | | typename GemvKernel::IteratorA::TensorRef | ref_A, | | | | typename GemvKernel::IteratorA::TensorRef::LongIndex | lda, | | | | typename GemvKernel::IteratorB::TensorRef | ref_B, | | | | typename GemvKernel::IteratorB::TensorRef::LongIndex | ldb, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_C, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldc, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_D, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldd | | | ) | | |

template<typename GemvKernel , typename ElementAlphaBeta >

| __global__ void cutlass::gemm::kernel::GemvBatchedStrided | ( | cutlass::gemm::BatchedGemmCoord | problem_size, | | | | ElementAlphaBeta | alpha, | | | | typename GemvKernel::IteratorA::TensorRef | ref_A, | | | | typename GemvKernel::IteratorA::TensorRef::LongIndex | lda, | | | | typename GemvKernel::IteratorB::TensorRef | ref_B, | | | | typename GemvKernel::IteratorB::TensorRef::LongIndex | ldb, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_D, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldd | | | ) | | |

template<typename GemvKernel >

| __global__ void cutlass::gemm::kernel::GemvBatchedStrided | ( | cutlass::gemm::BatchedGemmCoord | problem_size, | | | | typename GemvKernel::IteratorA::TensorRef | ref_A, | | | | typename GemvKernel::IteratorA::TensorRef::LongIndex | lda, | | | | typename GemvKernel::IteratorB::TensorRef | ref_B, | | | | typename GemvKernel::IteratorB::TensorRef::LongIndex | ldb, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_D, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldd | | | ) | | |

template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero = false>

| CUTLASS_DEVICE void cutlass::gemm::kernel::GemvBatchedStridedDevice | ( | cutlass::gemm::BatchedGemmCoord | problem_size, | | | | ElementAlphaBeta | alpha, | | | | ElementAlphaBeta | beta, | | | | typename GemvKernel::IteratorA::TensorRef | ref_A, | | | | typename GemvKernel::IteratorA::TensorRef::LongIndex | lda, | | | | typename GemvKernel::IteratorB::TensorRef | ref_B, | | | | typename GemvKernel::IteratorB::TensorRef::LongIndex | ldb, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_C, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldc, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_D, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldd | | | ) | | |


Generated by 1.8.11