docs/namespacecutlass_1_1gemm_1_1kernel.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
Namespaces | Classes | Functions
cutlass::gemm::kernel Namespace Reference
|
| | | detail | | |
|
|
| struct | DefaultGemm |
| |
| struct | DefaultGemm< ElementA, layout::ColumnMajorInterleaved< InterleavedK >, kAlignmentA, ElementB, layout::RowMajorInterleaved< InterleavedK >, kAlignmentB, ElementC, layout::ColumnMajorInterleaved< InterleavedK >, int32_t, arch::OpClassTensorOp, arch::Sm75, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator, IsBetaZero > |
| | Partial specialization for Turing Integer Matrix Multiply Interleaved layout. More...
|
| |
| struct | DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, GemmShape< 1, 1, 1 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator > |
| | Partial specialization for SIMT. More...
|
| |
| struct | DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassTensorOp, arch::Sm70, ThreadblockShape, WarpShape, GemmShape< 8, 8, 4 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator > |
| | Partial specialization for Volta architecture. More...
|
| |
| struct | DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassTensorOp, arch::Sm75, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator > |
| | Partial specialization for Turing Architecture. More...
|
| |
| struct | DefaultGemm< int8_t, LayoutA, kAlignmentA, int8_t, LayoutB, kAlignmentB, ElementC, LayoutC, ElementAccumulator, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, GemmShape< 1, 1, 4 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator, false > |
| | Partial specialization for SIMT DP4A. More...
|
| |
| struct | DefaultGemmSplitKParallel |
| |
| struct | DefaultGemv |
| |
| struct | Gemm |
| |
| struct | GemmBatched |
| |
| struct | GemmSplitKParallel |
| |
|
| | template<typename Mma , typename Epilogue , typename ThreadblockSwizzle > | | __global__ void | GemmPipelined (cutlass::gemm::GemmCoord problem_size, cutlass::gemm::GemmCoord grid_tiled_shape, typename Mma::IteratorA::Params params_A, typename Mma::IteratorA::TensorRef ref_A, typename Mma::IteratorB::Params params_B, typename Mma::IteratorB::TensorRef ref_B, typename Epilogue::Params params_epilogue) | | | | template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero = false> | | CUTLASS_DEVICE void | GemvBatchedStridedDevice (cutlass::gemm::BatchedGemmCoord problem_size, ElementAlphaBeta alpha, ElementAlphaBeta beta, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_C, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldc, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd) | | | | template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero> | | __global__ void | GemvBatchedStrided (cutlass::gemm::BatchedGemmCoord problem_size, ElementAlphaBeta alpha, ElementAlphaBeta beta, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_C, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldc, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd) | | | | template<typename GemvKernel , typename ElementAlphaBeta > | | __global__ void | GemvBatchedStrided (cutlass::gemm::BatchedGemmCoord problem_size, ElementAlphaBeta alpha, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd) | | | | template<typename GemvKernel > | | __global__ void | GemvBatchedStrided (cutlass::gemm::BatchedGemmCoord problem_size, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd) | | |
template<typename Mma , typename Epilogue , typename ThreadblockSwizzle >
| __global__ void cutlass::gemm::kernel::GemmPipelined | ( | cutlass::gemm::GemmCoord | problem_size, | | | | cutlass::gemm::GemmCoord | grid_tiled_shape, | | | | typename Mma::IteratorA::Params | params_A, | | | | typename Mma::IteratorA::TensorRef | ref_A, | | | | typename Mma::IteratorB::Params | params_B, | | | | typename Mma::IteratorB::TensorRef | ref_B, | | | | typename Epilogue::Params | params_epilogue | | | ) | | |
template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero>
| __global__ void cutlass::gemm::kernel::GemvBatchedStrided | ( | cutlass::gemm::BatchedGemmCoord | problem_size, | | | | ElementAlphaBeta | alpha, | | | | ElementAlphaBeta | beta, | | | | typename GemvKernel::IteratorA::TensorRef | ref_A, | | | | typename GemvKernel::IteratorA::TensorRef::LongIndex | lda, | | | | typename GemvKernel::IteratorB::TensorRef | ref_B, | | | | typename GemvKernel::IteratorB::TensorRef::LongIndex | ldb, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_C, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldc, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_D, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldd | | | ) | | |
template<typename GemvKernel , typename ElementAlphaBeta >
| __global__ void cutlass::gemm::kernel::GemvBatchedStrided | ( | cutlass::gemm::BatchedGemmCoord | problem_size, | | | | ElementAlphaBeta | alpha, | | | | typename GemvKernel::IteratorA::TensorRef | ref_A, | | | | typename GemvKernel::IteratorA::TensorRef::LongIndex | lda, | | | | typename GemvKernel::IteratorB::TensorRef | ref_B, | | | | typename GemvKernel::IteratorB::TensorRef::LongIndex | ldb, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_D, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldd | | | ) | | |
template<typename GemvKernel >
| __global__ void cutlass::gemm::kernel::GemvBatchedStrided | ( | cutlass::gemm::BatchedGemmCoord | problem_size, | | | | typename GemvKernel::IteratorA::TensorRef | ref_A, | | | | typename GemvKernel::IteratorA::TensorRef::LongIndex | lda, | | | | typename GemvKernel::IteratorB::TensorRef | ref_B, | | | | typename GemvKernel::IteratorB::TensorRef::LongIndex | ldb, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_D, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldd | | | ) | | |
template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero = false>
| CUTLASS_DEVICE void cutlass::gemm::kernel::GemvBatchedStridedDevice | ( | cutlass::gemm::BatchedGemmCoord | problem_size, | | | | ElementAlphaBeta | alpha, | | | | ElementAlphaBeta | beta, | | | | typename GemvKernel::IteratorA::TensorRef | ref_A, | | | | typename GemvKernel::IteratorA::TensorRef::LongIndex | lda, | | | | typename GemvKernel::IteratorB::TensorRef | ref_B, | | | | typename GemvKernel::IteratorB::TensorRef::LongIndex | ldb, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_C, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldc, | | | | typename GemvKernel::IteratorCD::TensorRef | ref_D, | | | | typename GemvKernel::IteratorCD::TensorRef::LongIndex | ldd | | | ) | | |
Generated by 1.8.11