Back to Cutlass

CUTLASS: cutlass::gemm::kernel::DefaultGemmSplitKParallel< ElementA_, LayoutA_, kAlignmentA, ElementB_, LayoutB_, kAlignmentB, ElementC_, LayoutC_, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, Operator > Struct Template Reference

docs/structcutlass_1_1gemm_1_1kernel_1_1DefaultGemmSplitKParallel.html

4.4.27.1 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

Public Types | List of all members

cutlass::gemm::kernel::DefaultGemmSplitKParallel< ElementA_, LayoutA_, kAlignmentA, ElementB_, LayoutB_, kAlignmentB, ElementC_, LayoutC_, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, Operator > Struct Template Reference

#include <default_gemm_splitk_parallel.h>

|

Public Types

| | using | Default = DefaultGemm< ElementA_, LayoutA_, kAlignmentA, ElementB_, LayoutB_, kAlignmentB, ElementAccumulator, LayoutC_, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, false, Operator > | | | | using | Mma = typename Default::Mma | | | Define the matrix multiply operator. More...
| | | | using | Epilogue = typename Default::Epilogue | | | Define the epilogue. More...
| | | | using | GemmKernel = kernel::GemmSplitKParallel< Mma, Epilogue, ThreadblockSwizzle > | | | Define the kernel-level GEMM operator. More...
| | |

Member Typedef Documentation

template<typename ElementA_ , typename LayoutA_ , int kAlignmentA, typename ElementB_ , typename LayoutB_ , int kAlignmentB, typename ElementC_ , typename LayoutC_ , typename ElementAccumulator , typename OperatorClass , typename ArchTag , typename ThreadblockShape , typename WarpShape , typename InstructionShape , typename EpilogueOutputOp , typename ThreadblockSwizzle , int Stages, typename Operator >

| using cutlass::gemm::kernel::DefaultGemmSplitKParallel< ElementA_, LayoutA_, kAlignmentA, ElementB_, LayoutB_, kAlignmentB, ElementC_, LayoutC_, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, Operator >::Default = DefaultGemm< ElementA_, LayoutA_, kAlignmentA, ElementB_, LayoutB_, kAlignmentB, ElementAccumulator, LayoutC_, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, false, Operator > |

Define the threadblock-scoped matrix multiply-accumulate using the basic GEMM's mainloop.

template<typename ElementA_ , typename LayoutA_ , int kAlignmentA, typename ElementB_ , typename LayoutB_ , int kAlignmentB, typename ElementC_ , typename LayoutC_ , typename ElementAccumulator , typename OperatorClass , typename ArchTag , typename ThreadblockShape , typename WarpShape , typename InstructionShape , typename EpilogueOutputOp , typename ThreadblockSwizzle , int Stages, typename Operator >

| using cutlass::gemm::kernel::DefaultGemmSplitKParallel< ElementA_, LayoutA_, kAlignmentA, ElementB_, LayoutB_, kAlignmentB, ElementC_, LayoutC_, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, Operator >::Epilogue = typename Default::Epilogue |

template<typename ElementA_ , typename LayoutA_ , int kAlignmentA, typename ElementB_ , typename LayoutB_ , int kAlignmentB, typename ElementC_ , typename LayoutC_ , typename ElementAccumulator , typename OperatorClass , typename ArchTag , typename ThreadblockShape , typename WarpShape , typename InstructionShape , typename EpilogueOutputOp , typename ThreadblockSwizzle , int Stages, typename Operator >

| using cutlass::gemm::kernel::DefaultGemmSplitKParallel< ElementA_, LayoutA_, kAlignmentA, ElementB_, LayoutB_, kAlignmentB, ElementC_, LayoutC_, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, Operator >::GemmKernel = kernel::GemmSplitKParallel<Mma, Epilogue, ThreadblockSwizzle> |

template<typename ElementA_ , typename LayoutA_ , int kAlignmentA, typename ElementB_ , typename LayoutB_ , int kAlignmentB, typename ElementC_ , typename LayoutC_ , typename ElementAccumulator , typename OperatorClass , typename ArchTag , typename ThreadblockShape , typename WarpShape , typename InstructionShape , typename EpilogueOutputOp , typename ThreadblockSwizzle , int Stages, typename Operator >

| using cutlass::gemm::kernel::DefaultGemmSplitKParallel< ElementA_, LayoutA_, kAlignmentA, ElementB_, LayoutB_, kAlignmentB, ElementC_, LayoutC_, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, Operator >::Mma = typename Default::Mma |


The documentation for this struct was generated from the following file:

  • [default_gemm_splitk_parallel.h](default gemm splitk__parallel_8h_source.html)

Generated by 1.8.11