docs/structcutlass_1_1gemm_1_1kernel_1_1Gemm.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
Classes | Public Types | Public Member Functions | Static Public Member Functions | Static Public Attributes | List of all members
cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial > Struct Template Reference
#include <gemm.h>
|
|
| struct | Params |
| | Parameters structure. More...
|
| |
| union | SharedStorage |
| | Shared memory storage structure. More...
|
| |
|
|
| using | Mma = Mma_ |
| |
| using | Epilogue = Epilogue_ |
| |
| using | OutputOp = typename Epilogue::OutputOp |
| |
| using | ThreadblockSwizzle = ThreadblockSwizzle_ |
| |
| using | WarpCount = typename Mma::WarpCount |
| | Warp count (concept: GemmShape) More...
|
| |
|
|
| CUTLASS_HOST_DEVICE | Gemm () |
| |
| CUTLASS_DEVICE void | operator() (Params const ¶ms, SharedStorage &shared_storage) |
| | Executes one GEMM. More...
|
| |
|
|
| static Status | can_implement (cutlass::gemm::GemmCoord const &problem_size, typename Mma::IteratorA::TensorRef ref_A, typename Mma::IteratorB::TensorRef ref_B, typename Epilogue::OutputTileIterator::TensorRef ref_C, typename Epilogue::OutputTileIterator::TensorRef ref_D) |
| | Determines whether kernel satisfies alignment. More...
|
| |
|
| | static bool const | kSplitKSerial = SplitKSerial | | | | static int const | kThreadCount = 32 * WarpCount::kCount | | |
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
| using cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::Epilogue = Epilogue_ |
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
| using cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::Mma = Mma_ |
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
| using cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::OutputOp = typename Epilogue::OutputOp |
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
| using cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::ThreadblockSwizzle = ThreadblockSwizzle_ |
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
| using cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::WarpCount = typename Mma::WarpCount |
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
|
| CUTLASS_HOST_DEVICE cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::Gemm | ( | | ) | |
| inline |
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
|
| static Status cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::can_implement | ( | cutlass::gemm::GemmCoord const & | problem_size, | | | | typename Mma::IteratorA::TensorRef | ref_A, | | | | typename Mma::IteratorB::TensorRef | ref_B, | | | | typename Epilogue::OutputTileIterator::TensorRef | ref_C, | | | | typename Epilogue::OutputTileIterator::TensorRef | ref_D | | | ) | | |
| inlinestatic |
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
|
| CUTLASS_DEVICE void cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::operator() | ( | Params const & | params, | | | | SharedStorage & | shared_storage | | | ) | | |
| inline |
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
|
| bool const cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::kSplitKSerial = SplitKSerial |
| static |
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
|
| int const cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::kThreadCount = 32 * WarpCount::kCount |
| static |
The documentation for this struct was generated from the following file:
Generated by 1.8.11