docs/structcutlass_1_1epilogue_1_1threadblock_1_1InterleavedOutputTileThreadMap.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
Classes | Public Types | Static Public Member Functions | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize > Struct Template Reference
#include <output_tile_thread_map.h>
|
| | struct | Detail | | |
|
| | using | WarpCount = WarpCount_ | | | | using | MmaCount = MmaCount_ | | | | using | Iterations = MmaCount | | | | using | Delta = layout::PitchLinearShape< kWarpSize *kElementsPerAccess, 1 > | | |
|
|
| static CUTLASS_HOST_DEVICE layout::PitchLinearCoord | initial_offset (int thread_idx) |
| | Initial offset function. More...
|
| |
|
| | static int const | kWarpSize = 32 | | | | static int const | kThreads = Threads | | | | static int const | kWarpCount = kThreads / kWarpSize | | | | static int const | kElementsPerAccess = ElementsPerAccess | | | | static int const | kElementSize = ElementSize | | |
Template metaprogram for partitioning a 3D interleaved layout across warps to achieve several performance objectives:
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
| using cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::Delta = layout::PitchLinearShape<kWarpSize * kElementsPerAccess, 1> |
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
| using cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::Iterations = MmaCount |
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
| using cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::MmaCount = MmaCount_ |
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
| using cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::WarpCount = WarpCount_ |
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
|
| static CUTLASS_HOST_DEVICE layout::PitchLinearCoord cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::initial_offset | ( | int | thread_idx | ) | |
| inlinestatic |
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
|
| int const cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::kElementSize = ElementSize |
| static |
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
|
| int const cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::kElementsPerAccess = ElementsPerAccess |
| static |
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
|
| int const cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::kThreads = Threads |
| static |
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
|
| int const cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::kWarpCount = kThreads / kWarpSize |
| static |
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
|
| int const cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::kWarpSize = 32 |
| static |
The documentation for this struct was generated from the following file:
Generated by 1.8.11