docs/classcutlass_1_1epilogue_1_1EpilogueWorkspace.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
Classes | Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ > Class Template Reference
#include <epilogue_workspace.h>
|
|
| struct | Params |
| | Parameters structure. More...
|
| |
| struct | SharedStorage |
| | Shared storage allocation needed by the epilogue. More...
|
| |
|
| | using | Shape = Shape_ | | | | using | FragmentC = FragmentC_ | | | | using | ElementC = typename FragmentC::value_type | | |
|
|
| CUTLASS_DEVICE | EpilogueWorkspace (Params const ¶ms, SharedStorage &, int warp_idx, int lane_idx) |
| | Constructor. More...
|
| |
| CUTLASS_DEVICE void | operator() (cutlass::gemm::GemmCoord problem_size, cutlass::gemm::GemmCoord tb_tile_coord, FragmentC const &accum) |
| | Streams the result to global memory. More...
|
| |
|
|
| static int const | kWarpCount = WarpCount |
| |
| static int const | kAccessSizeInBits = 128 |
| | Optimize for 128b accesses. More...
|
| |
| static int const | kWarpSize = 32 |
| | Warp size from the perspective of memory operations. More...
|
| |
| static int const | kElementsPerAccess |
| | Vector length of accesses. More...
|
| |
| static int const | kIterations = FragmentC::kElements / kElementsPerAccess |
| | Number of stores per thread. More...
|
| |
| static int const | kWarpAccesses = kIterations * kWarpSize |
| | Total number of vectorized accesses in warp (in units of vector) More...
|
| |
| static int const | kThreadblockAccesses = kWarpAccesses * kWarpCount |
| | Total number of vectorized accesses in threadblock tile (in units of vector) More...
|
| |
template<typename Shape_ , int WarpCount, typename FragmentC_ >
| using cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::ElementC = typename FragmentC::value_type |
template<typename Shape_ , int WarpCount, typename FragmentC_ >
| using cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::FragmentC = FragmentC_ |
template<typename Shape_ , int WarpCount, typename FragmentC_ >
| using cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::Shape = Shape_ |
template<typename Shape_ , int WarpCount, typename FragmentC_ >
|
| CUTLASS_DEVICE cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::EpilogueWorkspace | ( | Params const & | params, | | | | SharedStorage & | , | | | | int | warp_idx, | | | | int | lane_idx | | | ) | | |
| inline |
Parameters
| params | Host-constructable params object | | warp_idx | ID of warp within threadblock | | lane_idx | Id of thread within warp |
template<typename Shape_ , int WarpCount, typename FragmentC_ >
|
| CUTLASS_DEVICE void cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::operator() | ( | cutlass::gemm::GemmCoord | problem_size, | | | | cutlass::gemm::GemmCoord | tb_tile_coord, | | | | FragmentC const & | accum | | | ) | | |
| inline |
< Accumulator tile
Parameters
| problem_size | Problem size of GEMM (units of ElementC) | | tb_tile_coord | Threadblock tile coordinate in GEMM (in units of threadblock tiles) |
template<typename Shape_ , int WarpCount, typename FragmentC_ >
|
| int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kAccessSizeInBits = 128 |
| static |
template<typename Shape_ , int WarpCount, typename FragmentC_ >
|
| int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kElementsPerAccess |
| static |
Initial value:
=
kAccessSizeInBits / sizeof_bits<ElementC>::value
template<typename Shape_ , int WarpCount, typename FragmentC_ >
|
| int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kIterations = FragmentC::kElements / kElementsPerAccess |
| static |
template<typename Shape_ , int WarpCount, typename FragmentC_ >
|
| int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kThreadblockAccesses = kWarpAccesses * kWarpCount |
| static |
template<typename Shape_ , int WarpCount, typename FragmentC_ >
|
| int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kWarpAccesses = kIterations * kWarpSize |
| static |
template<typename Shape_ , int WarpCount, typename FragmentC_ >
|
| int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kWarpCount = WarpCount |
| static |
template<typename Shape_ , int WarpCount, typename FragmentC_ >
|
| int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kWarpSize = 32 |
| static |
The documentation for this class was generated from the following file:
Generated by 1.8.11