docs/classcutlass_1_1epilogue_1_1threadblock_1_1InterleavedEpilogue.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
Classes | Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero > Class Template Reference
Epilogue operator without splitk.
#include <interleaved_epilogue.h>
|
|
| struct | SharedStorage |
| | Shared storage allocation needed by the epilogue. More...
|
| |
|
|
| using | Shape = Shape_ |
| |
| using | WarpMmaOperator = WarpMmaOperator_ |
| |
| using | AccumulatorFragmentIterator = AccumulatorFragmentIterator_ |
| |
| using | OutputTileIterator = OutputTileIterator_ |
| |
| using | OutputOp = OutputOp_ |
| |
| using | Layout = layout::ColumnMajorInterleaved< InterleavedK > |
| | Output layout is always row-major. More...
|
| |
| using | AccumulatorTile = typename AccumulatorFragmentIterator::AccumulatorTile |
| | The complete warp-level accumulator tile. More...
|
| |
| using | ElementAccumulator = typename AccumulatorTile::Element |
| | Accumulator element. More...
|
| |
| using | ElementOutput = typename OutputTileIterator::Element |
| | Output element. More...
|
| |
| using | TensorRef = typename OutputTileIterator::TensorRef |
| | Tensor reference to destination tensor. More...
|
| |
| using | SyncTensorRef = typename cutlass::TensorRef< int, cutlass::layout::PackedVectorLayout > |
| | Tensor reference to sync tensor. More...
|
| |
| using | ConstTensorRef = typename OutputTileIterator::ConstTensorRef |
| | Const tensor reference to source tensor. More...
|
| |
| using | OutputAccessType = Array< typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess > |
| | Array type used to output. More...
|
| |
| using | AccumulatorAccessType = Array< ElementAccumulator, OutputTileIterator::kElementsPerAccess > |
| | Array type used by output functor. More...
|
| |
| using | WarpCount = gemm::GemmShape< Shape::kM/WarpMmaOperator::Shape::kM, Shape::kN/WarpMmaOperator::Shape::kN, kPartitionsK > |
| | Number of warps. More...
|
| |
|
|
| CUTLASS_DEVICE | InterleavedEpilogue (SharedStorage &shared_storage, int thread_idx, int warp_idx, int lane_idx) |
| | Constructor. More...
|
| |
| CUTLASS_DEVICE void | operator() (OutputOp const &output_op, OutputTileIterator destination_iterator, AccumulatorTile const &accumulators, OutputTileIterator source_iterator) |
| | Streams the result to global memory. More...
|
| |
|
|
| static int const | kPartitionsK = PartitionsK |
| |
| static int const | kElementsPerAccess = OutputTileIterator::kElementsPerAccess |
| | Output access size. More...
|
| |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::AccumulatorAccessType = Array<ElementAccumulator, OutputTileIterator::kElementsPerAccess> |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::AccumulatorFragmentIterator = AccumulatorFragmentIterator_ |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::AccumulatorTile = typename AccumulatorFragmentIterator::AccumulatorTile |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::ConstTensorRef = typename OutputTileIterator::ConstTensorRef |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::ElementAccumulator = typename AccumulatorTile::Element |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::ElementOutput = typename OutputTileIterator::Element |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::Layout = layout::ColumnMajorInterleaved<InterleavedK> |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::OutputAccessType = Array<typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess> |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::OutputOp = OutputOp_ |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::OutputTileIterator = OutputTileIterator_ |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::Shape = Shape_ |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::SyncTensorRef = typename cutlass::TensorRef<int, cutlass::layout::PackedVectorLayout> |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::TensorRef = typename OutputTileIterator::TensorRef |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::WarpCount = gemm::GemmShape<Shape::kM / WarpMmaOperator::Shape::kM, Shape::kN / WarpMmaOperator::Shape::kN, kPartitionsK> |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
| using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::WarpMmaOperator = WarpMmaOperator_ |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
|
| CUTLASS_DEVICE cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::InterleavedEpilogue | ( | SharedStorage & | shared_storage, | | | | int | thread_idx, | | | | int | warp_idx, | | | | int | lane_idx | | | ) | | |
| inline |
Parameters
| shared_storage | Shared storage object | | thread_idx | ID of a thread within the threadblock | | warp_idx | ID of warp within threadblock | | lane_idx | Id of thread within warp |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
|
| CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::operator() | ( | OutputOp const & | output_op, | | | | OutputTileIterator | destination_iterator, | | | | AccumulatorTile const & | accumulators, | | | | OutputTileIterator | source_iterator | | | ) | | |
| inline |
< Threadblock tile coordinate in GEMM (in units of threadblock tiles)
Parameters
| output_op | Output operator | | destination_iterator | Tile iterator for destination | | accumulators | Complete warp-level accumulator tile |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
|
| int const cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::kElementsPerAccess = OutputTileIterator::kElementsPerAccess |
| static |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
|
| int const cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::kPartitionsK = PartitionsK |
| static |
The documentation for this class was generated from the following file:
Generated by 1.8.11