Back to Cutlass

CUTLASS: cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ > Class Template Reference

docs/classcutlass_1_1epilogue_1_1threadblock_1_1Epilogue.html

4.4.229.6 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

Public Types | Public Member Functions | Static Public Attributes | List of all members

cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ > Class Template Reference

Epilogue operator without splitk.

#include <epilogue.h>

Inheritance diagram for cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >:

![Inheritance graph](classcutlass_1_1epilogue_1_1threadblock_1_1Epilogue inherit graph.png) [legend]

Collaboration diagram for cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >:

![Collaboration graph](classcutlass_1_1epilogue_1_1threadblock_1_1Epilogue coll graph.png) [legend]

|

Public Types

| | using | Base = EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ > | | | | using | Shape = Shape_ | | | | using | WarpMmaOperator = WarpMmaOperator_ | | | | using | OutputTileIterator = OutputTileIterator_ | | | | using | AccumulatorFragmentIterator = AccumulatorFragmentIterator_ | | | | using | WarpTileIterator = WarpTileIterator_ | | | | using | SharedLoadIterator = SharedLoadIterator_ | | | | using | OutputOp = OutputOp_ | | | | using | Padding = Padding_ | | | | using | Layout = layout::RowMajor | | | Output layout is always row-major. More...
| | | | using | LongIndex = typename Layout::LongIndex | | | | using | AccumulatorTile = typename Base::AccumulatorTile | | | The complete warp-level accumulator tile. More...
| | | | using | ElementAccumulator = typename WarpTileIterator::Element | | | Accumulator element. More...
| | | | using | ElementOutput = typename OutputTileIterator::Element | | | Output element. More...
| | | | using | TensorRef = typename OutputTileIterator::TensorRef | | | Tensor reference to destination tensor. More...
| | | | using | SyncTensorRef = typename cutlass::TensorRef< int, cutlass::layout::PackedVectorLayout > | | | Tensor reference to sync tensor. More...
| | | | using | ConstTensorRef = typename OutputTileIterator::ConstTensorRef | | | Const tensor reference to source tensor. More...
| | | | using | OutputAccessType = Array< typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess > | | | Array type used to output. More...
| | | | using | AccumulatorAccessType = Array< typename WarpTileIterator::Element, OutputTileIterator::kElementsPerAccess > | | | Array type used by output functor. More...
| | | | using | WarpCount = typename Base::WarpCount | | | Number of warps. More...
| | | | Public Types inherited from cutlass::epilogue::threadblock::EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ > | | using | Shape = Shape_ | | | | using | WarpMmaOperator = WarpMmaOperator_ | | | | using | AccumulatorFragmentIterator = AccumulatorFragmentIterator_ | | | | using | WarpTileIterator = WarpTileIterator_ | | | | using | Padding = Padding_ | | | | using | Layout = layout::RowMajor | | | Output layout is always row-major. More...
| | | | using | AccumulatorTile = typename AccumulatorFragmentIterator::AccumulatorTile | | | The complete warp-level accumulator tile. More...
| | | | using | ElementAccumulator = typename AccumulatorTile::Element | | | Accumulator element. More...
| | | | using | WarpCount = gemm::GemmShape< Shape::kM/WarpMmaOperator::Shape::kM, Shape::kN/WarpMmaOperator::Shape::kN, kPartitionsK > | | | Number of warps. More...
| | |

|

Public Member Functions

| | CUTLASS_DEVICE | Epilogue (typename Base::SharedStorage &shared_storage, int thread_idx, int warp_idx, int lane_idx) | | | Constructor. More...
| | | | CUTLASS_DEVICE void | operator() (OutputOp const &output_op, OutputTileIterator destination_iterator, AccumulatorTile const &accumulators, OutputTileIterator source_iterator) | | | Streams the result to global memory. More...
| | | | Public Member Functions inherited from cutlass::epilogue::threadblock::EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ > | | CUTLASS_DEVICE | EpilogueBase (SharedStorage &shared_storage, int thread_idx, int warp_idx, int lane_idx) | | | Constructor. More...
| | |

|

Static Public Attributes

| | static int const | kPartitionsK = PartitionsK | | | | static int const | kElementsPerAccess = OutputTileIterator::kElementsPerAccess | | | Output access size. More...
| | | | Static Public Attributes inherited from cutlass::epilogue::threadblock::EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ > | | static int const | kPartitionsK = PartitionsK | | |

|

Additional Inherited Members

| | Protected Attributes inherited from cutlass::epilogue::threadblock::EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ > | | SharedStorage & | shared_storage_ | | | | WarpTileIterator | warp_tile_iterator_ | | | Stores a warp's fragment of accumulators to SMEM. More...
| | |

Member Typedef Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::AccumulatorAccessType = Array<typename WarpTileIterator::Element, OutputTileIterator::kElementsPerAccess> |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::AccumulatorFragmentIterator = AccumulatorFragmentIterator_ |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::AccumulatorTile = typename Base::AccumulatorTile |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Base = EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_> |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::ConstTensorRef = typename OutputTileIterator::ConstTensorRef |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::ElementAccumulator = typename WarpTileIterator::Element |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::ElementOutput = typename OutputTileIterator::Element |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Layout = layout::RowMajor |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::LongIndex = typename Layout::LongIndex |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::OutputAccessType = Array< typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess> |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::OutputOp = OutputOp_ |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::OutputTileIterator = OutputTileIterator_ |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Padding = Padding_ |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Shape = Shape_ |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::SharedLoadIterator = SharedLoadIterator_ |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::SyncTensorRef = typename cutlass::TensorRef<int, cutlass::layout::PackedVectorLayout> |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::TensorRef = typename OutputTileIterator::TensorRef |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::WarpCount = typename Base::WarpCount |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::WarpMmaOperator = WarpMmaOperator_ |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

| using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::WarpTileIterator = WarpTileIterator_ |

Constructor & Destructor Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

|

| CUTLASS_DEVICE cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Epilogue | ( | typename Base::SharedStorage & | shared_storage, | | | | int | thread_idx, | | | | int | warp_idx, | | | | int | lane_idx | | | ) | | |

| inline |

Parameters

| shared_storage | Shared storage object | | thread_idx | ID of a thread within the threadblock | | warp_idx | ID of warp within threadblock | | lane_idx | Id of thread within warp |

Member Function Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

|

| CUTLASS_DEVICE void cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::operator() | ( | OutputOp const & | output_op, | | | | OutputTileIterator | destination_iterator, | | | | AccumulatorTile const & | accumulators, | | | | OutputTileIterator | source_iterator | | | ) | | |

| inline |

< Threadblock tile coordinate in GEMM (in units of threadblock tiles)

Parameters

| output_op | Output operator | | destination_iterator | Tile iterator for destination | | accumulators | Complete warp-level accumulator tile |

Member Data Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

|

| int const cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::kElementsPerAccess = OutputTileIterator::kElementsPerAccess |

| static |

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >

|

| int const cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::kPartitionsK = PartitionsK |

| static |


The documentation for this class was generated from the following file:


Generated by 1.8.11