Back to Cutlass

CUTLASS: cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess > Struct Template Reference

docs/structcutlass_1_1epilogue_1_1threadblock_1_1DefaultEpilogueVoltaTensorOp.html

4.4.219.7 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

Public Types | Static Public Attributes | List of all members

cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess > Struct Template Reference

Defines sensible defaults for epilogues for TensorOps.

#include <default_epilogue_volta_tensor_op.h>

|

Public Types

| | using | Shape = Shape_ | | | | using | WarpMmaTensorOp = WarpMmaTensorOp_ | | | | using | OutputOp = OutputOp_ | | | | using | ElementOutput = typename OutputOp::ElementOutput | | | | using | LayoutC = typename WarpMmaTensorOp::LayoutC | | | | using | ElementAccumulator = typename WarpMmaTensorOp::ElementC | | | | using | OutputTileThreadMap = typename cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess, ElementAccumulator >::Type | | | | using | OutputTileIterator = cutlass::epilogue::threadblock::PredicatedTileIterator< OutputTileThreadMap, ElementOutput > | | | | using | AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorVoltaTensorOp< typename WarpMmaTensorOp::Shape, gemm::GemmShape< 32, 32, 4 >, ElementAccumulator, LayoutC > | | | | using | WarpTileIterator = cutlass::epilogue::warp::TileIteratorVoltaTensorOp< typename WarpMmaTensorOp::Shape, gemm::GemmShape< 32, 32, 4 >, ElementAccumulator, LayoutC > | | | | using | SharedLoadIterator = cutlass::epilogue::threadblock::SharedLoadIterator< typename OutputTileThreadMap::CompactedThreadMap, ElementAccumulator, kSharedMemAlignment > | | | | using | Padding = typename WarpTileIterator::Padding | | | Hard-coded padding elements added. More...
| | | | using | Epilogue = cutlass::epilogue::threadblock::Epilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, WarpTileIterator, SharedLoadIterator, OutputOp, Padding > | | |

|

Static Public Attributes

| | static int const | kPartitionsK = PartitionsK | | | | static int const | kElementsPerAccess = ElementsPerAccess | | | | static int const | kSharedMemAlignment = sizeof_bits<ElementAccumulator>::value * WarpTileIterator::kElementsPerAccess / 8 | | |

Member Typedef Documentation

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorVoltaTensorOp< typename WarpMmaTensorOp::Shape, gemm::GemmShape<32, 32, 4>, ElementAccumulator, LayoutC > |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::ElementAccumulator = typename WarpMmaTensorOp::ElementC |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::ElementOutput = typename OutputOp::ElementOutput |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::Epilogue = cutlass::epilogue::threadblock::Epilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, WarpTileIterator, SharedLoadIterator, OutputOp, Padding > |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::LayoutC = typename WarpMmaTensorOp::LayoutC |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::OutputOp = OutputOp_ |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::OutputTileIterator = cutlass::epilogue::threadblock::PredicatedTileIterator< OutputTileThreadMap, ElementOutput > |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::OutputTileThreadMap = typename cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess, ElementAccumulator >::Type |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::Padding = typename WarpTileIterator::Padding |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::Shape = Shape_ |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::SharedLoadIterator = cutlass::epilogue::threadblock::SharedLoadIterator< typename OutputTileThreadMap::CompactedThreadMap, ElementAccumulator, kSharedMemAlignment > |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::WarpMmaTensorOp = WarpMmaTensorOp_ |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

| using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::WarpTileIterator = cutlass::epilogue::warp::TileIteratorVoltaTensorOp< typename WarpMmaTensorOp::Shape, gemm::GemmShape<32, 32, 4>, ElementAccumulator, LayoutC > |

Member Data Documentation

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

|

| int const cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::kElementsPerAccess = ElementsPerAccess |

| static |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

|

| int const cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::kPartitionsK = PartitionsK |

| static |

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>

|

| int const cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::kSharedMemAlignment = sizeof_bits<ElementAccumulator>::value * WarpTileIterator::kElementsPerAccess / 8 |

| static |


The documentation for this struct was generated from the following file:

  • [default_epilogue_volta_tensor_op.h](default epilogue volta tensor op_8h_source.html)

Generated by 1.8.11