docs/classcutlass_1_1epilogue_1_1threadblock_1_1DirectEpilogueTensorOp.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
Classes | Public Types | Public Member Functions | List of all members
cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ > Class Template Reference
Epilogue operator.
#include <direct_epilogue_tensor_op.h>
|
|
| struct | Params |
| | Parameters structure for host-constructible state. More...
|
| |
| struct | SharedStorage |
| | Shared storage allocation needed by the epilogue. More...
|
| |
|
|
| using | Shape = Shape_ |
| |
| using | Operator = Operator_ |
| |
| using | WarpCount = gemm::GemmShape< Shape::kM/Operator::Shape::kM, Shape::kN/Operator::Shape::kN, PartitionsK, > |
| | Number of warps spanning threadblock-scoped tile. More...
|
| |
| using | FragmentC = typename Operator::FragmentC |
| | Accumulator tile is really the warp-scoped tile. More...
|
| |
| using | Element = Element_ |
| | Data type of output tensor. More...
|
| |
| using | Layout = layout::RowMajor |
| | Output layout is always row-major. More...
|
| |
| using | OutputOp = OutputOp_ |
| | Function operator computing final output. More...
|
| |
| using | ConvertOp = ConvertOp_ |
| | Conversion operator to shared memory. More...
|
| |
| using | TensorRef = TensorRef< Element, Layout::kRank, Layout > |
| | Reference to source and destination tensors. More...
|
| |
|
|
| CUTLASS_DEVICE | DirectEpilogueTensorOp (Params const ¶ms, SharedStorage &shared_storage, int thread_idx, int warp_idx, int lane_idx) |
| | Constructor. More...
|
| |
| CUTLASS_DEVICE void | operator() (gemm::GemmCoord problem_size, gemm::GemmCoord tb_tile_coord, FragmentC const &accumulators) |
| | Streams the result to global memory. More...
|
| |
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
| using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::ConvertOp = ConvertOp_ |
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
| using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::Element = Element_ |
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
| using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::FragmentC = typename Operator::FragmentC |
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
| using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::Layout = layout::RowMajor |
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
| using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::Operator = Operator_ |
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
| using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::OutputOp = OutputOp_ |
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
| using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::Shape = Shape_ |
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
| using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::TensorRef = TensorRef<Element, Layout::kRank, Layout> |
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
| using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::WarpCount = gemm::GemmShape< Shape::kM / Operator::Shape::kM, Shape::kN / Operator::Shape::kN, PartitionsK, > |
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
|
| CUTLASS_DEVICE cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::DirectEpilogueTensorOp | ( | Params const & | params, | | | | SharedStorage & | shared_storage, | | | | int | thread_idx, | | | | int | warp_idx, | | | | int | lane_idx | | | ) | | |
| inline |
Parameters
| params | Host-constructable params object | | shared_storage | Shared storage object | | thread_idx | ID of a thread within the threadblock | | warp_idx | ID of warp within threadblock | | lane_idx | Id of thread within warp |
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
|
| CUTLASS_DEVICE void cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::operator() | ( | gemm::GemmCoord | problem_size, | | | | gemm::GemmCoord | tb_tile_coord, | | | | FragmentC const & | accumulators | | | ) | | |
| inline |
< Accumulator tile
Number of mma operations performed
Parameters
| problem_size | Problem size of GEMM (units of ElementC) | | tb_tile_coord | Threadblock tile coordinate in GEMM (in units of threadblock tiles) |
The documentation for this class was generated from the following file:
Generated by 1.8.11