Log in Get started

Back to Cutlass

CUTLASS: cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage > Class Template Reference

docs/classcutlass_1_1reduction_1_1kernel_1_1ReduceSplitK.html

4.4.213.5 KB

Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

Classes | Public Types | Public Member Functions | Static Public Member Functions | Static Public Attributes | List of all members

cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage > Class Template Reference

#include <reduce_split_k.h>

|

Classes

| | struct | Params | | | Params structure. More...
| | | | struct | SharedStorage | | |

|

Public Types

| | using | Shape = Shape_ | | | | using | ReductionOp = ReductionOp_ | | | | using | OutputOp = OutputOp_ | | | | using | ElementWorkspace = typename ReductionOp::Element | | | | using | ElementAccumulator = typename ReductionOp::ElementAccumulator | | | | using | ElementOutput = typename OutputOp::ElementOutput | | | | using | WorkspaceTensorRef = TensorRef< ElementWorkspace, layout::RowMajor > | | | | using | OutputTensorRef = TensorRef< ElementOutput, layout::RowMajor > | | | | using | FragmentWorkspace = AlignedArray< ElementWorkspace, kElementsPerAccess > | | | | using | FragmentAccumulator = Array< ElementAccumulator, kElementsPerAccess > | | | | using | FragmentOutput = AlignedArray< ElementOutput, kElementsPerAccess > | | |

|

Public Member Functions

| | CUTLASS_DEVICE void | operator() (Params const &params, SharedStorage &storage) | | | Perform a reduction. More...
| | |

|

Static Public Member Functions

| | static CUTLASS_HOST_DEVICE dim3 | grid_shape (cutlass::MatrixCoord problem_size) | | | Computes the grid size given a chosen threadblock shape. More...
| | | | static CUTLASS_HOST_DEVICE dim3 | block_shape () | | | Determines the threadblock shape. More...
| | |

|

Static Public Attributes

| | static int const | kElementsPerAccess = OutputOp::kCount | | | | static int const | kPartitionsPerStage = PartitionsPerStage | | |

Member Typedef Documentation

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

| using cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::ElementAccumulator = typename ReductionOp::ElementAccumulator |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

| using cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::ElementOutput = typename OutputOp::ElementOutput |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

| using cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::ElementWorkspace = typename ReductionOp::Element |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

| using cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::FragmentAccumulator = Array<ElementAccumulator, kElementsPerAccess> |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

| using cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::FragmentOutput = AlignedArray<ElementOutput, kElementsPerAccess> |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

| using cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::FragmentWorkspace = AlignedArray<ElementWorkspace, kElementsPerAccess> |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

| using cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::OutputOp = OutputOp_ |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

| using cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::OutputTensorRef = TensorRef<ElementOutput, layout::RowMajor> |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

| using cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::ReductionOp = ReductionOp_ |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

| using cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::Shape = Shape_ |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

| using cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::WorkspaceTensorRef = TensorRef<ElementWorkspace, layout::RowMajor> |

Member Function Documentation

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

|

| static CUTLASS_HOST_DEVICE dim3 cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::block_shape | ( | | ) | |

| inlinestatic |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

|

| static CUTLASS_HOST_DEVICE dim3 cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::grid_shape | ( | cutlass::MatrixCoord | problem_size | ) | |

| inlinestatic |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

|

| CUTLASS_DEVICE void cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::operator() | ( | Params const & | params, | | | | SharedStorage & | storage | | | ) | | |

| inline |

Member Data Documentation

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

|

| int const cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::kElementsPerAccess = OutputOp::kCount |

| static |

template<typename Shape_ , typename OutputOp_ , typename ReductionOp_ , int PartitionsPerStage = 4>

|

| int const cutlass::reduction::kernel::ReduceSplitK< Shape_, OutputOp_, ReductionOp_, PartitionsPerStage >::kPartitionsPerStage = PartitionsPerStage |

| static |

The documentation for this class was generated from the following file:

[reduce_split_k.h](reduce split k_8h_source.html)

Generated by 1.8.11