Back to Cutlass

CUTLASS: cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ > Struct Template Reference

docs/structcutlass_1_1reduction_1_1BatchedReductionTraits.html

4.4.223.0 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

Classes | Public Types | Static Public Attributes | List of all members

cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ > Struct Template Reference

#include <batched_reduction_traits.h>

|

Classes

| | struct | Params | | |

|

Public Types

| | typedef BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ > | This_ | | | | typedef cutlass::reduction::BatchedReduction< This_ > | KernelClass | | | The struct that consumes this Traits. More...
| | | | typedef OutputTile_ | OutputTile | | | | typedef SubTile_ | SubTile | | | | typedef ThreadShape_ | ThreadShape | | | | typedef ScalarA_ | ScalarA | | | The input pointer type. More...
| | | | typedef ScalarC_ | ScalarC | | | | typedef ScalarD_ | ScalarD | | | The output pointer type. More...
| | | | typedef ScalarAlphaBeta_ | ScalarAlphaBeta | | | The alpha beta type. More...
| | | | typedef ScalarAccum_ | ScalarAccum | | | The type for accumulation. More...
| | | | typedef Index_ | Index | | | The index. More...
| | | | typedef BlockSwizzle_ | BlockSwizzle | | | The thread block swizzle. More...
| | | | typedef Functor_ | Functor | | |

|

Static Public Attributes

| | static const int | ReductionSize = ReductionSize_ | | | | static const bool | ThreadShapeMultiple2 = (ThreadShape::kW % 2 == 0) | | | check if threadShape is multiple of 2. More...
| | | | static int const | kThreads = SubTile::kW / ThreadShape::kW | | | | static int const | maxInReg = maxInReg_ | | | | static int const | maxOutReg = maxOutReg_ | | |

Member Typedef Documentation

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef BlockSwizzle_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::BlockSwizzle |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef Functor_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::Functor |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef Index_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::Index |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef cutlass::reduction::BatchedReduction<This_> cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::KernelClass |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef OutputTile_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::OutputTile |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef ScalarA_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarA |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef ScalarAccum_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarAccum |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef ScalarAlphaBeta_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarAlphaBeta |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef ScalarC_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarC |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef ScalarD_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarD |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef SubTile_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::SubTile |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef BatchedReductionTraits<ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_> cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::This_ |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

| typedef ThreadShape_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ThreadShape |

Member Data Documentation

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

|

| int const cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::kThreads = SubTile::kW / ThreadShape::kW |

| static |

Parameteres object constructable on the host The number of threads per thread block. can be deduced

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

|

| int const cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::maxInReg = maxInReg_ |

| static |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

|

| int const cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::maxOutReg = maxOutReg_ |

| static |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

|

| const int cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ReductionSize = ReductionSize_ |

| static |

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

|

| const bool cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ThreadShapeMultiple2 = (ThreadShape::kW % 2 == 0) |

| static |


The documentation for this struct was generated from the following file:

  • [batched_reduction_traits.h](batched reduction traits_8h_source.html)

Generated by 1.8.11