docs/namespacecutlass_1_1reference_1_1device_1_1kernel.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
cutlass::reference::device::kernel Namespace Reference
|
|
| | detail |
| | Defines several helpers.
|
| |
|
|
| template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > |
| __global__ void | Gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefA tensor_a, TensorRefB tensor_b, ScalarType beta, TensorRefC tensor_c, TensorRefC tensor_d, AccumulatorType initial_accum) |
| |
| template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > |
| __global__ void | BatchedGemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefCollectionA tensor_collection_a, TensorRefCollectionB tensor_collection_b, ScalarType beta, TensorRefCollectionC tensor_collection_c, AccumulatorType initial_accum) |
| |
| template<typename T > |
| __global__ void | TensorInitializeUniform (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) |
| | Kernel to initialize tensor to uniform random distribution. More...
|
| |
| template<typename T > |
| __global__ void | TensorInitializeGaussian (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) |
| | Kernel to initialize tensor to uniform distribution. More...
|
| |
| template<typename T > |
| __global__ void | TensorInitializeLinear (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) |
| | Kernel to initialize tensor to an identity matrix. More...
|
| |
| template<typename T > |
| __global__ void | TensorInitializeIdentity (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) |
| | Kernel to initialize tensor to an identity matrix. More...
|
| |
| template<typename Func , int Rank, typename Params > |
| __global__ void | TensorForEach (Coord< Rank > size, Params params=Params()) |
| | Kernel calls a functor for each element in a tensor's index space. More...
|
| |
| template<typename Func , int Rank, typename Params > |
| __global__ void | TensorDiagonalForEach (Coord< Rank > size, Params params, int start, int end) |
| | Kernel calls a functor for each element along a tensor's diagonal. More...
|
| |
| template<typename Element , typename Func > |
| __global__ void | BlockForEach (Element *ptr, size_t capacity, typename Func::Params params) |
| |
| template<typename Element > |
| __global__ void | BlockCompareEqual (int *equal, Element const *ptr_A, Element const *ptr_B, size_t capacity) |
| |
| template<typename Element > |
| __global__ void | BlockCompareRelativelyEqual (int *equal, Element const *ptr_A, Element const *ptr_B, size_t capacity, Element epsilon, Element nonzero_floor) |
| |
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >
| __global__ void cutlass::reference::device::kernel::BatchedGemm | ( | gemm::GemmCoord | problem_size, | | | | ScalarType | alpha, | | | | TensorRefCollectionA | tensor_collection_a, | | | | TensorRefCollectionB | tensor_collection_b, | | | | ScalarType | beta, | | | | TensorRefCollectionC | tensor_collection_c, | | | | AccumulatorType | initial_accum | | | ) | | |
Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.
template<typename Element >
| __global__ void cutlass::reference::device::kernel::BlockCompareEqual | ( | int * | equal, | | | | Element const * | ptr_A, | | | | Element const * | ptr_B, | | | | size_t | capacity | | | ) | | |
template<typename Element >
| __global__ void cutlass::reference::device::kernel::BlockCompareRelativelyEqual | ( | int * | equal, | | | | Element const * | ptr_A, | | | | Element const * | ptr_B, | | | | size_t | capacity, | | | | Element | epsilon, | | | | Element | nonzero_floor | | | ) | | |
template<typename Element , typename Func >
| __global__ void cutlass::reference::device::kernel::BlockForEach | ( | Element * | ptr, | | | | size_t | capacity, | | | | typename Func::Params | params | | | ) | | |
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >
| __global__ void cutlass::reference::device::kernel::Gemm | ( | gemm::GemmCoord | problem_size, | | | | ScalarType | alpha, | | | | TensorRefA | tensor_a, | | | | TensorRefB | tensor_b, | | | | ScalarType | beta, | | | | TensorRefC | tensor_c, | | | | TensorRefC | tensor_d, | | | | AccumulatorType | initial_accum | | | ) | | |
Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.
template<typename Func , int Rank, typename Params >
| __global__ void cutlass::reference::device::kernel::TensorDiagonalForEach | ( | Coord< Rank > | size, | | | | Params | params, | | | | int | start, | | | | int | end | | | ) | | |
template<typename Func , int Rank, typename Params >
| __global__ void cutlass::reference::device::kernel::TensorForEach | ( | Coord< Rank > | size, |
| | | Params | params = Params() |
| | ) | | |
template<typename T >
| __global__ void cutlass::reference::device::kernel::TensorInitializeGaussian | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |
template<typename T >
| __global__ void cutlass::reference::device::kernel::TensorInitializeIdentity | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |
template<typename T >
| __global__ void cutlass::reference::device::kernel::TensorInitializeLinear | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |
template<typename T >
| __global__ void cutlass::reference::device::kernel::TensorInitializeUniform | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |
Generated by 1.8.11