Back to Cutlass

CUTLASS: cutlass::reference::device::kernel Namespace Reference

docs/namespacecutlass_1_1reference_1_1device_1_1kernel.html

4.4.29.3 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

Namespaces | Functions

cutlass::reference::device::kernel Namespace Reference

|

Namespaces

| | | detail | | | Defines several helpers.
| | |

|

Functions

| | template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > | | __global__ void | Gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefA tensor_a, TensorRefB tensor_b, ScalarType beta, TensorRefC tensor_c, TensorRefC tensor_d, AccumulatorType initial_accum) | | | | template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > | | __global__ void | BatchedGemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefCollectionA tensor_collection_a, TensorRefCollectionB tensor_collection_b, ScalarType beta, TensorRefCollectionC tensor_collection_c, AccumulatorType initial_accum) | | | | template<typename T > | | __global__ void | TensorInitializeUniform (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) | | | Kernel to initialize tensor to uniform random distribution. More...
| | | | template<typename T > | | __global__ void | TensorInitializeGaussian (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) | | | Kernel to initialize tensor to uniform distribution. More...
| | | | template<typename T > | | __global__ void | TensorInitializeLinear (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) | | | Kernel to initialize tensor to an identity matrix. More...
| | | | template<typename T > | | __global__ void | TensorInitializeIdentity (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) | | | Kernel to initialize tensor to an identity matrix. More...
| | | | template<typename Func , int Rank, typename Params > | | __global__ void | TensorForEach (Coord< Rank > size, Params params=Params()) | | | Kernel calls a functor for each element in a tensor's index space. More...
| | | | template<typename Func , int Rank, typename Params > | | __global__ void | TensorDiagonalForEach (Coord< Rank > size, Params params, int start, int end) | | | Kernel calls a functor for each element along a tensor's diagonal. More...
| | | | template<typename Element , typename Func > | | __global__ void | BlockForEach (Element *ptr, size_t capacity, typename Func::Params params) | | | | template<typename Element > | | __global__ void | BlockCompareEqual (int *equal, Element const *ptr_A, Element const *ptr_B, size_t capacity) | | | | template<typename Element > | | __global__ void | BlockCompareRelativelyEqual (int *equal, Element const *ptr_A, Element const *ptr_B, size_t capacity, Element epsilon, Element nonzero_floor) | | |

Function Documentation

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >

| __global__ void cutlass::reference::device::kernel::BatchedGemm | ( | gemm::GemmCoord | problem_size, | | | | ScalarType | alpha, | | | | TensorRefCollectionA | tensor_collection_a, | | | | TensorRefCollectionB | tensor_collection_b, | | | | ScalarType | beta, | | | | TensorRefCollectionC | tensor_collection_c, | | | | AccumulatorType | initial_accum | | | ) | | |

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Element >

| __global__ void cutlass::reference::device::kernel::BlockCompareEqual | ( | int * | equal, | | | | Element const * | ptr_A, | | | | Element const * | ptr_B, | | | | size_t | capacity | | | ) | | |

template<typename Element >

| __global__ void cutlass::reference::device::kernel::BlockCompareRelativelyEqual | ( | int * | equal, | | | | Element const * | ptr_A, | | | | Element const * | ptr_B, | | | | size_t | capacity, | | | | Element | epsilon, | | | | Element | nonzero_floor | | | ) | | |

template<typename Element , typename Func >

| __global__ void cutlass::reference::device::kernel::BlockForEach | ( | Element * | ptr, | | | | size_t | capacity, | | | | typename Func::Params | params | | | ) | | |

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >

| __global__ void cutlass::reference::device::kernel::Gemm | ( | gemm::GemmCoord | problem_size, | | | | ScalarType | alpha, | | | | TensorRefA | tensor_a, | | | | TensorRefB | tensor_b, | | | | ScalarType | beta, | | | | TensorRefC | tensor_c, | | | | TensorRefC | tensor_d, | | | | AccumulatorType | initial_accum | | | ) | | |

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Func , int Rank, typename Params >

| __global__ void cutlass::reference::device::kernel::TensorDiagonalForEach | ( | Coord< Rank > | size, | | | | Params | params, | | | | int | start, | | | | int | end | | | ) | | |

template<typename Func , int Rank, typename Params >

| __global__ void cutlass::reference::device::kernel::TensorForEach | ( | Coord< Rank > | size, | | | | Params | params = Params() | | | ) | | |

template<typename T >

| __global__ void cutlass::reference::device::kernel::TensorInitializeGaussian | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |

template<typename T >

| __global__ void cutlass::reference::device::kernel::TensorInitializeIdentity | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |

template<typename T >

| __global__ void cutlass::reference::device::kernel::TensorInitializeLinear | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |

template<typename T >

| __global__ void cutlass::reference::device::kernel::TensorInitializeUniform | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |


Generated by 1.8.11