CUTLASS: cutlass::reference::device::kernel Namespace Reference - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

cutlass::reference::device::kernel Namespace Reference

Namespaces

Functions

| | template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > | | __global__ void | Gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefA tensor_a, TensorRefB tensor_b, ScalarType beta, TensorRefC tensor_c, TensorRefC tensor_d, AccumulatorType initial_accum) | | | | template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > | | __global__ void | BatchedGemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefCollectionA tensor_collection_a, TensorRefCollectionB tensor_collection_b, ScalarType beta, TensorRefCollectionC tensor_collection_c, AccumulatorType initial_accum) | | | | template<typename T > | | __global__ void | TensorInitializeUniform (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) | | | Kernel to initialize tensor to uniform random distribution. More...
| | | | template<typename T > | | __global__ void | TensorInitializeGaussian (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) | | | Kernel to initialize tensor to uniform distribution. More...
| | | | template<typename T > | | __global__ void | TensorInitializeLinear (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) | | | Kernel to initialize tensor to an identity matrix. More...
| | | | template<typename T > | | __global__ void | TensorInitializeIdentity (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) | | | Kernel to initialize tensor to an identity matrix. More...
| | | | template<typename Func , int Rank, typename Params > | | __global__ void | TensorForEach (Coord< Rank > size, Params params=Params()) | | | Kernel calls a functor for each element in a tensor's index space. More...
| | | | template<typename Func , int Rank, typename Params > | | __global__ void | TensorDiagonalForEach (Coord< Rank > size, Params params, int start, int end) | | | Kernel calls a functor for each element along a tensor's diagonal. More...
| | | | template<typename Element , typename Func > | | __global__ void | BlockForEach (Element *ptr, size_t capacity, typename Func::Params params) | | | | template<typename Element > | | __global__ void | BlockCompareEqual (int *equal, Element const *ptr_A, Element const *ptr_B, size_t capacity) | | | | template<typename Element > | | __global__ void | BlockCompareRelativelyEqual (int *equal, Element const *ptr_A, Element const *ptr_B, size_t capacity, Element epsilon, Element nonzero_floor) | | |

Function Documentation

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Element >

template<typename Element >

template<typename Element , typename Func >

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Func , int Rank, typename Params >

| __global__ void cutlass::reference::device::kernel::TensorDiagonalForEach | ( | Coord< Rank > | size, | | | | Params | params, | | | | int | start, | | | | int | end | | | ) | | |

template<typename Func , int Rank, typename Params >

template<typename T >

| __global__ void cutlass::reference::device::kernel::TensorInitializeGaussian | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |

template<typename T >

| __global__ void cutlass::reference::device::kernel::TensorInitializeIdentity | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |

template<typename T >

| __global__ void cutlass::reference::device::kernel::TensorInitializeLinear | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |

template<typename T >

| __global__ void cutlass::reference::device::kernel::TensorInitializeUniform | ( | Distribution | dist, | | | | int64_t | seed, | | | | int | dim_contiguous, | | | | int | dim_strided, | | | | T * | tensor, | | | | int | ldm | | | ) | | |

Generated by 1.8.11