CUTLASS: cutlass::reference::device Namespace Reference - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

cutlass::reference::device Namespace Reference

Namespaces

Classes

Functions

| | template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename AccumulatorType , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>> | | void | compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, TensorRef< ElementC, LayoutC > tensor_d, AccumulatorType initial_accum) | | | | template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename AccumulatorType , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>> | | void | compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, AccumulatorType initial_accum) | | | | template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename InnerProductOp , typename ConvertOp > | | void | BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c, AccumulatorType initial_accum) | | | Computes a batch of GEMMs over a set of matrices of common dimension. More...
| | | | template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType > | | void | BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c) | | | | template<typename Element > | | bool | BlockCompareEqual (Element const *ptr_A, Element const *ptr_B, size_t capacity, int grid_size=0, int block_size=0) | | | Performs a bit-level equality check between two blocks. More...
| | | | template<typename Element > | | bool | BlockCompareRelativelyEqual (Element const *ptr_A, Element const *ptr_B, size_t capacity, Element epsilon, Element nonzero_floor, int grid_size=0, int block_size=0) | | | Performs a bit-level equality check between two blocks. More...
| | | | template<typename Element , typename Layout > | | void | TensorFillRandomGaussian (TensorView< Element, Layout > view, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1) | | | Fills a tensor with random values with a Gaussian distribution. More...
| | | | template<typename Element > | | void | BlockFillRandomGaussian (Element *ptr, size_t capacity, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1) | | | Fills a tensor with random values with a Gaussian distribution. More...
| | | | template<typename Element , typename Layout > | | void | TensorFillRandomUniform (TensorView< Element, Layout > view, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1) | | | Fills a tensor with random values with a uniform random distribution. More...
| | | | template<typename Element > | | void | BlockFillRandomUniform (Element *ptr, size_t capacity, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1) | | | Fills a tensor with random values with a uniform random distribution. More...
| | | | template<typename Element , typename Layout > | | void | TensorFillDiagonal (TensorView< Element, Layout > view, Element diag=Element(1), Element other=Element(0)) | | | Fills a tensor everywhere with a unique value for its diagonal. More...
| | | | template<typename Element , typename Layout > | | void | TensorFill (TensorView< Element, Layout > view, Element val=Element(0)) | | | Fills a tensor with a uniform value. More...
| | | | template<typename Element , typename Layout > | | void | TensorFillIdentity (TensorView< Element, Layout > view) | | | Fills a tensor's diagonal with 1 and 0 everywhere else. More...
| | | | template<typename Element , typename Layout > | | void | TensorUpdateDiagonal (TensorView< Element, Layout > view, Element diag=Element(1)) | | | Writes a uniform value to the diagonal of a tensor without modifying off-diagonal elements. More...
| | | | template<typename Element , typename Layout > | | void | TensorUpdateOffDiagonal (TensorView< Element, Layout > view, Element other=Element(1)) | | | Writes a uniform value to all elements in the tensor without modifying diagonal elements. More...
| | | | template<typename Element , typename Layout > | | void | TensorFillLinear (TensorView< Element, Layout > view, Array< Element, Layout::kRank > const &v, Element s=Element(0)) | | | Fills tensor with a linear combination of its coordinate and another vector. More...
| | | | template<typename Element > | | void | BlockFillSequential (Element *ptr, int64_t capacity, Element v=Element(1), Element s=Element(0)) | | | Fills a block of data with sequential elements. More...
| | | | template<typename Element > | | void | BlockFillRandom (Element *ptr, size_t capacity, uint64_t seed, Distribution dist) | | | Fills a block of data with sequential elements. More...
| | | | template<typename Element , typename Layout > | | void | TensorCopyDiagonalIn (TensorView< Element, Layout > view, Element const *ptr) | | | Copies a diagonal in from host memory without modifying off-diagonal elements. More...
| | | | template<typename Element , typename Layout > | | void | TensorCopyDiagonalOut (Element *ptr, TensorView< Element, Layout > view) | | | Copies the diagonal of a tensor into a dense buffer in host memory. More...
| | |

Function Documentation

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename InnerProductOp , typename ConvertOp >

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Element >

template<typename Element >

template<typename Element >

template<typename Element >

< Element type

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters

template<typename Element >

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters

template<typename Element >

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename AccumulatorType , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

Explicitly naming types needed by this template can be cumbersome, particularly for the accumulator type, so a function argument 'initial_accum' is exposed. Passing AccumulatorType(0) as the last function argument can be easier than naming all template arguments explicitly.

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

This assumes the accumulator type is the same type as the scalars.