docs/namespacecutlass_1_1arch.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
cutlass::arch Namespace Reference
|
|
| struct | Mma |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< double >, LayoutA, complex< double >, LayoutB, complex< double >, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< double >, LayoutA, double, LayoutB, complex< double >, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< float >, LayoutA, complex< float >, LayoutB, complex< float >, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 1 >, 1, complex< float >, LayoutA, float, LayoutB, complex< float >, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 1 >, 1, double, LayoutA, complex< double >, LayoutB, complex< double >, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 1 >, 1, double, LayoutA, double, LayoutB, double, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 1 >, 1, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, Operator > |
| | Matrix multiply-add operation - specialized for 1x1x1x1 matrix multiply operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 1 >, 1, float, LayoutA, complex< float >, LayoutB, complex< float >, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 1 >, 1, float, LayoutA, float, LayoutB, float, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 1 >, 1, half_t, LayoutA, half_t, LayoutB, float, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 1 >, 1, int, LayoutA, int, LayoutB, int, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 2 >, 1, int16_t, layout::RowMajor, int16_t, layout::ColumnMajor, int, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 1, 4 >, 1, int8_t, LayoutA, int8_t, LayoutB, int, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 1, 2, 1 >, 1, half_t, LayoutA, half_t, LayoutB, half_t, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 16, 16, 4 >, 32, half_t, LayoutA, half_t, LayoutB, ElementC, LayoutC, Operator > |
| | Matrix multiply-add operation specialized for the entire warp. More...
|
| |
| struct | Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: F32 = F16 * F16 + F32. More...
|
| |
| struct | Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation - F16 = F16 * F16 + F16. More...
|
| |
| struct | Mma< gemm::GemmShape< 2, 1, 1 >, 1, half_t, LayoutA, half_t, LayoutB, half_t, LayoutC, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 2, 2, 1 >, 1, half_t, layout::ColumnMajor, half_t, layout::RowMajor, half_t, layout::ColumnMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 2, 2, 1 >, 1, half_t, layout::ColumnMajor, half_t, layout::RowMajor, half_t, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc > |
| | Matrix multiply-add operation. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: S32 = S8 * S8 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate > |
| | Matrix multiply-add operation: S32 = S8 * S8 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: S32 = S8 * U8 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate > |
| | Matrix multiply-add operation: S32 = S8 * U8 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: S32 = U8 * S8 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate > |
| | Matrix multiply-add operation: S32 = U8 * S8 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: S32 = S8 * U8 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate > |
| | Matrix multiply-add operation: S32 = S8 * U8 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: S32 = S4 * S4 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate > |
| | Matrix multiply-add operation: S32 = S4 * S4 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: S32 = S4 * U4 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate > |
| | Matrix multiply-add operation: S32 = S4 * U4 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: S32 = U4 * S4 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate > |
| | Matrix multiply-add operation: S32 = U4 * S4 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: S32 = U4 * U4 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate > |
| | Matrix multiply-add operation: S32 = U4 * U4 + S32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::ColumnMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: F32 = F16 * F16 + F32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::ColumnMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: F16 = F16 * F16 + F16. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::ColumnMajor, half_t, layout::RowMajor, float, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: F32 = F16 * F16 + F32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::ColumnMajor, half_t, layout::RowMajor, half_t, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: F16 = F16 * F16 + F16. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: F32 = F16 * F16 + F32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: F16 = F16 * F16 + F16. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::RowMajor, half_t, layout::RowMajor, float, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: F32 = F16 * F16 + F32. More...
|
| |
| struct | Mma< gemm::GemmShape< 8, 8, 4 >, 8, half_t, layout::RowMajor, half_t, layout::RowMajor, half_t, layout::RowMajor, OpMultiplyAdd > |
| | Matrix multiply-add operation: F16 = F16 * F16 + F16. More...
|
| |
| struct | PtxWmma |
| | WMMA Matrix multiply-add operation. More...
|
| |
| struct | PtxWmmaLoadA |
| | WMMA PTX string load for A, B, and C matrices. More...
|
| |
| struct | PtxWmmaLoadB |
| |
| struct | PtxWmmaLoadC |
| |
| struct | PtxWmmaStoreD |
| | WMMA store for matrix D. More...
|
| |
| struct | Sm50 |
| |
| struct | Sm60 |
| |
| struct | Sm61 |
| |
| struct | Sm70 |
| |
| struct | Sm72 |
| |
| struct | Sm75 |
| |
| struct | [Wmma< Shape_, cutlass::half_t, LayoutA_, cutlass::half_t, LayoutB_, ElementC_, LayoutC_, cutlass::arch::OpMultiplyAdd >](structcutlass_1_1arch_1_1Wmma_3_01Shape _00_01cutlass_1_1half t_00_01LayoutA___00_01cutlass_1_84e30c8cc93eeb7ca02f651bd16d4c38.html) |
| |
| struct | [Wmma< Shape_, cutlass::int4b_t, LayoutA_, cutlass::int4b_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpMultiplyAdd >](structcutlass_1_1arch_1_1Wmma_3_01Shape _00_01cutlass_1_1int4b t_00_01LayoutA___00_01cutlass_16fd808a90b3cf9d7cfc99f30888ca3fe.html) |
| |
| struct | [Wmma< Shape_, cutlass::uint1b_t, LayoutA_, cutlass::uint1b_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpXorPopc >](structcutlass_1_1arch_1_1Wmma_3_01Shape _00_01cutlass_1_1uint1b t_00_01LayoutA___00_01cutlass_c80a7ea4d219cd9b13b560b493338028.html) |
| |
| struct | [Wmma< Shape_, int8_t, LayoutA_, int8_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpMultiplyAdd >](structcutlass_1_1arch_1_1Wmma_3_01Shape _00_01int8 t_00_01LayoutA _00_01int8 t_00_01LayoutB_505c57bb6818a941dc16f00cf35a9ec0.html) |
| |
| struct | [Wmma< Shape_, uint8_t, LayoutA_, uint8_t, LayoutB_, int32_t, LayoutC_, cutlass::arch::OpMultiplyAdd >](structcutlass_1_1arch_1_1Wmma_3_01Shape _00_01uint8 t_00_01LayoutA _00_01uint8 t_00_01Layout219a464a1248ebfc37aa29bcb10cb1b0.html) |
| |
|
|
| template<typename Layout , int MatrixCount> |
| __device__ void | ldsm (Array< unsigned, MatrixCount > &D, void const *ptr) |
| |
| template<> |
| __device__ void | ldsm< layout::RowMajor, 1 > (Array< unsigned, 1 > &D, void const *ptr) |
| |
| template<> |
| __device__ void | ldsm< layout::RowMajor, 2 > (Array< unsigned, 2 > &D, void const *ptr) |
| |
| template<> |
| __device__ void | ldsm< layout::RowMajor, 4 > (Array< unsigned, 4 > &D, void const *ptr) |
| |
| template<> |
| __device__ void | ldsm< layout::ColumnMajor, 1 > (Array< unsigned, 1 > &D, void const *ptr) |
| |
| template<> |
| __device__ void | ldsm< layout::ColumnMajor, 2 > (Array< unsigned, 2 > &D, void const *ptr) |
| |
| template<> |
| __device__ void | ldsm< layout::ColumnMajor, 4 > (Array< unsigned, 4 > &D, void const *ptr) |
| |
| template<typename T , int N> |
| CUTLASS_HOST_DEVICE Array< T, N > | operator* (Array< T, N > const &a, Array< T, N > const &b) |
| |
| template<typename T , int N> |
| CUTLASS_HOST_DEVICE Array< T, N > | operator+ (Array< T, N > const &a, Array< T, N > const &b) |
| |
| template<typename T , int N> |
| CUTLASS_HOST_DEVICE Array< T, N > | operator- (Array< T, N > const &a, Array< T, N > const &b) |
| |
| template<typename T , int N> |
| CUTLASS_HOST_DEVICE Array< T, N > | mac (Array< T, N > const &a, Array< T, N > const &b, Array< T, N > const &c) |
| |
| template<typename Element , typename Accumulator , int N> |
| CUTLASS_HOST_DEVICE Accumulator | dot (Array< T, N > const &a, Array< T, N > const &b, Accumulator accum) |
| |
| template<> |
| CUTLASS_HOST_DEVICE Array< half_t, 2 > | operator* (Array< half_t, 2 > const &a, Array< half_t, 2 > const &b) |
| |
| template<> |
| CUTLASS_HOST_DEVICE Array< half_t, 2 > | operator+ (AArray< half_t, 2 > const &a, Array< half_t, 2 > const &b) |
| |
| template<> |
| CUTLASS_HOST_DEVICE Array< half_t, 2 > | operator- (Array< half_t, 2 > const &a, Array< half_t, 2 > const &b) |
| |
| template<> |
| CUTLASS_HOST_DEVICE Array< half_t, 2 > | mac (Array< half_t, 2 > const &a, Array< half_t, 2 > const &b, Array< half_t, 2 > const &c) |
| | Multiply-accumulate operators - specialized for half_t x 2. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE half_t | dot (Array< half_t, 2 > const &a, Array< half_t, 2 > const &b, half_t accum) |
| | Dot product operator - specialized for half_t <- (half_t * half_t) x 2 + half_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE float | dot (Array< half_t, 2 > const &a, Array< half_t, 2 > const &b, float accum) |
| | Dot product operator - specialized for float <- (half_t * half_t) x 2 + float. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< int8_t, 4 > const &a, Array< int8_t, 4 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (int8_t * int8_t) x 4 + int32_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< uint8_t, 4 > const &a, Array< int8_t, 4 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (uint8_t * int8_t) x 4 + int32_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< int8_t, 4 > const &a, Array< uint8_t, 4 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (int8_t * uint8_t) x 4 + int32_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< uint8_t, 4 > const &a, Array< uint8_t, 4 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (uint8_t * uint8_t) x 4 + int32_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< int16_t, 2 > const &a, Array< int8_t, 2 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (int16_t * int8_t) x 2 + int32_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< uint16_t, 2 > const &a, Array< int8_t, 2 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (uint16_t * int8_t) x 2 + int32_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< int16_t, 2 > const &a, Array< uint8_t, 2 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (int16_t * int8_t) x 2 + int32_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< uint16_t, 2 > const &a, Array< uint8_t, 2 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (uint16_t * int8_t) x 2 + int32_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< int16_t, 2 > const &a, Array< int16_t, 2 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (int16_t * int16_t) x 2 + int32_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< uint16_t, 2 > const &a, Array< int16_t, 2 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (uint16_t * int16_t) x 2 + int32_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< int16_t, 2 > const &a, Array< uint16_t, 2 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (int16_t * int16_t) x 2 + int32_t. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int32_t | dot (Array< uint16_t, 2 > const &a, Array< uint16_t, 2 > const &b, int32_t accum) |
| | Dot product operator - specialized for int32_t <- (uint16_t * int16_t) x 2 + int32_t. More...
|
| |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int8_t, 4 > const & | a, | | | | Array< int8_t, 4 > const & | b, | | | | int32_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint8_t, 4 > const & | a, | | | | Array< int8_t, 4 > const & | b, | | | | int32_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int8_t, 4 > const & | a, | | | | Array< uint8_t, 4 > const & | b, | | | | int32_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint8_t, 4 > const & | a, | | | | Array< uint8_t, 4 > const & | b, | | | | int32_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int16_t, 2 > const & | a, | | | | Array< int8_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint16_t, 2 > const & | a, | | | | Array< int8_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE half_t cutlass::arch::dot | ( | Array< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b, | | | | half_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int16_t, 2 > const & | a, | | | | Array< uint8_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint16_t, 2 > const & | a, | | | | Array< uint8_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE float cutlass::arch::dot | ( | Array< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b, | | | | float | accum | | | ) | | |
template<typename Element , typename Accumulator , int N>
| CUTLASS_HOST_DEVICE Accumulator cutlass::arch::dot | ( | Array< T, N > const & | a, | | | | Array< T, N > const & | b, | | | | Accumulator | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int16_t, 2 > const & | a, | | | | Array< int16_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint16_t, 2 > const & | a, | | | | Array< int16_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int16_t, 2 > const & | a, | | | | Array< uint16_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint16_t, 2 > const & | a, | | | | Array< uint16_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |
template<typename Layout , int MatrixCount>
|
| __device__ void cutlass::arch::ldsm | ( | Array< unsigned, MatrixCount > & | D, | | | | void const * | ptr | | | ) | | |
| inline |
template<>
|
| __device__ void cutlass::arch::ldsm< layout::ColumnMajor, 1 > | ( | Array< unsigned, 1 > & | D, | | | | void const * | ptr | | | ) | | |
| inline |
template<>
|
| __device__ void cutlass::arch::ldsm< layout::ColumnMajor, 2 > | ( | Array< unsigned, 2 > & | D, | | | | void const * | ptr | | | ) | | |
| inline |
template<>
|
| __device__ void cutlass::arch::ldsm< layout::ColumnMajor, 4 > | ( | Array< unsigned, 4 > & | D, | | | | void const * | ptr | | | ) | | |
| inline |
template<>
|
| __device__ void cutlass::arch::ldsm< layout::RowMajor, 1 > | ( | Array< unsigned, 1 > & | D, | | | | void const * | ptr | | | ) | | |
| inline |
template<>
|
| __device__ void cutlass::arch::ldsm< layout::RowMajor, 2 > | ( | Array< unsigned, 2 > & | D, | | | | void const * | ptr | | | ) | | |
| inline |
template<>
|
| __device__ void cutlass::arch::ldsm< layout::RowMajor, 4 > | ( | Array< unsigned, 4 > & | D, | | | | void const * | ptr | | | ) | | |
| inline |
template<>
| CUTLASS_HOST_DEVICE Array<half_t, 2> cutlass::arch::mac | ( | Array< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b, | | | | Array< half_t, 2 > const & | c | | | ) | | |
template<typename T , int N>
| CUTLASS_HOST_DEVICE Array<T, N> cutlass::arch::mac | ( | Array< T, N > const & | a, | | | | Array< T, N > const & | b, | | | | Array< T, N > const & | c | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE Array<half_t, 2> cutlass::arch::operator* | ( | Array< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b | | | ) | | |
template<typename T , int N>
| CUTLASS_HOST_DEVICE Array<T, N> cutlass::arch::operator* | ( | Array< T, N > const & | a, | | | | Array< T, N > const & | b | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE Array<half_t, 2> cutlass::arch::operator+ | ( | AArray< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b | | | ) | | |
template<typename T , int N>
| CUTLASS_HOST_DEVICE Array<T, N> cutlass::arch::operator+ | ( | Array< T, N > const & | a, | | | | Array< T, N > const & | b | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE Array<half_t, 2> cutlass::arch::operator- | ( | Array< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b | | | ) | | |
template<typename T , int N>
| CUTLASS_HOST_DEVICE Array<T, N> cutlass::arch::operator- | ( | Array< T, N > const & | a, | | | | Array< T, N > const & | b | | | ) | | |
Generated by 1.8.11