CUTLASS: cutlass::arch Namespace Reference - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

cutlass::arch Namespace Reference

Classes

Functions

| | template<typename Layout , int MatrixCount> | | __device__ void | ldsm (Array< unsigned, MatrixCount > &D, void const *ptr) | | | | template<> | | __device__ void | ldsm< layout::RowMajor, 1 > (Array< unsigned, 1 > &D, void const *ptr) | | | | template<> | | __device__ void | ldsm< layout::RowMajor, 2 > (Array< unsigned, 2 > &D, void const *ptr) | | | | template<> | | __device__ void | ldsm< layout::RowMajor, 4 > (Array< unsigned, 4 > &D, void const *ptr) | | | | template<> | | __device__ void | ldsm< layout::ColumnMajor, 1 > (Array< unsigned, 1 > &D, void const *ptr) | | | | template<> | | __device__ void | ldsm< layout::ColumnMajor, 2 > (Array< unsigned, 2 > &D, void const *ptr) | | | | template<> | | __device__ void | ldsm< layout::ColumnMajor, 4 > (Array< unsigned, 4 > &D, void const *ptr) | | | | template<typename T , int N> | | CUTLASS_HOST_DEVICE Array< T, N > | operator* (Array< T, N > const &a, Array< T, N > const &b) | | | | template<typename T , int N> | | CUTLASS_HOST_DEVICE Array< T, N > | operator+ (Array< T, N > const &a, Array< T, N > const &b) | | | | template<typename T , int N> | | CUTLASS_HOST_DEVICE Array< T, N > | operator- (Array< T, N > const &a, Array< T, N > const &b) | | | | template<typename T , int N> | | CUTLASS_HOST_DEVICE Array< T, N > | mac (Array< T, N > const &a, Array< T, N > const &b, Array< T, N > const &c) | | | | template<typename Element , typename Accumulator , int N> | | CUTLASS_HOST_DEVICE Accumulator | dot (Array< T, N > const &a, Array< T, N > const &b, Accumulator accum) | | | | template<> | | CUTLASS_HOST_DEVICE Array< half_t, 2 > | operator* (Array< half_t, 2 > const &a, Array< half_t, 2 > const &b) | | | | template<> | | CUTLASS_HOST_DEVICE Array< half_t, 2 > | operator+ (AArray< half_t, 2 > const &a, Array< half_t, 2 > const &b) | | | | template<> | | CUTLASS_HOST_DEVICE Array< half_t, 2 > | operator- (Array< half_t, 2 > const &a, Array< half_t, 2 > const &b) | | | | template<> | | CUTLASS_HOST_DEVICE Array< half_t, 2 > | mac (Array< half_t, 2 > const &a, Array< half_t, 2 > const &b, Array< half_t, 2 > const &c) | | | Multiply-accumulate operators - specialized for half_t x 2. More...
| | | | template<> | | CUTLASS_HOST_DEVICE half_t | dot (Array< half_t, 2 > const &a, Array< half_t, 2 > const &b, half_t accum) | | | Dot product operator - specialized for half_t <- (half_t * half_t) x 2 + half_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE float | dot (Array< half_t, 2 > const &a, Array< half_t, 2 > const &b, float accum) | | | Dot product operator - specialized for float <- (half_t * half_t) x 2 + float. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< int8_t, 4 > const &a, Array< int8_t, 4 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (int8_t * int8_t) x 4 + int32_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< uint8_t, 4 > const &a, Array< int8_t, 4 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (uint8_t * int8_t) x 4 + int32_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< int8_t, 4 > const &a, Array< uint8_t, 4 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (int8_t * uint8_t) x 4 + int32_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< uint8_t, 4 > const &a, Array< uint8_t, 4 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (uint8_t * uint8_t) x 4 + int32_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< int16_t, 2 > const &a, Array< int8_t, 2 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (int16_t * int8_t) x 2 + int32_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< uint16_t, 2 > const &a, Array< int8_t, 2 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (uint16_t * int8_t) x 2 + int32_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< int16_t, 2 > const &a, Array< uint8_t, 2 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (int16_t * int8_t) x 2 + int32_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< uint16_t, 2 > const &a, Array< uint8_t, 2 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (uint16_t * int8_t) x 2 + int32_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< int16_t, 2 > const &a, Array< int16_t, 2 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (int16_t * int16_t) x 2 + int32_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< uint16_t, 2 > const &a, Array< int16_t, 2 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (uint16_t * int16_t) x 2 + int32_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< int16_t, 2 > const &a, Array< uint16_t, 2 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (int16_t * int16_t) x 2 + int32_t. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int32_t | dot (Array< uint16_t, 2 > const &a, Array< uint16_t, 2 > const &b, int32_t accum) | | | Dot product operator - specialized for int32_t <- (uint16_t * int16_t) x 2 + int32_t. More...
| | |

Function Documentation

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int8_t, 4 > const & | a, | | | | Array< int8_t, 4 > const & | b, | | | | int32_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint8_t, 4 > const & | a, | | | | Array< int8_t, 4 > const & | b, | | | | int32_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int8_t, 4 > const & | a, | | | | Array< uint8_t, 4 > const & | b, | | | | int32_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint8_t, 4 > const & | a, | | | | Array< uint8_t, 4 > const & | b, | | | | int32_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int16_t, 2 > const & | a, | | | | Array< int8_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint16_t, 2 > const & | a, | | | | Array< int8_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE half_t cutlass::arch::dot | ( | Array< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b, | | | | half_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int16_t, 2 > const & | a, | | | | Array< uint8_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint16_t, 2 > const & | a, | | | | Array< uint8_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE float cutlass::arch::dot | ( | Array< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b, | | | | float | accum | | | ) | | |

template<typename Element , typename Accumulator , int N>

| CUTLASS_HOST_DEVICE Accumulator cutlass::arch::dot | ( | Array< T, N > const & | a, | | | | Array< T, N > const & | b, | | | | Accumulator | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int16_t, 2 > const & | a, | | | | Array< int16_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint16_t, 2 > const & | a, | | | | Array< int16_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< int16_t, 2 > const & | a, | | | | Array< uint16_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot | ( | Array< uint16_t, 2 > const & | a, | | | | Array< uint16_t, 2 > const & | b, | | | | int32_t | accum | | | ) | | |

template<typename Layout , int MatrixCount>

| __device__ void cutlass::arch::ldsm | ( | Array< unsigned, MatrixCount > & | D, | | | | void const * | ptr | | | ) | | |

| inline |

template<>

| __device__ void cutlass::arch::ldsm< layout::ColumnMajor, 1 > | ( | Array< unsigned, 1 > & | D, | | | | void const * | ptr | | | ) | | |

| inline |

template<>

| __device__ void cutlass::arch::ldsm< layout::ColumnMajor, 2 > | ( | Array< unsigned, 2 > & | D, | | | | void const * | ptr | | | ) | | |

| inline |

template<>

| __device__ void cutlass::arch::ldsm< layout::ColumnMajor, 4 > | ( | Array< unsigned, 4 > & | D, | | | | void const * | ptr | | | ) | | |

| inline |

template<>

| __device__ void cutlass::arch::ldsm< layout::RowMajor, 1 > | ( | Array< unsigned, 1 > & | D, | | | | void const * | ptr | | | ) | | |

| inline |

template<>

| __device__ void cutlass::arch::ldsm< layout::RowMajor, 2 > | ( | Array< unsigned, 2 > & | D, | | | | void const * | ptr | | | ) | | |

| inline |

template<>

| __device__ void cutlass::arch::ldsm< layout::RowMajor, 4 > | ( | Array< unsigned, 4 > & | D, | | | | void const * | ptr | | | ) | | |

| inline |

template<>

| CUTLASS_HOST_DEVICE Array<half_t, 2> cutlass::arch::mac | ( | Array< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b, | | | | Array< half_t, 2 > const & | c | | | ) | | |

template<typename T , int N>

| CUTLASS_HOST_DEVICE Array<T, N> cutlass::arch::mac | ( | Array< T, N > const & | a, | | | | Array< T, N > const & | b, | | | | Array< T, N > const & | c | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE Array<half_t, 2> cutlass::arch::operator* | ( | Array< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b | | | ) | | |

template<typename T , int N>

| CUTLASS_HOST_DEVICE Array<T, N> cutlass::arch::operator* | ( | Array< T, N > const & | a, | | | | Array< T, N > const & | b | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE Array<half_t, 2> cutlass::arch::operator+ | ( | AArray< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b | | | ) | | |

template<typename T , int N>

| CUTLASS_HOST_DEVICE Array<T, N> cutlass::arch::operator+ | ( | Array< T, N > const & | a, | | | | Array< T, N > const & | b | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE Array<half_t, 2> cutlass::arch::operator- | ( | Array< half_t, 2 > const & | a, | | | | Array< half_t, 2 > const & | b | | | ) | | |

template<typename T , int N>

| CUTLASS_HOST_DEVICE Array<T, N> cutlass::arch::operator- | ( | Array< T, N > const & | a, | | | | Array< T, N > const & | b | | | ) | | |

Generated by 1.8.11