docs/namespacecutlass.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
Namespaces | Classes | Typedefs | Enumerations | Functions
cutlass Namespace Reference
|
| | | arch | | | | | debug | | | | | detail | | | | | device_memory | | | | | epilogue | | | | | gemm | | | | | layout | | | | | library | | | | | platform | | | | | reduction | | | | | reference | | | | | thread | | | | | transform | | |
|
|
| class | AlignedArray |
| | Aligned array type. More...
|
| |
| struct | AlignedBuffer |
| | Modifies semantics of cutlass::Array<> to provide guaranteed alignment. More...
|
| |
| class | Array< T, N, false > |
| | Statically sized array for any data type. More...
|
| |
| class | Array< T, N, true > |
| | Statically sized array for any data type. More...
|
| |
| struct | CommandLine |
| |
| class | complex |
| |
| class | ConstSubbyteReference |
| |
| struct | Coord |
| | Statically-sized array specifying Coords within a tensor. More...
|
| |
| class | cuda_exception |
| | C++ exception wrapper for CUDA cudaError_t. More...
|
| |
| struct | Distribution |
| | Distribution type. More...
|
| |
| struct | divide_assert |
| |
| struct | divides |
| |
| struct | divides< Array< half_t, N > > |
| |
| struct | divides< Array< T, N > > |
| |
| struct | FloatType |
| | Defines a floating-point type based on the number of exponent and mantissa bits. More...
|
| |
| struct | FloatType< 11, 52 > |
| |
| struct | FloatType< 5, 10 > |
| |
| struct | FloatType< 8, 23 > |
| |
| struct | half_t |
| | IEEE half-precision floating-point type. More...
|
| |
| class | HostTensor |
| | Host tensor. More...
|
| |
| class | IdentityTensorLayout |
| |
| struct | integer_subbyte |
| | 4-bit signed integer type More...
|
| |
| struct | IntegerType |
| | Defines integers based on size and whether they are signed. More...
|
| |
| struct | IntegerType< 1, false > |
| |
| struct | IntegerType< 1, true > |
| |
| struct | IntegerType< 16, false > |
| |
| struct | IntegerType< 16, true > |
| |
| struct | IntegerType< 32, false > |
| |
| struct | IntegerType< 32, true > |
| |
| struct | IntegerType< 4, false > |
| |
| struct | IntegerType< 4, true > |
| |
| struct | IntegerType< 64, false > |
| |
| struct | IntegerType< 64, true > |
| |
| struct | IntegerType< 8, false > |
| |
| struct | IntegerType< 8, true > |
| |
| struct | is_pow2 |
| |
| struct | KernelLaunchConfiguration |
| | Structure containing the basic launch configuration of a CUDA kernel. More...
|
| |
| struct | log2_down |
| |
| struct | log2_down< N, 1, Count > |
| |
| struct | log2_up |
| |
| struct | log2_up< N, 1, Count > |
| |
| struct | MatrixCoord |
| |
| struct | MatrixShape |
| | Describes the size of a matrix tile. More...
|
| |
| struct | Max |
| |
| struct | maximum |
| |
| struct | maximum< Array< T, N > > |
| |
| struct | maximum< float > |
| |
| struct | Min |
| |
| struct | minimum |
| |
| struct | minimum< Array< T, N > > |
| |
| struct | minimum< float > |
| |
| struct | minus |
| |
| struct | minus< Array< half_t, N > > |
| |
| struct | minus< Array< T, N > > |
| |
| struct | multiplies |
| |
| struct | multiplies< Array< half_t, N > > |
| |
| struct | multiplies< Array< T, N > > |
| |
| struct | multiply_add |
| | Fused multiply-add. More...
|
| |
| struct | [multiply_add< Array< half_t, N >, Array< half_t, N >, Array< half_t, N > >](structcutlass_1_1multiply add_3_01Array_3_01half t_00_01N_01_4_00_01Array_3_01half__t_00_01N_01adaeadb27c0e4439444709c0eb30963.html) |
| | Fused multiply-add. [More...](structcutlass_1_1multiply add_3_01Array_3_01half t_00_01N_01_4_00_01Array_3_01half__t_00_01N_01adaeadb27c0e4439444709c0eb30963.html#details)
|
| |
| struct | multiply_add< Array< T, N >, Array< T, N >, Array< T, N > > |
| | Fused multiply-add. More...
|
| |
| struct | multiply_add< complex< T >, complex< T >, complex< T > > |
| | Fused multiply-add. More...
|
| |
| struct | multiply_add< complex< T >, T, complex< T > > |
| | Fused multiply-add. More...
|
| |
| struct | multiply_add< T, complex< T >, complex< T > > |
| | Fused multiply-add. More...
|
| |
| struct | negate |
| |
| struct | negate< Array< half_t, N > > |
| |
| struct | negate< Array< T, N > > |
| |
| struct | NumericArrayConverter |
| | Conversion operator for Array. More...
|
| |
| struct | NumericArrayConverter< float, half_t, 2, Round > |
| | Partial specialization for Array<float, 2> <= Array<half_t, 2>, round to nearest. More...
|
| |
| struct | NumericArrayConverter< float, half_t, N, Round > |
| | Partial specialization for Array<half> <= Array<float> More...
|
| |
| struct | [NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest >](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html) |
| | Partial specialization for Array<half, 2> <= Array<float, 2>, round to nearest. [More...](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#details)
|
| |
| struct | NumericArrayConverter< half_t, float, N, Round > |
| | Partial specialization for Array<half> <= Array<float> More...
|
| |
| struct | NumericConverter |
| |
| struct | NumericConverter< float, half_t, Round > |
| | Partial specialization for float <= half_t. More...
|
| |
| struct | [NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest >](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html) |
| | Specialization for round-to-nearest. [More...](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#details)
|
| |
| struct | [NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero >](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html) |
| | Specialization for round-toward-zero. [More...](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#details)
|
| |
| struct | NumericConverter< int8_t, float, Round > |
| |
| struct | NumericConverter< T, T, Round > |
| | Partial specialization for float <= half_t. More...
|
| |
| struct | NumericConverterClamp |
| |
| struct | plus |
| |
| struct | plus< Array< half_t, N > > |
| |
| struct | plus< Array< T, N > > |
| |
| struct | PredicateVector |
| | Statically sized array of bits implementing. More...
|
| |
| struct | RealType |
| | Used to determine the real-valued underlying type of a numeric type T. More...
|
| |
| struct | RealType< complex< T > > |
| | Partial specialization for complex-valued type. More...
|
| |
| struct | ReferenceFactory |
| |
| struct | ReferenceFactory< Element, false > |
| |
| struct | ReferenceFactory< Element, true > |
| |
| struct | ScalarIO |
| | Helper to enable formatted printing of CUTLASS scalar types to an ostream. More...
|
| |
| class | Semaphore |
| | CTA-wide semaphore for inter-CTA synchronization. More...
|
| |
| struct | sizeof_bits |
| | Defines the size of an element in bits. More...
|
| |
| struct | sizeof_bits< Array< T, N, RegisterSized > > |
| | Statically sized array for any data type. More...
|
| |
| struct | [sizeof_bits< bin1_t >](structcutlass_1_1sizeof bits_3_01bin1 t_01_4.html) |
| | Defines the size of an element in bits - specialized for bin1_t. [More...](structcutlass_1_1sizeof bits_3_01bin1 t_01_4.html#details)
|
| |
| struct | [sizeof_bits< int4b_t >](structcutlass_1_1sizeof bits_3_01int4b t_01_4.html) |
| | Defines the size of an element in bits - specialized for int4b_t. [More...](structcutlass_1_1sizeof bits_3_01int4b t_01_4.html#details)
|
| |
| struct | [sizeof_bits< uint1b_t >](structcutlass_1_1sizeof bits_3_01uint1b t_01_4.html) |
| | Defines the size of an element in bits - specialized for uint1b_t. [More...](structcutlass_1_1sizeof bits_3_01uint1b t_01_4.html#details)
|
| |
| struct | [sizeof_bits< uint4b_t >](structcutlass_1_1sizeof bits_3_01uint4b t_01_4.html) |
| | Defines the size of an element in bits - specialized for uint4b_t. [More...](structcutlass_1_1sizeof bits_3_01uint4b t_01_4.html#details)
|
| |
| struct | sqrt_est |
| |
| class | SubbyteReference |
| |
| struct | Tensor4DCoord |
| | Defines a canonical 4D coordinate used by tensor operations. More...
|
| |
| class | TensorRef |
| |
| class | TensorView |
| |
| struct | TypeTraits |
| |
| struct | TypeTraits< complex< double > > |
| |
| struct | TypeTraits< complex< float > > |
| |
| struct | TypeTraits< complex< half > > |
| |
| struct | TypeTraits< complex< half_t > > |
| |
| struct | TypeTraits< double > |
| |
| struct | TypeTraits< float > |
| |
| struct | TypeTraits< half_t > |
| |
| struct | TypeTraits< int > |
| |
| struct | TypeTraits< int64_t > |
| |
| struct | TypeTraits< int8_t > |
| |
| struct | TypeTraits< uint64_t > |
| |
| struct | TypeTraits< uint8_t > |
| |
| struct | TypeTraits< unsigned > |
| |
| struct | xor_add |
| | Fused multiply-add. More...
|
| |
|
|
| using | uint1b_t = integer_subbyte< 1, false > |
| | 1-bit Unsigned integer type More...
|
| |
| using | int4b_t = integer_subbyte< 4, true > |
| | 4-bit Integer type More...
|
| |
| using | uint4b_t = integer_subbyte< 4, false > |
| | 4-bit Unsigned integer type More...
|
| |
| using | bin1_t = bool |
| | 1-bit binary type More...
|
| |
|
|
| enum | ComplexTransform { ComplexTransform::kNone, ComplexTransform::kConjugate } |
| | Enumeraed type describing a transformation on a complex value. More...
|
| |
| enum | Status {
Status::kSuccess, Status::kErrorMisalignedOperand, Status::kErrorInvalidLayout, Status::kErrorInvalidProblem,
Status::kErrorNotSupported, Status::kErrorWorkspaceNull, Status::kErrorInternal, Status::kInvalid
} |
| | Status code returned by CUTLASS operations. More...
|
| |
| enum | MatrixLayout { MatrixLayout::kColumnMajor, MatrixLayout::kRowMajor } |
| |
| enum | MatrixTransform { MatrixTransform::kNone, MatrixTransform::kTranspose, MatrixTransform::kConjugate, MatrixTransform::kHermitian } |
| | Transformation applied to matrix operands. More...
|
| |
| enum | FloatRoundStyle {
FloatRoundStyle::round_indeterminate, FloatRoundStyle::round_toward_zero, FloatRoundStyle::round_to_nearest, FloatRoundStyle::round_toward_infinity,
FloatRoundStyle::round_toward_neg_infinity, FloatRoundStyle::round_half_ulp_truncate
} |
| |
|
|
| CUTLASS_HOST_DEVICE constexpr bool | ispow2 (unsigned x) |
| | Returns true if the argument is a power of 2. More...
|
| |
| CUTLASS_HOST_DEVICE constexpr unsigned | floor_pow_2 (unsigned x) |
| | Returns the largest power of two not greater than the argument. More...
|
| |
| CUTLASS_HOST_DEVICE float const & | real (cuFloatComplex const &z) |
| | Returns the real part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE float & | real (cuFloatComplex &z) |
| | Returns the real part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE double const & | real (cuDoubleComplex const &z) |
| | Returns the real part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE double & | real (cuDoubleComplex &z) |
| | Returns the real part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE float const & | imag (cuFloatComplex const &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE float & | imag (cuFloatComplex &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE double const & | imag (cuDoubleComplex const &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE double & | imag (cuDoubleComplex &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T const & | real (complex< T > const &z) |
| | Returns the real part of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T & | real (complex< T > &z) |
| | Returns the real part of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T const & | imag (complex< T > const &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T & | imag (complex< T > &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| template<typename T > |
| std::ostream & | operator<< (std::ostream &out, complex< T > const &z) |
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T | abs (complex< T > const &z) |
| | Returns the magnitude of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T | arg (complex< T > const &z) |
| | Returns the magnitude of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T | norm (T const &z) |
| | Returns the squared magnitude of a real number. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int8_t | norm (int8_t const &z) |
| | Returns the squared magnitude of a real number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE double | norm (complex< T > const &z) |
| | Returns the squared magnitude of a complex number. More...
|
| |
| template<typename T , typename R > |
| CUTLASS_HOST_DEVICE R | norm_accumulate (T const &x, R const &accumulator) |
| | Norm-accumulate calculation. More...
|
| |
| template<typename T , typename R > |
| CUTLASS_HOST_DEVICE R | norm_accumulate (complex< T > const &z, R const &accumulator) |
| | Norm accumulate specialized for complex types. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | conj (complex< T > const &z) |
| | Returns the complex conjugate. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | proj (complex< T > const &z) |
| | Projects the complex number z onto the Riemann sphere. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | polar (T const &r, T const &theta=T()) |
| | Returns a complex number with magnitude r and phase theta. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | exp (complex< T > const &z) |
| | Computes the complex exponential of z. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | log (complex< T > const &z) |
| | Computes the complex exponential of z. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | log10 (complex< T > const &z) |
| | Computes the complex exponential of z. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | sqrt (complex< T > const &z) |
| | Computes the square root of complex number z. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | cos (complex< T > const &z) |
| | Computes the cosine of complex z. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | sin (complex< T > const &z) |
| | Computes the sin of complex z. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE cutlass::complex< half_t > | from_real< cutlass::complex< half_t > > (double r) |
| |
| template<> |
| CUTLASS_HOST_DEVICE cutlass::complex< float > | from_real< cutlass::complex< float > > (double r) |
| |
| template<> |
| CUTLASS_HOST_DEVICE cutlass::complex< double > | from_real< cutlass::complex< double > > (double r) |
| |
| template<int Rank, typename Index > |
| CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator/ (Index s, Coord< Rank, Index > coord) |
| | Scalar division. More...
|
| |
| template<int Rank, typename Index > |
| CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator/ (Coord< Rank, Index > coord, Index s) |
| | Scalar division. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 1 > | make_Coord (int _0) |
| | Helper to make a 2-element coordinate. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 2 > | make_Coord (int _0, int _1) |
| | Helper to make a 2-element coordinate. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 3 > | make_Coord (int _0, int _1, int _2) |
| | Helper to make a 3-element coordinate. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 4 > | make_Coord (int _0, int _1, int _2, int _3) |
| | Helper to make a 4-element coordinate. More...
|
| |
| template<int Rank> |
| std::ostream & | operator<< (std::ostream &out, Coord< Rank > const &coord) |
| |
| std::istream & | operator>> (std::istream &stream, half_t &x) |
| |
| std::ostream & | operator<< (std::ostream &out, half_t const &x) |
| |
| template<typename T > |
| std::ostream & | operator<< (std::ostream &out, ScalarIO< T > const &scalar) |
| | Default printing to ostream. More...
|
| |
| template<> |
| std::ostream & | operator<< (std::ostream &out, ScalarIO< int8_t > const &scalar) |
| | Printing to ostream of int8_t as integer rather than character. More...
|
| |
| template<> |
| std::ostream & | operator<< (std::ostream &out, ScalarIO< uint8_t > const &scalar) |
| | Printing to ostream of uint8_t as integer rather than character. More...
|
| |
| template<typename Operator > |
| __global__ void | Kernel (typename Operator::Params params) |
| | Generic CUTLASS kernel template. More...
|
| |
| template<typename dividend_t , typename divisor_t > |
| CUTLASS_HOST_DEVICE dividend_t | round_nearest (dividend_t dividend, divisor_t divisor) |
| |
| template<typename value_t > |
| CUTLASS_HOST_DEVICE value_t | gcd (value_t a, value_t b) |
| |
| template<typename value_t > |
| CUTLASS_HOST_DEVICE value_t | lcm (value_t a, value_t b) |
| |
| template<typename value_t > |
| CUTLASS_HOST_DEVICE value_t | clz (value_t x) |
| |
| template<typename value_t > |
| CUTLASS_HOST_DEVICE value_t | find_log2 (value_t x) |
| |
| CUTLASS_HOST_DEVICE void | find_divisor (unsigned int &mul, unsigned int &shr, unsigned int denom) |
| |
| CUTLASS_HOST_DEVICE void | fast_divmod (int &quo, int &rem, int src, int div, unsigned int mul, unsigned int shr) |
| |
| CUTLASS_HOST_DEVICE void | fast_divmod (int &quo, int64_t &rem, int64_t src, int div, unsigned int mul, unsigned int shr) |
| |
| CUTLASS_HOST_DEVICE constexpr int | const_min (int a, int b) |
| |
| CUTLASS_HOST_DEVICE constexpr int | const_max (int a, int b) |
| |
| CUTLASS_HOST_DEVICE bool | signbit (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE cutlass::half_t | abs (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE bool | isnan (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE bool | isfinite (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE cutlass::half_t | nanh (const char *) |
| |
| CUTLASS_HOST_DEVICE bool | isinf (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE bool | isnormal (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE int | fpclassify (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE cutlass::half_t | sqrt (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE half_t | copysign (half_t const &a, half_t const &b) |
| |
| CUTLASS_HOST_DEVICE bool | operator== (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE bool | operator!= (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE bool | operator< (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE bool | operator<= (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE bool | operator> (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE bool | operator>= (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator+ (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator- (half_t const &lhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator- (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator* (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator/ (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator+= (half_t &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator-= (half_t &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator*= (half_t &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator/= (half_t &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator++ (half_t &lhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator-- (half_t &lhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator++ (half_t &lhs, int) |
| |
| CUTLASS_HOST_DEVICE half_t | operator-- (half_t &lhs, int) |
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE bool | relatively_equal (T a, T b, T epsilon, T nonzero_floor) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint1b_t > (uint1b_t a, uint1b_t b, uint1b_t, uint1b_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< int4b_t > (int4b_t a, int4b_t b, int4b_t, int4b_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint4b_t > (uint4b_t a, uint4b_t b, uint4b_t, uint4b_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< int8_t > (int8_t a, int8_t b, int8_t, int8_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint8_t > (uint8_t a, uint8_t b, uint8_t, uint8_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< int16_t > (int16_t a, int16_t b, int16_t, int16_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint16_t > (uint16_t a, uint16_t b, uint16_t, uint16_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< int32_t > (int32_t a, int32_t b, int32_t, int32_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint32_t > (uint32_t a, uint32_t b, uint32_t, uint32_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< int64_t > (int64_t a, int64_t b, int64_t, int64_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint64_t > (uint64_t a, uint64_t b, uint64_t, uint64_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< half_t > (half_t a, half_t b, half_t epsilon, half_t nonzero_floor) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< float > (float a, float b, float epsilon, float nonzero_floor) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< double > (double a, double b, double epsilon, double nonzero_floor) |
| |
| template<typename Element , typename Layout > |
| CUTLASS_HOST_DEVICE TensorRef< Element, Layout > | make_TensorRef (Element *ptr, Layout const &layout) |
| | Constructs a TensorRef, deducing types from arguments. More...
|
| |
| template<typename Element , typename Layout > |
| bool | TensorRef_aligned (TensorRef< Element, Layout > const &ref, int alignment) |
| |
| template<typename Element , typename Layout > |
| CUTLASS_HOST_DEVICE TensorView< Element, Layout > | make_TensorView (Element *ptr, Layout const &layout, typename Layout::TensorCoord const &extent) |
| | Constructs a TensorRef, deducing types from arguments. More...
|
| |
| __host__ CUTLASS_DEVICE cudaError_t | cuda_perror_impl (cudaError_t error, const char *filename, int line) |
| | The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
|
| |
| std::ostream & | operator<< (std::ostream &out, cudaError_t result) |
| | Writes a cudaError_t to an output stream. More...
|
| |
| std::ostream & | operator<< (std::ostream &out, cuda_exception const &e) |
| | Writes a cuda_exception instance to an output stream. More...
|
| |
| template<int Interleaved, typename Element , typename Layout > |
| void | reorder_column (TensorRef< Element, Layout > dest, TensorRef< Element, Layout > src, cutlass::gemm::GemmCoord problem_size) |
| |
| template<typename Element , typename Layout > |
| std::ostream & | TensorViewWrite (std::ostream &out, TensorView< Element, Layout > const &view) |
| | Prints human-readable representation of a TensorView to an ostream. More...
|
| |
| template<typename Element , typename Layout > |
| std::ostream & | operator<< (std::ostream &out, TensorView< Element, Layout > const &view) |
| | Prints human-readable representation of a TensorView to an ostream. More...
|
| |
| using cutlass::bin1_t = typedef bool |
| using cutlass::int4b_t = typedef integer_subbyte<4, true> |
| using cutlass::uint1b_t = typedef integer_subbyte<1, false> |
| using cutlass::uint4b_t = typedef integer_subbyte<4, false> |
|
| enum cutlass::ComplexTransform |
| strong |
| Enumerator |
|---|
| kNone |
| kConjugate |
|
| enum cutlass::FloatRoundStyle |
| strong |
Floating-point rounding style similare to Standard Library's formats but supporting additional rounding options.
| Enumerator |
|---|
| round_indeterminate |
rounding mode unknown
| | round_toward_zero |
round toward zero
| | round_to_nearest |
round to nearest even
| | round_toward_infinity |
round toward infinity
| | round_toward_neg_infinity |
round toward negative infinity
| | round_half_ulp_truncate |
add 0.5ulp to integer representation then round toward zero
|
|
| enum cutlass::MatrixLayout |
| strong |
| Enumerator |
|---|
| kColumnMajor |
| kRowMajor |
|
| enum cutlass::MatrixTransform |
| strong |
| Enumerator |
|---|
| kNone |
| kTranspose |
no operation
| | kConjugate |
transpose operation
| | kHermitian |
conjugate
conjugate transpose
|
|
| enum cutlass::Status |
| strong |
| Enumerator |
|---|
| kSuccess |
Operation was successful.
| | kErrorMisalignedOperand |
operands fail alignment requirements.
| | kErrorInvalidLayout |
Layout fails alignment requirement.
| | kErrorInvalidProblem |
Specified problem size is not supported by operator.
| | kErrorNotSupported |
Operation is not supported on current device.
| | kErrorWorkspaceNull |
The given workspace is null when it is required to be non-null.
| | kErrorInternal |
An error within CUTLASS occurred.
| | kInvalid |
Status is unspecified.
|
template<typename T >
| CUTLASS_HOST_DEVICE T cutlass::abs | ( | complex< T > const & | z | ) | |
| CUTLASS_HOST_DEVICE cutlass::half_t cutlass::abs | ( | cutlass::half_t const & | h | ) | |
template<typename T >
| CUTLASS_HOST_DEVICE T cutlass::arg | ( | complex< T > const & | z | ) | |
template<typename value_t >
| CUTLASS_HOST_DEVICE value_t cutlass::clz | ( | value_t | x | ) | |
log2 computation, what's the difference between the below codes and log2_up/down codes?
template<typename T >
| CUTLASS_HOST_DEVICE complex<T> cutlass::conj | ( | complex< T > const & | z | ) | |
| CUTLASS_HOST_DEVICE constexpr int cutlass::const_max | ( | int | a, | | | | int | b | | | ) | | |
| CUTLASS_HOST_DEVICE constexpr int cutlass::const_min | ( | int | a, | | | | int | b | | | ) | | |
| CUTLASS_HOST_DEVICE half_t cutlass::copysign | ( | half_t const & | a, | | | | half_t const & | b | | | ) | | |
template<typename T >
| CUTLASS_HOST_DEVICE complex<T> cutlass::cos | ( | complex< T > const & | z | ) | |
| __host__ CUTLASS_DEVICE cudaError_t cutlass::cuda_perror_impl | ( | cudaError_t | error, | | | | const char * | filename, | | | | int | line | | | ) | | |
<dl class="section return"> <dt>Returns</dt> <dd>The CUDA error. </dd> </dl>template<typename T >
| CUTLASS_HOST_DEVICE complex<T> cutlass::exp | ( | complex< T > const & | z | ) | |
| CUTLASS_HOST_DEVICE void cutlass::fast_divmod | ( | int & | quo, | | | | int & | rem, | | | | int | src, | | | | int | div, | | | | unsigned int | mul, | | | | unsigned int | shr | | | ) | | |
Find quotient and remainder using device-side intrinsics
| CUTLASS_HOST_DEVICE void cutlass::fast_divmod | ( | int & | quo, | | | | int64_t & | rem, | | | | int64_t | src, | | | | int | div, | | | | unsigned int | mul, | | | | unsigned int | shr | | | ) | | |
| CUTLASS_HOST_DEVICE void cutlass::find_divisor | ( | unsigned int & | mul, | | | | unsigned int & | shr, | | | | unsigned int | denom | | | ) | | |
Find divisor, using find_log2
template<typename value_t >
| CUTLASS_HOST_DEVICE value_t cutlass::find_log2 | ( | value_t | x | ) | |
| CUTLASS_HOST_DEVICE constexpr unsigned cutlass::floor_pow_2 | ( | unsigned | x | ) | |
| CUTLASS_HOST_DEVICE int cutlass::fpclassify | ( | cutlass::half_t const & | h | ) | |
template<>
| CUTLASS_HOST_DEVICE cutlass::complex<double> cutlass::from_real< cutlass::complex< double > > | ( | double | r | ) | |
template<>
| CUTLASS_HOST_DEVICE cutlass::complex<float> cutlass::from_real< cutlass::complex< float > > | ( | double | r | ) | |
template<>
| CUTLASS_HOST_DEVICE cutlass::complex<half_t> cutlass::from_real< cutlass::complex< half_t > > | ( | double | r | ) | |
template<typename value_t >
| CUTLASS_HOST_DEVICE value_t cutlass::gcd | ( | value_t | a, | | | | value_t | b | | | ) | | |
Greatest common divisor
| CUTLASS_HOST_DEVICE float const& cutlass::imag | ( | cuFloatComplex const & | z | ) | |
| CUTLASS_HOST_DEVICE float& cutlass::imag | ( | cuFloatComplex & | z | ) | |
| CUTLASS_HOST_DEVICE double const& cutlass::imag | ( | cuDoubleComplex const & | z | ) | |
| CUTLASS_HOST_DEVICE double& cutlass::imag | ( | cuDoubleComplex & | z | ) | |
template<typename T >
| CUTLASS_HOST_DEVICE T const& cutlass::imag | ( | complex< T > const & | z | ) | |
template<typename T >
| CUTLASS_HOST_DEVICE T& cutlass::imag | ( | complex< T > & | z | ) | |
| CUTLASS_HOST_DEVICE bool cutlass::isfinite | ( | cutlass::half_t const & | h | ) | |
| CUTLASS_HOST_DEVICE bool cutlass::isinf | ( | cutlass::half_t const & | h | ) | |
| CUTLASS_HOST_DEVICE bool cutlass::isnan | ( | cutlass::half_t const & | h | ) | |
| CUTLASS_HOST_DEVICE bool cutlass::isnormal | ( | cutlass::half_t const & | h | ) | |
| CUTLASS_HOST_DEVICE constexpr bool cutlass::ispow2 | ( | unsigned | x | ) | |
template<typename Operator >
| __global__ void cutlass::Kernel | ( | typename Operator::Params | params | ) | |
template<typename value_t >
| CUTLASS_HOST_DEVICE value_t cutlass::lcm | ( | value_t | a, | | | | value_t | b | | | ) | | |
Least common multiple
template<typename T >
| CUTLASS_HOST_DEVICE complex<T> cutlass::log | ( | complex< T > const & | z | ) | |
template<typename T >
| CUTLASS_HOST_DEVICE complex<T> cutlass::log10 | ( | complex< T > const & | z | ) | |
| CUTLASS_HOST_DEVICE Coord<1> cutlass::make_Coord | ( | int | _0 | ) | |
| CUTLASS_HOST_DEVICE Coord<2> cutlass::make_Coord | ( | int | _0, | | | | int | _1 | | | ) | | |
| CUTLASS_HOST_DEVICE Coord<3> cutlass::make_Coord | ( | int | _0, | | | | int | _1, | | | | int | _2 | | | ) | | |
| CUTLASS_HOST_DEVICE Coord<4> cutlass::make_Coord | ( | int | _0, | | | | int | _1, | | | | int | _2, | | | | int | _3 | | | ) | | |
template<typename Element , typename Layout >
| CUTLASS_HOST_DEVICE TensorRef<Element, Layout> cutlass::make_TensorRef | ( | Element * | ptr, | | | | Layout const & | layout | | | ) | | |
template<typename Element , typename Layout >
| CUTLASS_HOST_DEVICE TensorView<Element, Layout> cutlass::make_TensorView | ( | Element * | ptr, | | | | Layout const & | layout, | | | | typename Layout::TensorCoord const & | extent | | | ) | | |
| CUTLASS_HOST_DEVICE cutlass::half_t cutlass::nanh | ( | const char * | | ) | |
template<typename T >
| CUTLASS_HOST_DEVICE T cutlass::norm | ( | T const & | z | ) | |
template<>
| CUTLASS_HOST_DEVICE int8_t cutlass::norm | ( | int8_t const & | z | ) | |
template<typename T >
| CUTLASS_HOST_DEVICE double cutlass::norm | ( | complex< T > const & | z | ) | |
template<typename T , typename R >
| CUTLASS_HOST_DEVICE R cutlass::norm_accumulate | ( | T const & | x, | | | | R const & | accumulator | | | ) | | |
template<typename T , typename R >
| CUTLASS_HOST_DEVICE R cutlass::norm_accumulate | ( | complex< T > const & | z, | | | | R const & | accumulator | | | ) | | |
| CUTLASS_HOST_DEVICE bool cutlass::operator!= | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |
| CUTLASS_HOST_DEVICE half_t cutlass::operator* | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |
| CUTLASS_HOST_DEVICE half_t& cutlass::operator*= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |
| CUTLASS_HOST_DEVICE half_t cutlass::operator+ | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |
| CUTLASS_HOST_DEVICE half_t& cutlass::operator++ | ( | half_t & | lhs | ) | |
| CUTLASS_HOST_DEVICE half_t cutlass::operator++ | ( | half_t & | lhs, | | | | int | | | | ) | | |
| CUTLASS_HOST_DEVICE half_t& cutlass::operator+= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |
| CUTLASS_HOST_DEVICE half_t cutlass::operator- | ( | half_t const & | lhs | ) | |
| CUTLASS_HOST_DEVICE half_t cutlass::operator- | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |
| CUTLASS_HOST_DEVICE half_t& cutlass::operator-- | ( | half_t & | lhs | ) | |
| CUTLASS_HOST_DEVICE half_t cutlass::operator-- | ( | half_t & | lhs, | | | | int | | | | ) | | |
| CUTLASS_HOST_DEVICE half_t& cutlass::operator-= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |
template<int Rank, typename Index >
| CUTLASS_HOST_DEVICE Coord<Rank, Index> cutlass::operator/ | ( | Index | s, | | | | Coord< Rank, Index > | coord | | | ) | | |
template<int Rank, typename Index >
| CUTLASS_HOST_DEVICE Coord<Rank, Index> cutlass::operator/ | ( | Coord< Rank, Index > | coord, | | | | Index | s | | | ) | | |
| CUTLASS_HOST_DEVICE half_t cutlass::operator/ | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |
| CUTLASS_HOST_DEVICE half_t& cutlass::operator/= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |
| CUTLASS_HOST_DEVICE bool cutlass::operator< | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |
template<int Rank>
|
| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | Coord< Rank > const & | coord | | | ) | | |
| inline |
|
| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | cudaError_t | result | | | ) | | |
| inline |
|
| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | cuda_exception const & | e | | | ) | | |
| inline |
|
| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | half_t const & | x | | | ) | | |
| inline |
template<typename T >
|
| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | ScalarIO< T > const & | scalar | | | ) | | |
| inline |
template<>
|
| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | ScalarIO< int8_t > const & | scalar | | | ) | | |
| inline |
template<>
|
| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | ScalarIO< uint8_t > const & | scalar | | | ) | | |
| inline |
template<typename Element , typename Layout >
|
| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | TensorView< Element, Layout > const & | view | | | ) | | |
| inline |
template<typename T >
| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | complex< T > const & | z | | | ) | | |
| CUTLASS_HOST_DEVICE bool cutlass::operator<= | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |
| CUTLASS_HOST_DEVICE bool cutlass::operator== | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |
| CUTLASS_HOST_DEVICE bool cutlass::operator> | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |
| CUTLASS_HOST_DEVICE bool cutlass::operator>= | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |
|
| std::istream& cutlass::operator>> | ( | std::istream & | stream, | | | | half_t & | x | | | ) | | |
| inline |
template<typename T >
| CUTLASS_HOST_DEVICE complex<T> cutlass::polar | ( | T const & | r, |
| | | T const & | theta = T() |
| | ) | | |
template<typename T >
| CUTLASS_HOST_DEVICE complex<T> cutlass::proj | ( | complex< T > const & | z | ) | |
| CUTLASS_HOST_DEVICE float const& cutlass::real | ( | cuFloatComplex const & | z | ) | |
| CUTLASS_HOST_DEVICE float& cutlass::real | ( | cuFloatComplex & | z | ) | |
| CUTLASS_HOST_DEVICE double const& cutlass::real | ( | cuDoubleComplex const & | z | ) | |
| CUTLASS_HOST_DEVICE double& cutlass::real | ( | cuDoubleComplex & | z | ) | |
template<typename T >
| CUTLASS_HOST_DEVICE T const& cutlass::real | ( | complex< T > const & | z | ) | |
template<typename T >
| CUTLASS_HOST_DEVICE T& cutlass::real | ( | complex< T > & | z | ) | |
template<typename T >
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal | ( | T | a, | | | | T | b, | | | | T | epsilon, | | | | T | nonzero_floor | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< double > | ( | double | a, | | | | double | b, | | | | double | epsilon, | | | | double | nonzero_floor | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< float > | ( | float | a, | | | | float | b, | | | | float | epsilon, | | | | float | nonzero_floor | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< half_t > | ( | half_t | a, | | | | half_t | b, | | | | half_t | epsilon, | | | | half_t | nonzero_floor | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int16_t > | ( | int16_t | a, | | | | int16_t | b, | | | | int16_t | , | | | | int16_t | | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int32_t > | ( | int32_t | a, | | | | int32_t | b, | | | | int32_t | , | | | | int32_t | | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int4b_t > | ( | int4b_t | a, | | | | int4b_t | b, | | | | int4b_t | , | | | | int4b_t | | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int64_t > | ( | int64_t | a, | | | | int64_t | b, | | | | int64_t | , | | | | int64_t | | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int8_t > | ( | int8_t | a, | | | | int8_t | b, | | | | int8_t | , | | | | int8_t | | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint16_t > | ( | uint16_t | a, | | | | uint16_t | b, | | | | uint16_t | , | | | | uint16_t | | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint1b_t > | ( | uint1b_t | a, | | | | uint1b_t | b, | | | | uint1b_t | , | | | | uint1b_t | | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint32_t > | ( | uint32_t | a, | | | | uint32_t | b, | | | | uint32_t | , | | | | uint32_t | | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint4b_t > | ( | uint4b_t | a, | | | | uint4b_t | b, | | | | uint4b_t | , | | | | uint4b_t | | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint64_t > | ( | uint64_t | a, | | | | uint64_t | b, | | | | uint64_t | , | | | | uint64_t | | | | ) | | |
template<>
| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint8_t > | ( | uint8_t | a, | | | | uint8_t | b, | | | | uint8_t | , | | | | uint8_t | | | | ) | | |
template<int Interleaved, typename Element , typename Layout >
| void cutlass::reorder_column | ( | TensorRef< Element, Layout > | dest, | | | | TensorRef< Element, Layout > | src, | | | | cutlass::gemm::GemmCoord | problem_size | | | ) | | |
template<typename dividend_t , typename divisor_t >
| CUTLASS_HOST_DEVICE dividend_t cutlass::round_nearest | ( | dividend_t | dividend, | | | | divisor_t | divisor | | | ) | | |
Round dividend up to the nearest multiple of divisor
| CUTLASS_HOST_DEVICE bool cutlass::signbit | ( | cutlass::half_t const & | h | ) | |
template<typename T >
| CUTLASS_HOST_DEVICE complex<T> cutlass::sin | ( | complex< T > const & | z | ) | |
template<typename T >
| CUTLASS_HOST_DEVICE complex<T> cutlass::sqrt | ( | complex< T > const & | z | ) | |
| CUTLASS_HOST_DEVICE cutlass::half_t cutlass::sqrt | ( | cutlass::half_t const & | h | ) | |
template<typename Element , typename Layout >
| bool cutlass::TensorRef_aligned | ( | TensorRef< Element, Layout > const & | ref, | | | | int | alignment | | | ) | | |
template<typename Element , typename Layout >
|
| std::ostream& cutlass::TensorViewWrite | ( | std::ostream & | out, | | | | TensorView< Element, Layout > const & | view | | | ) | | |
| inline |
<!-- contents --><!-- start footer part -->