Back to Cutlass

CUTLASS: cutlass Namespace Reference

docs/namespacecutlass.html

4.4.282.9 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

Namespaces | Classes | Typedefs | Enumerations | Functions

cutlass Namespace Reference

|

Namespaces

| | | arch | | | | | debug | | | | | detail | | | | | device_memory | | | | | epilogue | | | | | gemm | | | | | layout | | | | | library | | | | | platform | | | | | reduction | | | | | reference | | | | | thread | | | | | transform | | |

|

Classes

| | class | AlignedArray | | | Aligned array type. More...
| | | | struct | AlignedBuffer | | | Modifies semantics of cutlass::Array<> to provide guaranteed alignment. More...
| | | | class | Array< T, N, false > | | | Statically sized array for any data type. More...
| | | | class | Array< T, N, true > | | | Statically sized array for any data type. More...
| | | | struct | CommandLine | | | | class | complex | | | | class | ConstSubbyteReference | | | | struct | Coord | | | Statically-sized array specifying Coords within a tensor. More...
| | | | class | cuda_exception | | | C++ exception wrapper for CUDA cudaError_t. More...
| | | | struct | Distribution | | | Distribution type. More...
| | | | struct | divide_assert | | | | struct | divides | | | | struct | divides< Array< half_t, N > > | | | | struct | divides< Array< T, N > > | | | | struct | FloatType | | | Defines a floating-point type based on the number of exponent and mantissa bits. More...
| | | | struct | FloatType< 11, 52 > | | | | struct | FloatType< 5, 10 > | | | | struct | FloatType< 8, 23 > | | | | struct | half_t | | | IEEE half-precision floating-point type. More...
| | | | class | HostTensor | | | Host tensor. More...
| | | | class | IdentityTensorLayout | | | | struct | integer_subbyte | | | 4-bit signed integer type More...
| | | | struct | IntegerType | | | Defines integers based on size and whether they are signed. More...
| | | | struct | IntegerType< 1, false > | | | | struct | IntegerType< 1, true > | | | | struct | IntegerType< 16, false > | | | | struct | IntegerType< 16, true > | | | | struct | IntegerType< 32, false > | | | | struct | IntegerType< 32, true > | | | | struct | IntegerType< 4, false > | | | | struct | IntegerType< 4, true > | | | | struct | IntegerType< 64, false > | | | | struct | IntegerType< 64, true > | | | | struct | IntegerType< 8, false > | | | | struct | IntegerType< 8, true > | | | | struct | is_pow2 | | | | struct | KernelLaunchConfiguration | | | Structure containing the basic launch configuration of a CUDA kernel. More...
| | | | struct | log2_down | | | | struct | log2_down< N, 1, Count > | | | | struct | log2_up | | | | struct | log2_up< N, 1, Count > | | | | struct | MatrixCoord | | | | struct | MatrixShape | | | Describes the size of a matrix tile. More...
| | | | struct | Max | | | | struct | maximum | | | | struct | maximum< Array< T, N > > | | | | struct | maximum< float > | | | | struct | Min | | | | struct | minimum | | | | struct | minimum< Array< T, N > > | | | | struct | minimum< float > | | | | struct | minus | | | | struct | minus< Array< half_t, N > > | | | | struct | minus< Array< T, N > > | | | | struct | multiplies | | | | struct | multiplies< Array< half_t, N > > | | | | struct | multiplies< Array< T, N > > | | | | struct | multiply_add | | | Fused multiply-add. More...
| | | | struct | [multiply_add< Array< half_t, N >, Array< half_t, N >, Array< half_t, N > >](structcutlass_1_1multiply add_3_01Array_3_01half t_00_01N_01_4_00_01Array_3_01half__t_00_01N_01adaeadb27c0e4439444709c0eb30963.html) | | | Fused multiply-add. [More...](structcutlass_1_1multiply add_3_01Array_3_01half t_00_01N_01_4_00_01Array_3_01half__t_00_01N_01adaeadb27c0e4439444709c0eb30963.html#details)
| | | | struct | multiply_add< Array< T, N >, Array< T, N >, Array< T, N > > | | | Fused multiply-add. More...
| | | | struct | multiply_add< complex< T >, complex< T >, complex< T > > | | | Fused multiply-add. More...
| | | | struct | multiply_add< complex< T >, T, complex< T > > | | | Fused multiply-add. More...
| | | | struct | multiply_add< T, complex< T >, complex< T > > | | | Fused multiply-add. More...
| | | | struct | negate | | | | struct | negate< Array< half_t, N > > | | | | struct | negate< Array< T, N > > | | | | struct | NumericArrayConverter | | | Conversion operator for Array. More...
| | | | struct | NumericArrayConverter< float, half_t, 2, Round > | | | Partial specialization for Array<float, 2> <= Array<half_t, 2>, round to nearest. More...
| | | | struct | NumericArrayConverter< float, half_t, N, Round > | | | Partial specialization for Array<half> <= Array<float> More...
| | | | struct | [NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest >](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html) | | | Partial specialization for Array<half, 2> <= Array<float, 2>, round to nearest. [More...](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#details)
| | | | struct | NumericArrayConverter< half_t, float, N, Round > | | | Partial specialization for Array<half> <= Array<float> More...
| | | | struct | NumericConverter | | | | struct | NumericConverter< float, half_t, Round > | | | Partial specialization for float <= half_t. More...
| | | | struct | [NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest >](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html) | | | Specialization for round-to-nearest. [More...](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#details)
| | | | struct | [NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero >](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html) | | | Specialization for round-toward-zero. [More...](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#details)
| | | | struct | NumericConverter< int8_t, float, Round > | | | | struct | NumericConverter< T, T, Round > | | | Partial specialization for float <= half_t. More...
| | | | struct | NumericConverterClamp | | | | struct | plus | | | | struct | plus< Array< half_t, N > > | | | | struct | plus< Array< T, N > > | | | | struct | PredicateVector | | | Statically sized array of bits implementing. More...
| | | | struct | RealType | | | Used to determine the real-valued underlying type of a numeric type T. More...
| | | | struct | RealType< complex< T > > | | | Partial specialization for complex-valued type. More...
| | | | struct | ReferenceFactory | | | | struct | ReferenceFactory< Element, false > | | | | struct | ReferenceFactory< Element, true > | | | | struct | ScalarIO | | | Helper to enable formatted printing of CUTLASS scalar types to an ostream. More...
| | | | class | Semaphore | | | CTA-wide semaphore for inter-CTA synchronization. More...
| | | | struct | sizeof_bits | | | Defines the size of an element in bits. More...
| | | | struct | sizeof_bits< Array< T, N, RegisterSized > > | | | Statically sized array for any data type. More...
| | | | struct | [sizeof_bits< bin1_t >](structcutlass_1_1sizeof bits_3_01bin1 t_01_4.html) | | | Defines the size of an element in bits - specialized for bin1_t. [More...](structcutlass_1_1sizeof bits_3_01bin1 t_01_4.html#details)
| | | | struct | [sizeof_bits< int4b_t >](structcutlass_1_1sizeof bits_3_01int4b t_01_4.html) | | | Defines the size of an element in bits - specialized for int4b_t. [More...](structcutlass_1_1sizeof bits_3_01int4b t_01_4.html#details)
| | | | struct | [sizeof_bits< uint1b_t >](structcutlass_1_1sizeof bits_3_01uint1b t_01_4.html) | | | Defines the size of an element in bits - specialized for uint1b_t. [More...](structcutlass_1_1sizeof bits_3_01uint1b t_01_4.html#details)
| | | | struct | [sizeof_bits< uint4b_t >](structcutlass_1_1sizeof bits_3_01uint4b t_01_4.html) | | | Defines the size of an element in bits - specialized for uint4b_t. [More...](structcutlass_1_1sizeof bits_3_01uint4b t_01_4.html#details)
| | | | struct | sqrt_est | | | | class | SubbyteReference | | | | struct | Tensor4DCoord | | | Defines a canonical 4D coordinate used by tensor operations. More...
| | | | class | TensorRef | | | | class | TensorView | | | | struct | TypeTraits | | | | struct | TypeTraits< complex< double > > | | | | struct | TypeTraits< complex< float > > | | | | struct | TypeTraits< complex< half > > | | | | struct | TypeTraits< complex< half_t > > | | | | struct | TypeTraits< double > | | | | struct | TypeTraits< float > | | | | struct | TypeTraits< half_t > | | | | struct | TypeTraits< int > | | | | struct | TypeTraits< int64_t > | | | | struct | TypeTraits< int8_t > | | | | struct | TypeTraits< uint64_t > | | | | struct | TypeTraits< uint8_t > | | | | struct | TypeTraits< unsigned > | | | | struct | xor_add | | | Fused multiply-add. More...
| | |

|

Typedefs

| | using | uint1b_t = integer_subbyte< 1, false > | | | 1-bit Unsigned integer type More...
| | | | using | int4b_t = integer_subbyte< 4, true > | | | 4-bit Integer type More...
| | | | using | uint4b_t = integer_subbyte< 4, false > | | | 4-bit Unsigned integer type More...
| | | | using | bin1_t = bool | | | 1-bit binary type More...
| | |

|

Enumerations

| | enum | ComplexTransform { ComplexTransform::kNone, ComplexTransform::kConjugate } | | | Enumeraed type describing a transformation on a complex value. More...
| | | | enum | Status {
Status::kSuccess, Status::kErrorMisalignedOperand, Status::kErrorInvalidLayout, Status::kErrorInvalidProblem,
Status::kErrorNotSupported, Status::kErrorWorkspaceNull, Status::kErrorInternal, Status::kInvalid
} | | | Status code returned by CUTLASS operations. More...
| | | | enum | MatrixLayout { MatrixLayout::kColumnMajor, MatrixLayout::kRowMajor } | | | | enum | MatrixTransform { MatrixTransform::kNone, MatrixTransform::kTranspose, MatrixTransform::kConjugate, MatrixTransform::kHermitian } | | | Transformation applied to matrix operands. More...
| | | | enum | FloatRoundStyle {
FloatRoundStyle::round_indeterminate, FloatRoundStyle::round_toward_zero, FloatRoundStyle::round_to_nearest, FloatRoundStyle::round_toward_infinity,
FloatRoundStyle::round_toward_neg_infinity, FloatRoundStyle::round_half_ulp_truncate
} | | |

|

Functions

| | CUTLASS_HOST_DEVICE constexpr bool | ispow2 (unsigned x) | | | Returns true if the argument is a power of 2. More...
| | | | CUTLASS_HOST_DEVICE constexpr unsigned | floor_pow_2 (unsigned x) | | | Returns the largest power of two not greater than the argument. More...
| | | | CUTLASS_HOST_DEVICE float const & | real (cuFloatComplex const &z) | | | Returns the real part of the complex number. More...
| | | | CUTLASS_HOST_DEVICE float & | real (cuFloatComplex &z) | | | Returns the real part of the complex number. More...
| | | | CUTLASS_HOST_DEVICE double const & | real (cuDoubleComplex const &z) | | | Returns the real part of the complex number. More...
| | | | CUTLASS_HOST_DEVICE double & | real (cuDoubleComplex &z) | | | Returns the real part of the complex number. More...
| | | | CUTLASS_HOST_DEVICE float const & | imag (cuFloatComplex const &z) | | | Returns the imaginary part of the complex number. More...
| | | | CUTLASS_HOST_DEVICE float & | imag (cuFloatComplex &z) | | | Returns the imaginary part of the complex number. More...
| | | | CUTLASS_HOST_DEVICE double const & | imag (cuDoubleComplex const &z) | | | Returns the imaginary part of the complex number. More...
| | | | CUTLASS_HOST_DEVICE double & | imag (cuDoubleComplex &z) | | | Returns the imaginary part of the complex number. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE T const & | real (complex< T > const &z) | | | Returns the real part of the complex number. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE T & | real (complex< T > &z) | | | Returns the real part of the complex number. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE T const & | imag (complex< T > const &z) | | | Returns the imaginary part of the complex number. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE T & | imag (complex< T > &z) | | | Returns the imaginary part of the complex number. More...
| | | | template<typename T > | | std::ostream & | operator<< (std::ostream &out, complex< T > const &z) | | | | template<typename T > | | CUTLASS_HOST_DEVICE T | abs (complex< T > const &z) | | | Returns the magnitude of the complex number. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE T | arg (complex< T > const &z) | | | Returns the magnitude of the complex number. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE T | norm (T const &z) | | | Returns the squared magnitude of a real number. More...
| | | | template<> | | CUTLASS_HOST_DEVICE int8_t | norm (int8_t const &z) | | | Returns the squared magnitude of a real number. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE double | norm (complex< T > const &z) | | | Returns the squared magnitude of a complex number. More...
| | | | template<typename T , typename R > | | CUTLASS_HOST_DEVICE R | norm_accumulate (T const &x, R const &accumulator) | | | Norm-accumulate calculation. More...
| | | | template<typename T , typename R > | | CUTLASS_HOST_DEVICE R | norm_accumulate (complex< T > const &z, R const &accumulator) | | | Norm accumulate specialized for complex types. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE complex< T > | conj (complex< T > const &z) | | | Returns the complex conjugate. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE complex< T > | proj (complex< T > const &z) | | | Projects the complex number z onto the Riemann sphere. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE complex< T > | polar (T const &r, T const &theta=T()) | | | Returns a complex number with magnitude r and phase theta. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE complex< T > | exp (complex< T > const &z) | | | Computes the complex exponential of z. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE complex< T > | log (complex< T > const &z) | | | Computes the complex exponential of z. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE complex< T > | log10 (complex< T > const &z) | | | Computes the complex exponential of z. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE complex< T > | sqrt (complex< T > const &z) | | | Computes the square root of complex number z. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE complex< T > | cos (complex< T > const &z) | | | Computes the cosine of complex z. More...
| | | | template<typename T > | | CUTLASS_HOST_DEVICE complex< T > | sin (complex< T > const &z) | | | Computes the sin of complex z. More...
| | | | template<> | | CUTLASS_HOST_DEVICE cutlass::complex< half_t > | from_real< cutlass::complex< half_t > > (double r) | | | | template<> | | CUTLASS_HOST_DEVICE cutlass::complex< float > | from_real< cutlass::complex< float > > (double r) | | | | template<> | | CUTLASS_HOST_DEVICE cutlass::complex< double > | from_real< cutlass::complex< double > > (double r) | | | | template<int Rank, typename Index > | | CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator/ (Index s, Coord< Rank, Index > coord) | | | Scalar division. More...
| | | | template<int Rank, typename Index > | | CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator/ (Coord< Rank, Index > coord, Index s) | | | Scalar division. More...
| | | | CUTLASS_HOST_DEVICE Coord< 1 > | make_Coord (int _0) | | | Helper to make a 2-element coordinate. More...
| | | | CUTLASS_HOST_DEVICE Coord< 2 > | make_Coord (int _0, int _1) | | | Helper to make a 2-element coordinate. More...
| | | | CUTLASS_HOST_DEVICE Coord< 3 > | make_Coord (int _0, int _1, int _2) | | | Helper to make a 3-element coordinate. More...
| | | | CUTLASS_HOST_DEVICE Coord< 4 > | make_Coord (int _0, int _1, int _2, int _3) | | | Helper to make a 4-element coordinate. More...
| | | | template<int Rank> | | std::ostream & | operator<< (std::ostream &out, Coord< Rank > const &coord) | | | | std::istream & | operator>> (std::istream &stream, half_t &x) | | | | std::ostream & | operator<< (std::ostream &out, half_t const &x) | | | | template<typename T > | | std::ostream & | operator<< (std::ostream &out, ScalarIO< T > const &scalar) | | | Default printing to ostream. More...
| | | | template<> | | std::ostream & | operator<< (std::ostream &out, ScalarIO< int8_t > const &scalar) | | | Printing to ostream of int8_t as integer rather than character. More...
| | | | template<> | | std::ostream & | operator<< (std::ostream &out, ScalarIO< uint8_t > const &scalar) | | | Printing to ostream of uint8_t as integer rather than character. More...
| | | | template<typename Operator > | | __global__ void | Kernel (typename Operator::Params params) | | | Generic CUTLASS kernel template. More...
| | | | template<typename dividend_t , typename divisor_t > | | CUTLASS_HOST_DEVICE dividend_t | round_nearest (dividend_t dividend, divisor_t divisor) | | | | template<typename value_t > | | CUTLASS_HOST_DEVICE value_t | gcd (value_t a, value_t b) | | | | template<typename value_t > | | CUTLASS_HOST_DEVICE value_t | lcm (value_t a, value_t b) | | | | template<typename value_t > | | CUTLASS_HOST_DEVICE value_t | clz (value_t x) | | | | template<typename value_t > | | CUTLASS_HOST_DEVICE value_t | find_log2 (value_t x) | | | | CUTLASS_HOST_DEVICE void | find_divisor (unsigned int &mul, unsigned int &shr, unsigned int denom) | | | | CUTLASS_HOST_DEVICE void | fast_divmod (int &quo, int &rem, int src, int div, unsigned int mul, unsigned int shr) | | | | CUTLASS_HOST_DEVICE void | fast_divmod (int &quo, int64_t &rem, int64_t src, int div, unsigned int mul, unsigned int shr) | | | | CUTLASS_HOST_DEVICE constexpr int | const_min (int a, int b) | | | | CUTLASS_HOST_DEVICE constexpr int | const_max (int a, int b) | | | | CUTLASS_HOST_DEVICE bool | signbit (cutlass::half_t const &h) | | | | CUTLASS_HOST_DEVICE cutlass::half_t | abs (cutlass::half_t const &h) | | | | CUTLASS_HOST_DEVICE bool | isnan (cutlass::half_t const &h) | | | | CUTLASS_HOST_DEVICE bool | isfinite (cutlass::half_t const &h) | | | | CUTLASS_HOST_DEVICE cutlass::half_t | nanh (const char *) | | | | CUTLASS_HOST_DEVICE bool | isinf (cutlass::half_t const &h) | | | | CUTLASS_HOST_DEVICE bool | isnormal (cutlass::half_t const &h) | | | | CUTLASS_HOST_DEVICE int | fpclassify (cutlass::half_t const &h) | | | | CUTLASS_HOST_DEVICE cutlass::half_t | sqrt (cutlass::half_t const &h) | | | | CUTLASS_HOST_DEVICE half_t | copysign (half_t const &a, half_t const &b) | | | | CUTLASS_HOST_DEVICE bool | operator== (half_t const &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE bool | operator!= (half_t const &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE bool | operator< (half_t const &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE bool | operator<= (half_t const &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE bool | operator> (half_t const &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE bool | operator>= (half_t const &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE half_t | operator+ (half_t const &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE half_t | operator- (half_t const &lhs) | | | | CUTLASS_HOST_DEVICE half_t | operator- (half_t const &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE half_t | operator* (half_t const &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE half_t | operator/ (half_t const &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE half_t & | operator+= (half_t &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE half_t & | operator-= (half_t &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE half_t & | operator*= (half_t &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE half_t & | operator/= (half_t &lhs, half_t const &rhs) | | | | CUTLASS_HOST_DEVICE half_t & | operator++ (half_t &lhs) | | | | CUTLASS_HOST_DEVICE half_t & | operator-- (half_t &lhs) | | | | CUTLASS_HOST_DEVICE half_t | operator++ (half_t &lhs, int) | | | | CUTLASS_HOST_DEVICE half_t | operator-- (half_t &lhs, int) | | | | template<typename T > | | CUTLASS_HOST_DEVICE bool | relatively_equal (T a, T b, T epsilon, T nonzero_floor) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< uint1b_t > (uint1b_t a, uint1b_t b, uint1b_t, uint1b_t) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< int4b_t > (int4b_t a, int4b_t b, int4b_t, int4b_t) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< uint4b_t > (uint4b_t a, uint4b_t b, uint4b_t, uint4b_t) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< int8_t > (int8_t a, int8_t b, int8_t, int8_t) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< uint8_t > (uint8_t a, uint8_t b, uint8_t, uint8_t) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< int16_t > (int16_t a, int16_t b, int16_t, int16_t) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< uint16_t > (uint16_t a, uint16_t b, uint16_t, uint16_t) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< int32_t > (int32_t a, int32_t b, int32_t, int32_t) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< uint32_t > (uint32_t a, uint32_t b, uint32_t, uint32_t) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< int64_t > (int64_t a, int64_t b, int64_t, int64_t) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< uint64_t > (uint64_t a, uint64_t b, uint64_t, uint64_t) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< half_t > (half_t a, half_t b, half_t epsilon, half_t nonzero_floor) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< float > (float a, float b, float epsilon, float nonzero_floor) | | | | template<> | | CUTLASS_HOST_DEVICE bool | relatively_equal< double > (double a, double b, double epsilon, double nonzero_floor) | | | | template<typename Element , typename Layout > | | CUTLASS_HOST_DEVICE TensorRef< Element, Layout > | make_TensorRef (Element *ptr, Layout const &layout) | | | Constructs a TensorRef, deducing types from arguments. More...
| | | | template<typename Element , typename Layout > | | bool | TensorRef_aligned (TensorRef< Element, Layout > const &ref, int alignment) | | | | template<typename Element , typename Layout > | | CUTLASS_HOST_DEVICE TensorView< Element, Layout > | make_TensorView (Element *ptr, Layout const &layout, typename Layout::TensorCoord const &extent) | | | Constructs a TensorRef, deducing types from arguments. More...
| | | | __host__ CUTLASS_DEVICE cudaError_t | cuda_perror_impl (cudaError_t error, const char *filename, int line) | | | The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
| | | | std::ostream & | operator<< (std::ostream &out, cudaError_t result) | | | Writes a cudaError_t to an output stream. More...
| | | | std::ostream & | operator<< (std::ostream &out, cuda_exception const &e) | | | Writes a cuda_exception instance to an output stream. More...
| | | | template<int Interleaved, typename Element , typename Layout > | | void | reorder_column (TensorRef< Element, Layout > dest, TensorRef< Element, Layout > src, cutlass::gemm::GemmCoord problem_size) | | | | template<typename Element , typename Layout > | | std::ostream & | TensorViewWrite (std::ostream &out, TensorView< Element, Layout > const &view) | | | Prints human-readable representation of a TensorView to an ostream. More...
| | | | template<typename Element , typename Layout > | | std::ostream & | operator<< (std::ostream &out, TensorView< Element, Layout > const &view) | | | Prints human-readable representation of a TensorView to an ostream. More...
| | |

Typedef Documentation

| using cutlass::bin1_t = typedef bool |

| using cutlass::int4b_t = typedef integer_subbyte<4, true> |

| using cutlass::uint1b_t = typedef integer_subbyte<1, false> |

| using cutlass::uint4b_t = typedef integer_subbyte<4, false> |

Enumeration Type Documentation

|

| enum cutlass::ComplexTransform |

| strong |

Enumerator
kNone
kConjugate

|

| enum cutlass::FloatRoundStyle |

| strong |

Floating-point rounding style similare to Standard Library's formats but supporting additional rounding options.

Enumerator
round_indeterminate

rounding mode unknown

| | round_toward_zero |

round toward zero

| | round_to_nearest |

round to nearest even

| | round_toward_infinity |

round toward infinity

| | round_toward_neg_infinity |

round toward negative infinity

| | round_half_ulp_truncate |

add 0.5ulp to integer representation then round toward zero

|

|

| enum cutlass::MatrixLayout |

| strong |

Enumerator
kColumnMajor
kRowMajor

|

| enum cutlass::MatrixTransform |

| strong |

Enumerator
kNone
kTranspose

no operation

| | kConjugate |

transpose operation

| | kHermitian |

conjugate

conjugate transpose

|

|

| enum cutlass::Status |

| strong |

Enumerator
kSuccess

Operation was successful.

| | kErrorMisalignedOperand |

operands fail alignment requirements.

| | kErrorInvalidLayout |

Layout fails alignment requirement.

| | kErrorInvalidProblem |

Specified problem size is not supported by operator.

| | kErrorNotSupported |

Operation is not supported on current device.

| | kErrorWorkspaceNull |

The given workspace is null when it is required to be non-null.

| | kErrorInternal |

An error within CUTLASS occurred.

| | kInvalid |

Status is unspecified.

|

Function Documentation

template<typename T >

| CUTLASS_HOST_DEVICE T cutlass::abs | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE cutlass::half_t cutlass::abs | ( | cutlass::half_t const & | h | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T cutlass::arg | ( | complex< T > const & | z | ) | |

template<typename value_t >

| CUTLASS_HOST_DEVICE value_t cutlass::clz | ( | value_t | x | ) | |

log2 computation, what's the difference between the below codes and log2_up/down codes?

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::conj | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE constexpr int cutlass::const_max | ( | int | a, | | | | int | b | | | ) | | |

| CUTLASS_HOST_DEVICE constexpr int cutlass::const_min | ( | int | a, | | | | int | b | | | ) | | |

| CUTLASS_HOST_DEVICE half_t cutlass::copysign | ( | half_t const & | a, | | | | half_t const & | b | | | ) | | |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::cos | ( | complex< T > const & | z | ) | |

| __host__ CUTLASS_DEVICE cudaError_t cutlass::cuda_perror_impl | ( | cudaError_t | error, | | | | const char * | filename, | | | | int | line | | | ) | | |

<dl class="section return"> <dt>Returns</dt> <dd>The CUDA error. </dd> </dl>

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::exp | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE void cutlass::fast_divmod | ( | int & | quo, | | | | int & | rem, | | | | int | src, | | | | int | div, | | | | unsigned int | mul, | | | | unsigned int | shr | | | ) | | |

Find quotient and remainder using device-side intrinsics

| CUTLASS_HOST_DEVICE void cutlass::fast_divmod | ( | int & | quo, | | | | int64_t & | rem, | | | | int64_t | src, | | | | int | div, | | | | unsigned int | mul, | | | | unsigned int | shr | | | ) | | |

| CUTLASS_HOST_DEVICE void cutlass::find_divisor | ( | unsigned int & | mul, | | | | unsigned int & | shr, | | | | unsigned int | denom | | | ) | | |

Find divisor, using find_log2

template<typename value_t >

| CUTLASS_HOST_DEVICE value_t cutlass::find_log2 | ( | value_t | x | ) | |

| CUTLASS_HOST_DEVICE constexpr unsigned cutlass::floor_pow_2 | ( | unsigned | x | ) | |

| CUTLASS_HOST_DEVICE int cutlass::fpclassify | ( | cutlass::half_t const & | h | ) | |

template<>

| CUTLASS_HOST_DEVICE cutlass::complex<double> cutlass::from_real< cutlass::complex< double > > | ( | double | r | ) | |

template<>

| CUTLASS_HOST_DEVICE cutlass::complex<float> cutlass::from_real< cutlass::complex< float > > | ( | double | r | ) | |

template<>

| CUTLASS_HOST_DEVICE cutlass::complex<half_t> cutlass::from_real< cutlass::complex< half_t > > | ( | double | r | ) | |

template<typename value_t >

| CUTLASS_HOST_DEVICE value_t cutlass::gcd | ( | value_t | a, | | | | value_t | b | | | ) | | |

Greatest common divisor

| CUTLASS_HOST_DEVICE float const& cutlass::imag | ( | cuFloatComplex const & | z | ) | |

| CUTLASS_HOST_DEVICE float& cutlass::imag | ( | cuFloatComplex & | z | ) | |

| CUTLASS_HOST_DEVICE double const& cutlass::imag | ( | cuDoubleComplex const & | z | ) | |

| CUTLASS_HOST_DEVICE double& cutlass::imag | ( | cuDoubleComplex & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T const& cutlass::imag | ( | complex< T > const & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T& cutlass::imag | ( | complex< T > & | z | ) | |

| CUTLASS_HOST_DEVICE bool cutlass::isfinite | ( | cutlass::half_t const & | h | ) | |

| CUTLASS_HOST_DEVICE bool cutlass::isinf | ( | cutlass::half_t const & | h | ) | |

| CUTLASS_HOST_DEVICE bool cutlass::isnan | ( | cutlass::half_t const & | h | ) | |

| CUTLASS_HOST_DEVICE bool cutlass::isnormal | ( | cutlass::half_t const & | h | ) | |

| CUTLASS_HOST_DEVICE constexpr bool cutlass::ispow2 | ( | unsigned | x | ) | |

template<typename Operator >

| __global__ void cutlass::Kernel | ( | typename Operator::Params | params | ) | |

template<typename value_t >

| CUTLASS_HOST_DEVICE value_t cutlass::lcm | ( | value_t | a, | | | | value_t | b | | | ) | | |

Least common multiple

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::log | ( | complex< T > const & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::log10 | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE Coord<1> cutlass::make_Coord | ( | int | _0 | ) | |

| CUTLASS_HOST_DEVICE Coord<2> cutlass::make_Coord | ( | int | _0, | | | | int | _1 | | | ) | | |

| CUTLASS_HOST_DEVICE Coord<3> cutlass::make_Coord | ( | int | _0, | | | | int | _1, | | | | int | _2 | | | ) | | |

| CUTLASS_HOST_DEVICE Coord<4> cutlass::make_Coord | ( | int | _0, | | | | int | _1, | | | | int | _2, | | | | int | _3 | | | ) | | |

template<typename Element , typename Layout >

| CUTLASS_HOST_DEVICE TensorRef<Element, Layout> cutlass::make_TensorRef | ( | Element * | ptr, | | | | Layout const & | layout | | | ) | | |

template<typename Element , typename Layout >

| CUTLASS_HOST_DEVICE TensorView<Element, Layout> cutlass::make_TensorView | ( | Element * | ptr, | | | | Layout const & | layout, | | | | typename Layout::TensorCoord const & | extent | | | ) | | |

| CUTLASS_HOST_DEVICE cutlass::half_t cutlass::nanh | ( | const char * | | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T cutlass::norm | ( | T const & | z | ) | |

template<>

| CUTLASS_HOST_DEVICE int8_t cutlass::norm | ( | int8_t const & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE double cutlass::norm | ( | complex< T > const & | z | ) | |

template<typename T , typename R >

| CUTLASS_HOST_DEVICE R cutlass::norm_accumulate | ( | T const & | x, | | | | R const & | accumulator | | | ) | | |

template<typename T , typename R >

| CUTLASS_HOST_DEVICE R cutlass::norm_accumulate | ( | complex< T > const & | z, | | | | R const & | accumulator | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator!= | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator* | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator*= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator+ | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator++ | ( | half_t & | lhs | ) | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator++ | ( | half_t & | lhs, | | | | int | | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator+= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator- | ( | half_t const & | lhs | ) | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator- | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator-- | ( | half_t & | lhs | ) | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator-- | ( | half_t & | lhs, | | | | int | | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator-= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |

template<int Rank, typename Index >

| CUTLASS_HOST_DEVICE Coord<Rank, Index> cutlass::operator/ | ( | Index | s, | | | | Coord< Rank, Index > | coord | | | ) | | |

template<int Rank, typename Index >

| CUTLASS_HOST_DEVICE Coord<Rank, Index> cutlass::operator/ | ( | Coord< Rank, Index > | coord, | | | | Index | s | | | ) | | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator/ | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator/= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator< | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

template<int Rank>

|

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | Coord< Rank > const & | coord | | | ) | | |

| inline |

|

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | cudaError_t | result | | | ) | | |

| inline |

|

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | cuda_exception const & | e | | | ) | | |

| inline |

|

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | half_t const & | x | | | ) | | |

| inline |

template<typename T >

|

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | ScalarIO< T > const & | scalar | | | ) | | |

| inline |

template<>

|

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | ScalarIO< int8_t > const & | scalar | | | ) | | |

| inline |

template<>

|

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | ScalarIO< uint8_t > const & | scalar | | | ) | | |

| inline |

template<typename Element , typename Layout >

|

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | TensorView< Element, Layout > const & | view | | | ) | | |

| inline |

template<typename T >

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | complex< T > const & | z | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator<= | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator== | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator> | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator>= | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

|

| std::istream& cutlass::operator>> | ( | std::istream & | stream, | | | | half_t & | x | | | ) | | |

| inline |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::polar | ( | T const & | r, | | | | T const & | theta = T() | | | ) | | |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::proj | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE float const& cutlass::real | ( | cuFloatComplex const & | z | ) | |

| CUTLASS_HOST_DEVICE float& cutlass::real | ( | cuFloatComplex & | z | ) | |

| CUTLASS_HOST_DEVICE double const& cutlass::real | ( | cuDoubleComplex const & | z | ) | |

| CUTLASS_HOST_DEVICE double& cutlass::real | ( | cuDoubleComplex & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T const& cutlass::real | ( | complex< T > const & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T& cutlass::real | ( | complex< T > & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal | ( | T | a, | | | | T | b, | | | | T | epsilon, | | | | T | nonzero_floor | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< double > | ( | double | a, | | | | double | b, | | | | double | epsilon, | | | | double | nonzero_floor | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< float > | ( | float | a, | | | | float | b, | | | | float | epsilon, | | | | float | nonzero_floor | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< half_t > | ( | half_t | a, | | | | half_t | b, | | | | half_t | epsilon, | | | | half_t | nonzero_floor | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int16_t > | ( | int16_t | a, | | | | int16_t | b, | | | | int16_t | , | | | | int16_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int32_t > | ( | int32_t | a, | | | | int32_t | b, | | | | int32_t | , | | | | int32_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int4b_t > | ( | int4b_t | a, | | | | int4b_t | b, | | | | int4b_t | , | | | | int4b_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int64_t > | ( | int64_t | a, | | | | int64_t | b, | | | | int64_t | , | | | | int64_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int8_t > | ( | int8_t | a, | | | | int8_t | b, | | | | int8_t | , | | | | int8_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint16_t > | ( | uint16_t | a, | | | | uint16_t | b, | | | | uint16_t | , | | | | uint16_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint1b_t > | ( | uint1b_t | a, | | | | uint1b_t | b, | | | | uint1b_t | , | | | | uint1b_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint32_t > | ( | uint32_t | a, | | | | uint32_t | b, | | | | uint32_t | , | | | | uint32_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint4b_t > | ( | uint4b_t | a, | | | | uint4b_t | b, | | | | uint4b_t | , | | | | uint4b_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint64_t > | ( | uint64_t | a, | | | | uint64_t | b, | | | | uint64_t | , | | | | uint64_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint8_t > | ( | uint8_t | a, | | | | uint8_t | b, | | | | uint8_t | , | | | | uint8_t | | | | ) | | |

template<int Interleaved, typename Element , typename Layout >

| void cutlass::reorder_column | ( | TensorRef< Element, Layout > | dest, | | | | TensorRef< Element, Layout > | src, | | | | cutlass::gemm::GemmCoord | problem_size | | | ) | | |

template<typename dividend_t , typename divisor_t >

| CUTLASS_HOST_DEVICE dividend_t cutlass::round_nearest | ( | dividend_t | dividend, | | | | divisor_t | divisor | | | ) | | |

Round dividend up to the nearest multiple of divisor

| CUTLASS_HOST_DEVICE bool cutlass::signbit | ( | cutlass::half_t const & | h | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::sin | ( | complex< T > const & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::sqrt | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE cutlass::half_t cutlass::sqrt | ( | cutlass::half_t const & | h | ) | |

template<typename Element , typename Layout >

| bool cutlass::TensorRef_aligned | ( | TensorRef< Element, Layout > const & | ref, | | | | int | alignment | | | ) | | |

template<typename Element , typename Layout >

|

| std::ostream& cutlass::TensorViewWrite | ( | std::ostream & | out, | | | | TensorView< Element, Layout > const & | view | | | ) | | |

| inline |

<!-- contents --><!-- start footer part -->
<address class="footer"><small> Generated by 1.8.11 </small></address>