CUTLASS: cutlass Namespace Reference - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

Namespaces | Classes | Typedefs | Enumerations | Functions

cutlass Namespace Reference

Namespaces

Classes

Typedefs

Enumerations

| | enum | ComplexTransform { ComplexTransform::kNone, ComplexTransform::kConjugate } | | | Enumeraed type describing a transformation on a complex value. More...
| | | | enum | Status {
Status::kSuccess, Status::kErrorMisalignedOperand, Status::kErrorInvalidLayout, Status::kErrorInvalidProblem,
Status::kErrorNotSupported, Status::kErrorWorkspaceNull, Status::kErrorInternal, Status::kInvalid
} | | | Status code returned by CUTLASS operations. More...
| | | | enum | MatrixLayout { MatrixLayout::kColumnMajor, MatrixLayout::kRowMajor } | | | | enum | MatrixTransform { MatrixTransform::kNone, MatrixTransform::kTranspose, MatrixTransform::kConjugate, MatrixTransform::kHermitian } | | | Transformation applied to matrix operands. More...
| | | | enum | FloatRoundStyle {
FloatRoundStyle::round_indeterminate, FloatRoundStyle::round_toward_zero, FloatRoundStyle::round_to_nearest, FloatRoundStyle::round_toward_infinity,
FloatRoundStyle::round_toward_neg_infinity, FloatRoundStyle::round_half_ulp_truncate
} | | |

Functions

Typedef Documentation

| using cutlass::bin1_t = typedef bool |

| using cutlass::int4b_t = typedef integer_subbyte<4, true> |

| using cutlass::uint1b_t = typedef integer_subbyte<1, false> |

| using cutlass::uint4b_t = typedef integer_subbyte<4, false> |

Enumeration Type Documentation

| enum cutlass::ComplexTransform |

| strong |

Enumerator
kNone
kConjugate

| enum cutlass::FloatRoundStyle |

| strong |

Floating-point rounding style similare to Standard Library's formats but supporting additional rounding options.

Enumerator
round_indeterminate

rounding mode unknown

| | round_toward_zero |

round toward zero

| | round_to_nearest |

round to nearest even

| | round_toward_infinity |

round toward infinity

| | round_toward_neg_infinity |

round toward negative infinity

| | round_half_ulp_truncate |

add 0.5ulp to integer representation then round toward zero

| enum cutlass::MatrixLayout |

| strong |

Enumerator
kColumnMajor
kRowMajor

| enum cutlass::MatrixTransform |

| strong |

Enumerator
kNone
kTranspose

no operation

| | kConjugate |

transpose operation

| | kHermitian |

conjugate

conjugate transpose

| enum cutlass::Status |

| strong |

Enumerator
kSuccess

Operation was successful.

| | kErrorMisalignedOperand |

operands fail alignment requirements.

| | kErrorInvalidLayout |

Layout fails alignment requirement.

| | kErrorInvalidProblem |

Specified problem size is not supported by operator.

| | kErrorNotSupported |

Operation is not supported on current device.

| | kErrorWorkspaceNull |

The given workspace is null when it is required to be non-null.

| | kErrorInternal |

An error within CUTLASS occurred.

| | kInvalid |

Status is unspecified.

Function Documentation

template<typename T >

| CUTLASS_HOST_DEVICE T cutlass::abs | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE cutlass::half_t cutlass::abs | ( | cutlass::half_t const & | h | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T cutlass::arg | ( | complex< T > const & | z | ) | |

template<typename value_t >

| CUTLASS_HOST_DEVICE value_t cutlass::clz | ( | value_t | x | ) | |

log2 computation, what's the difference between the below codes and log2_up/down codes?

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::conj | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE constexpr int cutlass::const_max | ( | int | a, | | | | int | b | | | ) | | |

| CUTLASS_HOST_DEVICE constexpr int cutlass::const_min | ( | int | a, | | | | int | b | | | ) | | |

| CUTLASS_HOST_DEVICE half_t cutlass::copysign | ( | half_t const & | a, | | | | half_t const & | b | | | ) | | |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::cos | ( | complex< T > const & | z | ) | |

<dl class="section return"> <dt>Returns</dt> <dd>The CUDA error. </dd> </dl>

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::exp | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE void cutlass::fast_divmod | ( | int & | quo, | | | | int & | rem, | | | | int | src, | | | | int | div, | | | | unsigned int | mul, | | | | unsigned int | shr | | | ) | | |

Find quotient and remainder using device-side intrinsics

| CUTLASS_HOST_DEVICE void cutlass::fast_divmod | ( | int & | quo, | | | | int64_t & | rem, | | | | int64_t | src, | | | | int | div, | | | | unsigned int | mul, | | | | unsigned int | shr | | | ) | | |

| CUTLASS_HOST_DEVICE void cutlass::find_divisor | ( | unsigned int & | mul, | | | | unsigned int & | shr, | | | | unsigned int | denom | | | ) | | |

Find divisor, using find_log2

template<typename value_t >

| CUTLASS_HOST_DEVICE value_t cutlass::find_log2 | ( | value_t | x | ) | |

| CUTLASS_HOST_DEVICE constexpr unsigned cutlass::floor_pow_2 | ( | unsigned | x | ) | |

| CUTLASS_HOST_DEVICE int cutlass::fpclassify | ( | cutlass::half_t const & | h | ) | |

template<>

| CUTLASS_HOST_DEVICE cutlass::complex<double> cutlass::from_real< cutlass::complex< double > > | ( | double | r | ) | |

template<>

| CUTLASS_HOST_DEVICE cutlass::complex<float> cutlass::from_real< cutlass::complex< float > > | ( | double | r | ) | |

template<>

| CUTLASS_HOST_DEVICE cutlass::complex<half_t> cutlass::from_real< cutlass::complex< half_t > > | ( | double | r | ) | |

template<typename value_t >

| CUTLASS_HOST_DEVICE value_t cutlass::gcd | ( | value_t | a, | | | | value_t | b | | | ) | | |

Greatest common divisor

| CUTLASS_HOST_DEVICE float const& cutlass::imag | ( | cuFloatComplex const & | z | ) | |

| CUTLASS_HOST_DEVICE float& cutlass::imag | ( | cuFloatComplex & | z | ) | |

| CUTLASS_HOST_DEVICE double const& cutlass::imag | ( | cuDoubleComplex const & | z | ) | |

| CUTLASS_HOST_DEVICE double& cutlass::imag | ( | cuDoubleComplex & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T const& cutlass::imag | ( | complex< T > const & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T& cutlass::imag | ( | complex< T > & | z | ) | |

| CUTLASS_HOST_DEVICE bool cutlass::isfinite | ( | cutlass::half_t const & | h | ) | |

| CUTLASS_HOST_DEVICE bool cutlass::isinf | ( | cutlass::half_t const & | h | ) | |

| CUTLASS_HOST_DEVICE bool cutlass::isnan | ( | cutlass::half_t const & | h | ) | |

| CUTLASS_HOST_DEVICE bool cutlass::isnormal | ( | cutlass::half_t const & | h | ) | |

| CUTLASS_HOST_DEVICE constexpr bool cutlass::ispow2 | ( | unsigned | x | ) | |

template<typename Operator >

template<typename value_t >

| CUTLASS_HOST_DEVICE value_t cutlass::lcm | ( | value_t | a, | | | | value_t | b | | | ) | | |

Least common multiple

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::log | ( | complex< T > const & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::log10 | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE Coord<1> cutlass::make_Coord | ( | int | _0 | ) | |

| CUTLASS_HOST_DEVICE Coord<2> cutlass::make_Coord | ( | int | _0, | | | | int | _1 | | | ) | | |

| CUTLASS_HOST_DEVICE Coord<3> cutlass::make_Coord | ( | int | _0, | | | | int | _1, | | | | int | _2 | | | ) | | |

| CUTLASS_HOST_DEVICE Coord<4> cutlass::make_Coord | ( | int | _0, | | | | int | _1, | | | | int | _2, | | | | int | _3 | | | ) | | |

template<typename Element , typename Layout >

| CUTLASS_HOST_DEVICE TensorRef<Element, Layout> cutlass::make_TensorRef | ( | Element * | ptr, | | | | Layout const & | layout | | | ) | | |

template<typename Element , typename Layout >

| CUTLASS_HOST_DEVICE cutlass::half_t cutlass::nanh | ( | const char * | | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T cutlass::norm | ( | T const & | z | ) | |

template<>

| CUTLASS_HOST_DEVICE int8_t cutlass::norm | ( | int8_t const & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE double cutlass::norm | ( | complex< T > const & | z | ) | |

template<typename T , typename R >

| CUTLASS_HOST_DEVICE R cutlass::norm_accumulate | ( | T const & | x, | | | | R const & | accumulator | | | ) | | |

template<typename T , typename R >

| CUTLASS_HOST_DEVICE R cutlass::norm_accumulate | ( | complex< T > const & | z, | | | | R const & | accumulator | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator!= | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator* | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator*= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator+ | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator++ | ( | half_t & | lhs | ) | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator++ | ( | half_t & | lhs, | | | | int | | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator+= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator- | ( | half_t const & | lhs | ) | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator- | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator-- | ( | half_t & | lhs | ) | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator-- | ( | half_t & | lhs, | | | | int | | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator-= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |

template<int Rank, typename Index >

| CUTLASS_HOST_DEVICE Coord<Rank, Index> cutlass::operator/ | ( | Index | s, | | | | Coord< Rank, Index > | coord | | | ) | | |

template<int Rank, typename Index >

| CUTLASS_HOST_DEVICE Coord<Rank, Index> cutlass::operator/ | ( | Coord< Rank, Index > | coord, | | | | Index | s | | | ) | | |

| CUTLASS_HOST_DEVICE half_t cutlass::operator/ | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE half_t& cutlass::operator/= | ( | half_t & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator< | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

template<int Rank>

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | Coord< Rank > const & | coord | | | ) | | |

| inline |

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | cudaError_t | result | | | ) | | |

| inline |

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | cuda_exception const & | e | | | ) | | |

| inline |

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | half_t const & | x | | | ) | | |

| inline |

template<typename T >

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | ScalarIO< T > const & | scalar | | | ) | | |

| inline |

template<>

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | ScalarIO< int8_t > const & | scalar | | | ) | | |

| inline |

template<>

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | ScalarIO< uint8_t > const & | scalar | | | ) | | |

| inline |

template<typename Element , typename Layout >

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | TensorView< Element, Layout > const & | view | | | ) | | |

| inline |

template<typename T >

| std::ostream& cutlass::operator<< | ( | std::ostream & | out, | | | | complex< T > const & | z | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator<= | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator== | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator> | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| CUTLASS_HOST_DEVICE bool cutlass::operator>= | ( | half_t const & | lhs, | | | | half_t const & | rhs | | | ) | | |

| std::istream& cutlass::operator>> | ( | std::istream & | stream, | | | | half_t & | x | | | ) | | |

| inline |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::polar | ( | T const & | r, | | | | T const & | theta = T() | | | ) | | |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::proj | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE float const& cutlass::real | ( | cuFloatComplex const & | z | ) | |

| CUTLASS_HOST_DEVICE float& cutlass::real | ( | cuFloatComplex & | z | ) | |

| CUTLASS_HOST_DEVICE double const& cutlass::real | ( | cuDoubleComplex const & | z | ) | |

| CUTLASS_HOST_DEVICE double& cutlass::real | ( | cuDoubleComplex & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T const& cutlass::real | ( | complex< T > const & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE T& cutlass::real | ( | complex< T > & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal | ( | T | a, | | | | T | b, | | | | T | epsilon, | | | | T | nonzero_floor | | | ) | | |

template<>

template<>

template<>

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int16_t > | ( | int16_t | a, | | | | int16_t | b, | | | | int16_t | , | | | | int16_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int32_t > | ( | int32_t | a, | | | | int32_t | b, | | | | int32_t | , | | | | int32_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int4b_t > | ( | int4b_t | a, | | | | int4b_t | b, | | | | int4b_t | , | | | | int4b_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int64_t > | ( | int64_t | a, | | | | int64_t | b, | | | | int64_t | , | | | | int64_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int8_t > | ( | int8_t | a, | | | | int8_t | b, | | | | int8_t | , | | | | int8_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint16_t > | ( | uint16_t | a, | | | | uint16_t | b, | | | | uint16_t | , | | | | uint16_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint1b_t > | ( | uint1b_t | a, | | | | uint1b_t | b, | | | | uint1b_t | , | | | | uint1b_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint32_t > | ( | uint32_t | a, | | | | uint32_t | b, | | | | uint32_t | , | | | | uint32_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint4b_t > | ( | uint4b_t | a, | | | | uint4b_t | b, | | | | uint4b_t | , | | | | uint4b_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint64_t > | ( | uint64_t | a, | | | | uint64_t | b, | | | | uint64_t | , | | | | uint64_t | | | | ) | | |

template<>

| CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint8_t > | ( | uint8_t | a, | | | | uint8_t | b, | | | | uint8_t | , | | | | uint8_t | | | | ) | | |

template<int Interleaved, typename Element , typename Layout >

template<typename dividend_t , typename divisor_t >

Round dividend up to the nearest multiple of divisor

| CUTLASS_HOST_DEVICE bool cutlass::signbit | ( | cutlass::half_t const & | h | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::sin | ( | complex< T > const & | z | ) | |

template<typename T >

| CUTLASS_HOST_DEVICE complex<T> cutlass::sqrt | ( | complex< T > const & | z | ) | |

| CUTLASS_HOST_DEVICE cutlass::half_t cutlass::sqrt | ( | cutlass::half_t const & | h | ) | |

template<typename Element , typename Layout >

| bool cutlass::TensorRef_aligned | ( | TensorRef< Element, Layout > const & | ref, | | | | int | alignment | | | ) | | |

template<typename Element , typename Layout >

| std::ostream& cutlass::TensorViewWrite | ( | std::ostream & | out, | | | | TensorView< Element, Layout > const & | view | | | ) | | |

| inline |

<address class="footer"><small> Generated by 1.8.11 </small></address>