CUTLASS: linear_combination_relu.h Source File - Cutlass

CUTLASS_HOST_DEVICE FragmentOutput operator()(FragmentAccumulator const &accumulator, FragmentOutput const &source, ElementCompute uniform=ElementCompute(0)) const

Computes linear scaling: D = alpha * accumulator + beta * source.

Definition: linear_combination_relu.h:150

cutlass::epilogue::thread::LinearCombinationRelu::Params::Params

CUTLASS_HOST_DEVICE Params()

Definition: linear_combination_relu.h:87

cutlass

Definition: aligned_buffer.h:35

cutlass::epilogue::thread::LinearCombinationRelu::Params::Params

CUTLASS_HOST_DEVICE Params(ElementCompute const *alpha_ptr, ElementCompute const *beta_ptr, ElementCompute threshold=ElementCompute(0))

Definition: linear_combination_relu.h:104

cutlass::epilogue::thread::LinearCombinationRelu::Params::beta

ElementCompute beta

scales source tensor

Definition: linear_combination_relu.h:77

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::Params::Params

CUTLASS_HOST_DEVICE Params(ElementCompute alpha, ElementCompute beta, ElementCompute threshold=ElementCompute(0))

Definition: linear_combination_relu.h:233

cutlass::epilogue::thread::LinearCombinationRelu::FragmentOutput

Array< ElementOutput, kCount > FragmentOutput

Definition: linear_combination_relu.h:67

cutlass::epilogue::thread::LinearCombinationRelu::FragmentAccumulator

Array< ElementAccumulator, kCount > FragmentAccumulator

Definition: linear_combination_relu.h:68

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::Params::beta_ptr

ElementCompute const * beta_ptr

pointer to source scalar - if not null, loads it from memory

Definition: linear_combination_relu.h:218

cutlass::epilogue::thread::LinearCombinationRelu

Definition: linear_combination_relu.h:58

cutlass::maximum

Definition: functional.h:235

cutlass::epilogue::thread::LinearCombinationRelu::Params::beta_ptr

ElementCompute const * beta_ptr

pointer to source scalar - if not null, loads it from memory

Definition: linear_combination_relu.h:80

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::LinearCombinationRelu

CUTLASS_HOST_DEVICE LinearCombinationRelu(Params const &params)

Constructs the function object, possibly loading from pointers in host memory.

Definition: linear_combination_relu.h:265

cutlass::epilogue::thread::LinearCombinationRelu::ElementCompute

ElementCompute_ ElementCompute

Definition: linear_combination_relu.h:63

array.h

Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

CUTLASS_PRAGMA_UNROLL

#define CUTLASS_PRAGMA_UNROLL

Definition: cutlass.h:110

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::set_k_partition

CUTLASS_HOST_DEVICE void set_k_partition(int k_partition)

Functionally required for serial reduction in the epilogue.

Definition: linear_combination_relu.h:280

numeric_conversion.h

Boost-like numeric conversion operator for CUTLASS numeric types.

nullptr

#define nullptr

nullptr

Definition: platform.h:144

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::Params::alpha

ElementCompute alpha

scales accumulators

Definition: linear_combination_relu.h:214

cutlass::epilogue::thread::LinearCombinationRelu::LinearCombinationRelu

CUTLASS_HOST_DEVICE LinearCombinationRelu(Params const &params)

Constructs the function object, possibly loading from pointers in host memory.

Definition: linear_combination_relu.h:127

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::Params::beta

ElementCompute beta

scales source tensor

Definition: linear_combination_relu.h:215

cutlass::epilogue::thread::LinearCombinationRelu::Params::threshold

ElementCompute threshold

Relu threshold.

Definition: linear_combination_relu.h:78

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::FragmentOutput

Array< ElementOutput, kCount > FragmentOutput

Definition: linear_combination_relu.h:205

cutlass::epilogue::thread::LinearCombinationRelu::Params::Params

CUTLASS_HOST_DEVICE Params(ElementCompute alpha, ElementCompute beta, ElementCompute threshold=ElementCompute(0))

Definition: linear_combination_relu.h:95

cutlass::multiplies

Definition: functional.h:64

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

cutlass::epilogue::thread::LinearCombinationRelu::kRound

static FloatRoundStyle const kRound

Definition: linear_combination_relu.h:71

numeric_types.h

Top-level include for all CUTLASS numeric types.

cutlass::epilogue::thread::LinearCombinationRelu::ComputeFragment

Array< ElementCompute, kCount > ComputeFragment

Definition: linear_combination_relu.h:69

cutlass::epilogue::thread::LinearCombinationRelu::set_k_partition

CUTLASS_HOST_DEVICE void set_k_partition(int k_partition)

Functionally required for serial reduction in the epilogue.

Definition: linear_combination_relu.h:142

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::Params::Params

CUTLASS_HOST_DEVICE Params()

Definition: linear_combination_relu.h:225

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::ComputeFragment

Array< ElementCompute, kCount > ComputeFragment

Definition: linear_combination_relu.h:207

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::ElementOutput

ElementOutput_ ElementOutput

Definition: linear_combination_relu.h:199

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::is_source_needed

CUTLASS_HOST_DEVICE bool is_source_needed() const

Returns true if source is needed.

Definition: linear_combination_relu.h:274

cutlass::FloatRoundStyle::round_to_nearest

round to nearest even

cutlass::FloatRoundStyle

FloatRoundStyle

Definition: numeric_conversion.h:43

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::Params::alpha_ptr

ElementCompute const * alpha_ptr

pointer to accumulator scalar - if not null, loads it from memory

Definition: linear_combination_relu.h:217

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::Params::Params

CUTLASS_HOST_DEVICE Params(ElementCompute const *alpha_ptr, ElementCompute const *beta_ptr, ElementCompute threshold=ElementCompute(0))

Definition: linear_combination_relu.h:242

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::Params::threshold

ElementCompute threshold

Relu threshold.

Definition: linear_combination_relu.h:216

cutlass::NumericArrayConverter

Conversion operator for Array.

Definition: numeric_conversion.h:294

cutlass::epilogue::thread::LinearCombinationRelu::Params::alpha

ElementCompute alpha

scales accumulators

Definition: linear_combination_relu.h:76

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::ElementAccumulator

int ElementAccumulator

Definition: linear_combination_relu.h:200

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::ElementCompute

float ElementCompute

Definition: linear_combination_relu.h:201

cutlass::epilogue::thread::LinearCombinationRelu::ElementAccumulator

ElementAccumulator_ ElementAccumulator

Definition: linear_combination_relu.h:62

cutlass::epilogue::thread::LinearCombinationRelu::is_source_needed

CUTLASS_HOST_DEVICE bool is_source_needed() const

Returns true if source is needed.

Definition: linear_combination_relu.h:136

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::operator()

CUTLASS_HOST_DEVICE FragmentOutput operator()(FragmentAccumulator const &accumulator, FragmentOutput const &source, ElementCompute uniform=ElementCompute(0)) const

Computes linear scaling: D = alpha * accumulator + beta * source.

Definition: linear_combination_relu.h:288

cutlass::epilogue::thread::LinearCombinationRelu::kCount

static int const kCount

Definition: linear_combination_relu.h:65

cutlass.h

Basic include for CUTLASS.

cutlass::epilogue::thread::LinearCombinationRelu::Params::alpha_ptr

ElementCompute const * alpha_ptr

pointer to accumulator scalar - if not null, loads it from memory

Definition: linear_combination_relu.h:79

cutlass::epilogue::thread::LinearCombinationRelu< ElementOutput_, Count, int, float, Round >::FragmentAccumulator