CUTLASS: gemv.h Source File - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

gemv.h

Go to the documentation of this file.

1 /***************************************************************************************************

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

29 #pragma once

31 #include "cutlass/cutlass.h"

32 #include "cutlass/array.h"

33 #include "cutlass/numeric_types.h"

34 #include "cutlass/matrix_shape.h"

36 #include "cutlass/gemm/gemm.h"

40 namespace cutlass {

41 namespace gemm {

42 namespace threadblock {

47 template <

48class Core_ //< GemvCore

49 >

50 class Gemv {

51 public:

52using Shape = typename Core_::Shape;

55using Operator = typename Core_::Operator;

58using IteratorA = typename Core_::IteratorA;

61using IteratorB = typename Core_::IteratorB;

64using IteratorC = typename Core_::IteratorC;

67using FragmentA = typename IteratorA::Fragment;

70using FragmentB = typename IteratorB::Fragment;

73using FragmentC = typename Operator::FragmentC;

76using ThreadShape = typename Core_::ThreadShape;

78 public:

79 CUTLASS_DEVICE

80 Gemv() { }

82 CUTLASS_DEVICE

83void operator()(

84GemmCoord const &problem_size,

85FragmentC &accum,

86IteratorA iterator_A,

87IteratorB iterator_B,

88FragmentC const &src_accum) {

90//

91// Prologue

92//

94FragmentA frag_A;

95FragmentB frag_B;

96 frag_A.clear();

97 frag_B.clear();

99 iterator_A.load(frag_A);

100 iterator_B.load(frag_B);

101 ++iterator_A;

102 ++iterator_B;

103

104//

105// Mainloop

106//

107Operator thread_mma;

108int gemm_k = problem_size.k();

109

110if (gemm_k < Shape::kK)

111 {

112 iterator_A.clear_mask();

113 iterator_B.clear_mask();

114 }

115

116// iterate over K to accumulate result

117CUTLASS_GEMM_LOOP

118for (; gemm_k > 0; gemm_k -= Shape::kK) {

119 thread_mma(accum, frag_A, frag_B, accum);

120

121 iterator_A.load(frag_A);

122 iterator_B.load(frag_B);

123 ++iterator_A;

124 ++iterator_B;

125

126if (gemm_k < Shape::kK)

127 {

128 iterator_A.clear_mask();

129 iterator_B.clear_mask();

130 }

131 }

132

133 }

134 };

135

137

138 } // namespace threadblock

139 } // namespace gemm

140 } // namespace cutlass

cutlass

Definition: aligned_buffer.h:35

cutlass::gemm::threadblock::Gemv::Gemv

CUTLASS_DEVICE Gemv()

Definition: gemv.h:80

cutlass::gemm::threadblock::Gemv::FragmentB

typename IteratorB::Fragment FragmentB

Fragment of operand B loaded from global memory.

Definition: gemv.h:70

cutlass::gemm::GemmCoord

Definition: include/cutlass/gemm/gemm.h:94

gemm.h

Defines common types used for all GEMM-like operators.

cutlass::gemm::threadblock::Gemv::Operator

typename Core_::Operator Operator

The MMA operator that computes GEMV.

Definition: gemv.h:55

cutlass::gemm::GemmCoord::k

CUTLASS_HOST_DEVICE Index const & k() const

Returns the GEMM K coordinate.

Definition: include/cutlass/gemm/gemm.h:145

array.h

Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

cutlass::gemm::threadblock::Gemv

Structure to compute the matrix-vector product using SIMT math instructions.

Definition: gemv.h:50

matrix_shape.h

Defines a Shape template for matrix tiles.

numeric_types.h

Top-level include for all CUTLASS numeric types.

cutlass::gemm::threadblock::Gemv::ThreadShape

typename Core_::ThreadShape ThreadShape

Shape of the per-thread GEMV operation.

Definition: gemv.h:76

cutlass::gemm::threadblock::Gemv::IteratorA

typename Core_::IteratorA IteratorA

Iterates over A in global memory.

Definition: gemv.h:58

cutlass::gemm::threadblock::Gemv::FragmentC

typename Operator::FragmentC FragmentC

Fragment of operand accumulator loaded/stored to global memory.

Definition: gemv.h:73

cutlass::gemm::threadblock::Gemv::IteratorB

typename Core_::IteratorB IteratorB

Iterates over B in global memory.

Definition: gemv.h:61

CUTLASS_GEMM_LOOP

#define CUTLASS_GEMM_LOOP

Definition: cutlass.h:112

cutlass::gemm::threadblock::Gemv::FragmentA

typename IteratorA::Fragment FragmentA

Fragment of operand A loaded from global memory.

Definition: gemv.h:67

cutlass::gemm::threadblock::Gemv::operator()

CUTLASS_DEVICE void operator()(GemmCoord const &problem_size, FragmentC &accum, IteratorA iterator_A, IteratorB iterator_B, FragmentC const &src_accum)

Definition: gemv.h:83

cutlass.h

Basic include for CUTLASS.

cutlass::gemm::threadblock::Gemv::Shape

typename Core_::Shape Shape

Definition: gemv.h:52

cutlass::gemm::threadblock::Gemv::IteratorC

typename Core_::IteratorC IteratorC

Fragment of operand C loaded from global memory.

Definition: gemv.h:64

Generated by 1.8.11