CUTLASS: mma.h Source File - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

arch/mma.h

Go to the documentation of this file.

1 /***************************************************************************************************

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

29 #pragma once

31 #include "cutlass/array.h"

32 #include "cutlass/numeric_types.h"

33 #include "cutlass/gemm/gemm.h"

37 namespace cutlass {

38 namespace arch {

43 struct OpMultiplyAdd;

48 struct OpMultiplyAddSaturate;

53 struct OpXorPopc;

58 struct OpClassSimt;

63 struct OpClassTensorOp;

67 struct OpClassWmmaTensorOp;

72 template <

74typename Shape_,

76int kThreads_,

78typename ElementA,

80typename LayoutA,

82typename ElementB,

84typename LayoutB,

86typename ElementC,

88typename LayoutC,

90typename Operator

91 >

92 struct Mma;

97 template <

99typename ElementA,

101typename LayoutA,

103typename ElementB,

105typename LayoutB,

107typename ElementC,

109typename LayoutC,

111typename Operator

112 >

113 struct Mma<gemm::GemmShape<1, 1, 1>, 1, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, Operator> {

114

115using Shape = gemm::GemmShape<1, 1, 1>;

116

117CUTLASS_HOST_DEVICE

118void operator()(

119 Array<ElementC, 1> &d,

120 Array<ElementA, 1> const &a,

121 Array<ElementB, 1> const &b,

122 Array<ElementC, 1> const &c

123 ) {

124

125 d[0] = a[0] * b[0] + c[0];

126 }

127 };

128

130

131 } // namespace arch

132 } // namespace cutlass

133

135

136 //

137 // Specializations for each compute capability

138 //

139

140 #include "cutlass/arch/mma_sm50.h"

141 #include "cutlass/arch/mma_sm60.h"

142 #include "cutlass/arch/mma_sm61.h"

143 #include "cutlass/arch/mma_sm70.h"

144 #include "cutlass/arch/mma_sm75.h"

mma_sm70.h

Matrix multiply.

cutlass

Definition: aligned_buffer.h:35

gemm.h

Defines common types used for all GEMM-like operators.

array.h

Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

mma_sm60.h

Matrix multiply.

mma_sm61.h

Matrix multiply.

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

numeric_types.h

Top-level include for all CUTLASS numeric types.

cutlass::gemm::GemmShape

Shape of a matrix multiply-add operation.

Definition: include/cutlass/gemm/gemm.h:57

cutlass::arch::Mma

Matrix multiply-add operation.

Definition: arch/mma.h:92

mma_sm75.h

Matrix multiply for SM75.

mma_sm50.h

Matrix multiply.

cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, Operator >::operator()

CUTLASS_HOST_DEVICE void operator()(Array< ElementC, 1 > &d, Array< ElementA, 1 > const &a, Array< ElementB, 1 > const &b, Array< ElementC, 1 > const &c)

Definition: arch/mma.h:118

Generated by 1.8.11