CUTLASS: volta_tensor_op_policy.h Source File - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

volta_tensor_op_policy.h

[Go to the documentation of this file.](volta tensor op__policy_8h.html)

1 /***************************************************************************************************

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

31 #pragma once

33 #include "cutlass/matrix_shape.h"

34 #include "cutlass/layout/matrix.h"

35 #include "cutlass/gemm/gemm.h"

39 namespace cutlass {

40 namespace epilogue {

41 namespace warp {

46 template <

47typename WarpShape,

48typename InterleavedTileShape,

49typename ElementC,

50typename Layout

51 >

52 struct VoltaTensorOpPolicy;

57 template <

58typename WarpShape_

59 >

60 struct VoltaTensorOpPolicy<WarpShape_, gemm::GemmShape<32, 32, 4>, half_t, layout::RowMajor> {

62using WarpShape = WarpShape_;

63using InterleavedTileShape = gemm::GemmShape<32, 32, 4>;

64using ElementC = half_t;

65using Layout = layout::RowMajor;

68using InstructionShape = gemm::GemmShape<16, 16, 4>;

71using MmaIterations = MatrixShape<

72 InterleavedTileShape::kM / InstructionShape::kM,

73 InterleavedTileShape::kN / InstructionShape::kN

74 >;

77using TileIterations = MatrixShape<

78 WarpShape::kM / InterleavedTileShape::kM,

79 WarpShape::kN / InterleavedTileShape::kN

80 >;

83static int const kElementsPerMma = 8;

84static int const kRowsPerIteration = 16;

86//

87// Hard-coded constants regarding Tensor Operations

88//

91static int const kElementsPerAccess = 4;

94static int const kAccessesPerInterleavedTile = 4;

97static int const kIterations = TileIterations::kRow * 2;

99//

100// Derived types

101//

102

104using AccessType = AlignedArray<ElementC, kElementsPerAccess>;

105

107using Fragment = Array<

108 ElementC,

109 kElementsPerAccess * kAccessesPerInterleavedTile * TileIterations::kColumn>;

110

112using AccumulatorTile = Array<

113 ElementC,

114 TileIterations::kCount * MmaIterations::kCount * kElementsPerMma>;

115 };

116

118

120 template <

121typename WarpShape_

122 >

123 struct VoltaTensorOpPolicy<WarpShape_, gemm::GemmShape<32, 32, 4>, float, layout::RowMajor> {

124

125using WarpShape = WarpShape_;

126using InterleavedTileShape = gemm::GemmShape<32, 32, 4>;

127using ElementC = float;

128using Layout = layout::RowMajor;

129

131using InstructionShape = gemm::GemmShape<16, 16, 4>;

132

134using MmaIterations = MatrixShape<

135 InterleavedTileShape::kM / InstructionShape::kM,

136 InterleavedTileShape::kN / InstructionShape::kN

137 >;

138

140using TileIterations = MatrixShape<

141 WarpShape::kM / InterleavedTileShape::kM,

142 WarpShape::kN / InterleavedTileShape::kN

143 >;

144

146static int const kElementsPerMma = 8;

147static int const kRowsPerIteration = 16;

148

149//

150// Hard-coded constants regarding Tensor Operations

151//

152

154static int const kElementsPerAccess = 2;

155

157static int const kAccessesPerInterleavedTile = 8;

158

160static int const kRowsPerMmaTile = 2;

161

163static int const kIterations = TileIterations::kRow * MmaIterations::kRow;

164

165//

166// Derived types

167//

168

170using AccessType = AlignedArray<ElementC, kElementsPerAccess>;

171

173using Fragment = Array<

174 ElementC,

175 kElementsPerAccess * kAccessesPerInterleavedTile * TileIterations::kColumn>;

176

178using AccumulatorTile = Array<

179 ElementC,

180 TileIterations::kCount * MmaIterations::kCount * kElementsPerMma>;

181 };

182

184

185 } // namespace warp

186 } // namespace epilogue

187 } // namespace cutlass

188

cutlass::MatrixShape

Describes the size of a matrix tile.

Definition: matrix_shape.h:42

cutlass

Definition: aligned_buffer.h:35

cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::Fragment

Array< ElementC, kElementsPerAccess *kAccessesPerInterleavedTile *TileIterations::kColumn > Fragment

This is the fragment size produced by one access of the iterator.

Definition: volta_tensor_op_policy.h:175

cutlass::AlignedArray

Aligned array type.

Definition: array.h:511

cutlass::half_t

IEEE half-precision floating-point type.

Definition: half.h:126

gemm.h

Defines common types used for all GEMM-like operators.

matrix_shape.h

Defines a Shape template for matrix tiles.

cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::WarpShape

WarpShape_ WarpShape

Definition: volta_tensor_op_policy.h:125

cutlass::gemm::GemmShape

Shape of a matrix multiply-add operation.

Definition: include/cutlass/gemm/gemm.h:57

cutlass::layout::RowMajor

Mapping function for row-major matrices.

Definition: layout/matrix.h:50

cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::Fragment

Array< ElementC, kElementsPerAccess *kAccessesPerInterleavedTile *TileIterations::kColumn > Fragment

This is the fragment size produced by one access of the iterator.

Definition: volta_tensor_op_policy.h:109

matrix.h

Defines layout functions used by TensorRef and derived classes.

cutlass::epilogue::warp::VoltaTensorOpPolicy

Policy details related to the epilogue.

Definition: volta_tensor_op_policy.h:52

cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::AccumulatorTile

Array< ElementC, TileIterations::kCount *MmaIterations::kCount *kElementsPerMma > AccumulatorTile

This is the complete warp-level accumulator tile.

Definition: volta_tensor_op_policy.h:180

cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::AccumulatorTile

Array< ElementC, TileIterations::kCount *MmaIterations::kCount *kElementsPerMma > AccumulatorTile

This is the complete warp-level accumulator tile.

Definition: volta_tensor_op_policy.h:114

cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::WarpShape

WarpShape_ WarpShape

Definition: volta_tensor_op_policy.h:62

Generated by 1.8.11