CUTLASS: fragment_iterator_tensor_op.h Source File - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

fragment_iterator_tensor_op.h

[Go to the documentation of this file.](fragment iterator tensor__op_8h.html)

1 /***************************************************************************************************

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

38 #pragma once

40 #include "cutlass/array.h"

41 #include "cutlass/layout/matrix.h"

43 #include "[cutlass/epilogue/warp/tensor_op_policy.h](tensor op policy_8h.html)"

47 namespace cutlass {

48 namespace epilogue {

49 namespace warp {

54 template <

55typename WarpShape,

56typename OperatorShape,

57typename OperatorElementC,

58typename OperatorFragmentC,

59typename Layout

60 >

61 class FragmentIteratorTensorOp;

66 template <

67typename WarpShape_,

68typename OperatorShape_,

69typename OperatorElementC_,

70typename OperatorFragmentC_

71 >

72 class FragmentIteratorTensorOp<WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor> {

73 public:

75using WarpShape = WarpShape_;

76using OperatorShape = OperatorShape_;

77using OperatorElementC = OperatorElementC_;

78using OperatorFragmentC = OperatorFragmentC_;

79using Layout = layout::RowMajor;

81using Policy = TensorOpPolicy<WarpShape, OperatorShape, Layout>;

84using Fragment = Array<

85 OperatorElementC,

86 Policy::OperatorCount::kColumn * Policy::kElementsPerAccess>;

89using AccumulatorTile = Array<

90 OperatorElementC,

91 OperatorFragmentC::kElements * Policy::OperatorCount::kRow * Policy::OperatorCount::kColumn>;

93using OutputAccumulatorTile = AccumulatorTile;

96static int const kIterations = Policy::kIterations;

98 private:

101using AccessType = Array<OperatorElementC, Policy::kElementsPerAccess>;

102

103 private:

104

105//

106// Data members

107//

108

110 AccessType const *accumulators_;

111

113int index_;

114

115 public:

116

118CUTLASS_HOST_DEVICE

119 FragmentIteratorTensorOp(AccumulatorTile const &accum):

120 accumulators_(reinterpret_cast<AccessType const *>(&accum)),

121 index_(0) {

122 }

123

125CUTLASS_HOST_DEVICE

126 FragmentIteratorTensorOp &operator++() {

127 ++index_;

128return *this;

129 }

130

132CUTLASS_HOST_DEVICE

133 FragmentIteratorTensorOp &operator--() {

134 --index_;

135return *this;

136 }

137

139CUTLASS_HOST_DEVICE

140void load(Fragment &frag, int index_offset = 0) const {

141

142int index = index_ + index_offset;

143

144 AccessType *frag_ptr = reinterpret_cast<AccessType *>(&frag);

145

146CUTLASS_PRAGMA_UNROLL

147for (int n = 0; n < Policy::OperatorCount::kColumn; ++n) {

148

149int accumulator_access_offset =

150 index + n * Policy::kAccumulatorColumnStride / Policy::kElementsPerAccess;

151

152 frag_ptr[n] = accumulators_[accumulator_access_offset];

153 }

154 }

155 };

156

158

160 template <

162typename WarpShape_,

164typename OperatorShape_,

166typename OperatorElementC_,

168typename OperatorFragmentC_,

170int InterleavedK>

171 class FragmentIteratorTensorOp<WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_,

172 layout::ColumnMajorInterleaved<InterleavedK>> {

173public:

174using WarpShape = WarpShape_;

175using OperatorShape = OperatorShape_;

176using OperatorElementC = OperatorElementC_;

177using OperatorFragmentC = OperatorFragmentC_;

178static int const kInterleavedK = InterleavedK;

179using Layout = layout::ColumnMajorInterleaved<kInterleavedK>;

180

181using Policy = TensorOpPolicy<WarpShape, OperatorShape, Layout>;

182

184using Fragment =

185 Array<OperatorElementC,

186 Policy::kElementsPerAccess * InterleavedK / OperatorShape::kN>;

187

189using AccumulatorTile =

190 Array<OperatorElementC, OperatorFragmentC::kElements *

191 Policy::OperatorCount::kRow *

192 Policy::OperatorCount::kColumn>;

193

195static int const kIterations = Policy::kIterations;

196

197private:

199using AccessType =

200 Array<OperatorElementC, Policy::kElementsPerAccess>;

201

202private:

203//

204// Data members

205//

206

208 AccessType const *accumulators_;

209

211int index_;

212

213public:

215CUTLASS_HOST_DEVICE

216 FragmentIteratorTensorOp(AccumulatorTile const &accum)

217 : accumulators_(reinterpret_cast<AccessType const *>(&accum)),

218 index_(0) {}

219

221CUTLASS_HOST_DEVICE

222 FragmentIteratorTensorOp &operator++() {

223 ++index_;

224return *this;

225 }

226

228CUTLASS_HOST_DEVICE

229 FragmentIteratorTensorOp &operator--() {

230 --index_;

231return *this;

232 }

233

235CUTLASS_HOST_DEVICE

236void load(Fragment &frag, int index_offset = 0) const {

237int index = index_ + index_offset;

238

239 AccessType *frag_ptr = reinterpret_cast<AccessType *>(&frag);

240

241CUTLASS_PRAGMA_UNROLL

242for (int n = 0; n < (InterleavedK / OperatorShape::kN); ++n) {

243int index_m = index % (Policy::OperatorCount::kRow *

244 Policy::kIterationsPerInstruction);

245int index_n = index / (Policy::OperatorCount::kRow *

246 Policy::kIterationsPerInstruction);

247int accumulator_access_offset =

248 (index_m / Policy::kIterationsPerInstruction) *

249 (Policy::OperatorCount::kColumn *

250 Policy::kIterationsPerInstruction) +

251 (index_m % Policy::kIterationsPerInstruction) +

252 index_n * (InterleavedK / OperatorShape::kN) *

253 Policy::kIterationsPerInstruction +

254 n * Policy::kIterationsPerInstruction;

255

256 frag_ptr[n] = accumulators_[accumulator_access_offset];

257 }

258 }

259 };

260

262

263 } // namespace warp

264 } // namespace epilogue

265 } // namespace cutlass

266

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor >::WarpShape

WarpShape_ WarpShape

Definition: fragment_iterator_tensor_op.h:75

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor >::load

CUTLASS_HOST_DEVICE void load(Fragment &frag, int index_offset=0) const

Loads a fragment from the referenced part of the accumulator tile.

Definition: fragment_iterator_tensor_op.h:140

cutlass

Definition: aligned_buffer.h:35

[tensor_op_policy.h](tensor op policy_8h.html)

Defines basic structures needed for implementing the warp-scoped phase of the epilogue. These quantities assume a 'column-major' arrangement of TensorOp instructions, of which a row-oriented slice is visible per iteration.

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor >::OutputAccumulatorTile

AccumulatorTile OutputAccumulatorTile

Definition: fragment_iterator_tensor_op.h:93

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::ColumnMajorInterleaved< InterleavedK > >::load

CUTLASS_HOST_DEVICE void load(Fragment &frag, int index_offset=0) const

Loads a fragment from the referenced part of the accumulator tile.

Definition: fragment_iterator_tensor_op.h:236

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::ColumnMajorInterleaved< InterleavedK > >::operator--

CUTLASS_HOST_DEVICE FragmentIteratorTensorOp & operator--()

Decrements.

Definition: fragment_iterator_tensor_op.h:229

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::ColumnMajorInterleaved< InterleavedK > >::WarpShape

WarpShape_ WarpShape

Definition: fragment_iterator_tensor_op.h:174

array.h

Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

CUTLASS_PRAGMA_UNROLL

#define CUTLASS_PRAGMA_UNROLL

Definition: cutlass.h:110

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor >::AccumulatorTile

Array< OperatorElementC, OperatorFragmentC::kElements *Policy::OperatorCount::kRow *Policy::OperatorCount::kColumn > AccumulatorTile

This is the complete warp-level accumulator tile.

Definition: fragment_iterator_tensor_op.h:91

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor >::Fragment

Array< OperatorElementC, Policy::OperatorCount::kColumn *Policy::kElementsPerAccess > Fragment

This is the fragment size produced by one access of the iterator.

Definition: fragment_iterator_tensor_op.h:86

cutlass::epilogue::warp::TensorOpPolicy

Policy details related to the epilogue.

Definition: tensor_op_policy.h:50

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::ColumnMajorInterleaved< InterleavedK > >::operator++

CUTLASS_HOST_DEVICE FragmentIteratorTensorOp & operator++()

Increments.

Definition: fragment_iterator_tensor_op.h:222

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor >::FragmentIteratorTensorOp

CUTLASS_HOST_DEVICE FragmentIteratorTensorOp(AccumulatorTile const &accum)

Constructs an iterator.

Definition: fragment_iterator_tensor_op.h:119

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::ColumnMajorInterleaved< InterleavedK > >::FragmentIteratorTensorOp

CUTLASS_HOST_DEVICE FragmentIteratorTensorOp(AccumulatorTile const &accum)

Constructs an iterator.

Definition: fragment_iterator_tensor_op.h:216

cutlass::epilogue::warp::FragmentIteratorTensorOp

Definition: fragment_iterator_tensor_op.h:61

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::ColumnMajorInterleaved< InterleavedK > >::Fragment

Array< OperatorElementC, Policy::kElementsPerAccess *InterleavedK/OperatorShape::kN > Fragment

This is the fragment size produced by one access of the iterator.

Definition: fragment_iterator_tensor_op.h:186

cutlass::layout::RowMajor

Mapping function for row-major matrices.

Definition: layout/matrix.h:50

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor >::operator++

CUTLASS_HOST_DEVICE FragmentIteratorTensorOp & operator++()

Increments.

Definition: fragment_iterator_tensor_op.h:126

matrix.h

Defines layout functions used by TensorRef and derived classes.

cutlass::layout::ColumnMajorInterleaved

Definition: layout/matrix.h:343

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::ColumnMajorInterleaved< InterleavedK > >::AccumulatorTile

Array< OperatorElementC, OperatorFragmentC::kElements *Policy::OperatorCount::kRow *Policy::OperatorCount::kColumn > AccumulatorTile

This is the complete warp-level accumulator tile.

Definition: fragment_iterator_tensor_op.h:192

cutlass::epilogue::warp::FragmentIteratorTensorOp< WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor >::operator--

CUTLASS_HOST_DEVICE FragmentIteratorTensorOp & operator--()

Decrements.

Definition: fragment_iterator_tensor_op.h:133

Generated by 1.8.11