docs/fragment__iterator__tensor__op_8h_source.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
fragment_iterator_tensor_op.h
[Go to the documentation of this file.](fragment iterator tensor__op_8h.html)
1 /***************************************************************************************************
2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification, are permitted
5 * provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright notice, this list of
7 * conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright notice, this list of
9 * conditions and the following disclaimer in the documentation and/or other materials
10 * provided with the distribution.
11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12 * to endorse or promote products derived from this software without specific prior written
13 * permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 *
24 **************************************************************************************************/
38 #pragma once
39
40 #include "cutlass/array.h"
41 #include "cutlass/layout/matrix.h"
42
43 #include "[cutlass/epilogue/warp/tensor_op_policy.h](tensor op policy_8h.html)"
44
46
47 namespace cutlass {
48 namespace epilogue {
49 namespace warp {
50
52
54 template <
55typename WarpShape,
56typename OperatorShape,
57typename OperatorElementC,
58typename OperatorFragmentC,
59typename Layout
60 >
61 class FragmentIteratorTensorOp;
62
64
66 template <
67typename WarpShape_,
68typename OperatorShape_,
69typename OperatorElementC_,
70typename OperatorFragmentC_
71 >
72 class FragmentIteratorTensorOp<WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_, layout::RowMajor> {
73 public:
74
75using WarpShape = WarpShape_;
76using OperatorShape = OperatorShape_;
77using OperatorElementC = OperatorElementC_;
78using OperatorFragmentC = OperatorFragmentC_;
79using Layout = layout::RowMajor;
80
81using Policy = TensorOpPolicy<WarpShape, OperatorShape, Layout>;
82
84using Fragment = Array<
85 OperatorElementC,
86 Policy::OperatorCount::kColumn * Policy::kElementsPerAccess>;
87
89using AccumulatorTile = Array<
90 OperatorElementC,
91 OperatorFragmentC::kElements * Policy::OperatorCount::kRow * Policy::OperatorCount::kColumn>;
92
93using OutputAccumulatorTile = AccumulatorTile;
94
96static int const kIterations = Policy::kIterations;
97
98 private:
99
101using AccessType = Array<OperatorElementC, Policy::kElementsPerAccess>;
102
103 private:
104
105//
106// Data members
107//
108
110 AccessType const *accumulators_;
111
113int index_;
114
115 public:
116
119FragmentIteratorTensorOp(AccumulatorTile const &accum):
120 accumulators_(reinterpret_cast<AccessType const *>(&accum)),
121 index_(0) {
122 }
123
126FragmentIteratorTensorOp &operator++() {
127 ++index_;
128return *this;
129 }
130
133FragmentIteratorTensorOp &operator--() {
134 --index_;
135return *this;
136 }
137
140void load(Fragment &frag, int index_offset = 0) const {
141
142int index = index_ + index_offset;
143
144 AccessType *frag_ptr = reinterpret_cast<AccessType *>(&frag);
145
147for (int n = 0; n < Policy::OperatorCount::kColumn; ++n) {
148
149int accumulator_access_offset =
150 index + n * Policy::kAccumulatorColumnStride / Policy::kElementsPerAccess;
151
152 frag_ptr[n] = accumulators_[accumulator_access_offset];
153 }
154 }
155 };
156
158
160 template <
162typename WarpShape_,
164typename OperatorShape_,
166typename OperatorElementC_,
168typename OperatorFragmentC_,
170int InterleavedK>
171 class FragmentIteratorTensorOp<WarpShape_, OperatorShape_, OperatorElementC_, OperatorFragmentC_,
172 layout::ColumnMajorInterleaved<InterleavedK>> {
173public:
174using WarpShape = WarpShape_;
175using OperatorShape = OperatorShape_;
176using OperatorElementC = OperatorElementC_;
177using OperatorFragmentC = OperatorFragmentC_;
178static int const kInterleavedK = InterleavedK;
179using Layout = layout::ColumnMajorInterleaved<kInterleavedK>;
180
181using Policy = TensorOpPolicy<WarpShape, OperatorShape, Layout>;
182
184using Fragment =
185 Array<OperatorElementC,
186 Policy::kElementsPerAccess * InterleavedK / OperatorShape::kN>;
187
189using AccumulatorTile =
190 Array<OperatorElementC, OperatorFragmentC::kElements *
191 Policy::OperatorCount::kRow *
192 Policy::OperatorCount::kColumn>;
193
195static int const kIterations = Policy::kIterations;
196
197private:
199using AccessType =
200 Array<OperatorElementC, Policy::kElementsPerAccess>;
201
202private:
203//
204// Data members
205//
206
208 AccessType const *accumulators_;
209
211int index_;
212
213public:
216FragmentIteratorTensorOp(AccumulatorTile const &accum)
217 : accumulators_(reinterpret_cast<AccessType const *>(&accum)),
218 index_(0) {}
219
222FragmentIteratorTensorOp &operator++() {
223 ++index_;
224return *this;
225 }
226
229FragmentIteratorTensorOp &operator--() {
230 --index_;
231return *this;
232 }
233
236void load(Fragment &frag, int index_offset = 0) const {
237int index = index_ + index_offset;
238
239 AccessType *frag_ptr = reinterpret_cast<AccessType *>(&frag);
240
242for (int n = 0; n < (InterleavedK / OperatorShape::kN); ++n) {
243int index_m = index % (Policy::OperatorCount::kRow *
244 Policy::kIterationsPerInstruction);
245int index_n = index / (Policy::OperatorCount::kRow *
246 Policy::kIterationsPerInstruction);
247int accumulator_access_offset =
248 (index_m / Policy::kIterationsPerInstruction) *
249 (Policy::OperatorCount::kColumn *
250 Policy::kIterationsPerInstruction) +
251 (index_m % Policy::kIterationsPerInstruction) +
252 index_n * (InterleavedK / OperatorShape::kN) *
253 Policy::kIterationsPerInstruction +
254 n * Policy::kIterationsPerInstruction;
255
256 frag_ptr[n] = accumulators_[accumulator_access_offset];
257 }
258 }
259 };
260
262
263 } // namespace warp
264 } // namespace epilogue
265 } // namespace cutlass
266
WarpShape_ WarpShape
Definition: fragment_iterator_tensor_op.h:75
CUTLASS_HOST_DEVICE void load(Fragment &frag, int index_offset=0) const
Loads a fragment from the referenced part of the accumulator tile.
Definition: fragment_iterator_tensor_op.h:140
Definition: aligned_buffer.h:35
[tensor_op_policy.h](tensor op policy_8h.html)
Defines basic structures needed for implementing the warp-scoped phase of the epilogue. These quantities assume a 'column-major' arrangement of TensorOp instructions, of which a row-oriented slice is visible per iteration.
AccumulatorTile OutputAccumulatorTile
Definition: fragment_iterator_tensor_op.h:93
CUTLASS_HOST_DEVICE void load(Fragment &frag, int index_offset=0) const
Loads a fragment from the referenced part of the accumulator tile.
Definition: fragment_iterator_tensor_op.h:236
CUTLASS_HOST_DEVICE FragmentIteratorTensorOp & operator--()
Decrements.
Definition: fragment_iterator_tensor_op.h:229
WarpShape_ WarpShape
Definition: fragment_iterator_tensor_op.h:174
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
Array< OperatorElementC, OperatorFragmentC::kElements *Policy::OperatorCount::kRow *Policy::OperatorCount::kColumn > AccumulatorTile
This is the complete warp-level accumulator tile.
Definition: fragment_iterator_tensor_op.h:91
Array< OperatorElementC, Policy::OperatorCount::kColumn *Policy::kElementsPerAccess > Fragment
This is the fragment size produced by one access of the iterator.
Definition: fragment_iterator_tensor_op.h:86
cutlass::epilogue::warp::TensorOpPolicy
Policy details related to the epilogue.
Definition: tensor_op_policy.h:50
CUTLASS_HOST_DEVICE FragmentIteratorTensorOp & operator++()
Increments.
Definition: fragment_iterator_tensor_op.h:222
CUTLASS_HOST_DEVICE FragmentIteratorTensorOp(AccumulatorTile const &accum)
Constructs an iterator.
Definition: fragment_iterator_tensor_op.h:119
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_HOST_DEVICE FragmentIteratorTensorOp(AccumulatorTile const &accum)
Constructs an iterator.
Definition: fragment_iterator_tensor_op.h:216
cutlass::epilogue::warp::FragmentIteratorTensorOp
Definition: fragment_iterator_tensor_op.h:61
Array< OperatorElementC, Policy::kElementsPerAccess *InterleavedK/OperatorShape::kN > Fragment
This is the fragment size produced by one access of the iterator.
Definition: fragment_iterator_tensor_op.h:186
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
CUTLASS_HOST_DEVICE FragmentIteratorTensorOp & operator++()
Increments.
Definition: fragment_iterator_tensor_op.h:126
Defines layout functions used by TensorRef and derived classes.
cutlass::layout::ColumnMajorInterleaved
Definition: layout/matrix.h:343
Array< OperatorElementC, OperatorFragmentC::kElements *Policy::OperatorCount::kRow *Policy::OperatorCount::kColumn > AccumulatorTile
This is the complete warp-level accumulator tile.
Definition: fragment_iterator_tensor_op.h:192
CUTLASS_HOST_DEVICE FragmentIteratorTensorOp & operator--()
Decrements.
Definition: fragment_iterator_tensor_op.h:133
Generated by 1.8.11