docs/regular__tile__iterator__pitch__linear_8h_source.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
regular_tile_iterator_pitch_linear.h
[Go to the documentation of this file.](regular tile iterator pitch linear_8h.html)
1 /***************************************************************************************************
2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification, are permitted
5 * provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright notice, this list of
7 * conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright notice, this list of
9 * conditions and the following disclaimer in the documentation and/or other materials
10 * provided with the distribution.
11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12 * to endorse or promote products derived from this software without specific prior written
13 * permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 *
24 **************************************************************************************************/
35 #pragma once
36
37 #include "cutlass/cutlass.h"
38 #include "cutlass/tensor_ref.h"
39 #include "cutlass/layout/matrix.h"
40 #include "cutlass/layout/pitch_linear.h"
41
42 #include "[regular_tile_iterator.h](regular tile iterator_8h.html)"
43
45
46 namespace cutlass {
47 namespace transform {
48 namespace threadblock {
49
51
53 template <
54typename Shape_,
55typename Element_,
56int AdvanceRank,
57typename ThreadMap_,
58int Alignment
59 >
[60](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html) class RegularTileIterator<Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment> {
61 public:
62
[63](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#accb13157290b31815c4eaa4f25cf2b0b)using [Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#accb13157290b31815c4eaa4f25cf2b0b) = Shape_;
[64](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aa09850c3fe1f9b2b61b32b42cc71c729)using [Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aa09850c3fe1f9b2b61b32b42cc71c729) = Element_;
[65](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a802d26c95029a3eab7632a845e04b6f0)using Layout = layout::PitchLinear;
[66](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a5cfe40a22b6405c12cf6f17206ea87da)static int const kAdvanceRank = AdvanceRank;
[67](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a5f0a8db7449d465243c121725168211f)using [ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a5f0a8db7449d465243c121725168211f) = ThreadMap_;
[68](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a9995cd0108feb5cc5d4858181fd761b3)static int const kAlignment = Alignment;
69
[70](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144)using [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) = typename Layout::Index;
[71](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a83a6797234c779650f57dd7ed54b932a)using [LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a83a6797234c779650f57dd7ed54b932a) = typename Layout::LongIndex;
72
[73](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ae8d92bf17068b69fe278ca39009456b3)using TensorRef = TensorRef<Element, Layout>;
[74](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae)using [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae) = typename Layout::TensorCoord;
75
[76](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156)using [Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;
77
78static_assert(kAdvanceRank == 0 || kAdvanceRank == 1,
79"Advance rank may only be along the contiguous or strided dimensions.");
80
81 private:
82
83//
84// Types
85//
86
87using AccessType = AlignedArray<Element, ThreadMap::kElementsPerAccess, kAlignment>;
88
89//
90// Data members
91//
92
94 uint8_t *pointer_;
95
97[Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) stride_;
98
100[Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) increment_strided_;
101
103[Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) increment_advance_;
104
105 public:
106
107 CUTLASS_DEVICE
[108](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a77af9f2f9a530fbd63703bb83190135b)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a77af9f2f9a530fbd63703bb83190135b)(): pointer_(nullptr), increment_strided_(0), increment_advance_(0) { }
109
110 CUTLASS_DEVICE
[111](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aaeef1f0d4985612d1d68b6e52e0e0af8)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aaeef1f0d4985612d1d68b6e52e0e0af8)(
112TensorRef const &ref,
113int thread_idx
114 ):
115 pointer_(reinterpret_cast<uint8_t *>(ref.data()) + (ref.offset([ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a5f0a8db7449d465243c121725168211f)::initial_offset(thread_idx)) * sizeof_bits<Element>::value / 8)) {
116
117 stride_ = ref.stride()[0];
118 increment_strided_ = (ref.stride()[0] * sizeof_bits<Element>::value) * ThreadMap::Delta::kStrided / 8;
119
120 increment_advance_ =
121 (kAdvanceRank == 0 ?
122 Shape::kContiguous * sizeof_bits<Element>::value / 8 :
123 Shape::kStrided * (ref.stride()[0] * sizeof_bits<Element>::value / 8));
124 }
125
127 CUTLASS_DEVICE
[128](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#adcf3e59a87e7cadf97f9b60c1170a94d)void [load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#adcf3e59a87e7cadf97f9b60c1170a94d)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) pointer_offset) {
129
130AccessType *frag_ptr = reinterpret_cast<AccessType *>(&frag);
131 uint8_t const *byte_pointer = pointer_ + pointer_offset * sizeof_bits<Element>::value / 8;
132
134for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {
135
136AccessType const *access_ptr = reinterpret_cast<AccessType const *>(byte_pointer);
137
139for (int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {
140
141int idx = c + s * ThreadMap::Iterations::kContiguous;
142 frag_ptr[idx] = access_ptr[c * ThreadMap::Delta::kContiguous];
143 }
144
145if (s + 1 < ThreadMap::Iterations::kStrided) {
146 byte_pointer += increment_strided_;
147 }
148 }
149 }
150
[153](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a66796895c463231ee0bd9fa34bc74aec)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a66796895c463231ee0bd9fa34bc74aec)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae) const & tile_offset) {
154 load_with_pointer_offset(
155 frag,
156 tile_offset.contiguous() * Shape::kContiguous / ThreadMap::kElementsPerAccess +
157 tile_offset.strided() * Shape::kStrided * stride_
158 );
159 }
160
[163](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#af316509e35afa13869cf2ee4a91f3a29)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#af316509e35afa13869cf2ee4a91f3a29)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) &frag) {
164 load_with_pointer_offset(frag, 0);
165 }
166
[169](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ac90756fa87f005512468e8d3c6ad935a)void [store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ac90756fa87f005512468e8d3c6ad935a)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) const &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) pointer_offset) {
170
171AccessType const *frag_ptr = reinterpret_cast<AccessType const*>(&frag);
172 uint8_t *byte_pointer = pointer_ + pointer_offset * sizeof_bits<Element>::value / 8;
173
175for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {
176
177AccessType *access_ptr = reinterpret_cast<AccessType *>(byte_pointer);
178
180for (int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {
181
182int idx = c + s * ThreadMap::Iterations::kContiguous;
183 access_ptr[c * ThreadMap::Delta::kContiguous] = frag_ptr[idx];
184 }
185
186if (s + 1 < ThreadMap::Iterations::kStrided) {
187 byte_pointer += increment_strided_;
188 }
189 }
190 }
191
[194](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a8ced8039f5936ca545faa2fae38efcad)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a8ced8039f5936ca545faa2fae38efcad)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) const &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae) const & tile_offset) {
195 store_with_pointer_offset(
196 frag,
197 tile_offset.contiguous() * Shape::kContiguous + tile_offset.strided() * Shape::kStrided * stride_
198 );
199 }
200
[203](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a768e8c118a274348eaaac9dd8ca058e7)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a768e8c118a274348eaaac9dd8ca058e7)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) const &frag) {
204 store_with_pointer_offset(frag, 0);
205 }
206
[209](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3874988a2d312ab538f242e16f6a3b83)RegularTileIterator &[operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3874988a2d312ab538f242e16f6a3b83)() {
210 pointer_ += increment_advance_;
211return *this;
212 }
213
[216](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ae59497fa13e9b0d4c695ea7030cd2fca)RegularTileIterator &[operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ae59497fa13e9b0d4c695ea7030cd2fca)() {
217 pointer_ -= increment_advance_;
218return *this;
219 }
220
[223](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ab41c9c517802602b07f0c9e487d4124f)void [add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ab41c9c517802602b07f0c9e487d4124f)([LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a83a6797234c779650f57dd7ed54b932a) pointer_offset) {
224 pointer_ += pointer_offset;
225 }
226
228 CUTLASS_DEVICE
[229](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a672751196b76630f5b3c3f257de64bca)void [add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a672751196b76630f5b3c3f257de64bca)([TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae) const &coord) {
230int offset = sizeof_bits<Element>::value *
231 (coord.contiguous() * Shape::kContiguous + coord.strided() * Shape::kStrided * stride_) / 8;
232 add_pointer_offset(offset);
233 }
234
235 };
236
238
240 template <
241typename Shape_,
242typename Element_,
243int AdvanceRank,
244typename ThreadMap_,
245int Alignment
246 >
[247](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html) class RegularTileIterator<Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment> {
248 public:
249
[250](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5f4a6ad99a9af0cd500c1894d4ff552)using [Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5f4a6ad99a9af0cd500c1894d4ff552) = Shape_;
[251](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ad917a6eee6c57d5d9817542a6c6c4241)using [Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ad917a6eee6c57d5d9817542a6c6c4241) = Element_;
[252](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4e2e59ccbd6d1098913f2669cd69b6ea)using Layout = layout::RowMajor;
[253](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a1f7a2c177973a9c407df31ef4e7b900d)static int const kAdvanceRank = AdvanceRank;
[254](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a11fb9f9360fea4134f002c41f4901acb)using [ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a11fb9f9360fea4134f002c41f4901acb) = ThreadMap_;
[255](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a79f4dbf9da4bf9ba8b3b1559bea9835f)static int const kAlignment = Alignment;
256
[257](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af808b8c3fc0bea942874f61c17bbc4f8)using [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af808b8c3fc0bea942874f61c17bbc4f8) = typename Layout::Index;
[258](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a589c4fa14fed9f28631a5c291bd10e5e)using [LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a589c4fa14fed9f28631a5c291bd10e5e) = typename Layout::LongIndex;
259
[260](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0bdde358dd31c74b51ef2568e81a0f0b)using TensorRef = TensorRef<Element, Layout>;
[261](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c)using [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c) = typename Layout::TensorCoord;
262
[263](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af)using [Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;
264
265using Underlying = RegularTileIterator<
266layout::PitchLinearShape<Shape::kColumn, Shape::kRow>,
267[Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ad917a6eee6c57d5d9817542a6c6c4241),
269 (kAdvanceRank == 0 ? 1 : 0),
270[ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a11fb9f9360fea4134f002c41f4901acb),
271 kAlignment
[272](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a1d3913359089881a0bb91579dc0c7d65) >;
273
274static_assert(kAdvanceRank == 0 || kAdvanceRank == 1,
275"Advance rank may only be along the row or column dimensions.");
276
277 private:
278
279Underlying iterator_;
280
281 public:
282
283 CUTLASS_DEVICE
[284](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#aad803b85a96da22725a335b911975718)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#aad803b85a96da22725a335b911975718)() { }
285
286 CUTLASS_DEVICE
[287](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4e9c82df0fcde08f06a65d9efbefede6)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4e9c82df0fcde08f06a65d9efbefede6)(
288TensorRef const &ref,
289int thread_idx
290 ):
291 iterator_({ref.data(), ref.stride()}, thread_idx) {
292
293 }
294
[297](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5b5df5c59e77b1f42c76f049248e12c)void [load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5b5df5c59e77b1f42c76f049248e12c)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af808b8c3fc0bea942874f61c17bbc4f8) pointer_offset) {
298 iterator_.load_with_pointer_offset(frag, pointer_offset);
299 }
300
[303](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ada36f68ddee26776788ff3b4a882364b)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ada36f68ddee26776788ff3b4a882364b)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c) const & tile_offset) {
304 iterator_.load_with_pointer_offset(frag, {tile_offset.column(), tile_offset.row()});
305 }
306
[309](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a2cfc11a28f5d0f1f1186f31850c9c645)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a2cfc11a28f5d0f1f1186f31850c9c645)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) &frag) {
310 iterator_.load_with_pointer_offset(frag, 0);
311 }
312
[315](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a63b486d1d77de7d40efa3cefca144ef4)void [store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a63b486d1d77de7d40efa3cefca144ef4)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) const &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af808b8c3fc0bea942874f61c17bbc4f8) pointer_offset) {
316 iterator_.store_with_pointer_offset(frag, pointer_offset);
317 }
318
[321](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a33af1791659cf60aff5da1b71747f125)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a33af1791659cf60aff5da1b71747f125)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) const &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c) const & tile_offset) {
322 iterator_.store_with_pointer_offset(frag, {tile_offset.column(), tile_offset.row()});
323 }
324
[327](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a607ac75aa0bc44400f6f80320134675a)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a607ac75aa0bc44400f6f80320134675a)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) const &frag) {
328 iterator_.store_with_pointer_offset(frag, 0);
329 }
330
[333](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4d9b37a6c2bed1209a61fdcbf225f691)RegularTileIterator &[operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4d9b37a6c2bed1209a61fdcbf225f691)() {
334 ++iterator_;
335return *this;
336 }
337
[340](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a8b62546dd2f7373ec5a45b0b309bbd64)RegularTileIterator &[operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a8b62546dd2f7373ec5a45b0b309bbd64)() {
341 --iterator_;
342return *this;
343 }
344
[347](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a7470527cd1b851258eee044faaed080f)void [add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a7470527cd1b851258eee044faaed080f)([LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a589c4fa14fed9f28631a5c291bd10e5e) pointer_offset) {
348 iterator_.add_pointer_offset(pointer_offset);
349 }
350
352 CUTLASS_DEVICE
[353](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a46c1d1c3d66ea73696b7fd8dd0dd72f9)void [add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a46c1d1c3d66ea73696b7fd8dd0dd72f9)([TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c) const &coord) {
354 iterator_.add_tile_offset({coord.column(), coord.row()});
355 }
356
357 };
358
360
362 template <
363typename Shape_,
364typename Element_,
365int AdvanceRank,
366typename ThreadMap_,
367int Alignment
368 >
[369](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html) class RegularTileIterator<Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment> {
370 public:
371
[372](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a17ba448c167114fd55c9d527c8143bde)using [Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a17ba448c167114fd55c9d527c8143bde) = Shape_;
[373](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a765905f488050279a971c3193571545c)using [Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a765905f488050279a971c3193571545c) = Element_;
[374](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#aa38acb9ec06003a9f1d0b727576fca33)using Layout = layout::ColumnMajor;
[375](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a780965d9703eeaeb65aa2b2b47bdd93a)static int const kAdvanceRank = AdvanceRank;
[376](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a93d65c5bbfe9415a158e8a863c0882a3)using [ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a93d65c5bbfe9415a158e8a863c0882a3) = ThreadMap_;
[377](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#acf5b3c865149cac8d1ec6ff4e96e275c)static int const kAlignment = Alignment;
378
[379](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a236cea40e2359a0b5fe35ff1a3519e10)using [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a236cea40e2359a0b5fe35ff1a3519e10) = typename Layout::Index;
[380](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a57a1ee8a67d2b206fb7eec6751a14948)using [LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a57a1ee8a67d2b206fb7eec6751a14948) = typename Layout::LongIndex;
381
[382](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a676aa87744c8f6fe933503513fd7d7cb)using TensorRef = TensorRef<Element, Layout>;
[383](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7)using [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7) = typename Layout::TensorCoord;
384
[385](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3)using [Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;
386
387using Underlying = RegularTileIterator<
388layout::PitchLinearShape<Shape::kRow, Shape::kColumn>,
389[Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a765905f488050279a971c3193571545c),
391 (kAdvanceRank == 0 ? 0 : 1),
392[ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a93d65c5bbfe9415a158e8a863c0882a3)
[393](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a36e5de138cd6bfc6614f6c96c84d59a8) >;
394
395static_assert(kAdvanceRank == 0 || kAdvanceRank == 1,
396"Advance rank may only be along the row or column dimensions.");
397
398 private:
399
400Underlying iterator_;
401
402 public:
403
404 CUTLASS_DEVICE
[405](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a90dedd03e47edabf0f9fd6d51e70272e)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a90dedd03e47edabf0f9fd6d51e70272e)() { }
406
407 CUTLASS_DEVICE
[408](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#aaa48072b3ce5b8753710ac6b8c4e99d7)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#aaa48072b3ce5b8753710ac6b8c4e99d7)(
409TensorRef const &ref,
410int thread_idx
411 ):
412 iterator_({ref.data(), ref.stride()}, thread_idx) {
413
414 }
415
[418](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a176ee419e3705e96940b29c24121160c)void [load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a176ee419e3705e96940b29c24121160c)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a236cea40e2359a0b5fe35ff1a3519e10) pointer_offset) {
419 iterator_.load_with_pointer_offset(frag, pointer_offset);
420 }
421
[424](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a52146adf93a143916d527a153a3d9cb4)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a52146adf93a143916d527a153a3d9cb4)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7) const & tile_offset) {
425 iterator_.load_with_pointer_offset(frag, {tile_offset.row(), tile_offset.column()});
426 }
427
[430](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a192d119355b9bfc22199ddf5e489eb38)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a192d119355b9bfc22199ddf5e489eb38)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) &frag) {
431 iterator_.load_with_pointer_offset(frag, 0);
432 }
433
[436](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a587114fd23909a534b5c1a77a39d5916)void [store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a587114fd23909a534b5c1a77a39d5916)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) const &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a236cea40e2359a0b5fe35ff1a3519e10) pointer_offset) {
437 iterator_.store_with_pointer_offset(frag, pointer_offset);
438 }
439
[442](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a8f5567f445b60e912c35be20983ae7c0)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a8f5567f445b60e912c35be20983ae7c0)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) const &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7) const & tile_offset) {
443 iterator_.store_with_pointer_offset(frag, {tile_offset.row(), tile_offset.column()});
444 }
445
[448](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#add75f3a1ec8d7b64a717342ba2a14420)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#add75f3a1ec8d7b64a717342ba2a14420)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) const &frag) {
449 iterator_.store_with_pointer_offset(frag, 0);
450 }
451
[454](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a215a832690fd3f0b770cc64eabc3eb2f)RegularTileIterator &[operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a215a832690fd3f0b770cc64eabc3eb2f)() {
455 ++iterator_;
456return *this;
457 }
458
[461](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3b0cbdbbec0e63e759f86131cbcad32e)RegularTileIterator &[operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3b0cbdbbec0e63e759f86131cbcad32e)() {
462 --iterator_;
463return *this;
464 }
465
[468](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3aa895ffd980f4fe94862c0fe75f6cbe)void [add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3aa895ffd980f4fe94862c0fe75f6cbe)([LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a57a1ee8a67d2b206fb7eec6751a14948) pointer_offset) {
469 iterator_.add_pointer_offset(pointer_offset);
470 }
471
473 CUTLASS_DEVICE
[474](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3e078be0830303a162c3a0f91e017fc9)void [add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3e078be0830303a162c3a0f91e017fc9)([TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7) const &coord) {
475 iterator_.add_tile_offset({coord.row(), coord.column()});
476 }
477
478 };
479
481
482 } // namespace threadblock
483 } // namespace transform
484 } // namespace cutlass
485
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#aad803b85a96da22725a335b911975718)
CUTLASS_DEVICE RegularTileIterator()
Definition: regular_tile_iterator_pitch_linear.h:284
cutlass::layout::RowMajor::LongIndex
int64_t LongIndex
Long index type used for offsets.
Definition: layout/matrix.h:62
Definition: aligned_buffer.h:35
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3aa895ffd980f4fe94862c0fe75f6cbe)
CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)
Adds a pointer offset in units of Element.
Definition: regular_tile_iterator_pitch_linear.h:468
cutlass::layout::PitchLinearCoord
Coordinate in pitch-linear space.
Definition: pitch_linear.h:52
Defines a structure containing strides, bounds, and a pointer to tensor data.
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a33af1791659cf60aff5da1b71747f125)
CUTLASS_HOST_DEVICE void store(Fragment const &frag, TensorCoord const &tile_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:321
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ae59497fa13e9b0d4c695ea7030cd2fca)
CUTLASS_HOST_DEVICE RegularTileIterator & operator--()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:216
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ab41c9c517802602b07f0c9e487d4124f)
CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)
Adds a pointer offset in units of Element.
Definition: regular_tile_iterator_pitch_linear.h:223
CUTLASS_HOST_DEVICE Element * data() const
Returns the pointer to referenced data.
Definition: tensor_ref.h:254
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a83a6797234c779650f57dd7ed54b932a)
typename Layout::LongIndex LongIndex
Definition: regular_tile_iterator_pitch_linear.h:71
Mapping function for pitch-linear memory.
Definition: pitch_linear.h:163
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aaeef1f0d4985612d1d68b6e52e0e0af8)
CUTLASS_DEVICE RegularTileIterator(TensorRef const &ref, int thread_idx)
Definition: regular_tile_iterator_pitch_linear.h:111
cutlass::layout::ColumnMajor::LongIndex
int64_t LongIndex
Long index type used for offsets.
Definition: layout/matrix.h:154
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3e078be0830303a162c3a0f91e017fc9)
CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)
Adds a tile offset.
Definition: regular_tile_iterator_pitch_linear.h:474
Aligned array type.
Definition: array.h:511
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aa09850c3fe1f9b2b61b32b42cc71c729)
Element_ Element
Definition: regular_tile_iterator_pitch_linear.h:64
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a765905f488050279a971c3193571545c)
Element_ Element
Definition: regular_tile_iterator_pitch_linear.h:373
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a52146adf93a143916d527a153a3d9cb4)
CUTLASS_HOST_DEVICE void load(Fragment &frag, TensorCoord const &tile_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:424
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4d9b37a6c2bed1209a61fdcbf225f691)
CUTLASS_HOST_DEVICE RegularTileIterator & operator++()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:333
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#accb13157290b31815c4eaa4f25cf2b0b)
Shape_ Shape
Definition: regular_tile_iterator_pitch_linear.h:63
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a90dedd03e47edabf0f9fd6d51e70272e)
CUTLASS_DEVICE RegularTileIterator()
Definition: regular_tile_iterator_pitch_linear.h:405
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c)
typename Layout::TensorCoord TensorCoord
Definition: regular_tile_iterator_pitch_linear.h:261
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a192d119355b9bfc22199ddf5e489eb38)
CUTLASS_HOST_DEVICE void load(Fragment &frag)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:430
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae)
typename Layout::TensorCoord TensorCoord
Definition: regular_tile_iterator_pitch_linear.h:74
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ada36f68ddee26776788ff3b4a882364b)
CUTLASS_HOST_DEVICE void load(Fragment &frag, TensorCoord const &tile_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:303
Mapping function for column-major matrices.
Definition: layout/matrix.h:142
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a5f0a8db7449d465243c121725168211f)
ThreadMap_ ThreadMap
Definition: regular_tile_iterator_pitch_linear.h:67
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#aaa48072b3ce5b8753710ac6b8c4e99d7)
CUTLASS_DEVICE RegularTileIterator(TensorRef const &ref, int thread_idx)
Definition: regular_tile_iterator_pitch_linear.h:408
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3)
Array< Element, ThreadMap::Iterations::kCount *ThreadMap::kElementsPerAccess > Fragment
Definition: regular_tile_iterator_pitch_linear.h:385
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a176ee419e3705e96940b29c24121160c)
CUTLASS_HOST_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:418
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156)
Array< Element, ThreadMap::Iterations::kCount *ThreadMap::kElementsPerAccess > Fragment
Definition: regular_tile_iterator_pitch_linear.h:76
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#add75f3a1ec8d7b64a717342ba2a14420)
CUTLASS_HOST_DEVICE void store(Fragment const &frag)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:448
cutlass::layout::PitchLinearShape
Template defining a shape used by pitch-linear operators.
Definition: pitch_linear.h:43
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ac90756fa87f005512468e8d3c6ad935a)
CUTLASS_HOST_DEVICE void store_with_pointer_offset(Fragment const &frag, Index pointer_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:169
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5b5df5c59e77b1f42c76f049248e12c)
CUTLASS_HOST_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:297
cutlass::layout::RowMajor::Index
int32_t Index
Index type used for coordinates.
Definition: layout/matrix.h:59
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4e9c82df0fcde08f06a65d9efbefede6)
CUTLASS_DEVICE RegularTileIterator(TensorRef const &ref, int thread_idx)
Definition: regular_tile_iterator_pitch_linear.h:287
cutlass::layout::PitchLinear::LongIndex
int64_t LongIndex
Long index type used for offsets.
Definition: pitch_linear.h:175
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7)
typename Layout::TensorCoord TensorCoord
Definition: regular_tile_iterator_pitch_linear.h:383
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a57a1ee8a67d2b206fb7eec6751a14948)
typename Layout::LongIndex LongIndex
Definition: regular_tile_iterator_pitch_linear.h:380
CUTLASS_HOST_DEVICE Stride stride() const
Returns the layout object's stride vector.
Definition: tensor_ref.h:277
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a46c1d1c3d66ea73696b7fd8dd0dd72f9)
CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)
Adds a tile offset.
Definition: regular_tile_iterator_pitch_linear.h:353
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a215a832690fd3f0b770cc64eabc3eb2f)
CUTLASS_HOST_DEVICE RegularTileIterator & operator++()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:454
Defines the size of an element in bits.
Definition: numeric_types.h:42
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#af316509e35afa13869cf2ee4a91f3a29)
CUTLASS_HOST_DEVICE void load(Fragment &frag)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:163
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a66796895c463231ee0bd9fa34bc74aec)
CUTLASS_HOST_DEVICE void load(Fragment &frag, TensorCoord const &tile_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:153
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3874988a2d312ab538f242e16f6a3b83)
CUTLASS_HOST_DEVICE RegularTileIterator & operator++()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:209
#define nullptr
nullptr
Definition: platform.h:144
cutlass::TensorRef< Element, Layout >
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a8ced8039f5936ca545faa2fae38efcad)
CUTLASS_HOST_DEVICE void store(Fragment const &frag, TensorCoord const &tile_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:194
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a587114fd23909a534b5c1a77a39d5916)
CUTLASS_HOST_DEVICE void store_with_pointer_offset(Fragment const &frag, Index pointer_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:436
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a8b62546dd2f7373ec5a45b0b309bbd64)
CUTLASS_HOST_DEVICE RegularTileIterator & operator--()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:340
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a8f5567f445b60e912c35be20983ae7c0)
CUTLASS_HOST_DEVICE void store(Fragment const &frag, TensorCoord const &tile_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:442
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a607ac75aa0bc44400f6f80320134675a)
CUTLASS_HOST_DEVICE void store(Fragment const &frag)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:327
cutlass::transform::threadblock::RegularTileIterator
Definition: regular_tile_iterator.h:50
#define static_assert(__e, __m)
Definition: platform.h:153
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a11fb9f9360fea4134f002c41f4901acb)
ThreadMap_ ThreadMap
Definition: regular_tile_iterator_pitch_linear.h:254
cutlass::layout::PitchLinear::Index
int32_t Index
Index type used for coordinates.
Definition: pitch_linear.h:172
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a672751196b76630f5b3c3f257de64bca)
CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)
Adds a tile offset.
Definition: regular_tile_iterator_pitch_linear.h:229
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a17ba448c167114fd55c9d527c8143bde)
Shape_ Shape
Definition: regular_tile_iterator_pitch_linear.h:372
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a63b486d1d77de7d40efa3cefca144ef4)
CUTLASS_HOST_DEVICE void store_with_pointer_offset(Fragment const &frag, Index pointer_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:315
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a768e8c118a274348eaaac9dd8ca058e7)
CUTLASS_HOST_DEVICE void store(Fragment const &frag)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:203
[regular_tile_iterator.h](regular tile iterator_8h.html)
Templates implementing storing of tiles from pitch-linear rank=2 tensors.
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a77af9f2f9a530fbd63703bb83190135b)
CUTLASS_DEVICE RegularTileIterator()
Definition: regular_tile_iterator_pitch_linear.h:108
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a7470527cd1b851258eee044faaed080f)
CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)
Adds a pointer offset in units of Element.
Definition: regular_tile_iterator_pitch_linear.h:347
Defines layout functions used by TensorRef and derived classes.
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5f4a6ad99a9af0cd500c1894d4ff552)
Shape_ Shape
Definition: regular_tile_iterator_pitch_linear.h:250
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a2cfc11a28f5d0f1f1186f31850c9c645)
CUTLASS_HOST_DEVICE void load(Fragment &frag)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:309
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af808b8c3fc0bea942874f61c17bbc4f8)
typename Layout::Index Index
Definition: regular_tile_iterator_pitch_linear.h:257
Defines layout functions used by TensorRef and derived classes for pitch-linear memory.
cutlass::layout::ColumnMajor::Index
int32_t Index
Index type used for coordinates.
Definition: layout/matrix.h:151
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a236cea40e2359a0b5fe35ff1a3519e10)
typename Layout::Index Index
Definition: regular_tile_iterator_pitch_linear.h:379
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3b0cbdbbec0e63e759f86131cbcad32e)
CUTLASS_HOST_DEVICE RegularTileIterator & operator--()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:461
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144)
typename Layout::Index Index
Definition: regular_tile_iterator_pitch_linear.h:70
Basic include for CUTLASS.
Definition: matrix_coord.h:39
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ad917a6eee6c57d5d9817542a6c6c4241)
Element_ Element
Definition: regular_tile_iterator_pitch_linear.h:251
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a589c4fa14fed9f28631a5c291bd10e5e)
typename Layout::LongIndex LongIndex
Definition: regular_tile_iterator_pitch_linear.h:258
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#adcf3e59a87e7cadf97f9b60c1170a94d)
CUTLASS_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:128
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a93d65c5bbfe9415a158e8a863c0882a3)
ThreadMap_ ThreadMap
Definition: regular_tile_iterator_pitch_linear.h:376
[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af)
Array< Element, ThreadMap::Iterations::kCount *ThreadMap::kElementsPerAccess > Fragment
Definition: regular_tile_iterator_pitch_linear.h:263
Generated by 1.8.11