Back to Cutlass

CUTLASS: regular_tile_iterator_pitch_linear.h Source File

docs/regular__tile__iterator__pitch__linear_8h_source.html

4.4.279.0 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

regular_tile_iterator_pitch_linear.h

[Go to the documentation of this file.](regular tile iterator pitch linear_8h.html)

1 /***************************************************************************************************

2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

35 #pragma once

36

37 #include "cutlass/cutlass.h"

38 #include "cutlass/tensor_ref.h"

39 #include "cutlass/layout/matrix.h"

40 #include "cutlass/layout/pitch_linear.h"

41

42 #include "[regular_tile_iterator.h](regular tile iterator_8h.html)"

43

45

46 namespace cutlass {

47 namespace transform {

48 namespace threadblock {

49

51

53 template <

54typename Shape_,

55typename Element_,

56int AdvanceRank,

57typename ThreadMap_,

58int Alignment

59 >

[60](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html) class RegularTileIterator<Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment> {

61 public:

62

[63](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#accb13157290b31815c4eaa4f25cf2b0b)using [Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#accb13157290b31815c4eaa4f25cf2b0b) = Shape_;

[64](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aa09850c3fe1f9b2b61b32b42cc71c729)using [Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aa09850c3fe1f9b2b61b32b42cc71c729) = Element_;

[65](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a802d26c95029a3eab7632a845e04b6f0)using Layout = layout::PitchLinear;

[66](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a5cfe40a22b6405c12cf6f17206ea87da)static int const kAdvanceRank = AdvanceRank;

[67](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a5f0a8db7449d465243c121725168211f)using [ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a5f0a8db7449d465243c121725168211f) = ThreadMap_;

[68](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a9995cd0108feb5cc5d4858181fd761b3)static int const kAlignment = Alignment;

69

[70](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144)using [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) = typename Layout::Index;

[71](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a83a6797234c779650f57dd7ed54b932a)using [LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a83a6797234c779650f57dd7ed54b932a) = typename Layout::LongIndex;

72

[73](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ae8d92bf17068b69fe278ca39009456b3)using TensorRef = TensorRef<Element, Layout>;

[74](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae)using [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae) = typename Layout::TensorCoord;

75

[76](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156)using [Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;

77

78static_assert(kAdvanceRank == 0 || kAdvanceRank == 1,

79"Advance rank may only be along the contiguous or strided dimensions.");

80

81 private:

82

83//

84// Types

85//

86

87using AccessType = AlignedArray<Element, ThreadMap::kElementsPerAccess, kAlignment>;

88

89//

90// Data members

91//

92

94 uint8_t *pointer_;

95

97[Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) stride_;

98

100[Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) increment_strided_;

101

103[Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) increment_advance_;

104

105 public:

106

107 CUTLASS_DEVICE

[108](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a77af9f2f9a530fbd63703bb83190135b)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a77af9f2f9a530fbd63703bb83190135b)(): pointer_(nullptr), increment_strided_(0), increment_advance_(0) { }

109

110 CUTLASS_DEVICE

[111](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aaeef1f0d4985612d1d68b6e52e0e0af8)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aaeef1f0d4985612d1d68b6e52e0e0af8)(

112TensorRef const &ref,

113int thread_idx

114 ):

115 pointer_(reinterpret_cast<uint8_t *>(ref.data()) + (ref.offset([ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a5f0a8db7449d465243c121725168211f)::initial_offset(thread_idx)) * sizeof_bits<Element>::value / 8)) {

116

117 stride_ = ref.stride()[0];

118 increment_strided_ = (ref.stride()[0] * sizeof_bits<Element>::value) * ThreadMap::Delta::kStrided / 8;

119

120 increment_advance_ =

121 (kAdvanceRank == 0 ?

122 Shape::kContiguous * sizeof_bits<Element>::value / 8 :

123 Shape::kStrided * (ref.stride()[0] * sizeof_bits<Element>::value / 8));

124 }

125

127 CUTLASS_DEVICE

[128](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#adcf3e59a87e7cadf97f9b60c1170a94d)void [load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#adcf3e59a87e7cadf97f9b60c1170a94d)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) pointer_offset) {

129

130AccessType *frag_ptr = reinterpret_cast<AccessType *>(&frag);

131 uint8_t const *byte_pointer = pointer_ + pointer_offset * sizeof_bits<Element>::value / 8;

132

133CUTLASS_PRAGMA_UNROLL

134for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {

135

136AccessType const *access_ptr = reinterpret_cast<AccessType const *>(byte_pointer);

137

138CUTLASS_PRAGMA_UNROLL

139for (int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {

140

141int idx = c + s * ThreadMap::Iterations::kContiguous;

142 frag_ptr[idx] = access_ptr[c * ThreadMap::Delta::kContiguous];

143 }

144

145if (s + 1 < ThreadMap::Iterations::kStrided) {

146 byte_pointer += increment_strided_;

147 }

148 }

149 }

150

152CUTLASS_HOST_DEVICE

[153](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a66796895c463231ee0bd9fa34bc74aec)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a66796895c463231ee0bd9fa34bc74aec)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae) const & tile_offset) {

154 load_with_pointer_offset(

155 frag,

156 tile_offset.contiguous() * Shape::kContiguous / ThreadMap::kElementsPerAccess +

157 tile_offset.strided() * Shape::kStrided * stride_

158 );

159 }

160

162CUTLASS_HOST_DEVICE

[163](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#af316509e35afa13869cf2ee4a91f3a29)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#af316509e35afa13869cf2ee4a91f3a29)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) &frag) {

164 load_with_pointer_offset(frag, 0);

165 }

166

168CUTLASS_HOST_DEVICE

[169](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ac90756fa87f005512468e8d3c6ad935a)void [store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ac90756fa87f005512468e8d3c6ad935a)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) const &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144) pointer_offset) {

170

171AccessType const *frag_ptr = reinterpret_cast<AccessType const*>(&frag);

172 uint8_t *byte_pointer = pointer_ + pointer_offset * sizeof_bits<Element>::value / 8;

173

174CUTLASS_PRAGMA_UNROLL

175for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {

176

177AccessType *access_ptr = reinterpret_cast<AccessType *>(byte_pointer);

178

179CUTLASS_PRAGMA_UNROLL

180for (int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {

181

182int idx = c + s * ThreadMap::Iterations::kContiguous;

183 access_ptr[c * ThreadMap::Delta::kContiguous] = frag_ptr[idx];

184 }

185

186if (s + 1 < ThreadMap::Iterations::kStrided) {

187 byte_pointer += increment_strided_;

188 }

189 }

190 }

191

193CUTLASS_HOST_DEVICE

[194](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a8ced8039f5936ca545faa2fae38efcad)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a8ced8039f5936ca545faa2fae38efcad)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) const &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae) const & tile_offset) {

195 store_with_pointer_offset(

196 frag,

197 tile_offset.contiguous() * Shape::kContiguous + tile_offset.strided() * Shape::kStrided * stride_

198 );

199 }

200

202CUTLASS_HOST_DEVICE

[203](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a768e8c118a274348eaaac9dd8ca058e7)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a768e8c118a274348eaaac9dd8ca058e7)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156) const &frag) {

204 store_with_pointer_offset(frag, 0);

205 }

206

208CUTLASS_HOST_DEVICE

[209](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3874988a2d312ab538f242e16f6a3b83)RegularTileIterator &[operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3874988a2d312ab538f242e16f6a3b83)() {

210 pointer_ += increment_advance_;

211return *this;

212 }

213

215CUTLASS_HOST_DEVICE

[216](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ae59497fa13e9b0d4c695ea7030cd2fca)RegularTileIterator &[operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ae59497fa13e9b0d4c695ea7030cd2fca)() {

217 pointer_ -= increment_advance_;

218return *this;

219 }

220

222CUTLASS_HOST_DEVICE

[223](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ab41c9c517802602b07f0c9e487d4124f)void [add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ab41c9c517802602b07f0c9e487d4124f)([LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a83a6797234c779650f57dd7ed54b932a) pointer_offset) {

224 pointer_ += pointer_offset;

225 }

226

228 CUTLASS_DEVICE

[229](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a672751196b76630f5b3c3f257de64bca)void [add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a672751196b76630f5b3c3f257de64bca)([TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae) const &coord) {

230int offset = sizeof_bits<Element>::value *

231 (coord.contiguous() * Shape::kContiguous + coord.strided() * Shape::kStrided * stride_) / 8;

232 add_pointer_offset(offset);

233 }

234

235 };

236

238

240 template <

241typename Shape_,

242typename Element_,

243int AdvanceRank,

244typename ThreadMap_,

245int Alignment

246 >

[247](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html) class RegularTileIterator<Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment> {

248 public:

249

[250](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5f4a6ad99a9af0cd500c1894d4ff552)using [Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5f4a6ad99a9af0cd500c1894d4ff552) = Shape_;

[251](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ad917a6eee6c57d5d9817542a6c6c4241)using [Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ad917a6eee6c57d5d9817542a6c6c4241) = Element_;

[252](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4e2e59ccbd6d1098913f2669cd69b6ea)using Layout = layout::RowMajor;

[253](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a1f7a2c177973a9c407df31ef4e7b900d)static int const kAdvanceRank = AdvanceRank;

[254](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a11fb9f9360fea4134f002c41f4901acb)using [ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a11fb9f9360fea4134f002c41f4901acb) = ThreadMap_;

[255](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a79f4dbf9da4bf9ba8b3b1559bea9835f)static int const kAlignment = Alignment;

256

[257](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af808b8c3fc0bea942874f61c17bbc4f8)using [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af808b8c3fc0bea942874f61c17bbc4f8) = typename Layout::Index;

[258](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a589c4fa14fed9f28631a5c291bd10e5e)using [LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a589c4fa14fed9f28631a5c291bd10e5e) = typename Layout::LongIndex;

259

[260](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0bdde358dd31c74b51ef2568e81a0f0b)using TensorRef = TensorRef<Element, Layout>;

[261](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c)using [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c) = typename Layout::TensorCoord;

262

[263](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af)using [Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;

264

265using Underlying = RegularTileIterator<

266layout::PitchLinearShape<Shape::kColumn, Shape::kRow>,

267[Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ad917a6eee6c57d5d9817542a6c6c4241),

268layout::PitchLinear,

269 (kAdvanceRank == 0 ? 1 : 0),

270[ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a11fb9f9360fea4134f002c41f4901acb),

271 kAlignment

[272](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a1d3913359089881a0bb91579dc0c7d65) >;

273

274static_assert(kAdvanceRank == 0 || kAdvanceRank == 1,

275"Advance rank may only be along the row or column dimensions.");

276

277 private:

278

279Underlying iterator_;

280

281 public:

282

283 CUTLASS_DEVICE

[284](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#aad803b85a96da22725a335b911975718)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#aad803b85a96da22725a335b911975718)() { }

285

286 CUTLASS_DEVICE

[287](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4e9c82df0fcde08f06a65d9efbefede6)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4e9c82df0fcde08f06a65d9efbefede6)(

288TensorRef const &ref,

289int thread_idx

290 ):

291 iterator_({ref.data(), ref.stride()}, thread_idx) {

292

293 }

294

296CUTLASS_HOST_DEVICE

[297](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5b5df5c59e77b1f42c76f049248e12c)void [load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5b5df5c59e77b1f42c76f049248e12c)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af808b8c3fc0bea942874f61c17bbc4f8) pointer_offset) {

298 iterator_.load_with_pointer_offset(frag, pointer_offset);

299 }

300

302CUTLASS_HOST_DEVICE

[303](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ada36f68ddee26776788ff3b4a882364b)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ada36f68ddee26776788ff3b4a882364b)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c) const & tile_offset) {

304 iterator_.load_with_pointer_offset(frag, {tile_offset.column(), tile_offset.row()});

305 }

306

308CUTLASS_HOST_DEVICE

[309](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a2cfc11a28f5d0f1f1186f31850c9c645)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a2cfc11a28f5d0f1f1186f31850c9c645)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) &frag) {

310 iterator_.load_with_pointer_offset(frag, 0);

311 }

312

314CUTLASS_HOST_DEVICE

[315](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a63b486d1d77de7d40efa3cefca144ef4)void [store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a63b486d1d77de7d40efa3cefca144ef4)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) const &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af808b8c3fc0bea942874f61c17bbc4f8) pointer_offset) {

316 iterator_.store_with_pointer_offset(frag, pointer_offset);

317 }

318

320CUTLASS_HOST_DEVICE

[321](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a33af1791659cf60aff5da1b71747f125)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a33af1791659cf60aff5da1b71747f125)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) const &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c) const & tile_offset) {

322 iterator_.store_with_pointer_offset(frag, {tile_offset.column(), tile_offset.row()});

323 }

324

326CUTLASS_HOST_DEVICE

[327](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a607ac75aa0bc44400f6f80320134675a)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a607ac75aa0bc44400f6f80320134675a)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af) const &frag) {

328 iterator_.store_with_pointer_offset(frag, 0);

329 }

330

332CUTLASS_HOST_DEVICE

[333](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4d9b37a6c2bed1209a61fdcbf225f691)RegularTileIterator &[operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4d9b37a6c2bed1209a61fdcbf225f691)() {

334 ++iterator_;

335return *this;

336 }

337

339CUTLASS_HOST_DEVICE

[340](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a8b62546dd2f7373ec5a45b0b309bbd64)RegularTileIterator &[operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a8b62546dd2f7373ec5a45b0b309bbd64)() {

341 --iterator_;

342return *this;

343 }

344

346CUTLASS_HOST_DEVICE

[347](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a7470527cd1b851258eee044faaed080f)void [add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a7470527cd1b851258eee044faaed080f)([LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a589c4fa14fed9f28631a5c291bd10e5e) pointer_offset) {

348 iterator_.add_pointer_offset(pointer_offset);

349 }

350

352 CUTLASS_DEVICE

[353](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a46c1d1c3d66ea73696b7fd8dd0dd72f9)void [add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a46c1d1c3d66ea73696b7fd8dd0dd72f9)([TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c) const &coord) {

354 iterator_.add_tile_offset({coord.column(), coord.row()});

355 }

356

357 };

358

360

362 template <

363typename Shape_,

364typename Element_,

365int AdvanceRank,

366typename ThreadMap_,

367int Alignment

368 >

[369](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html) class RegularTileIterator<Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment> {

370 public:

371

[372](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a17ba448c167114fd55c9d527c8143bde)using [Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a17ba448c167114fd55c9d527c8143bde) = Shape_;

[373](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a765905f488050279a971c3193571545c)using [Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a765905f488050279a971c3193571545c) = Element_;

[374](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#aa38acb9ec06003a9f1d0b727576fca33)using Layout = layout::ColumnMajor;

[375](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a780965d9703eeaeb65aa2b2b47bdd93a)static int const kAdvanceRank = AdvanceRank;

[376](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a93d65c5bbfe9415a158e8a863c0882a3)using [ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a93d65c5bbfe9415a158e8a863c0882a3) = ThreadMap_;

[377](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#acf5b3c865149cac8d1ec6ff4e96e275c)static int const kAlignment = Alignment;

378

[379](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a236cea40e2359a0b5fe35ff1a3519e10)using [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a236cea40e2359a0b5fe35ff1a3519e10) = typename Layout::Index;

[380](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a57a1ee8a67d2b206fb7eec6751a14948)using [LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a57a1ee8a67d2b206fb7eec6751a14948) = typename Layout::LongIndex;

381

[382](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a676aa87744c8f6fe933503513fd7d7cb)using TensorRef = TensorRef<Element, Layout>;

[383](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7)using [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7) = typename Layout::TensorCoord;

384

[385](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3)using [Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;

386

387using Underlying = RegularTileIterator<

388layout::PitchLinearShape<Shape::kRow, Shape::kColumn>,

389[Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a765905f488050279a971c3193571545c),

390layout::PitchLinear,

391 (kAdvanceRank == 0 ? 0 : 1),

392[ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a93d65c5bbfe9415a158e8a863c0882a3)

[393](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a36e5de138cd6bfc6614f6c96c84d59a8) >;

394

395static_assert(kAdvanceRank == 0 || kAdvanceRank == 1,

396"Advance rank may only be along the row or column dimensions.");

397

398 private:

399

400Underlying iterator_;

401

402 public:

403

404 CUTLASS_DEVICE

[405](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a90dedd03e47edabf0f9fd6d51e70272e)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a90dedd03e47edabf0f9fd6d51e70272e)() { }

406

407 CUTLASS_DEVICE

[408](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#aaa48072b3ce5b8753710ac6b8c4e99d7)[RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#aaa48072b3ce5b8753710ac6b8c4e99d7)(

409TensorRef const &ref,

410int thread_idx

411 ):

412 iterator_({ref.data(), ref.stride()}, thread_idx) {

413

414 }

415

417CUTLASS_HOST_DEVICE

[418](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a176ee419e3705e96940b29c24121160c)void [load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a176ee419e3705e96940b29c24121160c)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a236cea40e2359a0b5fe35ff1a3519e10) pointer_offset) {

419 iterator_.load_with_pointer_offset(frag, pointer_offset);

420 }

421

423CUTLASS_HOST_DEVICE

[424](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a52146adf93a143916d527a153a3d9cb4)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a52146adf93a143916d527a153a3d9cb4)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7) const & tile_offset) {

425 iterator_.load_with_pointer_offset(frag, {tile_offset.row(), tile_offset.column()});

426 }

427

429CUTLASS_HOST_DEVICE

[430](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a192d119355b9bfc22199ddf5e489eb38)void [load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a192d119355b9bfc22199ddf5e489eb38)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) &frag) {

431 iterator_.load_with_pointer_offset(frag, 0);

432 }

433

435CUTLASS_HOST_DEVICE

[436](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a587114fd23909a534b5c1a77a39d5916)void [store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a587114fd23909a534b5c1a77a39d5916)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) const &frag, [Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a236cea40e2359a0b5fe35ff1a3519e10) pointer_offset) {

437 iterator_.store_with_pointer_offset(frag, pointer_offset);

438 }

439

441CUTLASS_HOST_DEVICE

[442](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a8f5567f445b60e912c35be20983ae7c0)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a8f5567f445b60e912c35be20983ae7c0)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) const &frag, [TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7) const & tile_offset) {

443 iterator_.store_with_pointer_offset(frag, {tile_offset.row(), tile_offset.column()});

444 }

445

447CUTLASS_HOST_DEVICE

[448](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#add75f3a1ec8d7b64a717342ba2a14420)void [store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#add75f3a1ec8d7b64a717342ba2a14420)([Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3) const &frag) {

449 iterator_.store_with_pointer_offset(frag, 0);

450 }

451

453CUTLASS_HOST_DEVICE

[454](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a215a832690fd3f0b770cc64eabc3eb2f)RegularTileIterator &[operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a215a832690fd3f0b770cc64eabc3eb2f)() {

455 ++iterator_;

456return *this;

457 }

458

460CUTLASS_HOST_DEVICE

[461](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3b0cbdbbec0e63e759f86131cbcad32e)RegularTileIterator &[operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3b0cbdbbec0e63e759f86131cbcad32e)() {

462 --iterator_;

463return *this;

464 }

465

467CUTLASS_HOST_DEVICE

[468](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3aa895ffd980f4fe94862c0fe75f6cbe)void [add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3aa895ffd980f4fe94862c0fe75f6cbe)([LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a57a1ee8a67d2b206fb7eec6751a14948) pointer_offset) {

469 iterator_.add_pointer_offset(pointer_offset);

470 }

471

473 CUTLASS_DEVICE

[474](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3e078be0830303a162c3a0f91e017fc9)void [add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3e078be0830303a162c3a0f91e017fc9)([TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7) const &coord) {

475 iterator_.add_tile_offset({coord.row(), coord.column()});

476 }

477

478 };

479

481

482 } // namespace threadblock

483 } // namespace transform

484 } // namespace cutlass

485

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#aad803b85a96da22725a335b911975718)

CUTLASS_DEVICE RegularTileIterator()

Definition: regular_tile_iterator_pitch_linear.h:284

cutlass::layout::RowMajor::LongIndex

int64_t LongIndex

Long index type used for offsets.

Definition: layout/matrix.h:62

cutlass

Definition: aligned_buffer.h:35

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3aa895ffd980f4fe94862c0fe75f6cbe)

CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)

Adds a pointer offset in units of Element.

Definition: regular_tile_iterator_pitch_linear.h:468

cutlass::layout::PitchLinearCoord

Coordinate in pitch-linear space.

Definition: pitch_linear.h:52

tensor_ref.h

Defines a structure containing strides, bounds, and a pointer to tensor data.

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a33af1791659cf60aff5da1b71747f125)

CUTLASS_HOST_DEVICE void store(Fragment const &frag, TensorCoord const &tile_offset)

Stores a fragment.

Definition: regular_tile_iterator_pitch_linear.h:321

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ae59497fa13e9b0d4c695ea7030cd2fca)

CUTLASS_HOST_DEVICE RegularTileIterator & operator--()

Advances the pointer.

Definition: regular_tile_iterator_pitch_linear.h:216

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ab41c9c517802602b07f0c9e487d4124f)

CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)

Adds a pointer offset in units of Element.

Definition: regular_tile_iterator_pitch_linear.h:223

cutlass::TensorRef::data

CUTLASS_HOST_DEVICE Element * data() const

Returns the pointer to referenced data.

Definition: tensor_ref.h:254

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a83a6797234c779650f57dd7ed54b932a)

typename Layout::LongIndex LongIndex

Definition: regular_tile_iterator_pitch_linear.h:71

cutlass::layout::PitchLinear

Mapping function for pitch-linear memory.

Definition: pitch_linear.h:163

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aaeef1f0d4985612d1d68b6e52e0e0af8)

CUTLASS_DEVICE RegularTileIterator(TensorRef const &ref, int thread_idx)

Definition: regular_tile_iterator_pitch_linear.h:111

cutlass::layout::ColumnMajor::LongIndex

int64_t LongIndex

Long index type used for offsets.

Definition: layout/matrix.h:154

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3e078be0830303a162c3a0f91e017fc9)

CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)

Adds a tile offset.

Definition: regular_tile_iterator_pitch_linear.h:474

cutlass::AlignedArray

Aligned array type.

Definition: array.h:511

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#aa09850c3fe1f9b2b61b32b42cc71c729)

Element_ Element

Definition: regular_tile_iterator_pitch_linear.h:64

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a765905f488050279a971c3193571545c)

Element_ Element

Definition: regular_tile_iterator_pitch_linear.h:373

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a52146adf93a143916d527a153a3d9cb4)

CUTLASS_HOST_DEVICE void load(Fragment &frag, TensorCoord const &tile_offset)

Loads a fragment.

Definition: regular_tile_iterator_pitch_linear.h:424

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4d9b37a6c2bed1209a61fdcbf225f691)

CUTLASS_HOST_DEVICE RegularTileIterator & operator++()

Advances the pointer.

Definition: regular_tile_iterator_pitch_linear.h:333

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#accb13157290b31815c4eaa4f25cf2b0b)

Shape_ Shape

Definition: regular_tile_iterator_pitch_linear.h:63

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a90dedd03e47edabf0f9fd6d51e70272e)

CUTLASS_DEVICE RegularTileIterator()

Definition: regular_tile_iterator_pitch_linear.h:405

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af3c08cdac1db52061c850ec4473c733c)

typename Layout::TensorCoord TensorCoord

Definition: regular_tile_iterator_pitch_linear.h:261

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a192d119355b9bfc22199ddf5e489eb38)

CUTLASS_HOST_DEVICE void load(Fragment &frag)

Loads a fragment.

Definition: regular_tile_iterator_pitch_linear.h:430

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a82266de9a558d551b8bbfe4327b9a3ae)

typename Layout::TensorCoord TensorCoord

Definition: regular_tile_iterator_pitch_linear.h:74

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ada36f68ddee26776788ff3b4a882364b)

CUTLASS_HOST_DEVICE void load(Fragment &frag, TensorCoord const &tile_offset)

Loads a fragment.

Definition: regular_tile_iterator_pitch_linear.h:303

cutlass::layout::ColumnMajor

Mapping function for column-major matrices.

Definition: layout/matrix.h:142

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a5f0a8db7449d465243c121725168211f)

ThreadMap_ ThreadMap

Definition: regular_tile_iterator_pitch_linear.h:67

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#aaa48072b3ce5b8753710ac6b8c4e99d7)

CUTLASS_DEVICE RegularTileIterator(TensorRef const &ref, int thread_idx)

Definition: regular_tile_iterator_pitch_linear.h:408

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a5e1532d6ba9ceb740f5cdbafb9bad6c3)

Array< Element, ThreadMap::Iterations::kCount *ThreadMap::kElementsPerAccess > Fragment

Definition: regular_tile_iterator_pitch_linear.h:385

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a176ee419e3705e96940b29c24121160c)

CUTLASS_HOST_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset)

Loads a fragment.

Definition: regular_tile_iterator_pitch_linear.h:418

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a23214640848bd844f182c8276d495156)

Array< Element, ThreadMap::Iterations::kCount *ThreadMap::kElementsPerAccess > Fragment

Definition: regular_tile_iterator_pitch_linear.h:76

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#add75f3a1ec8d7b64a717342ba2a14420)

CUTLASS_HOST_DEVICE void store(Fragment const &frag)

Stores a fragment.

Definition: regular_tile_iterator_pitch_linear.h:448

cutlass::layout::PitchLinearShape

Template defining a shape used by pitch-linear operators.

Definition: pitch_linear.h:43

CUTLASS_PRAGMA_UNROLL

#define CUTLASS_PRAGMA_UNROLL

Definition: cutlass.h:110

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#ac90756fa87f005512468e8d3c6ad935a)

CUTLASS_HOST_DEVICE void store_with_pointer_offset(Fragment const &frag, Index pointer_offset)

Stores a fragment.

Definition: regular_tile_iterator_pitch_linear.h:169

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5b5df5c59e77b1f42c76f049248e12c)

CUTLASS_HOST_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset)

Loads a fragment.

Definition: regular_tile_iterator_pitch_linear.h:297

cutlass::layout::RowMajor::Index

int32_t Index

Index type used for coordinates.

Definition: layout/matrix.h:59

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a4e9c82df0fcde08f06a65d9efbefede6)

CUTLASS_DEVICE RegularTileIterator(TensorRef const &ref, int thread_idx)

Definition: regular_tile_iterator_pitch_linear.h:287

cutlass::layout::PitchLinear::LongIndex

int64_t LongIndex

Long index type used for offsets.

Definition: pitch_linear.h:175

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::TensorCoord](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a600fe9c105a29ac8dc9e0d52ef251aa7)

typename Layout::TensorCoord TensorCoord

Definition: regular_tile_iterator_pitch_linear.h:383

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a57a1ee8a67d2b206fb7eec6751a14948)

typename Layout::LongIndex LongIndex

Definition: regular_tile_iterator_pitch_linear.h:380

cutlass::TensorRef::stride

CUTLASS_HOST_DEVICE Stride stride() const

Returns the layout object's stride vector.

Definition: tensor_ref.h:277

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a46c1d1c3d66ea73696b7fd8dd0dd72f9)

CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)

Adds a tile offset.

Definition: regular_tile_iterator_pitch_linear.h:353

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a215a832690fd3f0b770cc64eabc3eb2f)

CUTLASS_HOST_DEVICE RegularTileIterator & operator++()

Advances the pointer.

Definition: regular_tile_iterator_pitch_linear.h:454

cutlass::sizeof_bits

Defines the size of an element in bits.

Definition: numeric_types.h:42

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#af316509e35afa13869cf2ee4a91f3a29)

CUTLASS_HOST_DEVICE void load(Fragment &frag)

Loads a fragment.

Definition: regular_tile_iterator_pitch_linear.h:163

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a66796895c463231ee0bd9fa34bc74aec)

CUTLASS_HOST_DEVICE void load(Fragment &frag, TensorCoord const &tile_offset)

Loads a fragment.

Definition: regular_tile_iterator_pitch_linear.h:153

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::operator++](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3874988a2d312ab538f242e16f6a3b83)

CUTLASS_HOST_DEVICE RegularTileIterator & operator++()

Advances the pointer.

Definition: regular_tile_iterator_pitch_linear.h:209

nullptr

#define nullptr

nullptr

Definition: platform.h:144

cutlass::TensorRef< Element, Layout >

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a8ced8039f5936ca545faa2fae38efcad)

CUTLASS_HOST_DEVICE void store(Fragment const &frag, TensorCoord const &tile_offset)

Stores a fragment.

Definition: regular_tile_iterator_pitch_linear.h:194

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a587114fd23909a534b5c1a77a39d5916)

CUTLASS_HOST_DEVICE void store_with_pointer_offset(Fragment const &frag, Index pointer_offset)

Stores a fragment.

Definition: regular_tile_iterator_pitch_linear.h:436

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a8b62546dd2f7373ec5a45b0b309bbd64)

CUTLASS_HOST_DEVICE RegularTileIterator & operator--()

Advances the pointer.

Definition: regular_tile_iterator_pitch_linear.h:340

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a8f5567f445b60e912c35be20983ae7c0)

CUTLASS_HOST_DEVICE void store(Fragment const &frag, TensorCoord const &tile_offset)

Stores a fragment.

Definition: regular_tile_iterator_pitch_linear.h:442

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a607ac75aa0bc44400f6f80320134675a)

CUTLASS_HOST_DEVICE void store(Fragment const &frag)

Stores a fragment.

Definition: regular_tile_iterator_pitch_linear.h:327

cutlass::transform::threadblock::RegularTileIterator

Definition: regular_tile_iterator.h:50

static_assert

#define static_assert(__e, __m)

Definition: platform.h:153

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a11fb9f9360fea4134f002c41f4901acb)

ThreadMap_ ThreadMap

Definition: regular_tile_iterator_pitch_linear.h:254

cutlass::layout::PitchLinear::Index

int32_t Index

Index type used for coordinates.

Definition: pitch_linear.h:172

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a672751196b76630f5b3c3f257de64bca)

CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)

Adds a tile offset.

Definition: regular_tile_iterator_pitch_linear.h:229

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a17ba448c167114fd55c9d527c8143bde)

Shape_ Shape

Definition: regular_tile_iterator_pitch_linear.h:372

cutlass::layout::RowMajor

Mapping function for row-major matrices.

Definition: layout/matrix.h:50

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::store_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a63b486d1d77de7d40efa3cefca144ef4)

CUTLASS_HOST_DEVICE void store_with_pointer_offset(Fragment const &frag, Index pointer_offset)

Stores a fragment.

Definition: regular_tile_iterator_pitch_linear.h:315

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::store](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a768e8c118a274348eaaac9dd8ca058e7)

CUTLASS_HOST_DEVICE void store(Fragment const &frag)

Stores a fragment.

Definition: regular_tile_iterator_pitch_linear.h:203

[regular_tile_iterator.h](regular tile iterator_8h.html)

Templates implementing storing of tiles from pitch-linear rank=2 tensors.

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a77af9f2f9a530fbd63703bb83190135b)

CUTLASS_DEVICE RegularTileIterator()

Definition: regular_tile_iterator_pitch_linear.h:108

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a7470527cd1b851258eee044faaed080f)

CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)

Adds a pointer offset in units of Element.

Definition: regular_tile_iterator_pitch_linear.h:347

matrix.h

Defines layout functions used by TensorRef and derived classes.

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Shape](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ab5f4a6ad99a9af0cd500c1894d4ff552)

Shape_ Shape

Definition: regular_tile_iterator_pitch_linear.h:250

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::load](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a2cfc11a28f5d0f1f1186f31850c9c645)

CUTLASS_HOST_DEVICE void load(Fragment &frag)

Loads a fragment.

Definition: regular_tile_iterator_pitch_linear.h:309

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#af808b8c3fc0bea942874f61c17bbc4f8)

typename Layout::Index Index

Definition: regular_tile_iterator_pitch_linear.h:257

pitch_linear.h

Defines layout functions used by TensorRef and derived classes for pitch-linear memory.

cutlass::layout::ColumnMajor::Index

int32_t Index

Index type used for coordinates.

Definition: layout/matrix.h:151

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a236cea40e2359a0b5fe35ff1a3519e10)

typename Layout::Index Index

Definition: regular_tile_iterator_pitch_linear.h:379

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::operator--](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a3b0cbdbbec0e63e759f86131cbcad32e)

CUTLASS_HOST_DEVICE RegularTileIterator & operator--()

Advances the pointer.

Definition: regular_tile_iterator_pitch_linear.h:461

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Index](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#a3a922bd28e511a085e2cd09377c04144)

typename Layout::Index Index

Definition: regular_tile_iterator_pitch_linear.h:70

cutlass.h

Basic include for CUTLASS.

cutlass::MatrixCoord

Definition: matrix_coord.h:39

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Element](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#ad917a6eee6c57d5d9817542a6c6c4241)

Element_ Element

Definition: regular_tile_iterator_pitch_linear.h:251

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::LongIndex](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a589c4fa14fed9f28631a5c291bd10e5e)

typename Layout::LongIndex LongIndex

Definition: regular_tile_iterator_pitch_linear.h:258

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::load_with_pointer_offset](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0184a89653916f5d51ab59d1b386989a17.html#adcf3e59a87e7cadf97f9b60c1170a94d)

CUTLASS_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset)

Loads a fragment.

Definition: regular_tile_iterator_pitch_linear.h:128

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::ThreadMap](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_011d3637dbd8bc58bcb020b51bf57fbfc0.html#a93d65c5bbfe9415a158e8a863c0882a3)

ThreadMap_ ThreadMap

Definition: regular_tile_iterator_pitch_linear.h:376

[cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Fragment](classcutlass_1_1transform_1_1threadblock_1_1RegularTileIterator_3_01Shape 00_01Element 00_0149454d361ea5885cf5166a920b5145df.html#a0f8e337d13ada6404909a4a10572b8af)

Array< Element, ThreadMap::Iterations::kCount *ThreadMap::kElementsPerAccess > Fragment

Definition: regular_tile_iterator_pitch_linear.h:263


Generated by 1.8.11