Back to Cutlass

CUTLASS: tensor_ref.h Source File

docs/tensor__ref_8h_source.html

4.4.235.4 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

tensor_ref.h

Go to the documentation of this file.

1 /***************************************************************************************************

2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

28 #pragma once

29

30

31 #include "cutlass/cutlass.h"

32 #include "cutlass/coord.h"

33 #include "cutlass/platform/platform.h"

34 #include "cutlass/subbyte_reference.h"

35

36 namespace cutlass {

37

39

44 template <int Rank>

45 class IdentityTensorLayout {

46 public:

48static int const kRank = Rank;

49

51static int const kStrideRank = Rank;

52

54using Index = int32_t;

55

57using LongIndex = int64_t;

58

60using TensorCoord = Coord<kRank, Index>;

61

63using Stride = Coord<kStrideRank, Index>;

64

65 private:

66

67//

68// Data members

69//

70

72Stride stride_;

73

74 public:

75

76//

77// Methods

78//

79

80CUTLASS_HOST_DEVICE

81IdentityTensorLayout(Stride const &stride = Stride()): stride_(stride) { }

82

84CUTLASS_HOST_DEVICE

85LongIndex operator()(Coord<Rank> const &coord) const {

86return coord.dot(stride_);

87 }

88

90CUTLASS_HOST_DEVICE

91Stride stride() const {

92return stride_;

93 }

94

96CUTLASS_HOST_DEVICE

97Stride & stride() {

98return stride_;

99 }

100

102CUTLASS_HOST_DEVICE

103LongIndex capacity(TensorCoord const &size) const {

104int idx = stride_.max_dim_index();

105return stride_[idx] * size[idx];

106 }

107 };

108

110

111 /* \brief TensorRef is a template for objects pointing to the start of tensors of arbitrary rank

112 and layout within memory. A TensorRef combines a pointer and a Layout concept

113

114 Examples:

115

116 (These examples use helpers for matrix layouts defined in cutlass/layout/matrix.h)

117

118 1. Column-major matrix may be represented as a rank=2 tensor:

119

120 TensorRef<float, layout::ColumnMajor> A(ptr_A, ldm);

121

122 2. Row-major matrix may be represented as a rank=2 tensor:

123

124 TensorRef<float, layout::RowMajor> B(ptr_A, ldm);

125

126 3. An interleaved matrix may be represented as a rank=2 tensor:

127

128 TensorRef<int8_t, layout::ColumnMajorInterleaved<32> > C;

129

130 4. A helper exists to define a TensorRef for a contiguous matrix whose layout

131 is not known at compile time.

132

133 int ldm; // leading dimension

134 layout::Matrix kind; // Could be layout::Matrix::kRowMajor or layout::Matrix::kColumnMajor

135

136

137 TensorRef<int, layout::ContiguousMatrix> E(ptr_E, {ldm, kind});

138

139 */

140 template <

142typename Element_,

144typename Layout_

145 >

146 class TensorRef {

147public:

149using Element = Element_;

150

152using Layout = Layout_;

153

155using Reference = typename platform::conditional<

156sizeof_bits<Element>::value >= 8,

157Element &,

158SubbyteReference<Element>

159 >::type;

160

162static int const kRank = Layout::kRank;

163

165using Index = typename Layout::Index;

166

168using LongIndex = typename Layout::LongIndex;

169

171using TensorCoord = typename Layout::TensorCoord;

172

174using Stride = typename Layout::Stride;

175

177using ConstTensorRef = TensorRef<

178typename platform::remove_const<Element>::type const,

179Layout>;

180

182using NonConstTensorRef = TensorRef<

183typename platform::remove_const<Element>::type,

184Layout>;

185

189static_assert(kRank > 0, "Cannot define a zero-rank TensorRef");

190

191private:

192

194Element* ptr_;

195

197Layout layout_;

198

199public:

200

201//

202// Methods

203//

204

206CUTLASS_HOST_DEVICE

207TensorRef(

208Element *ptr = nullptr,

209Layout const &layout = Layout()

210 ):

211 ptr_(ptr), layout_(layout) {

212

213 }

214

216CUTLASS_HOST_DEVICE

217TensorRef(

218NonConstTensorRef const &ref

219 ):

220 ptr_(ref.data()), layout_(ref.layout()) { }

221

223CUTLASS_HOST_DEVICE

224ConstTensorRef const_ref() const {

225return ConstTensorRef(ptr_, layout_);

226 }

227

228CUTLASS_HOST_DEVICE

229NonConstTensorRef non_const_ref() const {

230return NonConstTensorRef(const_cast<typename platform::remove_const<Element>::type *>(ptr_), layout_);

231 }

232

234CUTLASS_HOST_DEVICE

235void reset(Element* ptr = nullptr) {

236 ptr_ = ptr;

237 }

238

240CUTLASS_HOST_DEVICE

241void reset(Element* ptr, Layout const &layout) {

242 ptr_ = ptr;

243 layout_ = layout;

244 }

245

247CUTLASS_HOST_DEVICE

248bool good() const {

249return ptr_ != nullptr;

250 }

251

253CUTLASS_HOST_DEVICE

254Element * data() const { return ptr_; }

255

257CUTLASS_HOST_DEVICE

258Reference data(LongIndex idx) const {

259return ReferenceFactory<typename platform::remove_const<Element>::type,

260 (sizeof_bits<Element>::value < 8)>::get(ptr_, idx);

261 }

262

264CUTLASS_HOST_DEVICE

265Layout & layout() {

266return layout_;

267 }

268

270CUTLASS_HOST_DEVICE

271Layout layout() const {

272return layout_;

273 }

274

276CUTLASS_HOST_DEVICE

277Stride stride() const {

278return layout_.stride();

279 }

280

282CUTLASS_HOST_DEVICE

283Stride & stride() {

284return layout_.stride();

285 }

286

288CUTLASS_HOST_DEVICE

289Index stride(int dim) const {

290return layout_.stride().at(dim);

291 }

292

294CUTLASS_HOST_DEVICE

295Index & stride(int dim) {

296return layout_.stride().at(dim);

297 }

298

300CUTLASS_HOST_DEVICE

301LongIndex offset(TensorCoord const& coord) const {

302return layout_(coord);

303 }

304

306CUTLASS_HOST_DEVICE

307Reference at(TensorCoord const& coord) const {

308return data(offset(coord));

309 }

310

312CUTLASS_HOST_DEVICE

313Reference operator[](TensorCoord const& coord) const {

314return data(offset(coord));

315 }

316

318CUTLASS_HOST_DEVICE

319TensorRef & add_pointer_offset(LongIndex offset_) {

320 ptr_ += offset_;

321return *this;

322 }

323

325CUTLASS_HOST_DEVICE

326TensorRef & add_coord_offset(TensorCoord const &coord) {

327 add_pointer_offset(offset(coord));

328return *this;

329 }

330

332CUTLASS_HOST_DEVICE

333TensorRef operator+(TensorCoord const& b) const {

334TensorRef result(*this);

335 result.add_coord_offset(b);

336return result;

337 }

338

340CUTLASS_HOST_DEVICE

341TensorRef & operator+=(TensorCoord const& b) {

342 add_coord_offset(b);

343return *this;

344 }

345

347CUTLASS_HOST_DEVICE

348TensorRef operator-(TensorCoord const& b) const {

349TensorRef result(*this);

350 result.add_pointer_offset(-offset(b));

351return result;

352 }

353

355CUTLASS_HOST_DEVICE

356TensorRef & operator-=(TensorCoord const& b) {

357 add_pointer_offset(-offset(b));

358return *this;

359 }

360 };

361

363 template <

364typename Element,

365typename Layout

366 >

367 CUTLASS_HOST_DEVICE

368 TensorRef<Element, Layout> make_TensorRef(Element *ptr, Layout const &layout) {

369return TensorRef<Element, Layout>(ptr, layout);

370 }

371

373 //

374 // Partial specializations to handle degenerate and sub-byte cases.

375 //

377

378 template <

379typename Element,

380typename Layout

381 >

382 bool TensorRef_aligned(TensorRef<Element, Layout> const &ref, int alignment) {

383

384int const kStrideRank = Layout::kStrideRank;

385

386if (reinterpret_cast<uintptr_t>(ref.data()) % alignment) {

387return false;

388 }

389

390CUTLASS_PRAGMA_UNROLL

391for (int i = 0; i < kStrideRank; ++i) {

392if (ref.stride(i) % alignment) {

393return false;

394 }

395 }

396

397return true;

398 }

399

401

402 } // namespace cutlass

cutlass::IdentityTensorLayout::stride

CUTLASS_HOST_DEVICE Stride stride() const

Returns the stride of the layout.

Definition: tensor_ref.h:91

cutlass

Definition: aligned_buffer.h:35

cutlass::IdentityTensorLayout::operator()

CUTLASS_HOST_DEVICE LongIndex operator()(Coord< Rank > const &coord) const

Returns the offset of a coordinate in linear memory.

Definition: tensor_ref.h:85

cutlass::TensorRef< Element, Layout >::stride

CUTLASS_HOST_DEVICE Index stride(int dim) const

Returns the layout object's stride in a given physical dimension.

Definition: tensor_ref.h:289

cutlass::TensorRef< Element, Layout >< Element, Layout >::Stride

typename Layout::Stride Stride

Layout's stride vector.

Definition: tensor_ref.h:174

cutlass::TensorRef< Element, Layout >::stride

CUTLASS_HOST_DEVICE Index & stride(int dim)

Returns the layout object's stride in a given physical dimension.

Definition: tensor_ref.h:295

cutlass::TensorRef< Element, Layout >::stride

CUTLASS_HOST_DEVICE Stride & stride()

Returns the layout object's stride vector.

Definition: tensor_ref.h:283

cutlass::platform::remove_const::type

T type

Definition: platform.h:351

cutlass::IdentityTensorLayout

Definition: tensor_ref.h:45

cutlass::TensorRef< Element, Layout >::data

CUTLASS_HOST_DEVICE Element * data() const

Returns the pointer to referenced data.

Definition: tensor_ref.h:254

cutlass::IdentityTensorLayout::Stride

Coord< kStrideRank, Index > Stride

Stride vector.

Definition: tensor_ref.h:63

cutlass::TensorRef< Element, Layout >::const_ref

CUTLASS_HOST_DEVICE ConstTensorRef const_ref() const

Returns a reference to constant-valued tensor.

Definition: tensor_ref.h:224

cutlass::IdentityTensorLayout::Index

int32_t Index

Index type used for coordinates.

Definition: tensor_ref.h:54

coord.h

A Coord is a coordinate of arbitrary rank into a tensor or matrix.

cutlass::TensorRef< Element, Layout >< Element, Layout >::Layout

Layout Layout

Mapping function from logical coordinate to linear memory.

Definition: tensor_ref.h:152

cutlass::IdentityTensorLayout::stride

CUTLASS_HOST_DEVICE Stride & stride()

Returns the stride of the layout.

Definition: tensor_ref.h:97

cutlass::TensorRef< Element, Layout >::reset

CUTLASS_HOST_DEVICE void reset(Element *ptr, Layout const &layout)

Updates the pointer and layout object.

Definition: tensor_ref.h:241

cutlass::TensorRef< Element, Layout >::operator-=

CUTLASS_HOST_DEVICE TensorRef & operator-=(TensorCoord const &b)

Returns a TensorRef offset by a given amount.

Definition: tensor_ref.h:356

cutlass::TensorRef< Element, Layout >::operator[]

CUTLASS_HOST_DEVICE Reference operator[](TensorCoord const &coord) const

Returns a reference to the element at a given Coord.

Definition: tensor_ref.h:313

platform.h

C++ features that may be otherwise unimplemented for CUDA device functions.

cutlass::TensorRef< Element, Layout >::TensorRef

CUTLASS_HOST_DEVICE TensorRef(Element *ptr=nullptr, Layout const &layout=Layout())

Constructs a TensorRef with a pointer and layout object.

Definition: tensor_ref.h:207

cutlass::TensorRef< Element, Layout >::layout

CUTLASS_HOST_DEVICE Layout layout() const

Returns the layout object.

Definition: tensor_ref.h:271

cutlass::TensorRef< Element, Layout >::add_coord_offset

CUTLASS_HOST_DEVICE TensorRef & add_coord_offset(TensorCoord const &coord)

Adds an offset to each pointer.

Definition: tensor_ref.h:326

cutlass::TensorRef< Element, Layout >< Element, Layout >::Element

Element Element

Data type of individual access.

Definition: tensor_ref.h:149

CUTLASS_PRAGMA_UNROLL

#define CUTLASS_PRAGMA_UNROLL

Definition: cutlass.h:110

cutlass::TensorRef< Element, Layout >::data

CUTLASS_HOST_DEVICE Reference data(LongIndex idx) const

Returns a reference to the element at a given linear index.

Definition: tensor_ref.h:258

cutlass::TensorRef< Element, Layout >::operator-

CUTLASS_HOST_DEVICE TensorRef operator-(TensorCoord const &b) const

Returns a TensorRef offset by a given amount.

Definition: tensor_ref.h:348

cutlass::TensorRef< Element, Layout >::TensorRef

CUTLASS_HOST_DEVICE TensorRef(NonConstTensorRef const &ref)

Converting constructor from TensorRef to non-constant data.

Definition: tensor_ref.h:217

cutlass::TensorRef< Element, Layout >::stride

CUTLASS_HOST_DEVICE Stride stride() const

Returns the layout object's stride vector.

Definition: tensor_ref.h:277

cutlass::TensorRef< Element, Layout >< Element, Layout >::TensorCoord

typename Layout::TensorCoord TensorCoord

Coordinate in logical tensor space.

Definition: tensor_ref.h:171

cutlass::TensorRef< Element, Layout >::good

CUTLASS_HOST_DEVICE bool good() const

Returns true if the TensorRef is non-null.

Definition: tensor_ref.h:248

cutlass::sizeof_bits

Defines the size of an element in bits.

Definition: numeric_types.h:42

cutlass::TensorRef< Element, Layout >::reset

CUTLASS_HOST_DEVICE void reset(Element *ptr=nullptr)

Updates only the pointer.

Definition: tensor_ref.h:235

cutlass::ReferenceFactory

Definition: subbyte_reference.h:557

cutlass::TensorRef

Definition: tensor_ref.h:146

cutlass::TensorRef< Element, Layout >< Element, Layout >::Reference

typename platform::conditional< sizeof_bits< Element >::value >=8, Element &, SubbyteReference< Element > >::type Reference

Reference type to an element.

Definition: tensor_ref.h:159

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

cutlass::TensorRef< Element, Layout >::offset

CUTLASS_HOST_DEVICE LongIndex offset(TensorCoord const &coord) const

Computes the offset of an index from the origin of the tensor.

Definition: tensor_ref.h:301

cutlass::platform::conditional

std::conditional (true specialization)

Definition: platform.h:325

static_assert

#define static_assert(__e, __m)

Definition: platform.h:153

cutlass::IdentityTensorLayout::kRank

static int const kRank

Logical rank of tensor.

Definition: tensor_ref.h:48

cutlass::TensorRef< Element, Layout >::non_const_ref

CUTLASS_HOST_DEVICE NonConstTensorRef non_const_ref() const

Definition: tensor_ref.h:229

cutlass::Coord

Statically-sized array specifying Coords within a tensor.

Definition: coord.h:43

cutlass::IdentityTensorLayout::LongIndex

int64_t LongIndex

Long index type used for offsets.

Definition: tensor_ref.h:57

cutlass::make_TensorRef

CUTLASS_HOST_DEVICE TensorRef< Element, Layout > make_TensorRef(Element *ptr, Layout const &layout)

Constructs a TensorRef, deducing types from arguments.

Definition: tensor_ref.h:368

cutlass::TensorRef< Element, Layout >< Element, Layout >::Index

typename Layout::Index Index

Index type.

Definition: tensor_ref.h:165

cutlass::IdentityTensorLayout::IdentityTensorLayout

CUTLASS_HOST_DEVICE IdentityTensorLayout(Stride const &stride=Stride())

Definition: tensor_ref.h:81

cutlass::TensorRef< Element, Layout >::at

CUTLASS_HOST_DEVICE Reference at(TensorCoord const &coord) const

Returns a reference to the element at a given Coord.

Definition: tensor_ref.h:307

cutlass::SubbyteReference

Definition: subbyte_reference.h:294

cutlass::TensorRef< Element, Layout >::layout

CUTLASS_HOST_DEVICE Layout & layout()

Returns the layout object.

Definition: tensor_ref.h:265

cutlass::TensorRef_aligned

bool TensorRef_aligned(TensorRef< Element, Layout > const &ref, int alignment)

Definition: tensor_ref.h:382

cutlass::TensorRef< Element, Layout >::operator+

CUTLASS_HOST_DEVICE TensorRef operator+(TensorCoord const &b) const

Returns a TensorRef offset by a given amount.

Definition: tensor_ref.h:333

cutlass::TensorRef< Element, Layout >::operator+=

CUTLASS_HOST_DEVICE TensorRef & operator+=(TensorCoord const &b)

Returns a TensorRef offset by a given amount.

Definition: tensor_ref.h:341

cutlass::TensorRef< Element, Layout >::add_pointer_offset

CUTLASS_HOST_DEVICE TensorRef & add_pointer_offset(LongIndex offset_)

Adds an offset to each pointer.

Definition: tensor_ref.h:319

subbyte_reference.h

Provides a mechanism for packing and unpacking elements smaller than one byte.

cutlass::IdentityTensorLayout::kStrideRank

static int const kStrideRank

Rank of stride vector.

Definition: tensor_ref.h:51

cutlass::Coord::dot

CUTLASS_HOST_DEVICE LongIndex dot(Coord const &b, LongIndex sum=LongIndex(0)) const

Computes the dot product with anotherCoord object.

Definition: coord.h:246

cutlass::Coord::max_dim_index

CUTLASS_HOST_DEVICE int max_dim_index() const

Returns the index of the dimension with greatest value.

Definition: coord.h:130

cutlass.h

Basic include for CUTLASS.

cutlass::TensorRef< Element, Layout >< Element, Layout >::LongIndex

typename Layout::LongIndex LongIndex

Long index used for pointer offsets.

Definition: tensor_ref.h:168

cutlass::IdentityTensorLayout::capacity

CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &size) const

Compute the number of contiguous elements needed to store a tensor with the given size...

Definition: tensor_ref.h:103


Generated by 1.8.11