Back to Cutlass

CUTLASS: host_tensor.h Source File

docs/host__tensor_8h_source.html

4.4.251.9 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

host_tensor.h

Go to the documentation of this file.

1 /***************************************************************************************************

2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

25 #pragma once

26

39 #include <vector>

40

41 #include "cutlass/cutlass.h"

42 #include "cutlass/matrix_traits.h"

43 #include "cutlass/tensor_ref.h"

44 #include "cutlass/tensor_view.h"

45

46 #include "device_memory.h"

47

48 namespace cutlass {

49

51

53 template <

55typename Element_,

57typename Layout_

58 >

59 class HostTensor {

60 public:

61

63using Element = Element_;

64

66using Layout = Layout_;

67

69static int const kRank = Layout::kRank;

70

72using Index = typename Layout::Index;

73

75using LongIndex = typename Layout::LongIndex;

76

78using TensorCoord = typename Layout::TensorCoord;

79

81using Stride = typename Layout::Stride;

82

84using TensorRef = TensorRef<Element, Layout>;

85

87using ConstTensorRef = typename TensorRef::ConstTensorRef;

88

90using TensorView = TensorView<Element, Layout>;

91

93using ConstTensorView = typename TensorView::ConstTensorView;

94

96using Reference = typename TensorRef::Reference;

97

99using ConstReference = typename ConstTensorRef::Reference;

100

102static int const kElementsPerStoredItem = (sizeof_bits<Element>::value < 8 ? sizeof(Element) * 8 / sizeof_bits<Element>::value : 1);

103

104private:

105

106//

107// Data members

108//

109

111TensorCoord extent_;

112

114Layout layout_;

115

117 std::vector<Element> host_;

118

120device_memory::allocation<Element> device_;

121

122public:

123//

124// Device and Host Methods

125//

126

128HostTensor() {}

129

131HostTensor(

132TensorCoord const &extent,

133bool device_backed = true

134 ) {

135

136 this->reset(extent, Layout::packed(extent), device_backed);

137 }

138

140HostTensor(

141TensorCoord const &extent,

142Layout const &layout,

143bool device_backed = true

144 ) {

145

146 this->reset(extent, layout, device_backed);

147 }

148

149~HostTensor() { }

150

152void reset() {

153 extent_ = TensorCoord();

154 layout_ = Layout::packed(extent_);

155

156 host_.clear();

157 device_.reset();

158 }

159

161void reserve(

162size_t count,

163bool device_backed_ = true) {

164

165 device_.reset();

166 host_.clear();

167

168 count /= kElementsPerStoredItem;

169

170 host_.resize(count);

171

172// Allocate memory

173Element* device_memory = nullptr;

174if (device_backed_) {

175 device_memory = device_memory::allocate<Element>(count);

176 }

177 device_.reset(device_memory, device_backed_ ? count : 0);

178 }

179

182void reset(

183TensorCoord const &extent,

184Layout const &layout,

185bool device_backed_ = true) {

186

187 extent_ = extent;

188 layout_ = layout;

189

190reserve(size_t(layout_.capacity(extent_)), device_backed_);

191 }

192

195void reset(

196TensorCoord const &extent,

197bool device_backed_ = true) {

198

199reset(extent, Layout::packed(extent), device_backed_);

200 }

201

204void resize(

205TensorCoord const &extent,

206Layout const &layout,

207bool device_backed_ = true) {

208

209 extent_ = extent;

210 layout_ = layout;

211

212LongIndex new_size = size_t(layout_.capacity(extent_));

213

214if (static_cast<decltype(host_.size())>(new_size) > host_.size()) {

215reserve(new_size);

216 }

217 }

218

221void resize(

222TensorCoord const &extent,

223bool device_backed_ = true) {

224

225resize(extent, Layout::packed(extent), device_backed_);

226 }

227

229size_t size() const {

230return host_.size() * kElementsPerStoredItem;

231 }

232

234LongIndex capacity() const {

235return layout_.capacity(extent_) * kElementsPerStoredItem;

236 }

237

239Element * host_data() { return host_.data(); }

240

242Element * host_data_ptr_offset(LongIndex ptr_element_offset) { return host_.data() + ptr_element_offset; }

243

245Reference host_data(LongIndex idx) {

246return ReferenceFactory<Element>::get(host_data(), idx);

247 }

248

250Element const * host_data() const { return host_.data(); }

251

253ConstReference host_data(LongIndex idx) const {

254return ReferenceFactory<Element const>::get(host_data(), idx);

255 }

256

258Element * device_data() { return device_.get(); }

259

261Element * device_data_ptr_offset(LongIndex ptr_element_offset) { return device_.get() + ptr_element_offset; }

262

264Element const * device_data() const { return device_.get(); }

265

267TensorRef host_ref(LongIndex ptr_element_offset=0) { return TensorRef(host_data_ptr_offset(ptr_element_offset), layout_); }

268

270ConstTensorRef host_ref(LongIndex ptr_element_offset=0) const { return ConstTensorRef(host_data_ptr_offset(ptr_element_offset), layout_); }

271

273TensorRef device_ref(LongIndex ptr_element_offset=0) {

274return TensorRef(device_data_ptr_offset(ptr_element_offset), layout_);

275 }

276

278ConstTensorRef device_ref(LongIndex ptr_element_offset=0) const {

279return TensorRef(device_data_ptr_offset(ptr_element_offset), layout_);

280 }

281

283TensorView host_view(LongIndex ptr_element_offset=0) {

284return TensorView(host_data_ptr_offset(ptr_element_offset), layout_, extent_);

285 }

286

288ConstTensorView host_view(LongIndex ptr_element_offset=0) const {

289return ConstTensorView(host_data_ptr_offset(ptr_element_offset), layout_, extent_);

290 }

291

293TensorView device_view(LongIndex ptr_element_offset=0) {

294return TensorView(device_data_ptr_offset(ptr_element_offset), layout_, extent_);

295 }

296

298ConstTensorView device_view(LongIndex ptr_element_offset=0) const {

299return ConstTensorView(device_data_ptr_offset(ptr_element_offset), layout_, extent_);

300 }

301

303bool device_backed() const {

304return (device_.get() == nullptr) ? false : true;

305 }

306

307

309Layout & layout() {

310return layout_;

311 }

312

314Layout layout() const {

315return layout_;

316 }

317

319Stride stride() const {

320return layout_.stride();

321 }

322

324Stride & stride() {

325return layout_.stride();

326 }

327

329Index stride(int dim) const {

330return layout_.stride().at(dim);

331 }

332

334Index & stride(int dim) {

335return layout_.stride().at(dim);

336 }

337

339LongIndex offset(TensorCoord const& coord) const {

340return layout_(coord);

341 }

342

344Reference at(TensorCoord const& coord) {

345return host_data(offset(coord));

346 }

347

349ConstReference at(TensorCoord const& coord) const {

350return host_data(offset(coord));

351 }

352

354TensorCoord extent() const {

355return extent_;

356 }

357

359TensorCoord & extent() {

360return extent_;

361 }

362

364void sync_host() {

365if (device_backed()) {

366device_memory::copy_to_host(

367host_data(), device_data(), size());

368 }

369 }

370

372void sync_device() {

373if (device_backed()) {

374device_memory::copy_to_device(

375device_data(), host_data(), size());

376 }

377 }

378

380void copy_in_device_to_host(

381Element const* ptr_device,

382LongIndex count = -1) {

383

384if (count < 0) {

385 count = capacity();

386 }

387else {

388 count = __NV_STD_MIN(capacity(), count);

389 }

390device_memory::copy_to_host(

391host_data(), ptr_device, count);

392 }

393

395void copy_in_device_to_device(

396Element const* ptr_device,

397LongIndex count = -1) {

398

399if (count < 0) {

400 count = capacity();

401 }

402else {

403 count = __NV_STD_MIN(capacity(), count);

404 }

405device_memory::copy_device_to_device(

406device_data(), ptr_device, count);

407 }

408

410void copy_in_host_to_device(

411Element const* ptr_host,

412LongIndex count = -1) {

413

414if (count < 0) {

415 count = capacity();

416 }

417else {

418 count = __NV_STD_MIN(capacity(), count);

419 }

420device_memory::copy_to_device(

421device_data(), ptr_host, count);

422 }

423

425void copy_in_host_to_host(

426Element const* ptr_host,

427LongIndex count = -1) {

428

429if (count < 0) {

430 count = capacity();

431 }

432else {

433 count = __NV_STD_MIN(capacity(), count);

434 }

435device_memory::copy_host_to_host(

436host_data(), ptr_host, count);

437 }

438

440void copy_out_device_to_host(

441Element * ptr_host,

442LongIndex count = -1) const {

443

444if (count < 0) {

445 count = capacity();

446 }

447else {

448 count = __NV_STD_MIN(capacity(), count);

449 }

450device_memory::copy_to_host(

451 ptr_host, device_data(), count);

452 }

453

455void copy_out_device_to_device(

456Element * ptr_device,

457LongIndex count = -1) const {

458

459if (count < 0) {

460 count = capacity();

461 }

462else {

463 count = __NV_STD_MIN(capacity(), count);

464 }

465device_memory::copy_device_to_device(

466 ptr_device, device_data(), count);

467 }

468

470void copy_out_host_to_device(

471Element * ptr_device,

472LongIndex count = -1) const {

473

474if (count < 0) {

475 count = capacity();

476 }

477else {

478 count = __NV_STD_MIN(capacity(), count);

479 }

480device_memory::copy_to_device(

481 ptr_device, host_data(), count);

482 }

483

485void copy_out_host_to_host(

486Element * ptr_host,

487LongIndex count = -1) const {

488

489if (count < 0) {

490 count = capacity();

491 }

492else {

493 count = __NV_STD_MIN(capacity(), count);

494 }

495device_memory::copy_host_to_host(

496 ptr_host, host_data(), count);

497 }

498 };

499

501

502 } // namespace cutlass

cutlass::HostTensor::stride

Stride & stride()

Returns the layout object's stride vector.

Definition: host_tensor.h:324

cutlass

Definition: aligned_buffer.h:35

cutlass::HostTensor::HostTensor

HostTensor(TensorCoord const &extent, Layout const &layout, bool device_backed=true)

Constructs a tensor given an extent and layout.

Definition: host_tensor.h:140

tensor_ref.h

Defines a structure containing strides, bounds, and a pointer to tensor data.

cutlass::HostTensor::Stride

typename Layout::Stride Stride

Layout's stride vector.

Definition: host_tensor.h:81

cutlass::HostTensor::copy_in_host_to_device

void copy_in_host_to_device(Element const *ptr_host, LongIndex count=-1)

Copy data from a caller-supplied device pointer into host memory.

Definition: host_tensor.h:410

cutlass::HostTensor::stride

Index & stride(int dim)

Returns the layout object's stride in a given physical dimension.

Definition: host_tensor.h:334

cutlass::device_memory::copy_to_device

void copy_to_device(T *dst, T const *src, size_t count=1)

Definition: device_memory.h:81

cutlass::HostTensor::device_view

ConstTensorView device_view(LongIndex ptr_element_offset=0) const

Accesses the tensor reference pointing to data.

Definition: host_tensor.h:298

cutlass::HostTensor::LongIndex

typename Layout::LongIndex LongIndex

Long index used for pointer offsets.

Definition: host_tensor.h:75

cutlass::HostTensor::Layout

Layout_ Layout

Mapping function from logical coordinate to linear memory.

Definition: host_tensor.h:66

cutlass::HostTensor::host_view

ConstTensorView host_view(LongIndex ptr_element_offset=0) const

Accesses the tensor reference pointing to data.

Definition: host_tensor.h:288

cutlass::HostTensor::capacity

LongIndex capacity() const

Returns the logical capacity based on extent and layout. May differ from size().

Definition: host_tensor.h:234

cutlass::HostTensor::copy_out_host_to_host

void copy_out_host_to_host(Element *ptr_host, LongIndex count=-1) const

Copy data from a caller-supplied device pointer into host memory.

Definition: host_tensor.h:485

cutlass::HostTensor::sync_host

void sync_host()

Copies data from device to host.

Definition: host_tensor.h:364

cutlass::HostTensor::layout

Layout layout() const

Returns the layout object.

Definition: host_tensor.h:314

cutlass::HostTensor::copy_in_host_to_host

void copy_in_host_to_host(Element const *ptr_host, LongIndex count=-1)

Copy data from a caller-supplied device pointer into host memory.

Definition: host_tensor.h:425

cutlass::HostTensor::device_data

Element * device_data()

Gets pointer to device data.

Definition: host_tensor.h:258

cutlass::device_memory::allocation::get

T * get() const

Returns a pointer to the managed object.

Definition: device_memory.h:162

cutlass::device_memory::allocation::reset

void reset()

Deletes the managed object and resets capacity to zero.

Definition: device_memory.h:171

cutlass::HostTensor::device_data

Element const * device_data() const

Gets pointer to device data.

Definition: host_tensor.h:264

cutlass::HostTensor::kElementsPerStoredItem

static int const kElementsPerStoredItem

Used to handle packing of subbyte elements.

Definition: host_tensor.h:102

tensor_view.h

Defines a structure containing strides and a pointer to tensor data.

device_memory.h

C++ interface to CUDA device memory management functions.

cutlass::HostTensor::~HostTensor

~HostTensor()

Definition: host_tensor.h:149

cutlass::TensorRef< Element, Layout >::ConstTensorRef

TensorRef< typename platform::remove_const< Element >::type const, Layout > ConstTensorRef

TensorRef to constant data.

Definition: tensor_ref.h:179

cutlass::HostTensor::copy_in_device_to_device

void copy_in_device_to_device(Element const *ptr_device, LongIndex count=-1)

Copy data from a caller-supplied device pointer into host memory.

Definition: host_tensor.h:395

cutlass::HostTensor::ConstReference

typename ConstTensorRef::Reference ConstReference

Constant reference to element in tensor.

Definition: host_tensor.h:99

cutlass::HostTensor::stride

Index stride(int dim) const

Returns the layout object's stride in a given physical dimension.

Definition: host_tensor.h:329

cutlass::HostTensor::sync_device

void sync_device()

Copies data from host to device.

Definition: host_tensor.h:372

cutlass::TensorView< Element, Layout >::ConstTensorView

TensorView< typename platform::remove_const< Element >::type const, Layout > ConstTensorView

TensorView pointing to constant memory.

Definition: tensor_view.h:95

cutlass::HostTensor::TensorView

TensorView< Element, Layout > TensorView

Tensor reference to device memory.

Definition: host_tensor.h:90

cutlass::HostTensor::host_data_ptr_offset

Element * host_data_ptr_offset(LongIndex ptr_element_offset)

Gets pointer to host data with a pointer offset.

Definition: host_tensor.h:242

cutlass::HostTensor

Host tensor.

Definition: host_tensor.h:59

cutlass::HostTensor::device_view

TensorView device_view(LongIndex ptr_element_offset=0)

Accesses the tensor reference pointing to data.

Definition: host_tensor.h:293

cutlass::TensorView< Element, Layout >

cutlass::HostTensor::copy_out_device_to_host

void copy_out_device_to_host(Element *ptr_host, LongIndex count=-1) const

Copy data from a caller-supplied device pointer into host memory.

Definition: host_tensor.h:440

cutlass::HostTensor::copy_out_host_to_device

void copy_out_host_to_device(Element *ptr_device, LongIndex count=-1) const

Copy data from a caller-supplied device pointer into host memory.

Definition: host_tensor.h:470

cutlass::HostTensor::at

ConstReference at(TensorCoord const &coord) const

Returns a const reference to the element at the logical Coord in host memory.

Definition: host_tensor.h:349

cutlass::sizeof_bits

Defines the size of an element in bits.

Definition: numeric_types.h:42

cutlass::device_memory::copy_host_to_host

void copy_host_to_host(T *dst, T const *src, size_t count=1)

Definition: device_memory.h:96

cutlass::HostTensor::host_ref

ConstTensorRef host_ref(LongIndex ptr_element_offset=0) const

Accesses the tensor reference pointing to data.

Definition: host_tensor.h:270

cutlass::HostTensor::extent

TensorCoord extent() const

Returns the extent of the tensor.

Definition: host_tensor.h:354

cutlass::HostTensor::host_data

Element * host_data()

Gets pointer to host data.

Definition: host_tensor.h:239

cutlass::HostTensor::reserve

void reserve(size_t count, bool device_backed_=true)

Resizes internal memory allocations without affecting layout or extent.

Definition: host_tensor.h:161

cutlass::ReferenceFactory

Definition: subbyte_reference.h:557

cutlass::TensorRef< Element, Layout >

__NV_STD_MIN

#define __NV_STD_MIN(a, b)

Select minimum(a, b)

Definition: platform.h:168

cutlass::HostTensor::device_backed

bool device_backed() const

Returns true if device memory is allocated.

Definition: host_tensor.h:303

cutlass::HostTensor::TensorCoord

typename Layout::TensorCoord TensorCoord

Coordinate in logical tensor space.

Definition: host_tensor.h:78

cutlass::TensorRef< Element, Layout >::Reference

typename platform::conditional< sizeof_bits< Element >::value >=8, Element &, SubbyteReference< Element > >::type Reference

Reference type to an element.

Definition: tensor_ref.h:159

cutlass::HostTensor::host_data

Reference host_data(LongIndex idx)

Gets a reference to an element in host memory.

Definition: host_tensor.h:245

cutlass::HostTensor::extent

TensorCoord & extent()

Returns the extent of the tensor.

Definition: host_tensor.h:359

cutlass::HostTensor::HostTensor

HostTensor()

Default constructor.

Definition: host_tensor.h:128

cutlass::HostTensor::ConstTensorView

typename TensorView::ConstTensorView ConstTensorView

Tensor reference to constant device memory.

Definition: host_tensor.h:93

cutlass::HostTensor::resize

void resize(TensorCoord const &extent, bool device_backed_=true)

Definition: host_tensor.h:221

cutlass::device_memory::copy_to_host

void copy_to_host(T *dst, T const *src, size_t count=1)

Definition: device_memory.h:86

cutlass::HostTensor::copy_in_device_to_host

void copy_in_device_to_host(Element const *ptr_device, LongIndex count=-1)

Copy data from a caller-supplied device pointer into host memory.

Definition: host_tensor.h:380

cutlass::HostTensor::offset

LongIndex offset(TensorCoord const &coord) const

Computes the offset of an index from the origin of the tensor.

Definition: host_tensor.h:339

cutlass::HostTensor::host_view

TensorView host_view(LongIndex ptr_element_offset=0)

Accesses the tensor reference pointing to data.

Definition: host_tensor.h:283

cutlass::HostTensor::HostTensor

HostTensor(TensorCoord const &extent, bool device_backed=true)

Constructs a tensor given an extent. Assumes a packed layout.

Definition: host_tensor.h:131

cutlass::HostTensor::device_ref

TensorRef device_ref(LongIndex ptr_element_offset=0)

Accesses the tensor reference pointing to data.

Definition: host_tensor.h:273

cutlass::HostTensor::Element

Element_ Element

Data type of individual access.

Definition: host_tensor.h:63

cutlass::HostTensor::at

Reference at(TensorCoord const &coord)

Returns a reference to the element at the logical Coord in host memory.

Definition: host_tensor.h:344

cutlass::HostTensor::reset

void reset(TensorCoord const &extent, bool device_backed_=true)

Definition: host_tensor.h:195

cutlass::HostTensor::ConstTensorRef

typename TensorRef::ConstTensorRef ConstTensorRef

Tensor reference to constant device memory.

Definition: host_tensor.h:87

cutlass::HostTensor::host_data

ConstReference host_data(LongIndex idx) const

Gets a constant reference to an element in host memory.

Definition: host_tensor.h:253

cutlass::HostTensor::reset

void reset()

Clears the HostTensor allocation to size/capacity = 0.

Definition: host_tensor.h:152

cutlass::HostTensor::kRank

static int const kRank

Logical rank of tensor index space.

Definition: host_tensor.h:69

cutlass::HostTensor::device_data_ptr_offset

Element * device_data_ptr_offset(LongIndex ptr_element_offset)

Gets pointer to device data with a pointer offset.

Definition: host_tensor.h:261

cutlass::HostTensor::size

size_t size() const

Returns the number of elements stored in the host tensor.

Definition: host_tensor.h:229

cutlass::HostTensor::device_ref

ConstTensorRef device_ref(LongIndex ptr_element_offset=0) const

Accesses the tensor reference pointing to data.

Definition: host_tensor.h:278

cutlass::HostTensor::layout

Layout & layout()

Returns the layout object.

Definition: host_tensor.h:309

cutlass::HostTensor::Reference

typename TensorRef::Reference Reference

Reference to element in tensor.

Definition: host_tensor.h:96

cutlass::HostTensor::resize

void resize(TensorCoord const &extent, Layout const &layout, bool device_backed_=true)

Definition: host_tensor.h:204

matrix_traits.h

Defines properties of matrices used to denote layout and operands to GEMM kernels.

cutlass::HostTensor::TensorRef

TensorRef< Element, Layout > TensorRef

Tensor reference to device memory.

Definition: host_tensor.h:84

cutlass::device_memory::allocation< Element >

cutlass::HostTensor::reset

void reset(TensorCoord const &extent, Layout const &layout, bool device_backed_=true)

Definition: host_tensor.h:182

cutlass::HostTensor::stride

Stride stride() const

Returns the layout object's stride vector.

Definition: host_tensor.h:319

cutlass::HostTensor::Index

typename Layout::Index Index

Index type.

Definition: host_tensor.h:72

cutlass.h

Basic include for CUTLASS.

cutlass::HostTensor::host_ref

TensorRef host_ref(LongIndex ptr_element_offset=0)

Accesses the tensor reference pointing to data.

Definition: host_tensor.h:267

cutlass::HostTensor::copy_out_device_to_device

void copy_out_device_to_device(Element *ptr_device, LongIndex count=-1) const

Copy data from a caller-supplied device pointer into host memory.

Definition: host_tensor.h:455

cutlass::device_memory::copy_device_to_device

void copy_device_to_device(T *dst, T const *src, size_t count=1)

Definition: device_memory.h:91

cutlass::HostTensor::host_data

Element const * host_data() const

Gets pointer to host data.

Definition: host_tensor.h:250


Generated by 1.8.11