docs/host__tensor_8h_source.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
host_tensor.h
Go to the documentation of this file.
1 /***************************************************************************************************
2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification, are permitted
5 * provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright notice, this list of
7 * conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright notice, this list of
9 * conditions and the following disclaimer in the documentation and/or other materials
10 * provided with the distribution.
11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12 * to endorse or promote products derived from this software without specific prior written
13 * permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 *
24 **************************************************************************************************/
25 #pragma once
26
39 #include <vector>
40
41 #include "cutlass/cutlass.h"
42 #include "cutlass/matrix_traits.h"
43 #include "cutlass/tensor_ref.h"
44 #include "cutlass/tensor_view.h"
45
46 #include "device_memory.h"
47
48 namespace cutlass {
49
51
53 template <
55typename Element_,
57typename Layout_
58 >
59 class HostTensor {
60 public:
61
64
67
69static int const kRank = Layout::kRank;
70
72using Index = typename Layout::Index;
73
75using LongIndex = typename Layout::LongIndex;
76
78using TensorCoord = typename Layout::TensorCoord;
79
81using Stride = typename Layout::Stride;
82
84using TensorRef = TensorRef<Element, Layout>;
85
87using ConstTensorRef = typename TensorRef::ConstTensorRef;
88
90using TensorView = TensorView<Element, Layout>;
91
93using ConstTensorView = typename TensorView::ConstTensorView;
94
96using Reference = typename TensorRef::Reference;
97
99using ConstReference = typename ConstTensorRef::Reference;
100
102static int const kElementsPerStoredItem = (sizeof_bits<Element>::value < 8 ? sizeof(Element) * 8 / sizeof_bits<Element>::value : 1);
103
104private:
105
106//
107// Data members
108//
109
111TensorCoord extent_;
112
114Layout layout_;
115
117 std::vector<Element> host_;
118
120device_memory::allocation<Element> device_;
121
122public:
123//
124// Device and Host Methods
125//
126
128HostTensor() {}
129
132TensorCoord const &extent,
133bool device_backed = true
134 ) {
135
136 this->reset(extent, Layout::packed(extent), device_backed);
137 }
138
141TensorCoord const &extent,
143bool device_backed = true
144 ) {
145
146 this->reset(extent, layout, device_backed);
147 }
148
149~HostTensor() { }
150
153 extent_ = TensorCoord();
154 layout_ = Layout::packed(extent_);
155
156 host_.clear();
157 device_.reset();
158 }
159
162size_t count,
163bool device_backed_ = true) {
164
165 device_.reset();
166 host_.clear();
167
168 count /= kElementsPerStoredItem;
169
170 host_.resize(count);
171
172// Allocate memory
173Element* device_memory = nullptr;
174if (device_backed_) {
175 device_memory = device_memory::allocate<Element>(count);
176 }
177 device_.reset(device_memory, device_backed_ ? count : 0);
178 }
179
183TensorCoord const &extent,
185bool device_backed_ = true) {
186
187 extent_ = extent;
188 layout_ = layout;
189
190reserve(size_t(layout_.capacity(extent_)), device_backed_);
191 }
192
196TensorCoord const &extent,
197bool device_backed_ = true) {
198
199reset(extent, Layout::packed(extent), device_backed_);
200 }
201
205TensorCoord const &extent,
207bool device_backed_ = true) {
208
209 extent_ = extent;
210 layout_ = layout;
211
212LongIndex new_size = size_t(layout_.capacity(extent_));
213
214if (static_cast<decltype(host_.size())>(new_size) > host_.size()) {
215reserve(new_size);
216 }
217 }
218
222TensorCoord const &extent,
223bool device_backed_ = true) {
224
225resize(extent, Layout::packed(extent), device_backed_);
226 }
227
230return host_.size() * kElementsPerStoredItem;
231 }
232
234LongIndex capacity() const {
235return layout_.capacity(extent_) * kElementsPerStoredItem;
236 }
237
239Element * host_data() { return host_.data(); }
240
242Element * host_data_ptr_offset(LongIndex ptr_element_offset) { return host_.data() + ptr_element_offset; }
243
245Reference host_data(LongIndex idx) {
246return ReferenceFactory<Element>::get(host_data(), idx);
247 }
248
250Element const * host_data() const { return host_.data(); }
251
253ConstReference host_data(LongIndex idx) const {
254return ReferenceFactory<Element const>::get(host_data(), idx);
255 }
256
258Element * device_data() { return device_.get(); }
259
261Element * device_data_ptr_offset(LongIndex ptr_element_offset) { return device_.get() + ptr_element_offset; }
262
264Element const * device_data() const { return device_.get(); }
265
267TensorRef host_ref(LongIndex ptr_element_offset=0) { return TensorRef(host_data_ptr_offset(ptr_element_offset), layout_); }
268
270ConstTensorRef host_ref(LongIndex ptr_element_offset=0) const { return ConstTensorRef(host_data_ptr_offset(ptr_element_offset), layout_); }
271
273TensorRef device_ref(LongIndex ptr_element_offset=0) {
274return TensorRef(device_data_ptr_offset(ptr_element_offset), layout_);
275 }
276
278ConstTensorRef device_ref(LongIndex ptr_element_offset=0) const {
279return TensorRef(device_data_ptr_offset(ptr_element_offset), layout_);
280 }
281
283TensorView host_view(LongIndex ptr_element_offset=0) {
284return TensorView(host_data_ptr_offset(ptr_element_offset), layout_, extent_);
285 }
286
288ConstTensorView host_view(LongIndex ptr_element_offset=0) const {
289return ConstTensorView(host_data_ptr_offset(ptr_element_offset), layout_, extent_);
290 }
291
293TensorView device_view(LongIndex ptr_element_offset=0) {
294return TensorView(device_data_ptr_offset(ptr_element_offset), layout_, extent_);
295 }
296
298ConstTensorView device_view(LongIndex ptr_element_offset=0) const {
299return ConstTensorView(device_data_ptr_offset(ptr_element_offset), layout_, extent_);
300 }
301
303bool device_backed() const {
304return (device_.get() == nullptr) ? false : true;
305 }
306
307
310return layout_;
311 }
312
315return layout_;
316 }
317
320return layout_.stride();
321 }
322
325return layout_.stride();
326 }
327
329Index stride(int dim) const {
330return layout_.stride().at(dim);
331 }
332
335return layout_.stride().at(dim);
336 }
337
339LongIndex offset(TensorCoord const& coord) const {
340return layout_(coord);
341 }
342
344Reference at(TensorCoord const& coord) {
345return host_data(offset(coord));
346 }
347
349ConstReference at(TensorCoord const& coord) const {
350return host_data(offset(coord));
351 }
352
354TensorCoord extent() const {
355return extent_;
356 }
357
359TensorCoord & extent() {
360return extent_;
361 }
362
365if (device_backed()) {
366device_memory::copy_to_host(
367host_data(), device_data(), size());
368 }
369 }
370
372void sync_device() {
373if (device_backed()) {
374device_memory::copy_to_device(
375device_data(), host_data(), size());
376 }
377 }
378
380void copy_in_device_to_host(
381Element const* ptr_device,
382LongIndex count = -1) {
383
384if (count < 0) {
385 count = capacity();
386 }
387else {
388 count = __NV_STD_MIN(capacity(), count);
389 }
390device_memory::copy_to_host(
391host_data(), ptr_device, count);
392 }
393
395void copy_in_device_to_device(
396Element const* ptr_device,
397LongIndex count = -1) {
398
399if (count < 0) {
400 count = capacity();
401 }
402else {
403 count = __NV_STD_MIN(capacity(), count);
404 }
405device_memory::copy_device_to_device(
406device_data(), ptr_device, count);
407 }
408
410void copy_in_host_to_device(
411Element const* ptr_host,
412LongIndex count = -1) {
413
414if (count < 0) {
415 count = capacity();
416 }
417else {
418 count = __NV_STD_MIN(capacity(), count);
419 }
420device_memory::copy_to_device(
421device_data(), ptr_host, count);
422 }
423
425void copy_in_host_to_host(
426Element const* ptr_host,
427LongIndex count = -1) {
428
429if (count < 0) {
430 count = capacity();
431 }
432else {
433 count = __NV_STD_MIN(capacity(), count);
434 }
435device_memory::copy_host_to_host(
436host_data(), ptr_host, count);
437 }
438
440void copy_out_device_to_host(
441Element * ptr_host,
442LongIndex count = -1) const {
443
444if (count < 0) {
445 count = capacity();
446 }
447else {
448 count = __NV_STD_MIN(capacity(), count);
449 }
450device_memory::copy_to_host(
451 ptr_host, device_data(), count);
452 }
453
455void copy_out_device_to_device(
456Element * ptr_device,
457LongIndex count = -1) const {
458
459if (count < 0) {
460 count = capacity();
461 }
462else {
463 count = __NV_STD_MIN(capacity(), count);
464 }
465device_memory::copy_device_to_device(
466 ptr_device, device_data(), count);
467 }
468
470void copy_out_host_to_device(
471Element * ptr_device,
472LongIndex count = -1) const {
473
474if (count < 0) {
475 count = capacity();
476 }
477else {
478 count = __NV_STD_MIN(capacity(), count);
479 }
480device_memory::copy_to_device(
481 ptr_device, host_data(), count);
482 }
483
485void copy_out_host_to_host(
486Element * ptr_host,
487LongIndex count = -1) const {
488
489if (count < 0) {
490 count = capacity();
491 }
492else {
493 count = __NV_STD_MIN(capacity(), count);
494 }
495device_memory::copy_host_to_host(
496 ptr_host, host_data(), count);
497 }
498 };
499
501
502 } // namespace cutlass
Stride & stride()
Returns the layout object's stride vector.
Definition: host_tensor.h:324
Definition: aligned_buffer.h:35
cutlass::HostTensor::HostTensor
HostTensor(TensorCoord const &extent, Layout const &layout, bool device_backed=true)
Constructs a tensor given an extent and layout.
Definition: host_tensor.h:140
Defines a structure containing strides, bounds, and a pointer to tensor data.
typename Layout::Stride Stride
Layout's stride vector.
Definition: host_tensor.h:81
cutlass::HostTensor::copy_in_host_to_device
void copy_in_host_to_device(Element const *ptr_host, LongIndex count=-1)
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:410
Index & stride(int dim)
Returns the layout object's stride in a given physical dimension.
Definition: host_tensor.h:334
cutlass::device_memory::copy_to_device
void copy_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:81
cutlass::HostTensor::device_view
ConstTensorView device_view(LongIndex ptr_element_offset=0) const
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:298
cutlass::HostTensor::LongIndex
typename Layout::LongIndex LongIndex
Long index used for pointer offsets.
Definition: host_tensor.h:75
Layout_ Layout
Mapping function from logical coordinate to linear memory.
Definition: host_tensor.h:66
cutlass::HostTensor::host_view
ConstTensorView host_view(LongIndex ptr_element_offset=0) const
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:288
LongIndex capacity() const
Returns the logical capacity based on extent and layout. May differ from size().
Definition: host_tensor.h:234
cutlass::HostTensor::copy_out_host_to_host
void copy_out_host_to_host(Element *ptr_host, LongIndex count=-1) const
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:485
cutlass::HostTensor::sync_host
void sync_host()
Copies data from device to host.
Definition: host_tensor.h:364
Layout layout() const
Returns the layout object.
Definition: host_tensor.h:314
cutlass::HostTensor::copy_in_host_to_host
void copy_in_host_to_host(Element const *ptr_host, LongIndex count=-1)
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:425
cutlass::HostTensor::device_data
Element * device_data()
Gets pointer to device data.
Definition: host_tensor.h:258
cutlass::device_memory::allocation::get
T * get() const
Returns a pointer to the managed object.
Definition: device_memory.h:162
cutlass::device_memory::allocation::reset
void reset()
Deletes the managed object and resets capacity to zero.
Definition: device_memory.h:171
cutlass::HostTensor::device_data
Element const * device_data() const
Gets pointer to device data.
Definition: host_tensor.h:264
cutlass::HostTensor::kElementsPerStoredItem
static int const kElementsPerStoredItem
Used to handle packing of subbyte elements.
Definition: host_tensor.h:102
Defines a structure containing strides and a pointer to tensor data.
C++ interface to CUDA device memory management functions.
cutlass::HostTensor::~HostTensor
~HostTensor()
Definition: host_tensor.h:149
cutlass::TensorRef< Element, Layout >::ConstTensorRef
TensorRef< typename platform::remove_const< Element >::type const, Layout > ConstTensorRef
TensorRef to constant data.
Definition: tensor_ref.h:179
cutlass::HostTensor::copy_in_device_to_device
void copy_in_device_to_device(Element const *ptr_device, LongIndex count=-1)
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:395
cutlass::HostTensor::ConstReference
typename ConstTensorRef::Reference ConstReference
Constant reference to element in tensor.
Definition: host_tensor.h:99
Index stride(int dim) const
Returns the layout object's stride in a given physical dimension.
Definition: host_tensor.h:329
cutlass::HostTensor::sync_device
void sync_device()
Copies data from host to device.
Definition: host_tensor.h:372
cutlass::TensorView< Element, Layout >::ConstTensorView
TensorView< typename platform::remove_const< Element >::type const, Layout > ConstTensorView
TensorView pointing to constant memory.
Definition: tensor_view.h:95
cutlass::HostTensor::TensorView
TensorView< Element, Layout > TensorView
Tensor reference to device memory.
Definition: host_tensor.h:90
cutlass::HostTensor::host_data_ptr_offset
Element * host_data_ptr_offset(LongIndex ptr_element_offset)
Gets pointer to host data with a pointer offset.
Definition: host_tensor.h:242
Host tensor.
Definition: host_tensor.h:59
cutlass::HostTensor::device_view
TensorView device_view(LongIndex ptr_element_offset=0)
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:293
cutlass::TensorView< Element, Layout >
cutlass::HostTensor::copy_out_device_to_host
void copy_out_device_to_host(Element *ptr_host, LongIndex count=-1) const
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:440
cutlass::HostTensor::copy_out_host_to_device
void copy_out_host_to_device(Element *ptr_device, LongIndex count=-1) const
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:470
ConstReference at(TensorCoord const &coord) const
Returns a const reference to the element at the logical Coord in host memory.
Definition: host_tensor.h:349
Defines the size of an element in bits.
Definition: numeric_types.h:42
cutlass::device_memory::copy_host_to_host
void copy_host_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:96
ConstTensorRef host_ref(LongIndex ptr_element_offset=0) const
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:270
TensorCoord extent() const
Returns the extent of the tensor.
Definition: host_tensor.h:354
cutlass::HostTensor::host_data
Element * host_data()
Gets pointer to host data.
Definition: host_tensor.h:239
void reserve(size_t count, bool device_backed_=true)
Resizes internal memory allocations without affecting layout or extent.
Definition: host_tensor.h:161
Definition: subbyte_reference.h:557
cutlass::TensorRef< Element, Layout >
#define __NV_STD_MIN(a, b)
Select minimum(a, b)
Definition: platform.h:168
cutlass::HostTensor::device_backed
bool device_backed() const
Returns true if device memory is allocated.
Definition: host_tensor.h:303
cutlass::HostTensor::TensorCoord
typename Layout::TensorCoord TensorCoord
Coordinate in logical tensor space.
Definition: host_tensor.h:78
cutlass::TensorRef< Element, Layout >::Reference
typename platform::conditional< sizeof_bits< Element >::value >=8, Element &, SubbyteReference< Element > >::type Reference
Reference type to an element.
Definition: tensor_ref.h:159
cutlass::HostTensor::host_data
Reference host_data(LongIndex idx)
Gets a reference to an element in host memory.
Definition: host_tensor.h:245
TensorCoord & extent()
Returns the extent of the tensor.
Definition: host_tensor.h:359
cutlass::HostTensor::HostTensor
HostTensor()
Default constructor.
Definition: host_tensor.h:128
cutlass::HostTensor::ConstTensorView
typename TensorView::ConstTensorView ConstTensorView
Tensor reference to constant device memory.
Definition: host_tensor.h:93
void resize(TensorCoord const &extent, bool device_backed_=true)
Definition: host_tensor.h:221
cutlass::device_memory::copy_to_host
void copy_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:86
cutlass::HostTensor::copy_in_device_to_host
void copy_in_device_to_host(Element const *ptr_device, LongIndex count=-1)
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:380
LongIndex offset(TensorCoord const &coord) const
Computes the offset of an index from the origin of the tensor.
Definition: host_tensor.h:339
cutlass::HostTensor::host_view
TensorView host_view(LongIndex ptr_element_offset=0)
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:283
cutlass::HostTensor::HostTensor
HostTensor(TensorCoord const &extent, bool device_backed=true)
Constructs a tensor given an extent. Assumes a packed layout.
Definition: host_tensor.h:131
cutlass::HostTensor::device_ref
TensorRef device_ref(LongIndex ptr_element_offset=0)
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:273
Element_ Element
Data type of individual access.
Definition: host_tensor.h:63
Reference at(TensorCoord const &coord)
Returns a reference to the element at the logical Coord in host memory.
Definition: host_tensor.h:344
void reset(TensorCoord const &extent, bool device_backed_=true)
Definition: host_tensor.h:195
cutlass::HostTensor::ConstTensorRef
typename TensorRef::ConstTensorRef ConstTensorRef
Tensor reference to constant device memory.
Definition: host_tensor.h:87
cutlass::HostTensor::host_data
ConstReference host_data(LongIndex idx) const
Gets a constant reference to an element in host memory.
Definition: host_tensor.h:253
void reset()
Clears the HostTensor allocation to size/capacity = 0.
Definition: host_tensor.h:152
static int const kRank
Logical rank of tensor index space.
Definition: host_tensor.h:69
cutlass::HostTensor::device_data_ptr_offset
Element * device_data_ptr_offset(LongIndex ptr_element_offset)
Gets pointer to device data with a pointer offset.
Definition: host_tensor.h:261
size_t size() const
Returns the number of elements stored in the host tensor.
Definition: host_tensor.h:229
cutlass::HostTensor::device_ref
ConstTensorRef device_ref(LongIndex ptr_element_offset=0) const
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:278
Layout & layout()
Returns the layout object.
Definition: host_tensor.h:309
cutlass::HostTensor::Reference
typename TensorRef::Reference Reference
Reference to element in tensor.
Definition: host_tensor.h:96
void resize(TensorCoord const &extent, Layout const &layout, bool device_backed_=true)
Definition: host_tensor.h:204
Defines properties of matrices used to denote layout and operands to GEMM kernels.
cutlass::HostTensor::TensorRef
TensorRef< Element, Layout > TensorRef
Tensor reference to device memory.
Definition: host_tensor.h:84
cutlass::device_memory::allocation< Element >
void reset(TensorCoord const &extent, Layout const &layout, bool device_backed_=true)
Definition: host_tensor.h:182
Stride stride() const
Returns the layout object's stride vector.
Definition: host_tensor.h:319
typename Layout::Index Index
Index type.
Definition: host_tensor.h:72
Basic include for CUTLASS.
TensorRef host_ref(LongIndex ptr_element_offset=0)
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:267
cutlass::HostTensor::copy_out_device_to_device
void copy_out_device_to_device(Element *ptr_device, LongIndex count=-1) const
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:455
cutlass::device_memory::copy_device_to_device
void copy_device_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:91
cutlass::HostTensor::host_data
Element const * host_data() const
Gets pointer to host data.
Definition: host_tensor.h:250
Generated by 1.8.11