docs/device__memory_8h_source.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
device_memory.h
Go to the documentation of this file.
1 /******************************************************************************
2 * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are not permitted.
6 *
7 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
8 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
9 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
10 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
11 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
12 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
13 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
14 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
15 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
16 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17 *
18 ******************************************************************************/
19
20 #pragma once
21
27 #include <memory>
28
29 #include "cutlass/platform/platform.h"
30 #include "cutlass/numeric_types.h"
31 #include "exceptions.h"
32
33 namespace cutlass {
34 namespace device_memory {
35
36 /******************************************************************************
37 * Allocation lifetime
38 ******************************************************************************/
39
41 template <typename T>
42 T* allocate(size_t count = 1) {
43 T* ptr = 0;
44size_t bytes = sizeof(T) * count;
45
46 cudaError_t cuda_error = cudaMalloc((void**)&ptr, bytes);
47if (cuda_error != cudaSuccess) {
48throw cuda_exception("Failed to allocate memory", cuda_error);
49 }
50
51return ptr;
52 }
53
55 template <typename T>
57if (ptr) {
58 cudaError_t cuda_error = (cudaFree(ptr));
59if (cuda_error != cudaSuccess) {
60throw cuda_exception("Failed to free device memory", cuda_error);
61 }
62 }
63 }
64
65 /******************************************************************************
66 * Data movement
67 ******************************************************************************/
68
69 template <typename T>
70 void copy(T* dst, T const* src, size_t count, cudaMemcpyKind kind) {
71size_t bytes = count * sizeof_bits<T>::value / 8;
72if (bytes == 0 && count > 0)
73 bytes = 1;
74 cudaError_t cuda_error = (cudaMemcpy(dst, src, bytes, kind));
75if (cuda_error != cudaSuccess) {
76throw cuda_exception("cudaMemcpy() failed", cuda_error);
77 }
78 }
79
80 template <typename T>
81 void copy_to_device(T* dst, T const* src, size_t count = 1) {
82copy(dst, src, count, cudaMemcpyHostToDevice);
83 }
84
85 template <typename T>
86 void copy_to_host(T* dst, T const* src, size_t count = 1) {
87copy(dst, src, count, cudaMemcpyDeviceToHost);
88 }
89
90 template <typename T>
91 void copy_device_to_device(T* dst, T const* src, size_t count = 1) {
92copy(dst, src, count, cudaMemcpyDeviceToDevice);
93 }
94
95 template <typename T>
96 void copy_host_to_host(T* dst, T const* src, size_t count = 1) {
97copy(dst, src, count, cudaMemcpyHostToHost);
98 }
99
101 template <typename OutputIterator, typename T>
102 void insert_to_host(OutputIterator begin, OutputIterator end, T const* device_begin) {
103size_t elements = end - begin;
104copy_to_host(&*begin, device_begin, elements);
105 }
106
108 template <typename T, typename InputIterator>
109 void insert_to_device(T* device_begin, InputIterator begin, InputIterator end) {
110size_t elements = end - begin;
111copy_to_device(device_begin, &*begin, elements);
112 }
113
114 /******************************************************************************
115 * "Smart" device memory allocation
116 ******************************************************************************/
117
119 template <typename T>
120 struct allocation {
123void operator()(T* ptr) {
124 cudaError_t cuda_error = (cudaFree(ptr));
125if (cuda_error != cudaSuccess) {
126// noexcept
127// throw cuda_exception("cudaFree() failed", cuda_error);
128return;
129 }
130 }
131 };
132
133//
134// Data members
135//
136
139
141platform::unique_ptr<T, deleter> smart_ptr;
142
143//
144// Methods
145//
146
148allocation() : capacity(0) {}
149
151allocation(size_t _capacity) : smart_ptr(allocate<T>(_capacity)), capacity(_capacity) {}
152
154allocation(allocation const &p): smart_ptr(allocate<T>(p.capacity)), capacity(p.capacity) {
155copy_device_to_device(smart_ptr.get(), p.get(), capacity);
156 }
157
159~allocation() { reset(); }
160
162 T* get() const { return smart_ptr.get(); }
163
166 capacity = 0;
167return smart_ptr.release();
168 }
169
172 capacity = 0;
173 smart_ptr.reset();
174 }
175
177void reset(T* _ptr, size_t _capacity) {
178 smart_ptr.reset(_ptr);
179 capacity = _capacity;
180 }
181
183 T* operator->() const { return smart_ptr.get(); }
184
186 deleter& get_deleter() { return smart_ptr.get_deleter(); }
187
189const deleter& get_deleter() const { return smart_ptr.get_deleter(); }
190
192allocation & operator=(allocation const &p) {
193if (capacity != p.capacity) {
194 smart_ptr.reset(allocate<T>(p.capacity));
195 capacity = p.capacity;
196 }
197copy_device_to_device(smart_ptr.get(), p.get(), capacity);
198return *this;
199 }
200 };
201
202 } // namespace device_memory
203 } // namespace cutlass
Definition: aligned_buffer.h:35
cutlass::device_memory::allocation::allocation
allocation(size_t _capacity)
Constructor: allocates capacity elements on the current CUDA device.
Definition: device_memory.h:151
cutlass::device_memory::insert_to_device
void insert_to_device(T *device_begin, InputIterator begin, InputIterator end)
Copies elements to device memory from host-side range.
Definition: device_memory.h:109
cutlass::device_memory::allocation::get_deleter
deleter & get_deleter()
Returns the deleter object which would be used for destruction of the managed object.
Definition: device_memory.h:186
cutlass::device_memory::copy_to_device
void copy_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:81
void copy(T *dst, T const *src, size_t count, cudaMemcpyKind kind)
Definition: device_memory.h:70
cutlass::device_memory::allocation::deleter::operator()
void operator()(T *ptr)
Definition: device_memory.h:123
cutlass::device_memory::allocation::get
T * get() const
Returns a pointer to the managed object.
Definition: device_memory.h:162
cutlass::device_memory::allocation::reset
void reset()
Deletes the managed object and resets capacity to zero.
Definition: device_memory.h:171
C++ features that may be otherwise unimplemented for CUDA device functions.
cutlass::device_memory::allocation::deleter
Delete functor for CUDA device memory.
Definition: device_memory.h:122
std::unique_ptr
Definition: platform.h:712
cutlass::device_memory::allocation::release
T * release()
Releases the ownership of the managed object (without deleting) and resets capacity to zero...
Definition: device_memory.h:165
cutlass::platform::unique_ptr::get
pointer get() const noexcept
Returns a pointer to the managed object or nullptr if no object is owned.
Definition: platform.h:735
cutlass::device_memory::allocate
T * allocate(size_t count=1)
Allocate a buffer of count elements of type T on the current CUDA device.
Definition: device_memory.h:42
cutlass::device_memory::allocation::smart_ptr
platform::unique_ptr< T, deleter > smart_ptr
Smart pointer.
Definition: device_memory.h:141
cutlass::device_memory::allocation::capacity
size_t capacity
Number of elements of T allocated on the current CUDA device.
Definition: device_memory.h:138
Defines the size of an element in bits.
Definition: numeric_types.h:42
cutlass::device_memory::copy_host_to_host
void copy_host_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:96
C++ exception semantics for CUDA error codes.
Top-level include for all CUTLASS numeric types.
cutlass::platform::unique_ptr::get_deleter
Deleter & get_deleter() noexcept
Returns the deleter object.
Definition: platform.h:757
cutlass::device_memory::allocation::operator->
T * operator->() const
Returns a pointer to the object owned by *this.
Definition: device_memory.h:183
cutlass::platform::unique_ptr::reset
void reset(pointer p=pointer()) noexcept
Replaces the managed object, deleting the old object.
Definition: platform.h:745
cutlass::device_memory::copy_to_host
void copy_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:86
cutlass::device_memory::allocation::~allocation
~allocation()
Destructor.
Definition: device_memory.h:159
cutlass::device_memory::allocation::get_deleter
const deleter & get_deleter() const
Returns the deleter object which would be used for destruction of the managed object (const) ...
Definition: device_memory.h:189
C++ exception wrapper for CUDA cudaError_t.
Definition: exceptions.h:36
cutlass::device_memory::allocation::operator=
allocation & operator=(allocation const &p)
Copies a device-side memory allocation.
Definition: device_memory.h:192
cutlass::device_memory::allocation::allocation
allocation()
Constructor: allocates no memory.
Definition: device_memory.h:148
cutlass::device_memory::allocation::reset
void reset(T *_ptr, size_t _capacity)
Deletes managed object, if owned, and replaces its reference with a given pointer and capacity...
Definition: device_memory.h:177
void free(T *ptr)
Free the buffer pointed to by ptr.
Definition: device_memory.h:56
cutlass::device_memory::insert_to_host
void insert_to_host(OutputIterator begin, OutputIterator end, T const *device_begin)
Copies elements from device memory to host-side range.
Definition: device_memory.h:102
cutlass::device_memory::allocation
Device allocation abstraction that tracks size and capacity.
Definition: device_memory.h:120
cutlass::platform::unique_ptr::release
pointer release() noexcept
Releases ownership of the managed object, if any.
Definition: platform.h:738
cutlass::device_memory::allocation::allocation
allocation(allocation const &p)
Copy constructor.
Definition: device_memory.h:154
cutlass::device_memory::copy_device_to_device
void copy_device_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:91
Generated by 1.8.11