Back to Cutlass

CUTLASS: device_memory.h Source File

docs/device__memory_8h_source.html

4.4.222.0 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

device_memory.h

Go to the documentation of this file.

1 /******************************************************************************

2 * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without

5 * modification, are not permitted.

6 *

7 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

8 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

9 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

10 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY

11 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

12 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

13 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

14 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

15 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

16 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

17 *

18 ******************************************************************************/

19

20 #pragma once

21

27 #include <memory>

28

29 #include "cutlass/platform/platform.h"

30 #include "cutlass/numeric_types.h"

31 #include "exceptions.h"

32

33 namespace cutlass {

34 namespace device_memory {

35

36 /******************************************************************************

37 * Allocation lifetime

38 ******************************************************************************/

39

41 template <typename T>

42 T* allocate(size_t count = 1) {

43 T* ptr = 0;

44size_t bytes = sizeof(T) * count;

45

46 cudaError_t cuda_error = cudaMalloc((void**)&ptr, bytes);

47if (cuda_error != cudaSuccess) {

48throw cuda_exception("Failed to allocate memory", cuda_error);

49 }

50

51return ptr;

52 }

53

55 template <typename T>

56 void free(T* ptr) {

57if (ptr) {

58 cudaError_t cuda_error = (cudaFree(ptr));

59if (cuda_error != cudaSuccess) {

60throw cuda_exception("Failed to free device memory", cuda_error);

61 }

62 }

63 }

64

65 /******************************************************************************

66 * Data movement

67 ******************************************************************************/

68

69 template <typename T>

70 void copy(T* dst, T const* src, size_t count, cudaMemcpyKind kind) {

71size_t bytes = count * sizeof_bits<T>::value / 8;

72if (bytes == 0 && count > 0)

73 bytes = 1;

74 cudaError_t cuda_error = (cudaMemcpy(dst, src, bytes, kind));

75if (cuda_error != cudaSuccess) {

76throw cuda_exception("cudaMemcpy() failed", cuda_error);

77 }

78 }

79

80 template <typename T>

81 void copy_to_device(T* dst, T const* src, size_t count = 1) {

82copy(dst, src, count, cudaMemcpyHostToDevice);

83 }

84

85 template <typename T>

86 void copy_to_host(T* dst, T const* src, size_t count = 1) {

87copy(dst, src, count, cudaMemcpyDeviceToHost);

88 }

89

90 template <typename T>

91 void copy_device_to_device(T* dst, T const* src, size_t count = 1) {

92copy(dst, src, count, cudaMemcpyDeviceToDevice);

93 }

94

95 template <typename T>

96 void copy_host_to_host(T* dst, T const* src, size_t count = 1) {

97copy(dst, src, count, cudaMemcpyHostToHost);

98 }

99

101 template <typename OutputIterator, typename T>

102 void insert_to_host(OutputIterator begin, OutputIterator end, T const* device_begin) {

103size_t elements = end - begin;

104copy_to_host(&*begin, device_begin, elements);

105 }

106

108 template <typename T, typename InputIterator>

109 void insert_to_device(T* device_begin, InputIterator begin, InputIterator end) {

110size_t elements = end - begin;

111copy_to_device(device_begin, &*begin, elements);

112 }

113

114 /******************************************************************************

115 * "Smart" device memory allocation

116 ******************************************************************************/

117

119 template <typename T>

120 struct allocation {

122struct deleter {

123void operator()(T* ptr) {

124 cudaError_t cuda_error = (cudaFree(ptr));

125if (cuda_error != cudaSuccess) {

126// noexcept

127// throw cuda_exception("cudaFree() failed", cuda_error);

128return;

129 }

130 }

131 };

132

133//

134// Data members

135//

136

138size_t capacity;

139

141platform::unique_ptr<T, deleter> smart_ptr;

142

143//

144// Methods

145//

146

148allocation() : capacity(0) {}

149

151allocation(size_t _capacity) : smart_ptr(allocate<T>(_capacity)), capacity(_capacity) {}

152

154allocation(allocation const &p): smart_ptr(allocate<T>(p.capacity)), capacity(p.capacity) {

155copy_device_to_device(smart_ptr.get(), p.get(), capacity);

156 }

157

159~allocation() { reset(); }

160

162 T* get() const { return smart_ptr.get(); }

163

165 T* release() {

166 capacity = 0;

167return smart_ptr.release();

168 }

169

171void reset() {

172 capacity = 0;

173 smart_ptr.reset();

174 }

175

177void reset(T* _ptr, size_t _capacity) {

178 smart_ptr.reset(_ptr);

179 capacity = _capacity;

180 }

181

183 T* operator->() const { return smart_ptr.get(); }

184

186 deleter& get_deleter() { return smart_ptr.get_deleter(); }

187

189const deleter& get_deleter() const { return smart_ptr.get_deleter(); }

190

192allocation & operator=(allocation const &p) {

193if (capacity != p.capacity) {

194 smart_ptr.reset(allocate<T>(p.capacity));

195 capacity = p.capacity;

196 }

197copy_device_to_device(smart_ptr.get(), p.get(), capacity);

198return *this;

199 }

200 };

201

202 } // namespace device_memory

203 } // namespace cutlass

cutlass

Definition: aligned_buffer.h:35

cutlass::device_memory::allocation::allocation

allocation(size_t _capacity)

Constructor: allocates capacity elements on the current CUDA device.

Definition: device_memory.h:151

cutlass::device_memory::insert_to_device

void insert_to_device(T *device_begin, InputIterator begin, InputIterator end)

Copies elements to device memory from host-side range.

Definition: device_memory.h:109

cutlass::device_memory::allocation::get_deleter

deleter & get_deleter()

Returns the deleter object which would be used for destruction of the managed object.

Definition: device_memory.h:186

cutlass::device_memory::copy_to_device

void copy_to_device(T *dst, T const *src, size_t count=1)

Definition: device_memory.h:81

cutlass::device_memory::copy

void copy(T *dst, T const *src, size_t count, cudaMemcpyKind kind)

Definition: device_memory.h:70

cutlass::device_memory::allocation::deleter::operator()

void operator()(T *ptr)

Definition: device_memory.h:123

cutlass::device_memory::allocation::get

T * get() const

Returns a pointer to the managed object.

Definition: device_memory.h:162

cutlass::device_memory::allocation::reset

void reset()

Deletes the managed object and resets capacity to zero.

Definition: device_memory.h:171

platform.h

C++ features that may be otherwise unimplemented for CUDA device functions.

cutlass::device_memory::allocation::deleter

Delete functor for CUDA device memory.

Definition: device_memory.h:122

cutlass::platform::unique_ptr

std::unique_ptr

Definition: platform.h:712

cutlass::device_memory::allocation::release

T * release()

Releases the ownership of the managed object (without deleting) and resets capacity to zero...

Definition: device_memory.h:165

cutlass::platform::unique_ptr::get

pointer get() const noexcept

Returns a pointer to the managed object or nullptr if no object is owned.

Definition: platform.h:735

cutlass::device_memory::allocate

T * allocate(size_t count=1)

Allocate a buffer of count elements of type T on the current CUDA device.

Definition: device_memory.h:42

cutlass::device_memory::allocation::smart_ptr

platform::unique_ptr< T, deleter > smart_ptr

Smart pointer.

Definition: device_memory.h:141

cutlass::device_memory::allocation::capacity

size_t capacity

Number of elements of T allocated on the current CUDA device.

Definition: device_memory.h:138

cutlass::sizeof_bits

Defines the size of an element in bits.

Definition: numeric_types.h:42

cutlass::device_memory::copy_host_to_host

void copy_host_to_host(T *dst, T const *src, size_t count=1)

Definition: device_memory.h:96

exceptions.h

C++ exception semantics for CUDA error codes.

numeric_types.h

Top-level include for all CUTLASS numeric types.

cutlass::platform::unique_ptr::get_deleter

Deleter & get_deleter() noexcept

Returns the deleter object.

Definition: platform.h:757

cutlass::device_memory::allocation::operator->

T * operator->() const

Returns a pointer to the object owned by *this.

Definition: device_memory.h:183

cutlass::platform::unique_ptr::reset

void reset(pointer p=pointer()) noexcept

Replaces the managed object, deleting the old object.

Definition: platform.h:745

cutlass::device_memory::copy_to_host

void copy_to_host(T *dst, T const *src, size_t count=1)

Definition: device_memory.h:86

cutlass::device_memory::allocation::~allocation

~allocation()

Destructor.

Definition: device_memory.h:159

cutlass::device_memory::allocation::get_deleter

const deleter & get_deleter() const

Returns the deleter object which would be used for destruction of the managed object (const) ...

Definition: device_memory.h:189

cutlass::cuda_exception

C++ exception wrapper for CUDA cudaError_t.

Definition: exceptions.h:36

cutlass::device_memory::allocation::operator=

allocation & operator=(allocation const &p)

Copies a device-side memory allocation.

Definition: device_memory.h:192

cutlass::device_memory::allocation::allocation

allocation()

Constructor: allocates no memory.

Definition: device_memory.h:148

cutlass::device_memory::allocation::reset

void reset(T *_ptr, size_t _capacity)

Deletes managed object, if owned, and replaces its reference with a given pointer and capacity...

Definition: device_memory.h:177

cutlass::device_memory::free

void free(T *ptr)

Free the buffer pointed to by ptr.

Definition: device_memory.h:56

cutlass::device_memory::insert_to_host

void insert_to_host(OutputIterator begin, OutputIterator end, T const *device_begin)

Copies elements from device memory to host-side range.

Definition: device_memory.h:102

cutlass::device_memory::allocation

Device allocation abstraction that tracks size and capacity.

Definition: device_memory.h:120

cutlass::platform::unique_ptr::release

pointer release() noexcept

Releases ownership of the managed object, if any.

Definition: platform.h:738

cutlass::device_memory::allocation::allocation

allocation(allocation const &p)

Copy constructor.

Definition: device_memory.h:154

cutlass::device_memory::copy_device_to_device

void copy_device_to_device(T *dst, T const *src, size_t count=1)

Definition: device_memory.h:91


Generated by 1.8.11