Taskflow: A General - Taskflow

| | Taskflow: A General-purpose Task-parallel Programming System |

Searching...

No Matches

cuda_device.hpp

1#pragma once

3#include "cuda_error.hpp"

10namespace tf {

15inline size_t cuda_get_num_devices() {

16int N = 0;

17 TF_CHECK_CUDA(cudaGetDeviceCount(&N), "failed to get device count");

18return static_cast<size_t>(N);

19}

24inline int cuda_get_device() {

25int id;

26 TF_CHECK_CUDA(cudaGetDevice(&id), "failed to get current device id");

27return id;

28}

33inline void cuda_set_device(int id) {

34 TF_CHECK_CUDA(cudaSetDevice(id), "failed to switch to device ", id);

35}

40inline void cuda_get_device_property(int i, cudaDeviceProp& p) {

41 TF_CHECK_CUDA(

42 cudaGetDeviceProperties(&p, i), "failed to get property of device ", i

43 );

44}

49inline cudaDeviceProp cuda_get_device_property(int i) {

50 cudaDeviceProp p;

51 TF_CHECK_CUDA(

52 cudaGetDeviceProperties(&p, i), "failed to get property of device ", i

53 );

54return p;

55}

60inline void cuda_dump_device_property(std::ostream& os, const cudaDeviceProp& p) {

62 os << "Major revision number: " << p.major << '\n'

63 << "Minor revision number: " << p.minor << '\n'

64 << "Name: " << p.name << '\n'

65 << "Total global memory: " << p.totalGlobalMem << '\n'

66 << "Total shared memory per block: " << p.sharedMemPerBlock << '\n'

67 << "Total registers per block: " << p.regsPerBlock << '\n'

68 << "Warp size: " << p.warpSize << '\n'

69 << "Maximum memory pitch: " << p.memPitch << '\n'

70 << "Maximum threads per block: " << p.maxThreadsPerBlock << '\n';

72 os << "Maximum dimension of block: ";

73for (int i = 0; i < 3; ++i) {

74if(i) os << 'x';

75 os << p.maxThreadsDim[i];

76 }

77 os << '\n';

79 os << "Maximum dimension of grid: ";

80for (int i = 0; i < 3; ++i) {

81if(i) os << 'x';

82 os << p.maxGridSize[i];;

83 }

84 os << '\n';

85 os << "Total constant memory: " << p.totalConstMem << '\n'

86 << "Texture alignment: " << p.textureAlignment << '\n'

87 << "Number of multiprocessors: " << p.multiProcessorCount << '\n'

88 << "GPU sharing Host Memory: " << p.integrated << '\n'

89 << "Host page-locked mem mapping: " << p.canMapHostMemory << '\n'

90 << "Alignment for Surfaces: " << p.surfaceAlignment << '\n'

91 << "Device has ECC support: " << p.ECCEnabled << '\n'

92 << "Unified Addressing (UVA): " << p.unifiedAddressing << '\n';

93}

98inline size_t cuda_get_device_max_threads_per_block(int d) {

99int threads = 0;

100 TF_CHECK_CUDA(

101 cudaDeviceGetAttribute(&threads, cudaDevAttrMaxThreadsPerBlock, d),

102"failed to query the maximum threads per block on device ", d

103 )

104return threads;

105}

106

110inline size_t cuda_get_device_max_x_dim_per_block(int d) {

111int dim = 0;

112 TF_CHECK_CUDA(

113 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimX, d),

114"failed to query the maximum x-dimension per block on device ", d

115 )

116return dim;

117}

118

122inline size_t cuda_get_device_max_y_dim_per_block(int d) {

123int dim = 0;

124 TF_CHECK_CUDA(

125 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimY, d),

126"failed to query the maximum y-dimension per block on device ", d

127 )

128return dim;

129}

130

134inline size_t cuda_get_device_max_z_dim_per_block(int d) {

135int dim = 0;

136 TF_CHECK_CUDA(

137 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimZ, d),

138"failed to query the maximum z-dimension per block on device ", d

139 )

140return dim;

141}

142

146inline size_t cuda_get_device_max_x_dim_per_grid(int d) {

147int dim = 0;

148 TF_CHECK_CUDA(

149 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimX, d),

150"failed to query the maximum x-dimension per grid on device ", d

151 )

152return dim;

153}

154

158inline size_t cuda_get_device_max_y_dim_per_grid(int d) {

159int dim = 0;

160 TF_CHECK_CUDA(

161 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimY, d),

162"failed to query the maximum y-dimension per grid on device ", d

163 )

164return dim;

165}

166

170inline size_t cuda_get_device_max_z_dim_per_grid(int d) {

171int dim = 0;

172 TF_CHECK_CUDA(

173 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimZ, d),

174"failed to query the maximum z-dimension per grid on device ", d

175 )

176return dim;

177}

178

182inline size_t cuda_get_device_max_shm_per_block(int d) {

183int num = 0;

184 TF_CHECK_CUDA(

185 cudaDeviceGetAttribute(&num, cudaDevAttrMaxSharedMemoryPerBlock, d),

186"failed to query the maximum shared memory per block on device ", d

187 )

188return num;

189}

190

194inline size_t cuda_get_device_warp_size(int d) {

195int num = 0;

196 TF_CHECK_CUDA(

197 cudaDeviceGetAttribute(&num, cudaDevAttrWarpSize, d),

198"failed to query the warp size per block on device ", d

199 )

200return num;

201}

202

206inline int cuda_get_device_compute_capability_major(int d) {

207int num = 0;

208 TF_CHECK_CUDA(

209 cudaDeviceGetAttribute(&num, cudaDevAttrComputeCapabilityMajor, d),

210"failed to query the major number of compute capability of device ", d

211 )

212return num;

213}

214

218inline int cuda_get_device_compute_capability_minor(int d) {

219int num = 0;

220 TF_CHECK_CUDA(

221 cudaDeviceGetAttribute(&num, cudaDevAttrComputeCapabilityMinor, d),

222"failed to query the minor number of compute capability of device ", d

223 )

224return num;

225}

226

230inline bool cuda_get_device_unified_addressing(int d) {

231int num = 0;

232 TF_CHECK_CUDA(

233 cudaDeviceGetAttribute(&num, cudaDevAttrUnifiedAddressing, d),

234"failed to query unified addressing status on device ", d

235 )

236return num;

237}

238

239// ----------------------------------------------------------------------------

240// CUDA Version

241// ----------------------------------------------------------------------------

242

246inline int cuda_get_driver_version() {

247int num = 0;

248 TF_CHECK_CUDA(

249 cudaDriverGetVersion(&num),

250"failed to query the latest cuda version supported by the driver"

251 );

252return num;

253}

254

258inline int cuda_get_runtime_version() {

259int num = 0;

260 TF_CHECK_CUDA(

261 cudaRuntimeGetVersion(&num), "failed to query cuda runtime version"

262 );

263return num;

264}

265

266// ----------------------------------------------------------------------------

267// cudaScopedDevice

268// ----------------------------------------------------------------------------

269

289class cudaScopedDevice {

290

291public:

292

298explicit cudaScopedDevice(int device);

299

303~cudaScopedDevice();

304

305private:

306

307cudaScopedDevice() = delete;

308cudaScopedDevice(const cudaScopedDevice&) = delete;

309cudaScopedDevice(cudaScopedDevice&&) = delete;

310

311int _p;

312};

313

314// Constructor

315inline cudaScopedDevice::cudaScopedDevice(int dev) {

316 TF_CHECK_CUDA(cudaGetDevice(&_p), "failed to get current device scope");

317if(_p == dev) {

318 _p = -1;

319 }

320else {

321 TF_CHECK_CUDA(cudaSetDevice(dev), "failed to scope on device ", dev);

322 }

323}

324

325// Destructor

326inline cudaScopedDevice::~cudaScopedDevice() {

327if(_p != -1) {

328 cudaSetDevice(_p);

329//TF_CHECK_CUDA(cudaSetDevice(_p), "failed to scope back to device ", _p);

330 }

331}

332

333} // end of namespace cuda ---------------------------------------------------

334

335

336

337

338

tf::cudaScopedDevice::cudaScopedDevice

cudaScopedDevice(int device)

constructs a RAII-styled device switcher

Definition cuda_device.hpp:315

tf::cudaScopedDevice::~cudaScopedDevice

~cudaScopedDevice()

destructs the guard and switches back to the previous device context

Definition cuda_device.hpp:326

taskflow namespace

Definition small_vector.hpp:20

tf::cuda_get_device_max_z_dim_per_grid

size_t cuda_get_device_max_z_dim_per_grid(int d)

queries the maximum z-dimension per grid on a device

Definition cuda_device.hpp:170

tf::cuda_get_device_compute_capability_major

int cuda_get_device_compute_capability_major(int d)

queries the major number of compute capability of a device

Definition cuda_device.hpp:206

tf::cuda_get_device

int cuda_get_device()

gets the current device associated with the caller thread

Definition cuda_device.hpp:24

tf::cuda_get_runtime_version

int cuda_get_runtime_version()

queries the CUDA Runtime version (1000 * major + 10 * minor)

Definition cuda_device.hpp:258

tf::cuda_get_device_property

void cuda_get_device_property(int i, cudaDeviceProp &p)

obtains the device property

Definition cuda_device.hpp:40

tf::cuda_get_driver_version

int cuda_get_driver_version()

queries the latest CUDA version (1000 * major + 10 * minor) supported by the driver

Definition cuda_device.hpp:246

tf::cuda_get_device_max_z_dim_per_block

size_t cuda_get_device_max_z_dim_per_block(int d)

queries the maximum z-dimension per block on a device

Definition cuda_device.hpp:134

tf::cuda_get_device_max_x_dim_per_grid

size_t cuda_get_device_max_x_dim_per_grid(int d)

queries the maximum x-dimension per grid on a device

Definition cuda_device.hpp:146

tf::cuda_get_device_compute_capability_minor

int cuda_get_device_compute_capability_minor(int d)

queries the minor number of compute capability of a device

Definition cuda_device.hpp:218

tf::cuda_get_device_max_y_dim_per_grid

size_t cuda_get_device_max_y_dim_per_grid(int d)

queries the maximum y-dimension per grid on a device

Definition cuda_device.hpp:158

tf::cuda_get_device_max_y_dim_per_block

size_t cuda_get_device_max_y_dim_per_block(int d)

queries the maximum y-dimension per block on a device

Definition cuda_device.hpp:122

tf::cuda_get_device_max_threads_per_block

size_t cuda_get_device_max_threads_per_block(int d)

queries the maximum threads per block on a device

Definition cuda_device.hpp:98

tf::cuda_get_num_devices

size_t cuda_get_num_devices()

queries the number of available devices

Definition cuda_device.hpp:15

tf::cuda_get_device_unified_addressing

bool cuda_get_device_unified_addressing(int d)

queries if the device supports unified addressing

Definition cuda_device.hpp:230

tf::cuda_set_device

void cuda_set_device(int id)

switches to a given device context

Definition cuda_device.hpp:33

tf::cuda_get_device_warp_size

size_t cuda_get_device_warp_size(int d)

queries the warp size on a device

Definition cuda_device.hpp:194

tf::cuda_get_device_max_shm_per_block

size_t cuda_get_device_max_shm_per_block(int d)

queries the maximum shared memory size in bytes per block on a device

Definition cuda_device.hpp:182

tf::cuda_get_device_max_x_dim_per_block

size_t cuda_get_device_max_x_dim_per_block(int d)

queries the maximum x-dimension per block on a device

Definition cuda_device.hpp:110

tf::cuda_dump_device_property

void cuda_dump_device_property(std::ostream &os, const cudaDeviceProp &p)

dumps the device property

Definition cuda_device.hpp:60

taskflow
cuda
cuda_device.hpp
Maintained by Dr. Tsung-Wei Huang — Generated by 1.13.1