Back to Taskflow

Taskflow: A General

docs/cuda__graph__exec_8hpp_source.html

4.1.022.5 KB
Original Source

| | Taskflow: A General-purpose Task-parallel Programming System |

Loading...

Searching...

No Matches

cuda_graph_exec.hpp

1#pragma once

2

3#include "cuda_graph.hpp"

4

5

6namespace tf {

7

8// ----------------------------------------------------------------------------

9// cudaGraphExec

10// ----------------------------------------------------------------------------

11

19class cudaGraphExecCreator {

20

21public:

22

26 cudaGraphExec_t operator ()() const {

27return nullptr;

28 }

29

33 cudaGraphExec_t operator ()(cudaGraphExec_t exec) const {

34return exec;

35 }

36

40 cudaGraphExec_t operator ()(cudaGraph_t graph) const {

41 cudaGraphExec_t exec;

42 TF_CHECK_CUDA(

43 cudaGraphInstantiate(&exec, graph, nullptr, nullptr, 0),

44"failed to create an executable graph"

45 );

46return exec;

47 }

48

52template <typename C, typename D>

53 cudaGraphExec_t operator ()(const cudaGraphBase<C, D>& graph) const {

54return this->operator()(graph.get());

55 }

56};

57

65class cudaGraphExecDeleter {

66

67public:

68

76void operator ()(cudaGraphExec_t executable) const {

77 cudaGraphExecDestroy(executable);

78 }

79};

80

92template <typename Creator, typename Deleter>

93class cudaGraphExecBase : public std::unique_ptr<std::remove_pointer_t<cudaGraphExec_t>, Deleter> {

94

95static_assert(std::is_pointer_v<cudaGraphExec_t>, "cudaGraphExec_t is not a pointer type");

96

97public:

98

102using base_type = std::unique_ptr<std::remove_pointer_t<cudaGraphExec_t>, Deleter>;

103

111template <typename... ArgsT>

112explicit cudaGraphExecBase(ArgsT&& ... args) : base_type(

113 Creator{}(std::forward<ArgsT>(args)...), Deleter()

114 ) {}

115

119cudaGraphExecBase(cudaGraphExecBase&&) = default;

120

124cudaGraphExecBase& operator =(cudaGraphExecBase&&) = default;

125

126// ----------------------------------------------------------------------------------------------

127// Update Methods

128// ----------------------------------------------------------------------------------------------

129

135template <typename C>

136void host(cudaTask task, C&& callable, void* user_data);

137

145template <typename F, typename... ArgsT>

146void kernel(

147cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args

148 );

149

159void memset(cudaTask task, void* dst, int ch, size_t count);

160

170void memcpy(cudaTask task, void* tgt, const void* src, size_t bytes);

171

182template <typename T, std::enable_if_t<

183 is_pod_v<T> && (sizeof(T)==1 || sizeof(T)==2 || sizeof(T)==4), void>* = nullptr

184 >

185void zero(cudaTask task, T* dst, size_t count);

186

197template <typename T, std::enable_if_t<

198 is_pod_v<T> && (sizeof(T)==1 || sizeof(T)==2 || sizeof(T)==4), void>* = nullptr

199 >

200void fill(cudaTask task, T* dst, T value, size_t count);

201

211template <typename T,

212 std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr

213 >

214void copy(cudaTask task, T* tgt, const T* src, size_t num);

215

216//---------------------------------------------------------------------------

217// Algorithm Primitives

218//---------------------------------------------------------------------------

219

226template <typename C>

227void single_task(cudaTask task, C c);

228

232template <typename I, typename C, typename E = cudaDefaultExecutionPolicy>

233void for_each(cudaTask task, I first, I last, C callable);

234

238template <typename I, typename C, typename E = cudaDefaultExecutionPolicy>

239void for_each_index(cudaTask task, I first, I last, I step, C callable);

240

244template <typename I, typename O, typename C, typename E = cudaDefaultExecutionPolicy>

245void transform(cudaTask task, I first, I last, O output, C c);

246

250template <typename I1, typename I2, typename O, typename C, typename E = cudaDefaultExecutionPolicy>

251void transform(cudaTask task, I1 first1, I1 last1, I2 first2, O output, C c);

252

253

254private:

255

256cudaGraphExecBase(const cudaGraphExecBase&) = delete;

257

258cudaGraphExecBase& operator =(const cudaGraphExecBase&) = delete;

259};

260

261// ------------------------------------------------------------------------------------------------

262// update methods

263// ------------------------------------------------------------------------------------------------

264

265// Function: host

266template <typename Creator, typename Deleter>

267template <typename C>

268void cudaGraphExecBase<Creator, Deleter>::host(cudaTask task, C&& func, void* user_data) {

269 cudaHostNodeParams p {func, user_data};

270 TF_CHECK_CUDA(

271 cudaGraphExecHostNodeSetParams(this->get(), task._native_node, &p),

272"failed to update kernel parameters on ", task

273 );

274}

275

276// Function: update kernel parameters

277template <typename Creator, typename Deleter>

278template <typename F, typename... ArgsT>

279void cudaGraphExecBase<Creator, Deleter>::kernel(

280cudaTask task, dim3 g, dim3 b, size_t s, F f, ArgsT... args

  1. {

282 cudaKernelNodeParams p;

283

284void* arguments[sizeof...(ArgsT)] = { (void*)(&args)... };

285 p.func = (void*)f;

286 p.gridDim = g;

287 p.blockDim = b;

288 p.sharedMemBytes = s;

289 p.kernelParams = arguments;

290 p.extra = nullptr;

291

292 TF_CHECK_CUDA(

293 cudaGraphExecKernelNodeSetParams(this->get(), task._native_node, &p),

294"failed to update kernel parameters on ", task

295 );

296}

297

298// Function: update copy parameters

299template <typename Creator, typename Deleter>

300template <typename T, std::enable_if_t<!std::is_same_v<T, void>, void>*>

301void cudaGraphExecBase<Creator, Deleter>::copy(cudaTask task, T* tgt, const T* src, size_t num) {

302auto p = cuda_get_copy_parms(tgt, src, num);

303 TF_CHECK_CUDA(

304 cudaGraphExecMemcpyNodeSetParams(this->get(), task._native_node, &p),

305"failed to update memcpy parameters on ", task

306 );

307}

308

309// Function: update memcpy parameters

310template <typename Creator, typename Deleter>

311void cudaGraphExecBase<Creator, Deleter>::memcpy(

312cudaTask task, void* tgt, const void* src, size_t bytes

  1. {

314auto p = cuda_get_memcpy_parms(tgt, src, bytes);

315

316 TF_CHECK_CUDA(

317 cudaGraphExecMemcpyNodeSetParams(this->get(), task._native_node, &p),

318"failed to update memcpy parameters on ", task

319 );

320}

321

322// Procedure: memset

323template <typename Creator, typename Deleter>

324void cudaGraphExecBase<Creator, Deleter>::memset(cudaTask task, void* dst, int ch, size_t count) {

325auto p = cuda_get_memset_parms(dst, ch, count);

326 TF_CHECK_CUDA(

327 cudaGraphExecMemsetNodeSetParams(this->get(), task._native_node, &p),

328"failed to update memset parameters on ", task

329 );

330}

331

332// Procedure: fill

333template <typename Creator, typename Deleter>

334template <typename T, std::enable_if_t<

335 is_pod_v<T> && (sizeof(T)==1 || sizeof(T)==2 || sizeof(T)==4), void>*

336>

337void cudaGraphExecBase<Creator, Deleter>::fill(cudaTask task, T* dst, T value, size_t count) {

338auto p = cuda_get_fill_parms(dst, value, count);

339 TF_CHECK_CUDA(

340 cudaGraphExecMemsetNodeSetParams(this->get(), task._native_node, &p),

341"failed to update memset parameters on ", task

342 );

343}

344

345// Procedure: zero

346template <typename Creator, typename Deleter>

347template <typename T, std::enable_if_t<

348 is_pod_v<T> && (sizeof(T)==1 || sizeof(T)==2 || sizeof(T)==4), void>*

349>

350void cudaGraphExecBase<Creator, Deleter>::zero(cudaTask task, T* dst, size_t count) {

351auto p = cuda_get_zero_parms(dst, count);

352 TF_CHECK_CUDA(

353 cudaGraphExecMemsetNodeSetParams(this->get(), task._native_node, &p),

354"failed to update memset parameters on ", task

355 );

356}

357

358//-------------------------------------------------------------------------------------------------

359// forward declaration

360//-------------------------------------------------------------------------------------------------

361

365template <typename SC, typename SD>

366cudaStreamBase<SC, SD>& cudaStreamBase<SC, SD>::run(cudaGraphExec_t exec) {

367 TF_CHECK_CUDA(

368 cudaGraphLaunch(exec, this->get()), "failed to launch a CUDA executable graph"

369 );

370return *this;

371}

372

376template <typename SC, typename SD>

377template <typename EC, typename ED>

378cudaStreamBase<SC, SD>& cudaStreamBase<SC, SD>::run(const cudaGraphExecBase<EC, ED>& exec) {

379return run(exec.get());

380}

381

382

383

384} // end of namespace tf -------------------------------------------------------------------------

tf::cudaGraphBase

class to create a CUDA graph with uunique ownership

Definition cuda_graph.hpp:531

tf::cudaGraphExecBase

class to create an executable CUDA graph with unique ownership

Definition cuda_graph_exec.hpp:93

tf::cudaGraphExecBase::zero

void zero(cudaTask task, T *dst, size_t count)

updates parameters of a memset task to a zero task

Definition cuda_graph_exec.hpp:350

tf::cudaGraphExecBase::cudaGraphExecBase

cudaGraphExecBase(ArgsT &&... args)

constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator

Definition cuda_graph_exec.hpp:112

tf::cudaGraphExecBase::transform

void transform(cudaTask task, I1 first1, I1 last1, I2 first2, O output, C c)

updates parameters of a transform kernel task created from the CUDA graph of *this

Definition transform.hpp:108

tf::cudaGraphExecBase::operator=

cudaGraphExecBase & operator=(cudaGraphExecBase &&)=default

assign the rhs to *this using move semantics

tf::cudaGraphExecBase::cudaGraphExecBase

cudaGraphExecBase(cudaGraphExecBase &&)=default

constructs a cudaGraphExec from the given rhs using move semantics

tf::cudaGraphExecBase::kernel

void kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)

updates parameters of a kernel task

Definition cuda_graph_exec.hpp:279

tf::cudaGraphExecBase::for_each_index

void for_each_index(cudaTask task, I first, I last, I step, C callable)

updates parameters of a for_each_index kernel task created from the CUDA graph of *this

Definition for_each.hpp:92

tf::cudaGraphExecBase::base_type

std::unique_ptr< std::remove_pointer_t< cudaGraphExec_t >, Deleter > base_type

base std::unique_ptr type

Definition cuda_graph_exec.hpp:102

tf::cudaGraphExecBase::host

void host(cudaTask task, C &&callable, void *user_data)

updates parameters of a host task

Definition cuda_graph_exec.hpp:268

tf::cudaGraphExecBase::transform

void transform(cudaTask task, I first, I last, O output, C c)

updates parameters of a transform kernel task created from the CUDA graph of *this

Definition transform.hpp:94

tf::cudaGraphExecBase::memset

void memset(cudaTask task, void *dst, int ch, size_t count)

updates parameters of a memset task

Definition cuda_graph_exec.hpp:324

tf::cudaGraphExecBase::single_task

void single_task(cudaTask task, C c)

updates a single-threaded kernel task

tf::cudaGraphExecBase::memcpy

void memcpy(cudaTask task, void *tgt, const void *src, size_t bytes)

updates parameters of a memcpy task

Definition cuda_graph_exec.hpp:311

tf::cudaGraphExecBase::copy

void copy(cudaTask task, T *tgt, const T *src, size_t num)

updates parameters of a memcpy task to a copy task

Definition cuda_graph_exec.hpp:301

tf::cudaGraphExecBase::for_each

void for_each(cudaTask task, I first, I last, C callable)

updates parameters of a for_each kernel task created from the CUDA graph of *this

Definition for_each.hpp:66

tf::cudaGraphExecBase::fill

void fill(cudaTask task, T *dst, T value, size_t count)

updates parameters of a memset task to a fill task

Definition cuda_graph_exec.hpp:337

tf::cudaGraphExecCreator

class to create functors for constructing executable CUDA graphs

Definition cuda_graph_exec.hpp:19

tf::cudaGraphExecCreator::operator()

cudaGraphExec_t operator()() const

returns a null executable CUDA graph

Definition cuda_graph_exec.hpp:26

tf::cudaGraphExecDeleter

class to create a functor for deleting an executable CUDA graph

Definition cuda_graph_exec.hpp:65

tf::cudaGraphExecDeleter::operator()

void operator()(cudaGraphExec_t executable) const

deletes an executable CUDA graph

Definition cuda_graph_exec.hpp:76

tf::cudaStreamBase

class to create a CUDA stream with unique ownership

Definition cuda_stream.hpp:189

tf::cudaStreamBase::cudaStreamBase

cudaStreamBase(ArgsT &&... args)

constructs a cudaStream object by passing the given arguments to the stream creator

Definition cuda_stream.hpp:211

tf::cudaStreamBase::run

cudaStreamBase & run(const cudaGraphExecBase< C, D > &exec)

runs the given executable CUDA graph

tf::cudaTask

class to create a task handle of a CUDA Graph node

Definition cuda_graph.hpp:315

tf

taskflow namespace

Definition small_vector.hpp:20

tf::cuda_get_zero_parms

cudaMemsetParams cuda_get_zero_parms(T *dst, size_t count)

gets the memset node parameter of a zero task (typed)

Definition cuda_graph.hpp:114

tf::cuda_get_memcpy_parms

cudaMemcpy3DParms cuda_get_memcpy_parms(void *tgt, const void *src, size_t bytes)

gets the memcpy node parameter of a memcpy task (untyped)

Definition cuda_graph.hpp:44

tf::cuda_get_memset_parms

cudaMemsetParams cuda_get_memset_parms(void *dst, int ch, size_t count)

gets the memset node parameter of a memcpy task (untyped)

Definition cuda_graph.hpp:69

tf::cuda_get_fill_parms

cudaMemsetParams cuda_get_fill_parms(T *dst, T value, size_t count)

gets the memset node parameter of a fill task (typed)

Definition cuda_graph.hpp:90

tf::cuda_get_copy_parms

cudaMemcpy3DParms cuda_get_copy_parms(T *tgt, const T *src, size_t num)

gets the memcpy node parameter of a copy task

Definition cuda_graph.hpp:23