Back to Taskflow

Taskflow: A General

docs/for__each_8hpp_source.html

4.1.06.6 KB
Original Source

| | Taskflow: A General-purpose Task-parallel Programming System |

Loading...

Searching...

No Matches

for_each.hpp

1#pragma once

2

3#include "../cudaflow.hpp"

4

9

10namespace tf {

11

12namespace detail {

13

17template <typename I, typename C, typename E>

18__global__ void cuda_for_each_kernel(I first, unsigned count, C c) {

19auto tid = threadIdx.x;

20auto bid = blockIdx.x;

21auto tile = cuda_get_tile(bid, E::nv, count);

22 cuda_strided_iterate<E::nt, E::vt>(

23 [=](auto, auto j) {

24 c(*(first + tile.begin + j));

25 },

26 tid, tile.count()

27 );

28}

29

31template <typename I, typename C, typename E>

32__global__ void cuda_for_each_index_kernel(I first, I inc, unsigned count, C c) {

33auto tid = threadIdx.x;

34auto bid = blockIdx.x;

35auto tile = cuda_get_tile(bid, E::nv, count);

36 cuda_strided_iterate<E::nt, E::vt>(

37 [=]__device__(auto, auto j) {

38 c(first + inc*(tile.begin+j));

39 },

40 tid, tile.count()

41 );

42}

43

44} // end of namespace detail -------------------------------------------------

45

46// ----------------------------------------------------------------------------

47// cudaFlow: for_each, for_each_index

48// ----------------------------------------------------------------------------

49

50// Function: for_each

51template <typename Creator, typename Deleter>

52template <typename I, typename C, typename E>

53cudaTask cudaGraphBase<Creator, Deleter>::for_each(I first, I last, C c) {

54

55unsigned count = std::distance(first, last);

56

57return kernel(

58 E::num_blocks(count), E::nt, 0,

59 detail::cuda_for_each_kernel<I, C, E>, first, count, c

60 );

61}

62

63// Function: for_each

64template <typename Creator, typename Deleter>

65template <typename I, typename C, typename E>

66void cudaGraphExecBase<Creator, Deleter>::for_each(cudaTask task, I first, I last, C c) {

67

68unsigned count = std::distance(first, last);

69

70kernel(task,

71 E::num_blocks(count), E::nt, 0,

72 detail::cuda_for_each_kernel<I, C, E>, first, count, c

73 );

74}

75

76// Function: for_each_index

77template <typename Creator, typename Deleter>

78template <typename I, typename C, typename E>

79cudaTask cudaGraphBase<Creator, Deleter>::for_each_index(I first, I last, I inc, C c) {

80

81unsigned count = distance(first, last, inc);

82

83return kernel(

84 E::num_blocks(count), E::nt, 0,

85 detail::cuda_for_each_index_kernel<I, C, E>, first, inc, count, c

86 );

87}

88

89// Function: for_each_index

90template <typename Creator, typename Deleter>

91template <typename I, typename C, typename E>

92void cudaGraphExecBase<Creator, Deleter>::for_each_index(cudaTask task, I first, I last, I inc, C c) {

93

94unsigned count = distance(first, last, inc);

95

96return kernel(task,

97 E::num_blocks(count), E::nt, 0,

98 detail::cuda_for_each_index_kernel<I, C, E>, first, inc, count, c

99 );

100}

101

102

103} // end of namespace tf -----------------------------------------------------

104

105

106

107

108

109

tf::cudaGraphBase::for_each

cudaTask for_each(I first, I last, C callable)

applies a callable to each dereferenced element of the data array

Definition for_each.hpp:53

tf::cudaGraphBase::kernel

cudaTask kernel(dim3 g, dim3 b, size_t s, F f, ArgsT... args)

creates a kernel task

Definition cuda_graph.hpp:1010

tf::cudaGraphBase::for_each_index

cudaTask for_each_index(I first, I last, I step, C callable)

applies a callable to each index in the range with the step size

Definition for_each.hpp:79

tf::cudaGraphExecBase::kernel

void kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)

updates parameters of a kernel task

Definition cuda_graph_exec.hpp:279

tf::cudaGraphExecBase::for_each_index

void for_each_index(cudaTask task, I first, I last, I step, C callable)

updates parameters of a for_each_index kernel task created from the CUDA graph of *this

Definition for_each.hpp:92

tf::cudaGraphExecBase::for_each

void for_each(cudaTask task, I first, I last, C callable)

updates parameters of a for_each kernel task created from the CUDA graph of *this

Definition for_each.hpp:66

tf::cudaTask

class to create a task handle of a CUDA Graph node

Definition cuda_graph.hpp:315

tf

taskflow namespace

Definition small_vector.hpp:20

tf::distance

constexpr size_t distance(T beg, T end, T step)

calculates the number of iterations in the given index range

Definition iterator.hpp:71