Back to Taskflow

Taskflow: A General

docs/transform_8hpp_source.html

4.1.06.3 KB
Original Source

| | Taskflow: A General-purpose Task-parallel Programming System |

Loading...

Searching...

No Matches

transform.hpp

1#pragma once

2

3#include "../cudaflow.hpp"

4

9

10namespace tf {

11

12// ----------------------------------------------------------------------------

13// transform

14// ----------------------------------------------------------------------------

15

16namespace detail {

17

21template <typename I, typename O, typename C, typename E>

22__global__ void cuda_transform_kernel(I first, unsigned count, O output, C op) {

23auto tid = threadIdx.x;

24auto bid = blockIdx.x;

25auto tile = cuda_get_tile(bid, E::nv, count);

26 cuda_strided_iterate<E::nt, E::vt>(

27 [=]__device__(auto, auto j) {

28auto offset = j + tile.begin;

29 *(output + offset) = op(*(first+offset));

30 },

31 tid,

32 tile.count()

33 );

34}

35

39template <typename I1, typename I2, typename O, typename C, typename E>

40__global__ void cuda_transform_kernel(

41 I1 first1, I2 first2, unsigned count, O output, C op

  1. {

43auto tid = threadIdx.x;

44auto bid = blockIdx.x;

45auto tile = cuda_get_tile(bid, E::nv, count);

46 cuda_strided_iterate<E::nt, E::vt>(

47 [=]__device__(auto, auto j) {

48auto offset = j + tile.begin;

49 *(output + offset) = op(*(first1+offset), *(first2+offset));

50 },

51 tid,

52 tile.count()

53 );

54}

55

56} // end of namespace detail -------------------------------------------------

57

58// ----------------------------------------------------------------------------

59// cudaFlow

60// ----------------------------------------------------------------------------

61

62// Function: transform

63template <typename Creator, typename Deleter>

64template <typename I, typename O, typename C, typename E>

65cudaTask cudaGraphBase<Creator, Deleter>::transform(I first, I last, O output, C c) {

66

67unsigned count = std::distance(first, last);

68

69return kernel(

70 E::num_blocks(count), E::nt, 0,

71 detail::cuda_transform_kernel<I, O, C, E>,

72 first, count, output, c

73 );

74}

75

76// Function: transform

77template <typename Creator, typename Deleter>

78template <typename I1, typename I2, typename O, typename C, typename E>

79cudaTask cudaGraphBase<Creator, Deleter>::transform(I1 first1, I1 last1, I2 first2, O output, C c) {

80

81unsigned count = std::distance(first1, last1);

82

83return kernel(

84 E::num_blocks(count), E::nt, 0,

85 detail::cuda_transform_kernel<I1, I2, O, C, E>,

86 first1, first2, count, output, c

87 );

88}

89

90

91// Function: update transform

92template <typename Creator, typename Deleter>

93template <typename I, typename O, typename C, typename E>

94void cudaGraphExecBase<Creator, Deleter>::transform(cudaTask task, I first, I last, O output, C c) {

95

96unsigned count = std::distance(first, last);

97

98kernel(task,

99 E::num_blocks(count), E::nt, 0,

100 detail::cuda_transform_kernel<I, O, C, E>,

101 first, count, output, c

102 );

103}

104

105// Function: update transform

106template <typename Creator, typename Deleter>

107template <typename I1, typename I2, typename O, typename C, typename E>

108void cudaGraphExecBase<Creator, Deleter>::transform(

109cudaTask task, I1 first1, I1 last1, I2 first2, O output, C c

  1. {

111unsigned count = std::distance(first1, last1);

112

113kernel(task,

114 E::num_blocks(count), E::nt, 0,

115 detail::cuda_transform_kernel<I1, I2, O, C, E>,

116 first1, first2, count, output, c

117 );

118}

119

120} // end of namespace tf -----------------------------------------------------

121

122

123

124

125

126

tf::cudaGraphBase::kernel

cudaTask kernel(dim3 g, dim3 b, size_t s, F f, ArgsT... args)

creates a kernel task

Definition cuda_graph.hpp:1010

tf::cudaGraphBase::transform

cudaTask transform(I first, I last, O output, C op)

applies a callable to a source range and stores the result in a target range

Definition transform.hpp:65

tf::cudaGraphExecBase::kernel

void kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)

updates parameters of a kernel task

Definition cuda_graph_exec.hpp:279

tf::cudaGraphExecBase::transform

void transform(cudaTask task, I first, I last, O output, C c)

updates parameters of a transform kernel task created from the CUDA graph of *this

Definition transform.hpp:94

tf::cudaTask

class to create a task handle of a CUDA Graph node

Definition cuda_graph.hpp:315

tf

taskflow namespace

Definition small_vector.hpp:20