Back to Cutlass

CUTLASS: device_dump.h Source File

docs/device__dump_8h_source.html

4.4.28.7 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

device_dump.h

Go to the documentation of this file.

1 /***************************************************************************************************

2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

25

26 #pragma once

27

28 #include <stdio.h>

29 #include "cutlass/cutlass.h"

30

37 namespace cutlass {

38 namespace debug {

39

40 /******************************************************************************

41 * Dump the fragments

42 ******************************************************************************/

43

47 template <typename Fragment>

48 CUTLASS_DEVICE void dump_fragment(Fragment const& frag, int N = 0, int M = 0,

49int S = 1) {

50int total_threads = blockDim.x * blockDim.y * blockDim.z;

51int block_id =

52 blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;

53int thread_id = (threadIdx.z * (blockDim.x * blockDim.y)) +

54 (threadIdx.y * blockDim.x) + threadIdx.x;

55

56 if (N < 0 || N > total_threads) {

57if (thread_id == 0 && block_id == 0)

58 printf("Thread number N = %d should between [1, %d].\n", N,

59 total_threads);

60

61 __syncthreads();

62

63return;

64 }

65

66int total_elements = frag.size();

67

68if (M < 0 || M > total_elements) {

69if (thread_id == 0 && block_id == 0)

70 printf("Element number M = %d should between [1, %d].\n", M,

71 total_elements);

72

73 __syncthreads();

74

75return;

76 }

77

78if (N == 0) N = total_threads;

79

80if (M == 0) M = total_elements;

81

82if (S < 1 || S > M) {

83if (thread_id == 0 && block_id == 0)

84 printf("Stride S = %d should between [1, %d].\n", S, M);

85

86 __syncthreads();

87

88return;

89 }

90

91if (thread_id == 0 && block_id == 0)

92 printf("\n*******************Dumping the fragments*******************\n\n");

93

94CUTLASS_PRAGMA_NO_UNROLL

95for (int tid = 0; tid < N; ++tid) {

96if (tid == thread_id) {

97 printf("TB%d W%d T%d: ", block_id, tid / 32, tid & 31);

98CUTLASS_PRAGMA_UNROLL

99for (int i = 0; i < M; i += S) {

100 printf("%.0f ", float(typename Fragment::value_type(frag[i])));

101 }

102 printf("\n");

103 }

104

105 __syncthreads();

106 }

107

108if (thread_id == 0 && block_id == 0)

109 printf("\n***********************************************************\n\n");

110

111 __syncthreads();

112

113return;

114 }

115

116 /******************************************************************************

117 * Dump the shared memory

118 ******************************************************************************/

119

120 #define SHMEM_ROW_SIZE 128

121

124 template <typename Element>

125 CUTLASS_DEVICE void dump_shmem(Element const* ptr, size_t size, int S = 1) {

126int block_id =

127 blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;

128int thread_id = (threadIdx.z * (blockDim.x * blockDim.y)) +

129 (threadIdx.y * blockDim.x) + threadIdx.x;

130

131 if (ptr == nullptr) {

132if (thread_id == 0 && block_id == 0) printf("ptr is null.\n");

133

134 __syncthreads();

135return;

136 }

137

138if (size < 1) {

139if (thread_id == 0 && block_id == 0)

140 printf("Element size is less than 1\n");

141

142 __syncthreads();

143

144return;

145 }

146

147int row_elements = SHMEM_ROW_SIZE / sizeof(Element);

148

149if (S < 1 || S > row_elements) {

150if (thread_id == 0 && block_id == 0)

151 printf("Stride S = %d should between [1, %d].\n", S, row_elements);

152

153 __syncthreads();

154

155return;

156 }

157

158 __syncthreads();

159

160if (thread_id == 0)

161 printf("\n********Dumping the shared memory of TB %d*******\n\n", block_id);

162

163if (thread_id == 0) {

164for (int i = 0; i < size; i += row_elements) {

165for (int j = 0; j < row_elements; j += S) {

166 printf("%.0f ", float(ptr[i + j]));

167 }

168

169 printf("\n");

170 }

171 }

172

173if (thread_id == 0)

174 printf("\n***********************************************************\n\n");

175

176 __syncthreads();

177

178return;

179 }

180 } // namespace debug

181 } // namespace cutlass

cutlass

Definition: aligned_buffer.h:35

CUTLASS_PRAGMA_UNROLL

#define CUTLASS_PRAGMA_UNROLL

Definition: cutlass.h:110

cutlass::debug::dump_fragment

CUTLASS_DEVICE void dump_fragment(Fragment const &frag, int N=0, int M=0, int S=1)

Definition: device_dump.h:48

CUTLASS_PRAGMA_NO_UNROLL

#define CUTLASS_PRAGMA_NO_UNROLL

Definition: cutlass.h:111

SHMEM_ROW_SIZE

#define SHMEM_ROW_SIZE

Definition: device_dump.h:120

cutlass.h

Basic include for CUTLASS.

cutlass::debug::dump_shmem

CUTLASS_DEVICE void dump_shmem(Element const *ptr, size_t size, int S=1)

Definition: device_dump.h:125


Generated by 1.8.11