docs/device__dump_8h_source.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
device_dump.h
Go to the documentation of this file.
1 /***************************************************************************************************
2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification, are permitted
5 * provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright notice, this list of
7 * conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright notice, this list of
9 * conditions and the following disclaimer in the documentation and/or other materials
10 * provided with the distribution.
11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12 * to endorse or promote products derived from this software without specific prior written
13 * permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 *
24 **************************************************************************************************/
25
26 #pragma once
27
28 #include <stdio.h>
29 #include "cutlass/cutlass.h"
30
37 namespace cutlass {
38 namespace debug {
39
40 /******************************************************************************
41 * Dump the fragments
42 ******************************************************************************/
43
47 template <typename Fragment>
48 CUTLASS_DEVICE void dump_fragment(Fragment const& frag, int N = 0, int M = 0,
49int S = 1) {
50int total_threads = blockDim.x * blockDim.y * blockDim.z;
51int block_id =
52 blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
53int thread_id = (threadIdx.z * (blockDim.x * blockDim.y)) +
54 (threadIdx.y * blockDim.x) + threadIdx.x;
55
56 if (N < 0 || N > total_threads) {
57if (thread_id == 0 && block_id == 0)
58 printf("Thread number N = %d should between [1, %d].\n", N,
59 total_threads);
60
61 __syncthreads();
62
63return;
64 }
65
66int total_elements = frag.size();
67
68if (M < 0 || M > total_elements) {
69if (thread_id == 0 && block_id == 0)
70 printf("Element number M = %d should between [1, %d].\n", M,
71 total_elements);
72
73 __syncthreads();
74
75return;
76 }
77
78if (N == 0) N = total_threads;
79
80if (M == 0) M = total_elements;
81
82if (S < 1 || S > M) {
83if (thread_id == 0 && block_id == 0)
84 printf("Stride S = %d should between [1, %d].\n", S, M);
85
86 __syncthreads();
87
88return;
89 }
90
91if (thread_id == 0 && block_id == 0)
92 printf("\n*******************Dumping the fragments*******************\n\n");
93
95for (int tid = 0; tid < N; ++tid) {
96if (tid == thread_id) {
97 printf("TB%d W%d T%d: ", block_id, tid / 32, tid & 31);
99for (int i = 0; i < M; i += S) {
100 printf("%.0f ", float(typename Fragment::value_type(frag[i])));
101 }
102 printf("\n");
103 }
104
105 __syncthreads();
106 }
107
108if (thread_id == 0 && block_id == 0)
109 printf("\n***********************************************************\n\n");
110
111 __syncthreads();
112
113return;
114 }
115
116 /******************************************************************************
117 * Dump the shared memory
118 ******************************************************************************/
119
120 #define SHMEM_ROW_SIZE 128
121
124 template <typename Element>
125 CUTLASS_DEVICE void dump_shmem(Element const* ptr, size_t size, int S = 1) {
126int block_id =
127 blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
128int thread_id = (threadIdx.z * (blockDim.x * blockDim.y)) +
129 (threadIdx.y * blockDim.x) + threadIdx.x;
130
131 if (ptr == nullptr) {
132if (thread_id == 0 && block_id == 0) printf("ptr is null.\n");
133
134 __syncthreads();
135return;
136 }
137
138if (size < 1) {
139if (thread_id == 0 && block_id == 0)
140 printf("Element size is less than 1\n");
141
142 __syncthreads();
143
144return;
145 }
146
147int row_elements = SHMEM_ROW_SIZE / sizeof(Element);
148
149if (S < 1 || S > row_elements) {
150if (thread_id == 0 && block_id == 0)
151 printf("Stride S = %d should between [1, %d].\n", S, row_elements);
152
153 __syncthreads();
154
155return;
156 }
157
158 __syncthreads();
159
160if (thread_id == 0)
161 printf("\n********Dumping the shared memory of TB %d*******\n\n", block_id);
162
163if (thread_id == 0) {
164for (int i = 0; i < size; i += row_elements) {
165for (int j = 0; j < row_elements; j += S) {
166 printf("%.0f ", float(ptr[i + j]));
167 }
168
169 printf("\n");
170 }
171 }
172
173if (thread_id == 0)
174 printf("\n***********************************************************\n\n");
175
176 __syncthreads();
177
178return;
179 }
180 } // namespace debug
181 } // namespace cutlass
Definition: aligned_buffer.h:35
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
CUTLASS_DEVICE void dump_fragment(Fragment const &frag, int N=0, int M=0, int S=1)
Definition: device_dump.h:48
#define CUTLASS_PRAGMA_NO_UNROLL
Definition: cutlass.h:111
#define SHMEM_ROW_SIZE
Definition: device_dump.h:120
Basic include for CUTLASS.
CUTLASS_DEVICE void dump_shmem(Element const *ptr, size_t size, int S=1)
Definition: device_dump.h:125
Generated by 1.8.11