docs/cutlass_8h_source.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
cutlass.h
Go to the documentation of this file.
1 /***************************************************************************************************
2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification, are permitted
5 * provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright notice, this list of
7 * conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright notice, this list of
9 * conditions and the following disclaimer in the documentation and/or other materials
10 * provided with the distribution.
11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12 * to endorse or promote products derived from this software without specific prior written
13 * permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 *
24 **************************************************************************************************/
25
30 #pragma once
31
33
34 namespace cutlass {
35
37
40kSuccess,
47kInvalid
48 };
49
51 static inline char const* cutlassGetStatusString(cutlass::Status status) {
52switch (status) {
53case cutlass::Status::kSuccess:
54return "Success";
55case cutlass::Status::kErrorMisalignedOperand:
56return "Error Misaligned Operand";
57case cutlass::Status::kErrorInvalidLayout:
58return "Error Invalid Layout";
59case cutlass::Status::kErrorInvalidProblem:
60return "Error Invalid Problem";
61case cutlass::Status::kErrorNotSupported:
62return "Error Not Supported";
63case cutlass::Status::kErrorWorkspaceNull:
64return "Error Workspace Null";
65case cutlass::Status::kErrorInternal:
66return "Error Internal";
67case cutlass::Status::kInvalid: break;
68 }
69
70return "Invalid status";
71 }
72
74
75 // CUDA 10.1 introduces the mma instruction
76 #if !defined(CUTLASS_ENABLE_TENSOR_CORE_MMA)
77 #define CUTLASS_ENABLE_TENSOR_CORE_MMA 0
78 #endif
79
81
82 #if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))
83 #define CUTLASS_HOST_DEVICE __forceinline__ __device__ __host__
84 #define CUTLASS_DEVICE __forceinline__ __device__
85 #elif defined(__CUDACC_RTC__)
86 #define CUTLASS_HOST_DEVICE __forceinline__ __device__
87 #define CUTLASS_DEVICE __forceinline__ __device__
88 #else
89 #define CUTLASS_HOST_DEVICE inline
90 #endif
91
92 #define CUTLASS_ASSERT(x) assert(x)
93
95
96 // CUTLASS_PRAGMA_(UNROLL|NO_UNROLL) optimization directives for the CUDA compiler.
97 #if defined(__CUDA_ARCH__)
98 #if defined(__CUDACC_RTC__) || (defined(__clang__) && defined(__CUDA__))
99 #define CUTLASS_PRAGMA_UNROLL _Pragma("unroll")
100 #define CUTLASS_PRAGMA_NO_UNROLL _Pragma("unroll 1")
101 #else
102 #define CUTLASS_PRAGMA_UNROLL #pragma unroll
103 #define CUTLASS_PRAGMA_NO_UNROLL #pragma unroll 1
104 #endif
105
106 #define CUTLASS_GEMM_LOOP CUTLASS_PRAGMA_NO_UNROLL
107
108 #else
109
110 #define CUTLASS_PRAGMA_UNROLL
111 #define CUTLASS_PRAGMA_NO_UNROLL
112 #define CUTLASS_GEMM_LOOP
113
114 #endif
115
117
118
119 static const int NUM_THREADS_PER_WARP = 32;
120 static const int NUM_THREADS_PER_HALF_WARP = NUM_THREADS_PER_WARP / 2;
121 static const int NUM_THREADS_PER_QUAD = 4;
122 static const int NUM_THREADS_PER_QUAD_PAIR = NUM_THREADS_PER_QUAD * 2;
123
124 #if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))
125
127 CUTLASS_DEVICE
128 int LaneId() {
129int ret;
130asm ("mov.u32 %0, %%laneid;" : "=r"(ret));
131return ret;
132 }
133
134 #endif
135
137
138 } // namespace cutlass
139
141
Definition: aligned_buffer.h:35
cutlass::Status::kErrorInvalidProblem
Specified problem size is not supported by operator.
Status is unspecified.
cutlass::Status::kErrorNotSupported
Operation is not supported on current device.
cutlass::Status::kErrorMisalignedOperand
operands fail alignment requirements.
cutlass::Status::kErrorInternal
An error within CUTLASS occurred.
cutlass::Status::kErrorWorkspaceNull
The given workspace is null when it is required to be non-null.
Operation was successful.
cutlass::Status::kErrorInvalidLayout
Layout fails alignment requirement.
Status
Status code returned by CUTLASS operations.
Definition: cutlass.h:39
Generated by 1.8.11