CUTLASS: cutlass.h Source File - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

cutlass.h

Go to the documentation of this file.

1 /***************************************************************************************************

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

30 #pragma once

34 namespace cutlass {

39 enum class Status {

40kSuccess,

41kErrorMisalignedOperand,

42kErrorInvalidLayout,

43kErrorInvalidProblem,

44kErrorNotSupported,

45kErrorWorkspaceNull,

46kErrorInternal,

47kInvalid

48 };

51 static inline char const* cutlassGetStatusString(cutlass::Status status) {

52switch (status) {

53case cutlass::Status::kSuccess:

54return "Success";

55case cutlass::Status::kErrorMisalignedOperand:

56return "Error Misaligned Operand";

57case cutlass::Status::kErrorInvalidLayout:

58return "Error Invalid Layout";

59case cutlass::Status::kErrorInvalidProblem:

60return "Error Invalid Problem";

61case cutlass::Status::kErrorNotSupported:

62return "Error Not Supported";

63case cutlass::Status::kErrorWorkspaceNull:

64return "Error Workspace Null";

65case cutlass::Status::kErrorInternal:

66return "Error Internal";

67case cutlass::Status::kInvalid: break;

68 }

70return "Invalid status";

71 }

75 // CUDA 10.1 introduces the mma instruction

76 #if !defined(CUTLASS_ENABLE_TENSOR_CORE_MMA)

77 #define CUTLASS_ENABLE_TENSOR_CORE_MMA 0

78 #endif

82 #if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))

83 #define CUTLASS_HOST_DEVICE __forceinline__ __device__ __host__

84 #define CUTLASS_DEVICE __forceinline__ __device__

85 #elif defined(__CUDACC_RTC__)

86 #define CUTLASS_HOST_DEVICE __forceinline__ __device__

87 #define CUTLASS_DEVICE __forceinline__ __device__

88 #else

89 #define CUTLASS_HOST_DEVICE inline

90 #endif

92 #define CUTLASS_ASSERT(x) assert(x)

96 // CUTLASS_PRAGMA_(UNROLL|NO_UNROLL) optimization directives for the CUDA compiler.

97 #if defined(__CUDA_ARCH__)

98 #if defined(__CUDACC_RTC__) || (defined(__clang__) && defined(__CUDA__))

99 #define CUTLASS_PRAGMA_UNROLL _Pragma("unroll")

100 #define CUTLASS_PRAGMA_NO_UNROLL _Pragma("unroll 1")

101 #else

102 #define CUTLASS_PRAGMA_UNROLL #pragma unroll

103 #define CUTLASS_PRAGMA_NO_UNROLL #pragma unroll 1

104 #endif

105

106 #define CUTLASS_GEMM_LOOP CUTLASS_PRAGMA_NO_UNROLL

107

108 #else

109

110 #define CUTLASS_PRAGMA_UNROLL

111 #define CUTLASS_PRAGMA_NO_UNROLL

112 #define CUTLASS_GEMM_LOOP

113

114 #endif

115

117

118

119 static const int NUM_THREADS_PER_WARP = 32;

120 static const int NUM_THREADS_PER_HALF_WARP = NUM_THREADS_PER_WARP / 2;

121 static const int NUM_THREADS_PER_QUAD = 4;

122 static const int NUM_THREADS_PER_QUAD_PAIR = NUM_THREADS_PER_QUAD * 2;

123

124 #if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))

125

127 CUTLASS_DEVICE

128 int LaneId() {

129int ret;

130asm ("mov.u32 %0, %%laneid;" : "=r"(ret));

131return ret;

132 }

133

134 #endif

135

137

138 } // namespace cutlass

139

141

cutlass

Definition: aligned_buffer.h:35

cutlass::Status::kErrorInvalidProblem

Specified problem size is not supported by operator.

cutlass::Status::kInvalid

Status is unspecified.

cutlass::Status::kErrorNotSupported

Operation is not supported on current device.

cutlass::Status::kErrorMisalignedOperand

operands fail alignment requirements.

cutlass::Status::kErrorInternal

An error within CUTLASS occurred.

cutlass::Status::kErrorWorkspaceNull

The given workspace is null when it is required to be non-null.

cutlass::Status::kSuccess

Operation was successful.

cutlass::Status::kErrorInvalidLayout

Layout fails alignment requirement.

cutlass::Status

Status

Status code returned by CUTLASS operations.

Definition: cutlass.h:39

Generated by 1.8.11