CUTLASS: mma_sm61.h Source File - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

gemm/thread/mma_sm61.h

Go to the documentation of this file.

1 /***************************************************************************************************

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

29 #pragma once

31 #include "cutlass/cutlass.h"

32 #include "cutlass/tensor_ref.h"

33 #include "cutlass/layout/matrix.h"

34 #include "cutlass/gemm/gemm.h"

35 #include "cutlass/gemm/thread/mma.h"

39 namespace cutlass {

40 namespace gemm {

41 namespace thread {

46 template <

48typename Shape_,

50typename LayoutC_

51 >

[52](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html) struct Mma<

53 Shape_,

54 int8_t,

55 layout::RowMajor,

56 int8_t,

57layout::ColumnMajor,

58 int32_t,

59 LayoutC_,

60 arch::OpMultiplyAdd,

61 bool> {

[64](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a33d6356e20b97badf1a49d384144e411)using [Shape](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a33d6356e20b97badf1a49d384144e411) = Shape_;

[67](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#ad2345cd2c37e767e54e89079032d2456)using [ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#ad2345cd2c37e767e54e89079032d2456) = int8_t;

[70](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#ae97a267ff5cb731e4c396cf6f7524e01)using LayoutA = layout::RowMajor;

[73](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9709b100b2ee7e54af92ac648c389ca5)using [ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9709b100b2ee7e54af92ac648c389ca5) = int8_t;

[76](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a2f2ab8637971d2837beec904d4e1c09f)using LayoutB = layout::ColumnMajor;

[79](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a4b596aaab0b18e2bf5baf0034cb59fda)using [ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a4b596aaab0b18e2bf5baf0034cb59fda) = int32_t;

[82](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#abccf798bd97c69887268f62cfabf2aac)using [LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#abccf798bd97c69887268f62cfabf2aac) = LayoutC_;

[85](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a86c5359a3fa5251ba38db6851a586a03)using [Operator](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a86c5359a3fa5251ba38db6851a586a03) = arch::OpMultiplyAdd;

[88](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#af9811ca34314708c1371e37faf3f6cf2)using [FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#af9811ca34314708c1371e37faf3f6cf2) = Array<ElementA, Shape::kMK>;

[91](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#acc0894e5afdf193553e9205e7ea56ada)using [FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#acc0894e5afdf193553e9205e7ea56ada) = Array<ElementB, Shape::kKN>;

[94](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9fea49fb8cd291c925ed0a52bccff1e7)using [FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9fea49fb8cd291c925ed0a52bccff1e7) = Array<ElementC, Shape::kMN>;

96//

97// Methods

98//

101CUTLASS_HOST_DEVICE

[102](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a827c554a04cd668a357b87a20dc8abbe)void [operator()](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a827c554a04cd668a357b87a20dc8abbe)(

103[FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9fea49fb8cd291c925ed0a52bccff1e7) & D,

104[FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#af9811ca34314708c1371e37faf3f6cf2) const & A,

105[FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#acc0894e5afdf193553e9205e7ea56ada) const & B,

106[FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9fea49fb8cd291c925ed0a52bccff1e7) const & C) {

107

108TensorRef<ElementC, LayoutC> d(

109 reinterpret_cast<ElementC *>(&D), LayoutC::packed({ Shape::kM, Shape::kN }));

110

111// Copy accumulators

112 D = C;

113

115using Mma = arch::Mma<

116gemm::GemmShape<1,1,4>,

117 1,

118[ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#ad2345cd2c37e767e54e89079032d2456),

119LayoutA,

120[ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9709b100b2ee7e54af92ac648c389ca5),

121LayoutB,

122[ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a4b596aaab0b18e2bf5baf0034cb59fda),

123[LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#abccf798bd97c69887268f62cfabf2aac),

124 arch::OpMultiplyAdd>;

125

126Mma mma;

127

128// Compute matrix product

129CUTLASS_PRAGMA_UNROLL

130for (int k = 0; k < Shape::kK / Mma::Shape::kK; ++k) {

131

132CUTLASS_PRAGMA_UNROLL

133for (int n = 0; n < Shape::kN; ++n) {

134

135CUTLASS_PRAGMA_UNROLL

136for (int m = 0; m < Shape::kM; ++m) {

137MatrixCoord mn(m, n);

138

139 Array<int8_t, 4> const *ptr_A = reinterpret_cast<Array<int8_t, 4> const *>(&A);

140 Array<int8_t, 4> const *ptr_B = reinterpret_cast<Array<int8_t, 4> const *>(&B);

141

142 Array<int32_t, 1> tmp = reinterpret_cast<Array<int32_t, 1> &>(d.at(mn));

143

144 mma(

145 tmp,

146 ptr_A[m * Shape::kK / Mma::Shape::kK + k],

147 ptr_B[n * Shape::kK / Mma::Shape::kK + k],

148 tmp);

149

150 d.at(mn) = reinterpret_cast<int32_t &>(tmp);

151 }

152 }

153 }

154 }

155 };

156

159 template <

161typename Shape_,

163typename LayoutC_

164 >

[165](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html) struct Mma<

166 Shape_,

167 int8_t,

168 layout::ColumnMajor,

169 int8_t,

170layout::RowMajor,

171 int32_t,

172 LayoutC_,

173 arch::OpMultiplyAdd,

174 int8_t> {

175

[177](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#aad5a7046adce98731840857a4b9f928c)using [Shape](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#aad5a7046adce98731840857a4b9f928c) = Shape_;

178

[180](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a1429b2efb3fcf27381763368fa49ac4b)using [ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a1429b2efb3fcf27381763368fa49ac4b) = int8_t;

181

[183](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a285e7f7a777dcc1b553e3647bcf3833a)using LayoutA = layout::ColumnMajor;

184

[186](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a39d3c40f8d968acac938cdfb0e99d89e)using [ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a39d3c40f8d968acac938cdfb0e99d89e) = int8_t;

187

[189](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ab4fdf251bf699a1e5972385d66f8220d)using LayoutB = layout::RowMajor;

190

[192](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a739813c06b0b67f6d7fca1ec00bb301d)using [ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a739813c06b0b67f6d7fca1ec00bb301d) = int32_t;

193

[195](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a5a8bef91fe385be165980f9e0cd4e453)using [LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a5a8bef91fe385be165980f9e0cd4e453) = LayoutC_;

196

[198](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a7e010ad47f102bf4b1c23a6cec257f96)using [Operator](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a7e010ad47f102bf4b1c23a6cec257f96) = arch::OpMultiplyAdd;

199

[201](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a0b6a3262da428537ba826a5abf16b894)using [FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a0b6a3262da428537ba826a5abf16b894) = Array<ElementA, Shape::kMK>;

202

[204](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a34af38037b5cd7328f8bd9e463bfd4b7)using [FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a34af38037b5cd7328f8bd9e463bfd4b7) = Array<ElementB, Shape::kKN>;

205

[207](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ac70df9d0d37586189c6e3796d2b4bfb8)using [FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ac70df9d0d37586189c6e3796d2b4bfb8) = Array<ElementC, Shape::kMN>;

208

209//

210// Methods

211//

212

214CUTLASS_HOST_DEVICE

[215](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ae518a9644f8a0842921d78216b5ac952)void [operator()](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ae518a9644f8a0842921d78216b5ac952)(

216[FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ac70df9d0d37586189c6e3796d2b4bfb8) & D,

217[FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a0b6a3262da428537ba826a5abf16b894) const & A,

218[FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a34af38037b5cd7328f8bd9e463bfd4b7) const & B,

219[FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ac70df9d0d37586189c6e3796d2b4bfb8) const & C) {

220

221TensorRef<ElementC, LayoutC> d(

222 reinterpret_cast<ElementC *>(&D), LayoutC::packed({ Shape::kM, Shape::kN }));

223

224// Copy accumulators

225 D = C;

226

228using Mma = arch::Mma<

229gemm::GemmShape<1,1,4>,

230 1,

231[ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a1429b2efb3fcf27381763368fa49ac4b),

232LayoutA,

233[ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a39d3c40f8d968acac938cdfb0e99d89e),

234LayoutB,

235[ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a739813c06b0b67f6d7fca1ec00bb301d),

236[LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a5a8bef91fe385be165980f9e0cd4e453),

237 arch::OpMultiplyAdd>;

238

239Mma mma;

240 Array<int8_t, 4> const *ptr_A = reinterpret_cast<Array<int8_t, 4> const *>(&A);

241 Array<int8_t, 4> const *ptr_B = reinterpret_cast<Array<int8_t, 4> const *>(&B);

242

243// Compute matrix product

244CUTLASS_PRAGMA_UNROLL

245for (int k = 0; k < Shape::kK / Mma::Shape::kK; ++k) {

246

247CUTLASS_PRAGMA_UNROLL

248for (int n = 0; n < Shape::kN; ++n) {

249

250CUTLASS_PRAGMA_UNROLL

251for (int m = 0; m < Shape::kM; ++m) {

252MatrixCoord mn(m, n);

253

254 Array<int32_t, 1> tmp = reinterpret_cast<Array<int32_t, 1> &>(d.at(mn));

255

256 mma(

257 tmp,

258 ptr_A[m + k * Shape::kM],

259 ptr_B[n + k * Shape::kN],

260 tmp);

261

262 d.at(mn) = reinterpret_cast<int32_t &>(tmp);

263 }

264 }

265 }

266 }

267 };

268

269 } // namespace thread

270 } // namespace gemm

271 } // namespace cutlass

272

cutlass

Definition: aligned_buffer.h:35

tensor_ref.h

Defines a structure containing strides, bounds, and a pointer to tensor data.

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::operator()](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a827c554a04cd668a357b87a20dc8abbe)

CUTLASS_HOST_DEVICE void operator()(FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C)

Computes a matrix product D = A * B + C.

Definition: gemm/thread/mma_sm61.h:102

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a34af38037b5cd7328f8bd9e463bfd4b7)

Array< ElementB, Shape::kKN > FragmentB

B operand storage.

Definition: gemm/thread/mma_sm61.h:204

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::Shape](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a33d6356e20b97badf1a49d384144e411)

Shape_ Shape

Size of the Gemm problem - concept: gemm::GemmShape<>

Definition: gemm/thread/mma_sm61.h:64

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::operator()](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ae518a9644f8a0842921d78216b5ac952)

CUTLASS_HOST_DEVICE void operator()(FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C)

Computes a matrix product D = A * B + C.

Definition: gemm/thread/mma_sm61.h:215

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::Operator](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a86c5359a3fa5251ba38db6851a586a03)

arch::OpMultiplyAdd Operator

Underlying mathematical operator.

Definition: gemm/thread/mma_sm61.h:85

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ac70df9d0d37586189c6e3796d2b4bfb8)

Array< ElementC, Shape::kMN > FragmentC

C operand storage.

Definition: gemm/thread/mma_sm61.h:207

gemm.h

Defines common types used for all GEMM-like operators.

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::Operator](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a7e010ad47f102bf4b1c23a6cec257f96)

arch::OpMultiplyAdd Operator

Underlying mathematical operator.

Definition: gemm/thread/mma_sm61.h:198

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9709b100b2ee7e54af92ac648c389ca5)

int8_t ElementB

Data type of operand B.

Definition: gemm/thread/mma_sm61.h:73

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#af9811ca34314708c1371e37faf3f6cf2)

Array< ElementA, Shape::kMK > FragmentA

A operand storage.

Definition: gemm/thread/mma_sm61.h:88

cutlass::layout::ColumnMajor

Mapping function for column-major matrices.

Definition: layout/matrix.h:142

CUTLASS_PRAGMA_UNROLL

#define CUTLASS_PRAGMA_UNROLL

Definition: cutlass.h:110

cutlass::TensorRef< ElementC, LayoutC >

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a39d3c40f8d968acac938cdfb0e99d89e)

int8_t ElementB

Data type of operand B.

Definition: gemm/thread/mma_sm61.h:186

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

mma.h

Templates exposing architecture support for warp-level multiply-add operations.

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a0b6a3262da428537ba826a5abf16b894)

Array< ElementA, Shape::kMK > FragmentA

A operand storage.

Definition: gemm/thread/mma_sm61.h:201

cutlass::gemm::GemmShape

Shape of a matrix multiply-add operation.

Definition: include/cutlass/gemm/gemm.h:57

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#ad2345cd2c37e767e54e89079032d2456)

int8_t ElementA

Data type of operand A.

Definition: gemm/thread/mma_sm61.h:67

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a739813c06b0b67f6d7fca1ec00bb301d)

int32_t ElementC

Element type of operand C.

Definition: gemm/thread/mma_sm61.h:192

cutlass::layout::RowMajor

Mapping function for row-major matrices.

Definition: layout/matrix.h:50

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a5a8bef91fe385be165980f9e0cd4e453)

LayoutC_ LayoutC

Layout of C matrix (concept: layout::MapFunc)

Definition: gemm/thread/mma_sm61.h:195

cutlass::TensorRef::at

CUTLASS_HOST_DEVICE Reference at(TensorCoord const &coord) const

Returns a reference to the element at a given Coord.

Definition: tensor_ref.h:307

cutlass::gemm::thread::Mma

Structure to compute the matrix product.

Definition: gemm/thread/mma.h:66

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#abccf798bd97c69887268f62cfabf2aac)

LayoutC_ LayoutC

Layout of C matrix (concept: layout::MapFunc)

Definition: gemm/thread/mma_sm61.h:82

matrix.h

Defines layout functions used by TensorRef and derived classes.

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a4b596aaab0b18e2bf5baf0034cb59fda)

int32_t ElementC

Element type of operand C.

Definition: gemm/thread/mma_sm61.h:79

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#acc0894e5afdf193553e9205e7ea56ada)

Array< ElementB, Shape::kKN > FragmentB

B operand storage.

Definition: gemm/thread/mma_sm61.h:91

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9fea49fb8cd291c925ed0a52bccff1e7)

Array< ElementC, Shape::kMN > FragmentC

C operand storage.

Definition: gemm/thread/mma_sm61.h:94

cutlass::arch::Mma

Matrix multiply-add operation.

Definition: arch/mma.h:92

cutlass.h

Basic include for CUTLASS.

cutlass::MatrixCoord

Definition: matrix_coord.h:39

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::Shape](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#aad5a7046adce98731840857a4b9f928c)

Shape_ Shape

Size of the Gemm problem - concept: gemm::GemmShape<>

Definition: gemm/thread/mma_sm61.h:177

[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a1429b2efb3fcf27381763368fa49ac4b)

int8_t ElementA

Data type of operand A.

Definition: gemm/thread/mma_sm61.h:180

Generated by 1.8.11