docs/gemm_2thread_2mma__sm61_8h_source.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
gemm/thread/mma_sm61.h
Go to the documentation of this file.
1 /***************************************************************************************************
2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification, are permitted
5 * provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright notice, this list of
7 * conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright notice, this list of
9 * conditions and the following disclaimer in the documentation and/or other materials
10 * provided with the distribution.
11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12 * to endorse or promote products derived from this software without specific prior written
13 * permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 *
24 **************************************************************************************************/
29 #pragma once
30
31 #include "cutlass/cutlass.h"
32 #include "cutlass/tensor_ref.h"
33 #include "cutlass/layout/matrix.h"
34 #include "cutlass/gemm/gemm.h"
35 #include "cutlass/gemm/thread/mma.h"
36
38
39 namespace cutlass {
40 namespace gemm {
41 namespace thread {
42
44
46 template <
48typename Shape_,
50typename LayoutC_
51 >
[52](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html) struct Mma<
53 Shape_,
54 int8_t,
55 layout::RowMajor,
56 int8_t,
58 int32_t,
59 LayoutC_,
60 arch::OpMultiplyAdd,
61 bool> {
62
[64](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a33d6356e20b97badf1a49d384144e411)using [Shape](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a33d6356e20b97badf1a49d384144e411) = Shape_;
65
[67](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#ad2345cd2c37e767e54e89079032d2456)using [ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#ad2345cd2c37e767e54e89079032d2456) = int8_t;
68
[70](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#ae97a267ff5cb731e4c396cf6f7524e01)using LayoutA = layout::RowMajor;
71
[73](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9709b100b2ee7e54af92ac648c389ca5)using [ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9709b100b2ee7e54af92ac648c389ca5) = int8_t;
74
[76](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a2f2ab8637971d2837beec904d4e1c09f)using LayoutB = layout::ColumnMajor;
77
[79](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a4b596aaab0b18e2bf5baf0034cb59fda)using [ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a4b596aaab0b18e2bf5baf0034cb59fda) = int32_t;
80
[82](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#abccf798bd97c69887268f62cfabf2aac)using [LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#abccf798bd97c69887268f62cfabf2aac) = LayoutC_;
83
[85](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a86c5359a3fa5251ba38db6851a586a03)using [Operator](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a86c5359a3fa5251ba38db6851a586a03) = arch::OpMultiplyAdd;
86
[88](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#af9811ca34314708c1371e37faf3f6cf2)using [FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#af9811ca34314708c1371e37faf3f6cf2) = Array<ElementA, Shape::kMK>;
89
[91](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#acc0894e5afdf193553e9205e7ea56ada)using [FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#acc0894e5afdf193553e9205e7ea56ada) = Array<ElementB, Shape::kKN>;
92
[94](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9fea49fb8cd291c925ed0a52bccff1e7)using [FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9fea49fb8cd291c925ed0a52bccff1e7) = Array<ElementC, Shape::kMN>;
95
96//
97// Methods
98//
99
[102](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a827c554a04cd668a357b87a20dc8abbe)void [operator()](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a827c554a04cd668a357b87a20dc8abbe)(
103[FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9fea49fb8cd291c925ed0a52bccff1e7) & D,
104[FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#af9811ca34314708c1371e37faf3f6cf2) const & A,
105[FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#acc0894e5afdf193553e9205e7ea56ada) const & B,
106[FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9fea49fb8cd291c925ed0a52bccff1e7) const & C) {
107
108TensorRef<ElementC, LayoutC> d(
109 reinterpret_cast<ElementC *>(&D), LayoutC::packed({ Shape::kM, Shape::kN }));
110
111// Copy accumulators
112 D = C;
113
117 1,
118[ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#ad2345cd2c37e767e54e89079032d2456),
119LayoutA,
120[ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9709b100b2ee7e54af92ac648c389ca5),
121LayoutB,
122[ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a4b596aaab0b18e2bf5baf0034cb59fda),
123[LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#abccf798bd97c69887268f62cfabf2aac),
124 arch::OpMultiplyAdd>;
125
126Mma mma;
127
128// Compute matrix product
130for (int k = 0; k < Shape::kK / Mma::Shape::kK; ++k) {
131
133for (int n = 0; n < Shape::kN; ++n) {
134
136for (int m = 0; m < Shape::kM; ++m) {
137MatrixCoord mn(m, n);
138
139 Array<int8_t, 4> const *ptr_A = reinterpret_cast<Array<int8_t, 4> const *>(&A);
140 Array<int8_t, 4> const *ptr_B = reinterpret_cast<Array<int8_t, 4> const *>(&B);
141
142 Array<int32_t, 1> tmp = reinterpret_cast<Array<int32_t, 1> &>(d.at(mn));
143
144 mma(
145 tmp,
146 ptr_A[m * Shape::kK / Mma::Shape::kK + k],
147 ptr_B[n * Shape::kK / Mma::Shape::kK + k],
148 tmp);
149
150 d.at(mn) = reinterpret_cast<int32_t &>(tmp);
151 }
152 }
153 }
154 }
155 };
156
159 template <
161typename Shape_,
163typename LayoutC_
164 >
[165](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html) struct Mma<
166 Shape_,
167 int8_t,
168 layout::ColumnMajor,
169 int8_t,
170layout::RowMajor,
171 int32_t,
172 LayoutC_,
173 arch::OpMultiplyAdd,
174 int8_t> {
175
[177](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#aad5a7046adce98731840857a4b9f928c)using [Shape](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#aad5a7046adce98731840857a4b9f928c) = Shape_;
178
[180](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a1429b2efb3fcf27381763368fa49ac4b)using [ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a1429b2efb3fcf27381763368fa49ac4b) = int8_t;
181
[183](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a285e7f7a777dcc1b553e3647bcf3833a)using LayoutA = layout::ColumnMajor;
184
[186](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a39d3c40f8d968acac938cdfb0e99d89e)using [ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a39d3c40f8d968acac938cdfb0e99d89e) = int8_t;
187
[189](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ab4fdf251bf699a1e5972385d66f8220d)using LayoutB = layout::RowMajor;
190
[192](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a739813c06b0b67f6d7fca1ec00bb301d)using [ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a739813c06b0b67f6d7fca1ec00bb301d) = int32_t;
193
[195](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a5a8bef91fe385be165980f9e0cd4e453)using [LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a5a8bef91fe385be165980f9e0cd4e453) = LayoutC_;
196
[198](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a7e010ad47f102bf4b1c23a6cec257f96)using [Operator](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a7e010ad47f102bf4b1c23a6cec257f96) = arch::OpMultiplyAdd;
199
[201](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a0b6a3262da428537ba826a5abf16b894)using [FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a0b6a3262da428537ba826a5abf16b894) = Array<ElementA, Shape::kMK>;
202
[204](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a34af38037b5cd7328f8bd9e463bfd4b7)using [FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a34af38037b5cd7328f8bd9e463bfd4b7) = Array<ElementB, Shape::kKN>;
205
[207](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ac70df9d0d37586189c6e3796d2b4bfb8)using [FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ac70df9d0d37586189c6e3796d2b4bfb8) = Array<ElementC, Shape::kMN>;
208
209//
210// Methods
211//
212
[215](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ae518a9644f8a0842921d78216b5ac952)void [operator()](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ae518a9644f8a0842921d78216b5ac952)(
216[FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ac70df9d0d37586189c6e3796d2b4bfb8) & D,
217[FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a0b6a3262da428537ba826a5abf16b894) const & A,
218[FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a34af38037b5cd7328f8bd9e463bfd4b7) const & B,
219[FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ac70df9d0d37586189c6e3796d2b4bfb8) const & C) {
220
221TensorRef<ElementC, LayoutC> d(
222 reinterpret_cast<ElementC *>(&D), LayoutC::packed({ Shape::kM, Shape::kN }));
223
224// Copy accumulators
225 D = C;
226
230 1,
231[ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a1429b2efb3fcf27381763368fa49ac4b),
232LayoutA,
233[ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a39d3c40f8d968acac938cdfb0e99d89e),
234LayoutB,
235[ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a739813c06b0b67f6d7fca1ec00bb301d),
236[LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a5a8bef91fe385be165980f9e0cd4e453),
237 arch::OpMultiplyAdd>;
238
239Mma mma;
240 Array<int8_t, 4> const *ptr_A = reinterpret_cast<Array<int8_t, 4> const *>(&A);
241 Array<int8_t, 4> const *ptr_B = reinterpret_cast<Array<int8_t, 4> const *>(&B);
242
243// Compute matrix product
245for (int k = 0; k < Shape::kK / Mma::Shape::kK; ++k) {
246
248for (int n = 0; n < Shape::kN; ++n) {
249
251for (int m = 0; m < Shape::kM; ++m) {
252MatrixCoord mn(m, n);
253
254 Array<int32_t, 1> tmp = reinterpret_cast<Array<int32_t, 1> &>(d.at(mn));
255
256 mma(
257 tmp,
258 ptr_A[m + k * Shape::kM],
259 ptr_B[n + k * Shape::kN],
260 tmp);
261
262 d.at(mn) = reinterpret_cast<int32_t &>(tmp);
263 }
264 }
265 }
266 }
267 };
268
269 } // namespace thread
270 } // namespace gemm
271 } // namespace cutlass
272
Definition: aligned_buffer.h:35
Defines a structure containing strides, bounds, and a pointer to tensor data.
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::operator()](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a827c554a04cd668a357b87a20dc8abbe)
CUTLASS_HOST_DEVICE void operator()(FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C)
Computes a matrix product D = A * B + C.
Definition: gemm/thread/mma_sm61.h:102
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a34af38037b5cd7328f8bd9e463bfd4b7)
Array< ElementB, Shape::kKN > FragmentB
B operand storage.
Definition: gemm/thread/mma_sm61.h:204
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::Shape](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a33d6356e20b97badf1a49d384144e411)
Shape_ Shape
Size of the Gemm problem - concept: gemm::GemmShape<>
Definition: gemm/thread/mma_sm61.h:64
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::operator()](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ae518a9644f8a0842921d78216b5ac952)
CUTLASS_HOST_DEVICE void operator()(FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C)
Computes a matrix product D = A * B + C.
Definition: gemm/thread/mma_sm61.h:215
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::Operator](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a86c5359a3fa5251ba38db6851a586a03)
arch::OpMultiplyAdd Operator
Underlying mathematical operator.
Definition: gemm/thread/mma_sm61.h:85
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#ac70df9d0d37586189c6e3796d2b4bfb8)
Array< ElementC, Shape::kMN > FragmentC
C operand storage.
Definition: gemm/thread/mma_sm61.h:207
Defines common types used for all GEMM-like operators.
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::Operator](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a7e010ad47f102bf4b1c23a6cec257f96)
arch::OpMultiplyAdd Operator
Underlying mathematical operator.
Definition: gemm/thread/mma_sm61.h:198
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9709b100b2ee7e54af92ac648c389ca5)
int8_t ElementB
Data type of operand B.
Definition: gemm/thread/mma_sm61.h:73
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#af9811ca34314708c1371e37faf3f6cf2)
Array< ElementA, Shape::kMK > FragmentA
A operand storage.
Definition: gemm/thread/mma_sm61.h:88
Mapping function for column-major matrices.
Definition: layout/matrix.h:142
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
cutlass::TensorRef< ElementC, LayoutC >
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a39d3c40f8d968acac938cdfb0e99d89e)
int8_t ElementB
Data type of operand B.
Definition: gemm/thread/mma_sm61.h:186
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Templates exposing architecture support for warp-level multiply-add operations.
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a0b6a3262da428537ba826a5abf16b894)
Array< ElementA, Shape::kMK > FragmentA
A operand storage.
Definition: gemm/thread/mma_sm61.h:201
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#ad2345cd2c37e767e54e89079032d2456)
int8_t ElementA
Data type of operand A.
Definition: gemm/thread/mma_sm61.h:67
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a739813c06b0b67f6d7fca1ec00bb301d)
int32_t ElementC
Element type of operand C.
Definition: gemm/thread/mma_sm61.h:192
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a5a8bef91fe385be165980f9e0cd4e453)
LayoutC_ LayoutC
Layout of C matrix (concept: layout::MapFunc)
Definition: gemm/thread/mma_sm61.h:195
CUTLASS_HOST_DEVICE Reference at(TensorCoord const &coord) const
Returns a reference to the element at a given Coord.
Definition: tensor_ref.h:307
Structure to compute the matrix product.
Definition: gemm/thread/mma.h:66
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#abccf798bd97c69887268f62cfabf2aac)
LayoutC_ LayoutC
Layout of C matrix (concept: layout::MapFunc)
Definition: gemm/thread/mma_sm61.h:82
Defines layout functions used by TensorRef and derived classes.
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a4b596aaab0b18e2bf5baf0034cb59fda)
int32_t ElementC
Element type of operand C.
Definition: gemm/thread/mma_sm61.h:79
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentB](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#acc0894e5afdf193553e9205e7ea56ada)
Array< ElementB, Shape::kKN > FragmentB
B operand storage.
Definition: gemm/thread/mma_sm61.h:91
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentC](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1RowMajor_00_01int89c659e7faf47264972bdba6cd80f42b.html#a9fea49fb8cd291c925ed0a52bccff1e7)
Array< ElementC, Shape::kMN > FragmentC
C operand storage.
Definition: gemm/thread/mma_sm61.h:94
Matrix multiply-add operation.
Definition: arch/mma.h:92
Basic include for CUTLASS.
Definition: matrix_coord.h:39
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::Shape](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#aad5a7046adce98731840857a4b9f928c)
Shape_ Shape
Size of the Gemm problem - concept: gemm::GemmShape<>
Definition: gemm/thread/mma_sm61.h:177
[cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementA](structcutlass_1_1gemm_1_1thread_1_1Mma_3_01Shape _00_01int8 t_00_01layout_1_1ColumnMajor_00_013f3785e722edc6e9aab6f866309b8623.html#a1429b2efb3fcf27381763368fa49ac4b)
int8_t ElementA
Data type of operand A.
Definition: gemm/thread/mma_sm61.h:180
Generated by 1.8.11