docs/numeric__conversion_8h_source.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
numeric_conversion.h
Go to the documentation of this file.
1 /***************************************************************************************************
2 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification, are permitted
5 * provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright notice, this list of
7 * conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright notice, this list of
9 * conditions and the following disclaimer in the documentation and/or other materials
10 * provided with the distribution.
11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12 * to endorse or promote products derived from this software without specific prior written
13 * permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 *
24 **************************************************************************************************/
29 #pragma once
30
31 #include "cutlass/cutlass.h"
32 #include "cutlass/numeric_types.h"
33
34 #include "cutlass/array.h"
35 #include "cutlass/half.h"
36
37 namespace cutlass {
38
40
43 enum class FloatRoundStyle {
50 };
51
53
54 template <
55typename T,
56typename S,
57FloatRoundStyle Round = FloatRoundStyle::round_to_nearest
58 >
59 struct NumericConverter {
60
61using result_type = T;
62using source_type = S;
63static FloatRoundStyle const round_style = Round;
64
66static result_type convert(source_type const & s) {
67
68return static_cast<result_type>(s);
69 }
70
72result_type operator()(source_type const &s) {
73return convert(s);
74 }
75 };
76
78 //
79 // Partial specializations for float => int8_t
80 //
82 template <FloatRoundStyle Round>
83 struct NumericConverter<int8_t, float, Round> {
84
85using result_type = int8_t;
86using source_type = float;
87static FloatRoundStyle const round_style = Round;
88
90static result_type convert(source_type const & s) {
91
92result_type result = static_cast<int8_t>(s);
93
94return result;
95 }
96
98result_type operator()(source_type const &s) {
99return convert(s);
100 }
101 };
102
104
106 template <typename T, FloatRoundStyle Round>
107 struct NumericConverter<T, T, Round> {
108
109using result_type = T;
110using source_type = T;
111static FloatRoundStyle const round_style = Round;
112
114static result_type convert(source_type const & s) {
115
116return s;
117 }
118
120result_type operator()(source_type const &s) {
121return convert(s);
122 }
123 };
124
126 //
127 // Partial specializations for float <=> half_t
128 //
130
132 template <FloatRoundStyle Round>
133 struct NumericConverter<float, half_t, Round> {
134
135using result_type = float;
136using source_type = half_t;
137static FloatRoundStyle const round_style = Round;
138
140static result_type convert(source_type const & s) {
141
142result_type result = static_cast<float>(s);
143
144return result;
145 }
146
148result_type operator()(source_type const &s) {
149return convert(s);
150 }
151 };
152
154 template <>
[155](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html) struct NumericConverter<half_t, float, FloatRoundStyle::round_to_nearest> {
156
[157](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a5dc993f38c6eedd917008e6c839c6300)using result_type = half_t;
[158](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a977053fab779dd7519612b6ae0fb53ce)using [source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977053fab779dd7519612b6ae0fb53ce) = float;
[159](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#affd38515f30c26256ff5c06e5a567080)static FloatRoundStyle const round_style = FloatRoundStyle::round_to_nearest;
160
[162](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#aaf16c1dd3bb1fc0566c819146dfd5ab8)static result_type [convert](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#aaf16c1dd3bb1fc0566c819146dfd5ab8)([source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977053fab779dd7519612b6ae0fb53ce) const & s) {
163
164result_type result = static_cast<half_t>(s);
165
166return result;
167 }
168
[170](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a619f44798aa208650afaf2c584454d4b)result_type [operator()](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a619f44798aa208650afaf2c584454d4b)([source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977053fab779dd7519612b6ae0fb53ce) const &s) {
171return convert(s);
172 }
173 };
174
176 template <>
[177](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html) struct NumericConverter<half_t, float, FloatRoundStyle::round_toward_zero> {
178
[179](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#aa5bfe0288e538f1df94d74fa52aa1e17)using result_type = half_t;
[180](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a80980dc518dd65f4c3c1533782ba8b36)using [source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round toward zero_01_4.html#a80980dc518dd65f4c3c1533782ba8b36) = float;
[181](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a43fde0bc2ddeeebf1f188c6d1ac7fbe0)static FloatRoundStyle const round_style = FloatRoundStyle::round_toward_zero;
182
[185](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a43ab30e5283f39b1defe46b13da9ac1b)static result_type [convert](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a43ab30e5283f39b1defe46b13da9ac1b)([source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round toward zero_01_4.html#a80980dc518dd65f4c3c1533782ba8b36) const & flt) {
186
187 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
188return half_t(__float2half_rz(flt));
189 #else
190// software implementation rounds toward nearest even
191unsigned const& s = reinterpret_cast<unsigned const &>(flt);
192 uint16_t sign = uint16_t((s >> 16) & 0x8000);
193 int16_t exp = uint16_t(((s >> 23) & 0xff) - 127);
194int mantissa = s & 0x7fffff;
195 uint16_t u = 0;
196
197if ((s & 0x7fffffff) == 0) {
198// sign-preserving zero
199return half_t::bitcast(sign);
200 }
201
202if (exp > 15) {
203if (exp == 128 && mantissa) {
204// not a number
205 u = 0x7fff;
206 } else {
207// overflow to infinity
208 u = sign | 0x7c00;
209 }
210return half_t::bitcast(u);
211 }
212
213if (exp >= -14) {
214// normal fp32 to normal fp16
215 exp = uint16_t(exp + uint16_t(15));
216 u = uint16_t(((exp & 0x1f) << 10));
217 u = uint16_t(u | (mantissa >> 13));
218 } else {
219// normal single-precision to subnormal half_t-precision representation
220int rshift = (-14 - exp);
221if (rshift < 32) {
222 mantissa |= (1 << 23);
223 mantissa = (mantissa >> rshift);
224 u = (uint16_t(mantissa >> 13) & 0x3ff);
225 } else {
226 mantissa = 0;
227 u = 0;
228 }
229 }
230
231 u |= sign;
232
233return half_t::bitcast(u);
234
235 #endif // defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
236 }
237
[239](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a9ee10f5cf5ff71842a31e305d3a83947)result_type [operator()](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a9ee10f5cf5ff71842a31e305d3a83947)([source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round toward zero_01_4.html#a80980dc518dd65f4c3c1533782ba8b36) const &s) {
240return convert(s);
241 }
242 };
243
245 //
246 // Conversion and Clamp operator for Integers
247 //
249
250 template <
251typename T,
252typename S
253 >
254 struct NumericConverterClamp {
255
256using result_type = T;
257using source_type = S;
258
259static_assert((platform::is_same<result_type, int32_t>::value ||
260platform::is_same<result_type, int8_t>::value ||
261platform::is_same<result_type, cutlass::int4b_t>::value),
262"Clamp is only needed for integer types");
263
265static result_type convert(source_type const & s) {
266NumericConverter<result_type, source_type> convert_op;
267result_type const kClamp_max =
268 (0x1U << (sizeof_bits<result_type>::value - 1)) - 1;
269result_type const kClamp_min = -kClamp_max - 1;
270bool is_int_min = !(s > kClamp_min);
271bool is_int_max = !(s < kClamp_max);
272return is_int_min ? kClamp_min : (is_int_max ? kClamp_max : convert_op(s));
273 }
274
276result_type operator()(source_type const &s) {
277return convert(s);
278 }
279 };
280
282 //
283 // Conversion operator for Array
284 //
286
288 template <
289typename T,
290typename S,
291int N,
292FloatRoundStyle Round = FloatRoundStyle::round_to_nearest
293 >
294 struct NumericArrayConverter {
295
296using result_type = Array<T, N>;
297using source_type = Array<S, N>;
298static FloatRoundStyle const round_style = Round;
299
301static result_type convert(source_type const & s) {
302
303result_type result;
304NumericConverter<T, S, Round> convert_;
305
307for (int i = 0; i < N; ++i) {
308 result[i] = convert_(s[i]);
309 }
310
311return result;
312 }
313
315result_type operator()(source_type const &s) {
316return convert(s);
317 }
318 };
319
321
323 template <>
[324](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html) struct NumericArrayConverter<half_t, float, 2, FloatRoundStyle::round_to_nearest> {
325
[326](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#ad50d5ce8c7047513745c1fab77c3988c)using [result_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#ad50d5ce8c7047513745c1fab77c3988c) = Array<half_t, 2>;
[327](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a977dc7cb44a006493604fb7805b75f4e)using [source_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977dc7cb44a006493604fb7805b75f4e) = Array<float, 2>;
[328](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a3c31373beb0e6a9c649134b21a02125a)static FloatRoundStyle const round_style = FloatRoundStyle::round_to_nearest;
329
[331](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a3cc4d59f083555f24288e15490eeb41d)static [result_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#ad50d5ce8c7047513745c1fab77c3988c) [convert](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a3cc4d59f083555f24288e15490eeb41d)([source_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977dc7cb44a006493604fb7805b75f4e) const & source) {
332
333 Array<half_t, 2> result;
334
335 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
336reinterpret_cast<__half2 &>(result) = __float22half2_rn(reinterpret_cast<float2 const &>(source));
337 #else
338NumericConverter<half_t, float, round_style> convert_;
339 result[0] = convert_(source[0]);
340 result[1] = convert_(source[1]);
341 #endif
342
343return result;
344 }
345
[347](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a177cc3209f728a6629754cf2d685a37a)[result_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#ad50d5ce8c7047513745c1fab77c3988c) [operator()](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a177cc3209f728a6629754cf2d685a37a)([source_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977dc7cb44a006493604fb7805b75f4e) const &s) {
348return convert(s);
349 }
350 };
351
353 template <FloatRoundStyle Round>
354 struct NumericArrayConverter<float, half_t, 2, Round> {
355
356using result_type = Array<float, 2>;
357using source_type = Array<half_t, 2>;
358static FloatRoundStyle const round_style = FloatRoundStyle::round_to_nearest;
359
361static result_type convert(source_type const & source) {
362
363 Array<float, 2> result;
364
365 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
366reinterpret_cast<float2 &>(result) = __half22float2(reinterpret_cast<__half2 const &>(source));
367 #else
368NumericConverter<float, half_t, round_style> convert_;
369 result[0] = convert_(source[0]);
370 result[1] = convert_(source[1]);
371 #endif
372
373return result;
374 }
375
377result_type operator()(source_type const &s) {
378return convert(s);
379 }
380 };
381
383
385 template <
386int N,
387FloatRoundStyle Round
388 >
389 struct NumericArrayConverter<half_t, float, N, Round> {
390
391using result_type = Array<half_t, N>;
392using source_type = Array<float, N>;
393static FloatRoundStyle const round_style = Round;
394
396static result_type convert(source_type const & source) {
397
398NumericArrayConverter<half_t, float, 2, Round> convert_vector_;
399NumericConverter<half_t, float, Round> convert_element_;
400
401result_type result;
402
403 Array<half_t, 2> *result_ptr = reinterpret_cast<Array<half_t, 2> *>(&result);
404 Array<float, 2> const *source_ptr = reinterpret_cast<Array<float, 2> const *>(&source);
405
407for (int i = 0; i < N / 2; ++i) {
408 result_ptr[i] = convert_vector_(source_ptr[i]);
409 }
410
411if (N % 2) {
412 result[N - 1] = convert_element_(source[N - 1]);
413 }
414
415return result;
416 }
417
419result_type operator()(source_type const &s) {
420return convert(s);
421 }
422 };
423
424
426 template <
427int N,
428FloatRoundStyle Round
429 >
430 struct NumericArrayConverter<float, half_t, N, Round> {
431
432using result_type = Array<float, N>;
433using source_type = Array<half_t, N>;
434static FloatRoundStyle const round_style = Round;
435
437static result_type convert(source_type const & source) {
438
439NumericArrayConverter<float, half_t, 2, Round> convert_vector_;
440NumericConverter<float, half_t, Round> convert_element_;
441
442result_type result;
443
444 Array<float, 2> *result_ptr = reinterpret_cast<Array<float, 2> *>(&result);
445 Array<half_t, 2> const *source_ptr = reinterpret_cast<Array<half_t, 2> const *>(&source);
446
448for (int i = 0; i < N / 2; ++i) {
449 result_ptr[i] = convert_vector_(source_ptr[i]);
450 }
451
452if (N % 2) {
453 result[N - 1] = convert_element_(source[N - 1]);
454 }
455
456return result;
457 }
458
460result_type operator()(source_type const &s) {
461return convert(s);
462 }
463 };
464
466
467 // Conditional guards to enable partial specialization for packed integers
468 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 720) && (__CUDACC_VER_MAJOR__ >= 10) && (__CUDACC_VER_MINOR__ >= 2)
469
471 template <
472FloatRoundStyle Round
473 >
474 struct NumericArrayConverter<int8_t, int, 1, Round> {
475
476using result_type = Array<int8_t, 1>;
477using source_type = Array<int, 1>;
478static FloatRoundStyle const round_style = Round;
479
481static result_type convert(source_type const & source) {
482NumericConverter<int8_t, int, Round> convert_element_;
483
484 result_type result;
485
486 result[0] = convert_element_(source[0]);
487
488return result;
489 }
490
492 result_type operator()(source_type const &s) {
493return convert(s);
494 }
495 };
496
498 template <
499FloatRoundStyle Round
500 >
501 struct NumericArrayConverter<int8_t, int, 2, Round> {
502
503using result_type = Array<int8_t, 2>;
504using source_type = Array<int, 2>;
505static FloatRoundStyle const round_style = Round;
506
508static result_type convert(source_type const & source) {
509
510 uint32_t tmp;
511
512asm volatile(
513"cvt.pack.sat.s8.s32.b32 %0, %2, %1, 0;\n"
514 : "=r"(tmp) : "r"(source[0]), "r"(source[1]));
515
516 uint16_t out = (tmp & 0xffff);
517return reinterpret_cast<result_type const &>(out);
518 }
519
521 result_type operator()(source_type const &s) {
522return convert(s);
523 }
524 };
525
527 template <
528FloatRoundStyle Round
529 >
530 struct NumericArrayConverter<int8_t, int, 4, Round> {
531
532using result_type = Array<int8_t, 4>;
533using source_type = Array<int, 4>;
534static FloatRoundStyle const round_style = Round;
535
537static result_type convert(source_type const & source) {
538
539unsigned out;
540
541asm volatile(
542"{ .reg .u32 r4;"
543"cvt.pack.sat.s8.s32.b32 r4, %4, %3, 0;"
544"cvt.pack.sat.s8.s32.b32 %0, %2, %1, r4;"
545"}"
546 : "=r"(out) : "r"(source[0]), "r"(source[1]), "r"(source[2]), "r"(source[3]));
547
548return reinterpret_cast<result_type const &>(out);
549 }
550
552 result_type operator()(source_type const &s) {
553return convert(s);
554 }
555 };
556
558 template <
559int N,
560FloatRoundStyle Round
561 >
562 struct NumericArrayConverter<int8_t, int, N, Round> {
563static_assert(!(N % 4), "N must be multiple of 4.");
564
565using result_type = Array<int8_t, N>;
566using source_type = Array<int, N>;
567static FloatRoundStyle const round_style = Round;
568
570static result_type convert(source_type const & source) {
571
572NumericArrayConverter<int8_t, int, 4, Round> convert_vector_;
573
574result_type result;
575
576 Array<int8_t, 4> *result_ptr = reinterpret_cast<Array<int8_t, 4> *>(&result);
577 Array<int, 4> const *source_ptr = reinterpret_cast<Array<int, 4> const *>(&source);
578
580for (int i = 0; i < N / 4; ++i) {
581 result_ptr[i] = convert_vector_(source_ptr[i]);
582 }
583
584return result;
585 }
586
588result_type operator()(source_type const &s) {
589return convert(s);
590 }
591 };
592
593 #endif // Conditional guards to enable partial specialization for packed integers
594
596
597 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 750) && (__CUDACC_VER_MAJOR__ >= 10) && (__CUDACC_VER_MINOR__ >= 2)
598
600 template <
601FloatRoundStyle Round
602 >
603 struct NumericArrayConverter<int4b_t, int, 8, Round> {
604
605using result_type = Array<int4b_t, 8>;
606using source_type = Array<int, 8>;
607static FloatRoundStyle const round_style = Round;
608
610static result_type convert(source_type const & source) {
611
612unsigned out;
613
614asm volatile(
615"{ .reg .u32 r4;"
616"cvt.pack.sat.s4.s32.b32 r4, %8, %7, 0;"
617"cvt.pack.sat.s4.s32.b32 r4, %6, %5, r4;"
618"cvt.pack.sat.s4.s32.b32 r4, %4, %3, r4;"
619"cvt.pack.sat.s4.s32.b32 %0, %2, %1, r4;"
620"}"
621 : "=r"(out)
622 : "r"(source[0]), "r"(source[1]), "r"(source[2]), "r"(source[3]),
623"r"(source[4]), "r"(source[5]), "r"(source[6]), "r"(source[7]));
624
625return reinterpret_cast<result_type const &>(out);
626 }
627
629 result_type operator()(source_type const &s) {
630return convert(s);
631 }
632 };
633
635 template <
636int N,
637FloatRoundStyle Round
638 >
639 struct NumericArrayConverter<int4b_t, int, N, Round> {
640static_assert(!(N % 8), "N must be multiple of 8.");
641
642using result_type = Array<int4b_t, N>;
643using source_type = Array<int, N>;
644static FloatRoundStyle const round_style = Round;
645
647static result_type convert(source_type const & source) {
648
649NumericArrayConverter<int4b_t, int, 8, Round> convert_vector_;
650
651result_type result;
652
653 Array<int4b_t, 8> *result_ptr = reinterpret_cast<Array<int4b_t, 8> *>(&result);
654 Array<int, 8> const *source_ptr = reinterpret_cast<Array<int, 8> const *>(&source);
655
657for (int i = 0; i < N / 8; ++i) {
658 result_ptr[i] = convert_vector_(source_ptr[i]);
659 }
660
661return result;
662 }
663
665result_type operator()(source_type const &s) {
666return convert(s);
667 }
668 };
669
670 #endif // Conditional guards to enable partial specialization for packed integers
671
673
674 } // namespace cutlass
cutlass::NumericConverterClamp::result_type
T result_type
Definition: numeric_conversion.h:256
[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero >::source_type](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a80980dc518dd65f4c3c1533782ba8b36)
float source_type
Definition: numeric_conversion.h:180
cutlass::NumericConverter< float, half_t, Round >
Partial specialization for float <= half_t.
Definition: numeric_conversion.h:133
static CUTLASS_HOST_DEVICE half_t bitcast(uint16_t x)
Constructs from an unsigned short.
Definition: half.h:141
cutlass::NumericConverter< T, T, Round >::operator()
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:120
cutlass::NumericConverterClamp::operator()
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:276
Definition: aligned_buffer.h:35
cutlass::NumericArrayConverter< float, half_t, 2, Round >::result_type
Array< float, 2 > result_type
Definition: numeric_conversion.h:356
cutlass::NumericConverterClamp
Definition: numeric_conversion.h:254
cutlass::NumericConverter< int8_t, float, Round >::source_type
float source_type
Definition: numeric_conversion.h:86
cutlass::NumericArrayConverter< half_t, float, N, Round >::convert
static CUTLASS_HOST_DEVICE result_type convert(source_type const &source)
Definition: numeric_conversion.h:396
[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero >::convert](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a43ab30e5283f39b1defe46b13da9ac1b)
static CUTLASS_HOST_DEVICE result_type convert(source_type const &flt)
Round toward zero.
Definition: numeric_conversion.h:185
cutlass::NumericConverter< T, T, Round >::result_type
T result_type
Definition: numeric_conversion.h:109
[cutlass::NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest >::convert](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a3cc4d59f083555f24288e15490eeb41d)
static CUTLASS_HOST_DEVICE result_type convert(source_type const &source)
Definition: numeric_conversion.h:331
std::is_same (false specialization)
Definition: platform.h:394
cutlass::FloatRoundStyle::round_toward_zero
round toward zero
cutlass::NumericConverterClamp::convert
static CUTLASS_HOST_DEVICE result_type convert(source_type const &s)
Definition: numeric_conversion.h:265
Defines a class for using IEEE half-precision floating-point types in host or device code...
cutlass::NumericConverter::result_type
T result_type
Definition: numeric_conversion.h:61
IEEE half-precision floating-point type.
Definition: half.h:126
cutlass::NumericArrayConverter< float, half_t, 2, Round >::source_type
Array< half_t, 2 > source_type
Definition: numeric_conversion.h:357
cutlass::NumericArrayConverter< float, half_t, 2, Round >::operator()
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:377
cutlass::NumericArrayConverter< float, half_t, N, Round >::result_type
Array< float, N > result_type
Definition: numeric_conversion.h:432
CUTLASS_HOST_DEVICE complex< T > exp(complex< T > const &z)
Computes the complex exponential of z.
Definition: complex.h:375
[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest >::source_type](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a977053fab779dd7519612b6ae0fb53ce)
float source_type
Definition: numeric_conversion.h:158
cutlass::NumericArrayConverter< float, half_t, N, Round >::source_type
Array< half_t, N > source_type
Definition: numeric_conversion.h:433
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
cutlass::NumericArrayConverter< float, half_t, N, Round >::operator()
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:460
[cutlass::NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest >::result_type](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#ad50d5ce8c7047513745c1fab77c3988c)
Array< half_t, 2 > result_type
Definition: numeric_conversion.h:326
cutlass::NumericArrayConverter< half_t, float, N, Round >::operator()
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:419
cutlass::NumericArrayConverter< float, half_t, N, Round >::convert
static CUTLASS_HOST_DEVICE result_type convert(source_type const &source)
Definition: numeric_conversion.h:437
cutlass::NumericArrayConverter< half_t, float, N, Round >::result_type
Array< half_t, N > result_type
Definition: numeric_conversion.h:391
cutlass::FloatRoundStyle::round_toward_neg_infinity
round toward negative infinity
[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest >::convert](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#aaf16c1dd3bb1fc0566c819146dfd5ab8)
static CUTLASS_HOST_DEVICE result_type convert(source_type const &s)
Definition: numeric_conversion.h:162
cutlass::NumericConverter< T, T, Round >::convert
static CUTLASS_HOST_DEVICE result_type convert(source_type const &s)
Definition: numeric_conversion.h:114
[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest >::operator()](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a619f44798aa208650afaf2c584454d4b)
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:170
cutlass::FloatRoundStyle::round_half_ulp_truncate
add 0.5ulp to integer representation then round toward zero
cutlass::NumericConverter< T, T, Round >::source_type
T source_type
Definition: numeric_conversion.h:110
cutlass::NumericArrayConverter< float, half_t, 2, Round >
Partial specialization for Array<float, 2> <= Array<half_t, 2>, round to nearest. ...
Definition: numeric_conversion.h:354
[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero >::operator()](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a9ee10f5cf5ff71842a31e305d3a83947)
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:239
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
cutlass::NumericArrayConverter::source_type
Array< S, N > source_type
Definition: numeric_conversion.h:297
Top-level include for all CUTLASS numeric types.
#define static_assert(__e, __m)
Definition: platform.h:153
cutlass::NumericConverter::convert
static CUTLASS_HOST_DEVICE result_type convert(source_type const &s)
Definition: numeric_conversion.h:66
cutlass::NumericConverterClamp::source_type
S source_type
Definition: numeric_conversion.h:257
[cutlass::NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest >::source_type](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a977dc7cb44a006493604fb7805b75f4e)
Array< float, 2 > source_type
Definition: numeric_conversion.h:327
Definition: numeric_conversion.h:59
cutlass::FloatRoundStyle::round_toward_infinity
round toward infinity
cutlass::NumericArrayConverter::operator()
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:315
cutlass::FloatRoundStyle::round_to_nearest
round to nearest even
cutlass::NumericArrayConverter< float, half_t, 2, Round >::convert
static CUTLASS_HOST_DEVICE result_type convert(source_type const &source)
Definition: numeric_conversion.h:361
FloatRoundStyle
Definition: numeric_conversion.h:43
cutlass::NumericConverter< float, half_t, Round >::operator()
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:148
cutlass::NumericConverter< int8_t, float, Round >::result_type
int8_t result_type
Definition: numeric_conversion.h:85
cutlass::FloatRoundStyle::round_indeterminate
rounding mode unknown
[cutlass::NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest >::operator()](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a177cc3209f728a6629754cf2d685a37a)
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:347
cutlass::NumericConverter::operator()
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:72
cutlass::NumericArrayConverter< half_t, float, N, Round >::source_type
Array< float, N > source_type
Definition: numeric_conversion.h:392
cutlass::NumericConverter::source_type
S source_type
Definition: numeric_conversion.h:62
cutlass::NumericConverter< float, half_t, Round >::result_type
float result_type
Definition: numeric_conversion.h:135
cutlass::NumericArrayConverter
Conversion operator for Array.
Definition: numeric_conversion.h:294
cutlass::NumericConverter< int8_t, float, Round >::convert
static CUTLASS_HOST_DEVICE result_type convert(source_type const &s)
Definition: numeric_conversion.h:90
cutlass::NumericArrayConverter::result_type
Array< T, N > result_type
Definition: numeric_conversion.h:296
Basic include for CUTLASS.
cutlass::NumericConverter< float, half_t, Round >::convert
static CUTLASS_HOST_DEVICE result_type convert(source_type const &s)
Definition: numeric_conversion.h:140
cutlass::NumericConverter< int8_t, float, Round >::operator()
CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)
Definition: numeric_conversion.h:98
cutlass::NumericArrayConverter::convert
static CUTLASS_HOST_DEVICE result_type convert(source_type const &s)
Definition: numeric_conversion.h:301
Generated by 1.8.11