docs/half_8h_source.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
half.h
Go to the documentation of this file.
1 /***************************************************************************************************
2 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification, are permitted
5 * provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright notice, this list of
7 * conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright notice, this list of
9 * conditions and the following disclaimer in the documentation and/or other materials
10 * provided with the distribution.
11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12 * to endorse or promote products derived from this software without specific prior written
13 * permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 *
24 **************************************************************************************************/
30 #pragma once
31
32 #ifndef CUTLASS_ENABLE_F16C
33 #define CUTLASS_ENABLE_F16C 0
34 #endif
35
36 #if defined(__CUDACC_RTC__)
37 /* All floating-point numbers can be put in one of these categories. */
38 enum
39 {
40 FP_NAN =
41 # define FP_NAN 0
42 FP_NAN,
43 FP_INFINITE =
44 # define FP_INFINITE 1
45 FP_INFINITE,
46 FP_ZERO =
47 # define FP_ZERO 2
48 FP_ZERO,
49 FP_SUBNORMAL =
50 # define FP_SUBNORMAL 3
51 FP_SUBNORMAL,
52 FP_NORMAL =
53 # define FP_NORMAL 4
54 FP_NORMAL
55 };
56
57 // F16C extensions are not meaningful when compiling for NVRTC which only accommodates device code.
58 #undef CUTLASS_ENABLE_F16C
59 #define CUTLASS_ENABLE_F16C 0
60
61 #else
62 #include <cmath>
63 #include <limits>
64 #include <cstdint>
65 #endif
66
68
69 #include <cuda_fp16.h>
70
71 #include "cutlass/cutlass.h"
72
74
75 // Optionally target F16C extentions to accelerate half-precision conversion.
76 #if !defined(__CUDA_ARCH__) && (CUTLASS_ENABLE_F16C)
77 #if defined(_MSC_VER)
78
79 #include <immintrin.h>
80
81 #define F16C_ROUND_NEAREST 0
82
83 #if !defined(__CUDA_ARCH__)
84 extern __inline float _cvtsh_ss (unsigned short __S) {
85 __m128i packed;
86 std::memcpy(&packed, &__S, sizeof(__S));
87
88 __m128 result = _mm_cvtph_ps(packed);
89
90float flt;
91 std::memcpy(&flt, &result, sizeof(flt));
92
93return flt;
94 }
95
96 __inline unsigned short _cvtss_sh (float __F, const int) {
97 __m128 packed;
98 std::memcpy(&packed, &__F, sizeof(__F));
99
100 __m128i result = _mm_cvtps_ph(packed, F16C_ROUND_NEAREST);
101
102unsigned short u;
103 std::memcpy(&u, &result, sizeof(u));
104
105return u;
106 }
107 #endif
108
109 #else
110
111 // Linux
112 #include <x86intrin.h>
113 #define F16C_ROUND_NEAREST (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
114
115 #endif
116 #endif // !defined(__CUDA_ARCH__) && CUTLASS_ENABLE_F16C
117
119
120
121 namespace cutlass {
122
124
126 struct alignas(2) half_t {
127
128//
129// Data members
130//
131
134
135//
136// Static conversion operators
137//
138
141static half_t bitcast(uint16_t x) {
142half_t h;
143 h.storage = x;
144return h;
145 }
146
148 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530)
149// Avoid inlining in device code if no hardware support
150 __device__ __noinline__
151 #else
153 #endif
154static half_t convert(float const& flt) {
155 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
156return half_t(__float2half_rn(flt));
157 #elif !defined(__CUDA_ARCH__) && CUTLASS_ENABLE_F16C
158unsigned short u = _cvtss_sh(flt, F16C_ROUND_NEAREST);
159return bitcast(u);
160 #else
161// software implementation rounds toward nearest even
162unsigned const& s = reinterpret_cast<unsigned const &>(flt);
163 uint16_t sign = uint16_t((s >> 16) & 0x8000);
164 int16_t exp = uint16_t(((s >> 23) & 0xff) - 127);
165int mantissa = s & 0x7fffff;
166 uint16_t u = 0;
167
168if ((s & 0x7fffffff) == 0) {
169// sign-preserving zero
170return bitcast(sign);
171 }
172
173if (exp > 15) {
174if (exp == 128 && mantissa) {
175// not a number
176 u = 0x7fff;
177 } else {
178// overflow to infinity
179 u = sign | 0x7c00;
180 }
181return bitcast(u);
182 }
183
184int sticky_bit = 0;
185
186if (exp >= -14) {
187// normal fp32 to normal fp16
188 exp = uint16_t(exp + uint16_t(15));
189 u = uint16_t(((exp & 0x1f) << 10));
190 u = uint16_t(u | (mantissa >> 13));
191 } else {
192// normal single-precision to subnormal half_t-precision representation
193int rshift = (-14 - exp);
194if (rshift < 32) {
195 mantissa |= (1 << 23);
196
197 sticky_bit = ((mantissa & ((1 << rshift) - 1)) != 0);
198
199 mantissa = (mantissa >> rshift);
200 u = (uint16_t(mantissa >> 13) & 0x3ff);
201 } else {
202 mantissa = 0;
203 u = 0;
204 }
205 }
206
207// round to nearest even
208int round_bit = ((mantissa >> 12) & 1);
209 sticky_bit |= ((mantissa & ((1 << 12) - 1)) != 0);
210
211if ((round_bit && sticky_bit) || (round_bit && (u & 1))) {
212 u = uint16_t(u + 1);
213 }
214
215 u |= sign;
216
217return bitcast(u);
218 #endif
219 }
220
223static half_t convert(int const& n) {
224 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
225return half_t(__int2half_rn(n));
226 #else
227return convert(float(n));
228 #endif
229 }
230
233static half_t convert(unsigned const& n) {
234 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
235return half_t(__uint2half_rn(n));
236 #else
237return convert(float(n));
238 #endif
239 }
240
242 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530)
243// Avoid inlining in device code if no hardware support
244 __device__ __noinline__
245 #else
247 #endif
248static float convert(half_t const& x) {
249 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
250return __half2float(x.to_half());
251 #elif !defined(__CUDA_ARCH__) && CUTLASS_ENABLE_F16C
252unsigned short u = x.storage;
253return _cvtsh_ss(u);
254 #else
255 uint16_t const &h = x.storage;
256int sign = ((h >> 15) & 1);
257int exp = ((h >> 10) & 0x1f);
258int mantissa = (h & 0x3ff);
259unsigned f = 0;
260
261if (exp > 0 && exp < 31) {
262// normal
263 exp += 112;
264 f = (sign << 31) | (exp << 23) | (mantissa << 13);
265 } else if (exp == 0) {
266if (mantissa) {
267// subnormal
268 exp += 113;
269while ((mantissa & (1 << 10)) == 0) {
270 mantissa <<= 1;
271 exp--;
272 }
273 mantissa &= 0x3ff;
274 f = (sign << 31) | (exp << 23) | (mantissa << 13);
275 } else {
276// sign-preserving zero
277 f = (sign << 31);
278 }
279 } else if (exp == 31) {
280if (mantissa) {
281 f = 0x7fffffff; // not a number
282 } else {
283 f = (0xff << 23) | (sign << 31); // inf
284 }
285 }
286return reinterpret_cast<float const&>(f);
287 #endif
288 }
289
290//
291// Methods
292//
293
297
300explicit half_t(half const & x): storage(reinterpret_cast<uint16_t const &>(x)) {
301
302 }
303
307 storage = convert(x).storage;
308 }
309
312explicit half_t(double x): half_t(float(x)) {
313
314 }
315
319 storage = convert(x).storage;
320 }
321
324explicit half_t(unsigned x) {
325 storage = convert(x).storage;
326 }
327
330half_t & operator=(half const &x) {
331 storage = reinterpret_cast<uint16_t const &>(x);
332return *this;
333 }
334
337operator float() const {
338return convert(*this);
339 }
340
343operator double() const {
344return double(convert(*this));
345 }
346
349explicit operator int() const {
350return int(convert(*this));
351 }
352
355operator bool() const {
356return (convert(*this) != 0.0f);
357 }
358
362return reinterpret_cast<half const &>(storage);
363 }
364
368return storage;
369 }
370
374return storage;
375 }
376
380return ((storage & 0x8000) != 0);
381 }
382
385int exponent_biased() const {
386return int((storage >> 10) & 0x1f);
387 }
388
392return exponent_biased() - 15;
393 }
394
398return int(storage & 0x3ff);
399 }
400 };
401
403
405 bool signbit(cutlass::half_t const& h) {
406return ((h.raw() & 0x8000) != 0);
407 }
408
410 cutlass::half_t abs(cutlass::half_t const& h) {
411return cutlass::half_t::bitcast(h.raw() & 0x7fff);
412 }
413
415 bool isnan(cutlass::half_t const& h) {
416return (h.exponent_biased() == 0x1f) && h.mantissa();
417 }
418
420 bool isfinite(cutlass::half_t const& h) {
421return (h.exponent_biased() != 0x1f);
422 }
423
425 cutlass::half_t nanh(const char*) {
426// NVIDIA canonical NaN
427return cutlass::half_t::bitcast(0x7fff);
428 }
429
431 bool isinf(cutlass::half_t const& h) {
432return (h.exponent_biased() == 0x1f) && !h.mantissa();
433 }
434
436 bool isnormal(cutlass::half_t const& h) {
437return h.exponent_biased() && h.exponent_biased() != 0x1f;
438 }
439
441 int fpclassify(cutlass::half_t const& h) {
442int exp = h.exponent_biased();
443int mantissa = h.mantissa();
444if (exp == 0x1f) {
445if (mantissa) {
446return FP_NAN;
447 }
448else {
449return FP_INFINITE;
450 }
451 }
452else if (!exp) {
453if (mantissa) {
454return FP_SUBNORMAL;
455 }
456else {
457return FP_ZERO;
458 }
459 }
460return FP_NORMAL;
461 }
462
464 cutlass::half_t sqrt(cutlass::half_t const& h) {
465 #if defined(__CUDACC_RTC__)
466return cutlass::half_t(sqrtf(float(h)));
467 #else
468return cutlass::half_t(std::sqrt(float(h)));
469 #endif
470 }
471
473 half_t copysign(half_t const& a, half_t const& b) {
474
475 uint16_t a_mag = (reinterpret_cast<uint16_t const &>(a) & 0x7fff);
476 uint16_t b_sign = (reinterpret_cast<uint16_t const &>(b) & 0x8000);
477 uint16_t result = (a_mag | b_sign);
478
479return reinterpret_cast<cutlass::half_t const &>(result);
480 }
481
483
484 } // namespace cutlass
485
487 //
488 // Standard Library operations and definitions
489 //
491
492 namespace std {
493
494 #if !defined(__CUDACC_RTC__)
495 template <>
[497](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html) struct numeric_limits<cutlass::half_t> {
[498](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ade4affb586360c5356a7939c1b343a40)static bool const is_specialized = true;
[499](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ab3a169117baca2e7fae33846caa5dbfd)static bool const is_signed = true;
[500](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a2a2ec168a6f0e9f55dc42d3b3e5fff25)static bool const is_integer = false;
[501](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a7413e9cd24eb03a86cc5f2d47c49db3e)static bool const is_exact = false;
[502](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a6f7f2fbe6cd7a04803b90b8fa9172098)static bool const has_infinity = true;
[503](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a3d75832e46bc154758e35a03a624ccf8)static bool const has_quiet_NaN = true;
[504](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a68a0d0f6ecc2f3b84f2e71475b2c48bd)static bool const has_signaling_NaN = false;
[505](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#aaf46b5d03403828c1e6633fb714ffd84)static std::float_denorm_style const has_denorm = std::denorm_present;
[506](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a8b371f82151fd0238b7da083fa2b87a9)static bool const has_denorm_loss = true;
[507](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ab0af85c1d7c83ca03ed0c083fe22262f)static std::float_round_style const round_style = std::round_to_nearest;
[508](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#afc86b85a1fe209658a50d8c06e54cb77)static bool const is_iec559 = true;
[509](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a59737f49161b87c259683c26737f42c2)static bool const is_bounded = true;
[510](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a640a034527a3577039053113bc1c5e46)static bool const is_modulo = false;
[511](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a92152311525685a53c6a0db4cb74f193)static int const digits = 10;
512
[514](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ad9175b4d7b32fe18cf9c07e4f559b32c)static cutlass::half_t min() { return cutlass::half_t::bitcast(0x0001); }
515
[517](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ac2ae43139037875e38056a675ae1f6c4)static cutlass::half_t lowest() { return cutlass::half_t::bitcast(0xfbff); }
518
[520](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a39a5774583daedbb5ac4aaaaa8034883)static cutlass::half_t max() { return cutlass::half_t::bitcast(0x7bff); }
521
[523](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ab9fc3a009eaff0c922307f2780ee3fc0)static cutlass::half_t epsilon() { return cutlass::half_t::bitcast(0x1800); }
524
[526](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ab0a036db7a1ad11c65e876020c78b1a5)static cutlass::half_t round_error() { return cutlass::half_t(0.5f); }
527
[529](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ab7a40820e64282376a050095d5004b74)static cutlass::half_t infinity() { return cutlass::half_t::bitcast(0x7c00); }
530
[532](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a8c7eafdd3b121353c0914dc6e1c0d108)static cutlass::half_t quiet_NaN() { return cutlass::half_t::bitcast(0x7fff); }
533
[535](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a423fb5b95e6071e832d40918e597f63f)static cutlass::half_t signaling_NaN() { return cutlass::half_t::bitcast(0x7fff); }
536
[538](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a2c05c19022c183e8734ada65c8970af5)static cutlass::half_t denorm_min() { return cutlass::half_t::bitcast(0x0001); }
539 };
540 #endif
541
542 } // namespace std
543
545 //
546 // Arithmetic operators
547 //
549
550 namespace cutlass {
551
553
555 bool operator==(half_t const& lhs, half_t const& rhs) {
556 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
557return __heq(lhs.to_half(), rhs.to_half());
558 #else
559return float(lhs) == float(rhs);
560 #endif
561 }
562
564 bool operator!=(half_t const& lhs, half_t const& rhs) {
565 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
566return __hne(lhs.to_half(), rhs.to_half());
567 #else
568return float(lhs) != float(rhs);
569 #endif
570 }
571
573 bool operator<(half_t const& lhs, half_t const& rhs) {
574 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
575return __hlt(lhs.to_half(), rhs.to_half());
576 #else
577return float(lhs) < float(rhs);
578 #endif
579 }
580
582 bool operator<=(half_t const& lhs, half_t const& rhs) {
583 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
584return __hle(lhs.to_half(), rhs.to_half());
585 #else
586return float(lhs) <= float(rhs);
587 #endif
588 }
589
591 bool operator>(half_t const& lhs, half_t const& rhs) {
592 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
593return __hgt(lhs.to_half(), rhs.to_half());
594 #else
595return float(lhs) > float(rhs);
596 #endif
597 }
598
600 bool operator>=(half_t const& lhs, half_t const& rhs) {
601 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
602return __hge(lhs.to_half(), rhs.to_half());
603 #else
604return float(lhs) >= float(rhs);
605 #endif
606 }
607
609 half_t operator+(half_t const& lhs, half_t const& rhs) {
610 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
611return half_t(__hadd(lhs.to_half(), rhs.to_half()));
612 #else
613return half_t(float(lhs) + float(rhs));
614 #endif
615 }
616
618 half_t operator-(half_t const& lhs) {
619 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
620return half_t(__hneg(lhs.to_half()));
621 #else
622return half_t(-float(lhs));
623 #endif
624 }
625
627 half_t operator-(half_t const& lhs, half_t const& rhs) {
628 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
629return half_t(__hsub(lhs.to_half(), rhs.to_half()));
630 #else
631return half_t(float(lhs) - float(rhs));
632 #endif
633 }
634
636 half_t operator*(half_t const& lhs, half_t const& rhs) {
637 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
638return half_t(__hmul(lhs.to_half(), rhs.to_half()));
639 #else
640return half_t(float(lhs) * float(rhs));
641 #endif
642 }
643
645 half_t operator/(half_t const& lhs, half_t const& rhs) {
646 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
647return half_t(__hdiv(lhs.to_half(), rhs.to_half()));
648 #else
649return half_t(float(lhs) / float(rhs));
650 #endif
651 }
652
654 half_t& operator+=(half_t & lhs, half_t const& rhs) {
655 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
656 lhs = half_t(__hadd(lhs.to_half(), rhs.to_half()));
657 #else
658 lhs = half_t(float(lhs) + float(rhs));
659 #endif
660return lhs;
661 }
662
664 half_t& operator-=(half_t & lhs, half_t const& rhs) {
665 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
666 lhs = half_t(__hsub(lhs.to_half(), rhs.to_half()));
667 #else
668 lhs = half_t(float(lhs) - float(rhs));
669 #endif
670return lhs;
671 }
672
674 half_t& operator*=(half_t & lhs, half_t const& rhs) {
675 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
676 lhs = half_t(__hmul(lhs.to_half(), rhs.to_half()));
677 #else
678 lhs = half_t(float(lhs) * float(rhs));
679 #endif
680return lhs;
681 }
682
684 half_t& operator/=(half_t & lhs, half_t const& rhs) {
685 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
686 lhs = half_t(__hdiv(lhs.to_half(), rhs.to_half()));
687 #else
688 lhs = half_t(float(lhs) / float(rhs));
689 #endif
690return lhs;
691 }
692
694 half_t& operator++(half_t & lhs) {
695 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
696 lhs = half_t(__hadd(lhs.to_half(), half_t(1.0f).to_half()));
697 #else
698float tmp(lhs);
699 ++tmp;
700 lhs = half_t(tmp);
701 #endif
702return lhs;
703 }
704
706 half_t& operator--(half_t & lhs) {
707 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
708 lhs = half_t(__hsub(lhs.to_half(), half_t(1.0f).to_half()));
709 #else
710float tmp(lhs);
711 --tmp;
712 lhs = half_t(tmp);
713 #endif
714return lhs;
715 }
716
718 half_t operator++(half_t & lhs, int) {
719half_t ret(lhs);
720 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
721 lhs = half_t(__hadd(lhs.to_half(), half_t(1.0f).to_half()));
722 #else
723float tmp(lhs);
724 tmp++;
725 lhs = half_t(tmp);
726 #endif
727return ret;
728 }
729
731 half_t operator--(half_t & lhs, int) {
732half_t ret(lhs);
733 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
734 lhs = half_t(__hsub(lhs.to_half(), half_t(1.0f).to_half()));
735 #else
736float tmp(lhs);
737 tmp--;
738 lhs = half_t(tmp);
739 #endif
740return ret;
741 }
742
744
745 } // namespace cutlass
746
748
749 //
750 // User-defined literals
751 //
752
754 cutlass::half_t operator "" _hf(long double x) {
755return cutlass::half_t(float(x));
756 }
757
759 cutlass::half_t operator "" _hf(unsigned long long int x) {
760return cutlass::half_t(int(x));
761 }
762
[std::numeric_limits< cutlass::half_t >::max](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a39a5774583daedbb5ac4aaaaa8034883)
static cutlass::half_t max()
Maximum finite value.
Definition: half.h:520
static CUTLASS_HOST_DEVICE half_t bitcast(uint16_t x)
Constructs from an unsigned short.
Definition: half.h:141
Definition: aligned_buffer.h:35
CUTLASS_HOST_DEVICE half_t(int x)
Integer conversion - round to nearest even.
Definition: half.h:318
CUTLASS_HOST_DEVICE T abs(complex< T > const &z)
Returns the magnitude of the complex number.
Definition: complex.h:313
[std::numeric_limits< cutlass::half_t >::signaling_NaN](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a423fb5b95e6071e832d40918e597f63f)
static cutlass::half_t signaling_NaN()
Returns smallest finite value.
Definition: half.h:535
[std::numeric_limits< cutlass::half_t >::infinity](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ab7a40820e64282376a050095d5004b74)
static cutlass::half_t infinity()
Returns smallest finite value.
Definition: half.h:529
CUTLASS_HOST_DEVICE half_t()
Default constructor.
Definition: half.h:296
static CUTLASS_HOST_DEVICE half_t convert(float const &flt)
FP32 -> FP16 conversion - rounds to nearest even.
Definition: half.h:154
CUTLASS_HOST_DEVICE half_t & operator/=(half_t &lhs, half_t const &rhs)
Definition: half.h:684
uint16_t storage
Storage type.
Definition: half.h:133
IEEE half-precision floating-point type.
Definition: half.h:126
static CUTLASS_HOST_DEVICE half_t convert(int const &n)
FP32 -> FP16 conversion - rounds to nearest even.
Definition: half.h:223
CUTLASS_HOST_DEVICE bool isnormal(cutlass::half_t const &h)
Definition: half.h:436
STL namespace.
CUTLASS_HOST_DEVICE bool operator<=(half_t const &lhs, half_t const &rhs)
Definition: half.h:582
[std::numeric_limits< cutlass::half_t >::denorm_min](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a2c05c19022c183e8734ada65c8970af5)
static cutlass::half_t denorm_min()
Returns smallest finite value.
Definition: half.h:538
CUTLASS_HOST_DEVICE complex< T > exp(complex< T > const &z)
Computes the complex exponential of z.
Definition: complex.h:375
static CUTLASS_HOST_DEVICE float convert(half_t const &x)
Converts a half-precision value stored as a uint16_t to a float.
Definition: half.h:248
CUTLASS_HOST_DEVICE half_t & operator+=(half_t &lhs, half_t const &rhs)
Definition: half.h:654
CUTLASS_HOST_DEVICE half_t(unsigned x)
Integer conversion - round toward zero.
Definition: half.h:324
CUTLASS_HOST_DEVICE half_t operator+(half_t const &lhs, half_t const &rhs)
Definition: half.h:609
CUTLASS_HOST_DEVICE half_t & operator-=(half_t &lhs, half_t const &rhs)
Definition: half.h:664
[std::numeric_limits< cutlass::half_t >::round_error](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ab0a036db7a1ad11c65e876020c78b1a5)
static cutlass::half_t round_error()
Returns smallest finite value.
Definition: half.h:526
CUTLASS_HOST_DEVICE half_t & operator++(half_t &lhs)
Definition: half.h:694
CUTLASS_HOST_DEVICE bool signbit() const
Returns the sign bit.
Definition: half.h:379
CUTLASS_HOST_DEVICE cutlass::half_t sqrt(cutlass::half_t const &h)
Definition: half.h:464
CUTLASS_HOST_DEVICE int fpclassify(cutlass::half_t const &h)
Definition: half.h:441
CUTLASS_HOST_DEVICE bool operator!=(half_t const &lhs, half_t const &rhs)
Definition: half.h:564
CUTLASS_HOST_DEVICE half to_half() const
Bitcasts to CUDA's half type.
Definition: half.h:361
CUTLASS_HOST_DEVICE half_t & operator=(half const &x)
Assignment.
Definition: half.h:330
CUTLASS_HOST_DEVICE half_t(half const &x)
Reinterpret cast from CUDA's half type.
Definition: half.h:300
CUTLASS_HOST_DEVICE uint16_t raw() const
Accesses raw internal state.
Definition: half.h:373
CUTLASS_HOST_DEVICE bool isinf(cutlass::half_t const &h)
Definition: half.h:431
CUTLASS_HOST_DEVICE half_t copysign(half_t const &a, half_t const &b)
Definition: half.h:473
CUTLASS_HOST_DEVICE half_t & operator--(half_t &lhs)
Definition: half.h:706
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_HOST_DEVICE cutlass::half_t nanh(const char *)
Definition: half.h:425
CUTLASS_HOST_DEVICE half_t(float x)
Floating point conversion.
Definition: half.h:306
CUTLASS_HOST_DEVICE bool operator>(half_t const &lhs, half_t const &rhs)
Definition: half.h:591
CUTLASS_HOST_DEVICE half_t operator-(half_t const &lhs)
Definition: half.h:618
CUTLASS_HOST_DEVICE bool isfinite(cutlass::half_t const &h)
Definition: half.h:420
CUTLASS_HOST_DEVICE half_t & operator*=(half_t &lhs, half_t const &rhs)
Definition: half.h:674
[std::numeric_limits< cutlass::half_t >::min](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ad9175b4d7b32fe18cf9c07e4f559b32c)
static cutlass::half_t min()
Least positive value.
Definition: half.h:514
cutlass::FloatRoundStyle::round_to_nearest
round to nearest even
CUTLASS_HOST_DEVICE int mantissa() const
Returns the mantissa.
Definition: half.h:397
[std::numeric_limits< cutlass::half_t >::lowest](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ac2ae43139037875e38056a675ae1f6c4)
static cutlass::half_t lowest()
Minimum finite value.
Definition: half.h:517
[std::numeric_limits< cutlass::half_t >::epsilon](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#ab9fc3a009eaff0c922307f2780ee3fc0)
static cutlass::half_t epsilon()
Returns smallest finite value.
Definition: half.h:523
CUTLASS_HOST_DEVICE bool operator==(half_t const &lhs, half_t const &rhs)
Definition: half.h:555
CUTLASS_HOST_DEVICE Coord< Rank, Index > operator/(Index s, Coord< Rank, Index > coord)
Scalar division.
Definition: coord.h:360
CUTLASS_HOST_DEVICE bool operator>=(half_t const &lhs, half_t const &rhs)
Definition: half.h:600
CUTLASS_HOST_DEVICE half_t(double x)
Floating point conversion.
Definition: half.h:312
static CUTLASS_HOST_DEVICE half_t convert(unsigned const &n)
FP32 -> FP16 conversion - rounds to nearest even.
Definition: half.h:233
CUTLASS_HOST_DEVICE bool operator<(half_t const &lhs, half_t const &rhs)
Definition: half.h:573
Basic include for CUTLASS.
CUTLASS_HOST_DEVICE complex< T > sqrt(complex< T > const &z)
Computes the square root of complex number z.
Definition: complex.h:393
CUTLASS_HOST_DEVICE uint16_t & raw()
Accesses raw internal state.
Definition: half.h:367
[std::numeric_limits< cutlass::half_t >::quiet_NaN](structstd_1_1numeric limits_3_01cutlass_1_1half t_01_4.html#a8c7eafdd3b121353c0914dc6e1c0d108)
static cutlass::half_t quiet_NaN()
Returns smallest finite value.
Definition: half.h:532
CUTLASS_HOST_DEVICE int exponent() const
Returns the unbiased exponent.
Definition: half.h:391
cutlass::half_t::exponent_biased
CUTLASS_HOST_DEVICE int exponent_biased() const
Returns the biased exponent.
Definition: half.h:385
CUTLASS_HOST_DEVICE half_t operator*(half_t const &lhs, half_t const &rhs)
Definition: half.h:636
CUTLASS_HOST_DEVICE bool isnan(cutlass::half_t const &h)
Definition: half.h:415
Generated by 1.8.11