CUTLASS: numeric_conversion.h Source File - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

numeric_conversion.h

Go to the documentation of this file.

1 /***************************************************************************************************

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

29 #pragma once

31 #include "cutlass/cutlass.h"

32 #include "cutlass/numeric_types.h"

34 #include "cutlass/array.h"

35 #include "cutlass/half.h"

37 namespace cutlass {

43 enum class FloatRoundStyle {

44round_indeterminate,

45round_toward_zero,

46round_to_nearest,

47round_toward_infinity,

48round_toward_neg_infinity,

49round_half_ulp_truncate

50 };

54 template <

55typename T,

56typename S,

57FloatRoundStyle Round = FloatRoundStyle::round_to_nearest

58 >

59 struct NumericConverter {

61using result_type = T;

62using source_type = S;

63static FloatRoundStyle const round_style = Round;

65CUTLASS_HOST_DEVICE

66static result_type convert(source_type const & s) {

68return static_cast<result_type>(s);

69 }

71CUTLASS_HOST_DEVICE

72 result_type operator()(source_type const &s) {

73return convert(s);

74 }

75 };

78 //

79 // Partial specializations for float => int8_t

80 //

82 template <FloatRoundStyle Round>

83 struct NumericConverter<int8_t, float, Round> {

85using result_type = int8_t;

86using source_type = float;

87static FloatRoundStyle const round_style = Round;

89CUTLASS_HOST_DEVICE

90static result_type convert(source_type const & s) {

92result_type result = static_cast<int8_t>(s);

94return result;

95 }

97CUTLASS_HOST_DEVICE

98 result_type operator()(source_type const &s) {

99return convert(s);

100 }

101 };

102

104

106 template <typename T, FloatRoundStyle Round>

107 struct NumericConverter<T, T, Round> {

108

109using result_type = T;

110using source_type = T;

111static FloatRoundStyle const round_style = Round;

112

113CUTLASS_HOST_DEVICE

114static result_type convert(source_type const & s) {

115

116return s;

117 }

118

119CUTLASS_HOST_DEVICE

120 result_type operator()(source_type const &s) {

121return convert(s);

122 }

123 };

124

126 //

127 // Partial specializations for float <=> half_t

128 //

130

132 template <FloatRoundStyle Round>

133 struct NumericConverter<float, half_t, Round> {

134

135using result_type = float;

136using source_type = half_t;

137static FloatRoundStyle const round_style = Round;

138

139CUTLASS_HOST_DEVICE

140static result_type convert(source_type const & s) {

141

142result_type result = static_cast<float>(s);

143

144return result;

145 }

146

147CUTLASS_HOST_DEVICE

148 result_type operator()(source_type const &s) {

149return convert(s);

150 }

151 };

152

154 template <>

[155](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html) struct NumericConverter<half_t, float, FloatRoundStyle::round_to_nearest> {

156

[157](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a5dc993f38c6eedd917008e6c839c6300)using result_type = half_t;

[158](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a977053fab779dd7519612b6ae0fb53ce)using [source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977053fab779dd7519612b6ae0fb53ce) = float;

[159](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#affd38515f30c26256ff5c06e5a567080)static FloatRoundStyle const round_style = FloatRoundStyle::round_to_nearest;

160

161CUTLASS_HOST_DEVICE

[162](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#aaf16c1dd3bb1fc0566c819146dfd5ab8)static result_type [convert](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#aaf16c1dd3bb1fc0566c819146dfd5ab8)([source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977053fab779dd7519612b6ae0fb53ce) const & s) {

163

164result_type result = static_cast<half_t>(s);

165

166return result;

167 }

168

169CUTLASS_HOST_DEVICE

[170](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a619f44798aa208650afaf2c584454d4b)result_type [operator()](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a619f44798aa208650afaf2c584454d4b)([source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977053fab779dd7519612b6ae0fb53ce) const &s) {

171return convert(s);

172 }

173 };

174

176 template <>

[177](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html) struct NumericConverter<half_t, float, FloatRoundStyle::round_toward_zero> {

178

[179](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#aa5bfe0288e538f1df94d74fa52aa1e17)using result_type = half_t;

[180](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a80980dc518dd65f4c3c1533782ba8b36)using [source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round toward zero_01_4.html#a80980dc518dd65f4c3c1533782ba8b36) = float;

[181](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a43fde0bc2ddeeebf1f188c6d1ac7fbe0)static FloatRoundStyle const round_style = FloatRoundStyle::round_toward_zero;

182

184CUTLASS_HOST_DEVICE

[185](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a43ab30e5283f39b1defe46b13da9ac1b)static result_type [convert](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a43ab30e5283f39b1defe46b13da9ac1b)([source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round toward zero_01_4.html#a80980dc518dd65f4c3c1533782ba8b36) const & flt) {

186

187 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)

188return half_t(__float2half_rz(flt));

189 #else

190// software implementation rounds toward nearest even

191unsigned const& s = reinterpret_cast<unsigned const &>(flt);

192 uint16_t sign = uint16_t((s >> 16) & 0x8000);

193 int16_t exp = uint16_t(((s >> 23) & 0xff) - 127);

194int mantissa = s & 0x7fffff;

195 uint16_t u = 0;

196

197if ((s & 0x7fffffff) == 0) {

198// sign-preserving zero

199return half_t::bitcast(sign);

200 }

201

202if (exp > 15) {

203if (exp == 128 && mantissa) {

204// not a number

205 u = 0x7fff;

206 } else {

207// overflow to infinity

208 u = sign | 0x7c00;

209 }

210return half_t::bitcast(u);

211 }

212

213if (exp >= -14) {

214// normal fp32 to normal fp16

215 exp = uint16_t(exp + uint16_t(15));

216 u = uint16_t(((exp & 0x1f) << 10));

217 u = uint16_t(u | (mantissa >> 13));

218 } else {

219// normal single-precision to subnormal half_t-precision representation

220int rshift = (-14 - exp);

221if (rshift < 32) {

222 mantissa |= (1 << 23);

223 mantissa = (mantissa >> rshift);

224 u = (uint16_t(mantissa >> 13) & 0x3ff);

225 } else {

226 mantissa = 0;

227 u = 0;

228 }

229 }

230

231 u |= sign;

232

233return half_t::bitcast(u);

234

235 #endif // defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)

236 }

237

238CUTLASS_HOST_DEVICE

[239](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a9ee10f5cf5ff71842a31e305d3a83947)result_type [operator()](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a9ee10f5cf5ff71842a31e305d3a83947)([source_type](structcutlass_1_1NumericConverter_3_01half__t_00_01float_00_01FloatRoundStyle_1_1round toward zero_01_4.html#a80980dc518dd65f4c3c1533782ba8b36) const &s) {

240return convert(s);

241 }

242 };

243

245 //

246 // Conversion and Clamp operator for Integers

247 //

249

250 template <

251typename T,

252typename S

253 >

254 struct NumericConverterClamp {

255

256using result_type = T;

257using source_type = S;

258

259static_assert((platform::is_same<result_type, int32_t>::value ||

260platform::is_same<result_type, int8_t>::value ||

261platform::is_same<result_type, cutlass::int4b_t>::value),

262"Clamp is only needed for integer types");

263

264CUTLASS_HOST_DEVICE

265static result_type convert(source_type const & s) {

266NumericConverter<result_type, source_type> convert_op;

267result_type const kClamp_max =

268 (0x1U << (sizeof_bits<result_type>::value - 1)) - 1;

269result_type const kClamp_min = -kClamp_max - 1;

270bool is_int_min = !(s > kClamp_min);

271bool is_int_max = !(s < kClamp_max);

272return is_int_min ? kClamp_min : (is_int_max ? kClamp_max : convert_op(s));

273 }

274

275CUTLASS_HOST_DEVICE

276 result_type operator()(source_type const &s) {

277return convert(s);

278 }

279 };

280

282 //

283 // Conversion operator for Array

284 //

286

288 template <

289typename T,

290typename S,

291int N,

292FloatRoundStyle Round = FloatRoundStyle::round_to_nearest

293 >

294 struct NumericArrayConverter {

295

296using result_type = Array<T, N>;

297using source_type = Array<S, N>;

298static FloatRoundStyle const round_style = Round;

299

300CUTLASS_HOST_DEVICE

301static result_type convert(source_type const & s) {

302

303result_type result;

304NumericConverter<T, S, Round> convert_;

305

306CUTLASS_PRAGMA_UNROLL

307for (int i = 0; i < N; ++i) {

308 result[i] = convert_(s[i]);

309 }

310

311return result;

312 }

313

314CUTLASS_HOST_DEVICE

315 result_type operator()(source_type const &s) {

316return convert(s);

317 }

318 };

319

321

323 template <>

[324](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html) struct NumericArrayConverter<half_t, float, 2, FloatRoundStyle::round_to_nearest> {

325

[326](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#ad50d5ce8c7047513745c1fab77c3988c)using [result_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#ad50d5ce8c7047513745c1fab77c3988c) = Array<half_t, 2>;

[327](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a977dc7cb44a006493604fb7805b75f4e)using [source_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977dc7cb44a006493604fb7805b75f4e) = Array<float, 2>;

[328](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a3c31373beb0e6a9c649134b21a02125a)static FloatRoundStyle const round_style = FloatRoundStyle::round_to_nearest;

329

330CUTLASS_HOST_DEVICE

[331](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a3cc4d59f083555f24288e15490eeb41d)static [result_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#ad50d5ce8c7047513745c1fab77c3988c) [convert](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a3cc4d59f083555f24288e15490eeb41d)([source_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977dc7cb44a006493604fb7805b75f4e) const & source) {

332

333 Array<half_t, 2> result;

334

335 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)

336reinterpret_cast<__half2 &>(result) = __float22half2_rn(reinterpret_cast<float2 const &>(source));

337 #else

338NumericConverter<half_t, float, round_style> convert_;

339 result[0] = convert_(source[0]);

340 result[1] = convert_(source[1]);

341 #endif

342

343return result;

344 }

345

346CUTLASS_HOST_DEVICE

[347](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a177cc3209f728a6629754cf2d685a37a)[result_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#ad50d5ce8c7047513745c1fab77c3988c) [operator()](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a177cc3209f728a6629754cf2d685a37a)([source_type](structcutlass_1_1NumericArrayConverter_3_01half__t_00_01float_00_012_00_01FloatRoundStyle_1_1round to nearest_01_4.html#a977dc7cb44a006493604fb7805b75f4e) const &s) {

348return convert(s);

349 }

350 };

351

353 template <FloatRoundStyle Round>

354 struct NumericArrayConverter<float, half_t, 2, Round> {

355

356using result_type = Array<float, 2>;

357using source_type = Array<half_t, 2>;

358static FloatRoundStyle const round_style = FloatRoundStyle::round_to_nearest;

359

360CUTLASS_HOST_DEVICE

361static result_type convert(source_type const & source) {

362

363 Array<float, 2> result;

364

365 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)

366reinterpret_cast<float2 &>(result) = __half22float2(reinterpret_cast<__half2 const &>(source));

367 #else

368NumericConverter<float, half_t, round_style> convert_;

369 result[0] = convert_(source[0]);

370 result[1] = convert_(source[1]);

371 #endif

372

373return result;

374 }

375

376CUTLASS_HOST_DEVICE

377 result_type operator()(source_type const &s) {

378return convert(s);

379 }

380 };

381

383

385 template <

386int N,

387FloatRoundStyle Round

388 >

389 struct NumericArrayConverter<half_t, float, N, Round> {

390

391using result_type = Array<half_t, N>;

392using source_type = Array<float, N>;

393static FloatRoundStyle const round_style = Round;

394

395CUTLASS_HOST_DEVICE

396static result_type convert(source_type const & source) {

397

398NumericArrayConverter<half_t, float, 2, Round> convert_vector_;

399NumericConverter<half_t, float, Round> convert_element_;

400

401result_type result;

402

403 Array<half_t, 2> *result_ptr = reinterpret_cast<Array<half_t, 2> *>(&result);

404 Array<float, 2> const *source_ptr = reinterpret_cast<Array<float, 2> const *>(&source);

405

406CUTLASS_PRAGMA_UNROLL

407for (int i = 0; i < N / 2; ++i) {

408 result_ptr[i] = convert_vector_(source_ptr[i]);

409 }

410

411if (N % 2) {

412 result[N - 1] = convert_element_(source[N - 1]);

413 }

414

415return result;

416 }

417

418CUTLASS_HOST_DEVICE

419 result_type operator()(source_type const &s) {

420return convert(s);

421 }

422 };

423

424

426 template <

427int N,

428FloatRoundStyle Round

429 >

430 struct NumericArrayConverter<float, half_t, N, Round> {

431

432using result_type = Array<float, N>;

433using source_type = Array<half_t, N>;

434static FloatRoundStyle const round_style = Round;

435

436CUTLASS_HOST_DEVICE

437static result_type convert(source_type const & source) {

438

439NumericArrayConverter<float, half_t, 2, Round> convert_vector_;

440NumericConverter<float, half_t, Round> convert_element_;

441

442result_type result;

443

444 Array<float, 2> *result_ptr = reinterpret_cast<Array<float, 2> *>(&result);

445 Array<half_t, 2> const *source_ptr = reinterpret_cast<Array<half_t, 2> const *>(&source);

446

447CUTLASS_PRAGMA_UNROLL

448for (int i = 0; i < N / 2; ++i) {

449 result_ptr[i] = convert_vector_(source_ptr[i]);

450 }

451

452if (N % 2) {

453 result[N - 1] = convert_element_(source[N - 1]);

454 }

455

456return result;

457 }

458

459CUTLASS_HOST_DEVICE

460 result_type operator()(source_type const &s) {

461return convert(s);

462 }

463 };

464

466

467 // Conditional guards to enable partial specialization for packed integers

468 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 720) && (__CUDACC_VER_MAJOR__ >= 10) && (__CUDACC_VER_MINOR__ >= 2)

469

471 template <

472FloatRoundStyle Round

473 >

474 struct NumericArrayConverter<int8_t, int, 1, Round> {

475

476using result_type = Array<int8_t, 1>;

477using source_type = Array<int, 1>;

478static FloatRoundStyle const round_style = Round;

479

480CUTLASS_HOST_DEVICE

481static result_type convert(source_type const & source) {

482NumericConverter<int8_t, int, Round> convert_element_;

483

484 result_type result;

485

486 result[0] = convert_element_(source[0]);

487

488return result;

489 }

490

491CUTLASS_HOST_DEVICE

492 result_type operator()(source_type const &s) {

493return convert(s);

494 }

495 };

496

498 template <

499FloatRoundStyle Round

500 >

501 struct NumericArrayConverter<int8_t, int, 2, Round> {

502

503using result_type = Array<int8_t, 2>;

504using source_type = Array<int, 2>;

505static FloatRoundStyle const round_style = Round;

506

507CUTLASS_HOST_DEVICE

508static result_type convert(source_type const & source) {

509

510 uint32_t tmp;

511

512asm volatile(

513"cvt.pack.sat.s8.s32.b32 %0, %2, %1, 0;\n"

514 : "=r"(tmp) : "r"(source[0]), "r"(source[1]));

515

516 uint16_t out = (tmp & 0xffff);

517return reinterpret_cast<result_type const &>(out);

518 }

519

520CUTLASS_HOST_DEVICE

521 result_type operator()(source_type const &s) {

522return convert(s);

523 }

524 };

525

527 template <

528FloatRoundStyle Round

529 >

530 struct NumericArrayConverter<int8_t, int, 4, Round> {

531

532using result_type = Array<int8_t, 4>;

533using source_type = Array<int, 4>;

534static FloatRoundStyle const round_style = Round;

535

536CUTLASS_HOST_DEVICE

537static result_type convert(source_type const & source) {

538

539unsigned out;

540

541asm volatile(

542"{ .reg .u32 r4;"

543"cvt.pack.sat.s8.s32.b32 r4, %4, %3, 0;"

544"cvt.pack.sat.s8.s32.b32 %0, %2, %1, r4;"

545"}"

546 : "=r"(out) : "r"(source[0]), "r"(source[1]), "r"(source[2]), "r"(source[3]));

547

548return reinterpret_cast<result_type const &>(out);

549 }

550

551CUTLASS_HOST_DEVICE

552 result_type operator()(source_type const &s) {

553return convert(s);

554 }

555 };

556

558 template <

559int N,

560FloatRoundStyle Round

561 >

562 struct NumericArrayConverter<int8_t, int, N, Round> {

563static_assert(!(N % 4), "N must be multiple of 4.");

564

565using result_type = Array<int8_t, N>;

566using source_type = Array<int, N>;

567static FloatRoundStyle const round_style = Round;

568

569CUTLASS_HOST_DEVICE

570static result_type convert(source_type const & source) {

571

572NumericArrayConverter<int8_t, int, 4, Round> convert_vector_;

573

574result_type result;

575

576 Array<int8_t, 4> *result_ptr = reinterpret_cast<Array<int8_t, 4> *>(&result);

577 Array<int, 4> const *source_ptr = reinterpret_cast<Array<int, 4> const *>(&source);

578

579CUTLASS_PRAGMA_UNROLL

580for (int i = 0; i < N / 4; ++i) {

581 result_ptr[i] = convert_vector_(source_ptr[i]);

582 }

583

584return result;

585 }

586

587CUTLASS_HOST_DEVICE

588result_type operator()(source_type const &s) {

589return convert(s);

590 }

591 };

592

593 #endif // Conditional guards to enable partial specialization for packed integers

594

596

597 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 750) && (__CUDACC_VER_MAJOR__ >= 10) && (__CUDACC_VER_MINOR__ >= 2)

598

600 template <

601FloatRoundStyle Round

602 >

603 struct NumericArrayConverter<int4b_t, int, 8, Round> {

604

605using result_type = Array<int4b_t, 8>;

606using source_type = Array<int, 8>;

607static FloatRoundStyle const round_style = Round;

608

609CUTLASS_HOST_DEVICE

610static result_type convert(source_type const & source) {

611

612unsigned out;

613

614asm volatile(

615"{ .reg .u32 r4;"

616"cvt.pack.sat.s4.s32.b32 r4, %8, %7, 0;"

617"cvt.pack.sat.s4.s32.b32 r4, %6, %5, r4;"

618"cvt.pack.sat.s4.s32.b32 r4, %4, %3, r4;"

619"cvt.pack.sat.s4.s32.b32 %0, %2, %1, r4;"

620"}"

621 : "=r"(out)

622 : "r"(source[0]), "r"(source[1]), "r"(source[2]), "r"(source[3]),

623"r"(source[4]), "r"(source[5]), "r"(source[6]), "r"(source[7]));

624

625return reinterpret_cast<result_type const &>(out);

626 }

627

628CUTLASS_HOST_DEVICE

629 result_type operator()(source_type const &s) {

630return convert(s);

631 }

632 };

633

635 template <

636int N,

637FloatRoundStyle Round

638 >

639 struct NumericArrayConverter<int4b_t, int, N, Round> {

640static_assert(!(N % 8), "N must be multiple of 8.");

641

642using result_type = Array<int4b_t, N>;

643using source_type = Array<int, N>;

644static FloatRoundStyle const round_style = Round;

645

646CUTLASS_HOST_DEVICE

647static result_type convert(source_type const & source) {

648

649NumericArrayConverter<int4b_t, int, 8, Round> convert_vector_;

650

651result_type result;

652

653 Array<int4b_t, 8> *result_ptr = reinterpret_cast<Array<int4b_t, 8> *>(&result);

654 Array<int, 8> const *source_ptr = reinterpret_cast<Array<int, 8> const *>(&source);

655

656CUTLASS_PRAGMA_UNROLL

657for (int i = 0; i < N / 8; ++i) {

658 result_ptr[i] = convert_vector_(source_ptr[i]);

659 }

660

661return result;

662 }

663

664CUTLASS_HOST_DEVICE

665result_type operator()(source_type const &s) {

666return convert(s);

667 }

668 };

669

670 #endif // Conditional guards to enable partial specialization for packed integers

671

673

674 } // namespace cutlass

cutlass::NumericConverterClamp::result_type

T result_type

Definition: numeric_conversion.h:256

[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero >::source_type](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a80980dc518dd65f4c3c1533782ba8b36)

float source_type

Definition: numeric_conversion.h:180

cutlass::NumericConverter< float, half_t, Round >

Partial specialization for float <= half_t.

Definition: numeric_conversion.h:133

cutlass::half_t::bitcast

static CUTLASS_HOST_DEVICE half_t bitcast(uint16_t x)

Constructs from an unsigned short.

Definition: half.h:141

cutlass::NumericConverter< T, T, Round >::operator()

CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)

Definition: numeric_conversion.h:120

cutlass::NumericConverterClamp::operator()

CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)

Definition: numeric_conversion.h:276

cutlass

Definition: aligned_buffer.h:35

cutlass::NumericArrayConverter< float, half_t, 2, Round >::result_type

Array< float, 2 > result_type

Definition: numeric_conversion.h:356

cutlass::NumericConverterClamp

Definition: numeric_conversion.h:254

cutlass::NumericConverter< int8_t, float, Round >::source_type

float source_type

Definition: numeric_conversion.h:86

cutlass::NumericArrayConverter< half_t, float, N, Round >::convert

static CUTLASS_HOST_DEVICE result_type convert(source_type const &source)

Definition: numeric_conversion.h:396

[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero >::convert](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a43ab30e5283f39b1defe46b13da9ac1b)

static CUTLASS_HOST_DEVICE result_type convert(source_type const &flt)

Round toward zero.

Definition: numeric_conversion.h:185

cutlass::NumericConverter< T, T, Round >::result_type

T result_type

Definition: numeric_conversion.h:109

[cutlass::NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest >::convert](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a3cc4d59f083555f24288e15490eeb41d)

static CUTLASS_HOST_DEVICE result_type convert(source_type const &source)

Definition: numeric_conversion.h:331

cutlass::platform::is_same

std::is_same (false specialization)

Definition: platform.h:394

cutlass::FloatRoundStyle::round_toward_zero

round toward zero

cutlass::NumericConverterClamp::convert

static CUTLASS_HOST_DEVICE result_type convert(source_type const &s)

Definition: numeric_conversion.h:265

half.h

Defines a class for using IEEE half-precision floating-point types in host or device code...

cutlass::NumericConverter::result_type

T result_type

Definition: numeric_conversion.h:61

cutlass::half_t

IEEE half-precision floating-point type.

Definition: half.h:126

cutlass::NumericArrayConverter< float, half_t, 2, Round >::source_type

Array< half_t, 2 > source_type

Definition: numeric_conversion.h:357

cutlass::NumericArrayConverter< float, half_t, 2, Round >::operator()

CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)

Definition: numeric_conversion.h:377

cutlass::NumericArrayConverter< float, half_t, N, Round >::result_type

Array< float, N > result_type

Definition: numeric_conversion.h:432

cutlass::exp

CUTLASS_HOST_DEVICE complex< T > exp(complex< T > const &z)

Computes the complex exponential of z.

Definition: complex.h:375

[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest >::source_type](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a977053fab779dd7519612b6ae0fb53ce)

float source_type

Definition: numeric_conversion.h:158

cutlass::NumericArrayConverter< float, half_t, N, Round >::source_type

Array< half_t, N > source_type

Definition: numeric_conversion.h:433

array.h

Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

CUTLASS_PRAGMA_UNROLL

#define CUTLASS_PRAGMA_UNROLL

Definition: cutlass.h:110

cutlass::NumericArrayConverter< float, half_t, N, Round >::operator()

CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)

Definition: numeric_conversion.h:460

[cutlass::NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest >::result_type](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#ad50d5ce8c7047513745c1fab77c3988c)

Array< half_t, 2 > result_type

Definition: numeric_conversion.h:326

cutlass::NumericArrayConverter< half_t, float, N, Round >::operator()

CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)

Definition: numeric_conversion.h:419

cutlass::NumericArrayConverter< float, half_t, N, Round >::convert

static CUTLASS_HOST_DEVICE result_type convert(source_type const &source)

Definition: numeric_conversion.h:437

cutlass::NumericArrayConverter< half_t, float, N, Round >::result_type

Array< half_t, N > result_type

Definition: numeric_conversion.h:391

cutlass::FloatRoundStyle::round_toward_neg_infinity

round toward negative infinity

[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest >::convert](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#aaf16c1dd3bb1fc0566c819146dfd5ab8)

static CUTLASS_HOST_DEVICE result_type convert(source_type const &s)

Definition: numeric_conversion.h:162

cutlass::NumericConverter< T, T, Round >::convert

static CUTLASS_HOST_DEVICE result_type convert(source_type const &s)

Definition: numeric_conversion.h:114

[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest >::operator()](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a619f44798aa208650afaf2c584454d4b)

CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)

Definition: numeric_conversion.h:170

cutlass::FloatRoundStyle::round_half_ulp_truncate

add 0.5ulp to integer representation then round toward zero

cutlass::NumericConverter< T, T, Round >::source_type

T source_type

Definition: numeric_conversion.h:110

cutlass::NumericArrayConverter< float, half_t, 2, Round >

Partial specialization for Array<float, 2> <= Array<half_t, 2>, round to nearest. ...

Definition: numeric_conversion.h:354

[cutlass::NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero >::operator()](structcutlass_1_1NumericConverter_3_01half t_00_01float_00_01FloatRoundStyle_1_1round toward__zero_01_4.html#a9ee10f5cf5ff71842a31e305d3a83947)

CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)

Definition: numeric_conversion.h:239

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

cutlass::NumericArrayConverter::source_type

Array< S, N > source_type

Definition: numeric_conversion.h:297

numeric_types.h

Top-level include for all CUTLASS numeric types.

static_assert

#define static_assert(__e, __m)

Definition: platform.h:153

cutlass::NumericConverter::convert

static CUTLASS_HOST_DEVICE result_type convert(source_type const &s)

Definition: numeric_conversion.h:66

cutlass::NumericConverterClamp::source_type

S source_type

Definition: numeric_conversion.h:257

[cutlass::NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest >::source_type](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a977dc7cb44a006493604fb7805b75f4e)

Array< float, 2 > source_type

Definition: numeric_conversion.h:327

cutlass::NumericConverter

Definition: numeric_conversion.h:59

cutlass::FloatRoundStyle::round_toward_infinity

round toward infinity

cutlass::NumericArrayConverter::operator()

CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)

Definition: numeric_conversion.h:315

cutlass::FloatRoundStyle::round_to_nearest

round to nearest even

cutlass::NumericArrayConverter< float, half_t, 2, Round >::convert

static CUTLASS_HOST_DEVICE result_type convert(source_type const &source)

Definition: numeric_conversion.h:361

cutlass::FloatRoundStyle

FloatRoundStyle

Definition: numeric_conversion.h:43

cutlass::NumericConverter< float, half_t, Round >::operator()

CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)

Definition: numeric_conversion.h:148

cutlass::NumericConverter< int8_t, float, Round >::result_type

int8_t result_type

Definition: numeric_conversion.h:85

cutlass::FloatRoundStyle::round_indeterminate

rounding mode unknown

[cutlass::NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest >::operator()](structcutlass_1_1NumericArrayConverter_3_01half t_00_01float_00_012_00_01FloatRoundStyle_1_1round to__nearest_01_4.html#a177cc3209f728a6629754cf2d685a37a)

CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)

Definition: numeric_conversion.h:347

cutlass::NumericConverter::operator()

CUTLASS_HOST_DEVICE result_type operator()(source_type const &s)

Definition: numeric_conversion.h:72

cutlass::NumericArrayConverter< half_t, float, N, Round >::source_type

Array< float, N > source_type

Definition: numeric_conversion.h:392

cutlass::NumericConverter::source_type

S source_type

Definition: numeric_conversion.h:62

cutlass::NumericConverter< float, half_t, Round >::result_type