Back to Cutlass

CUTLASS: tensor_fill.h Source File

docs/host_2tensor__fill_8h_source.html

4.4.271.0 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

host/tensor_fill.h

Go to the documentation of this file.

1 /***************************************************************************************************

2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

25 /* \file

26 \brief Provides several functions for filling tensors with data.

27 */

28

29 #pragma once

30

31 // Standard Library includes

32 #include <utility>

33 #include <cstdlib>

34 #include <cmath>

35

36 // Cutlass includes

37 #include "cutlass/cutlass.h"

38 #include "cutlass/complex.h"

39 #include "cutlass/array.h"

40 #include "cutlass/numeric_types.h"

41

42 #include "cutlass/util/distribution.h"

43 #include "tensor_foreach.h"

44

46

47 namespace cutlass {

48 namespace reference {

49 namespace host {

50

53

54 namespace detail {

55

56 template <

57typename Element,

58typename Layout>

59 struct TensorFillFunc {

60

61using TensorView = TensorView<Element, Layout>;

62

63//

64// Data members

65//

66

67TensorView view;

68 Element value;

69

70//

71// Methods

72//

73

74TensorFillFunc(

75TensorView const &view_ = TensorView(),

76 Element value_ = Element(0)

77 ): view(view_), value(value_) { }

78

79void operator()(Coord<Layout::kRank> const & coord) const {

80 view.at(coord) = value;

81 }

82 };

83

84 } // namespace detail

85

87

89 template <

90typename Element,

91typename Layout>

92 void TensorFill(

93TensorView<Element, Layout> dst,

94 Element val = Element(0)) {

95

96detail::TensorFillFunc<Element, Layout> func(dst, val);

97

98TensorForEach(

99 dst.extent(),

100 func

101 );

102 }

103

106

107 namespace detail {

108

109 template <typename Element>

110 struct RandomGaussianFunc {

111

112 uint64_t seed;

113double mean;

114double stddev;

115int int_scale;

116double pi;

117

118//

119// Methods

120//

121RandomGaussianFunc(

122 uint64_t seed_ = 0,

123double mean_ = 0,

124double stddev_ = 1,

125int int_scale_ = -1

126 ):

127 seed(seed_), mean(mean_), stddev(stddev_), int_scale(int_scale_), pi(std::acos(-1)) {

128 std::srand((unsigned)seed);

129 }

130

132 Element operator()() const {

133

134// Box-Muller transform to generate random numbers with Normal distribution

135double u1 = double(std::rand()) / double(RAND_MAX);

136double u2 = double(std::rand()) / double(RAND_MAX);

137

138// Compute Gaussian random value

139double rnd = std::sqrt(-2 * std::log(u1)) * std::cos(2 * pi * u2);

140 rnd = mean + stddev * rnd;

141

142// Scale and convert final result

143 Element result;

144

145if (int_scale >= 0) {

146 rnd = double(int64_t(rnd * double(1 << int_scale))) / double(1 << int_scale);

147 result = static_cast<Element>(rnd);

148 }

149else {

150 result = static_cast<Element>(rnd);

151 }

152

153return result;

154 }

155 };

156

158 template <typename Element>

159 struct RandomGaussianFunc<complex<Element> > {

160

161 uint64_t seed;

162double mean;

163double stddev;

164int int_scale;

165double pi;

166

167//

168// Methods

169//

170RandomGaussianFunc(

171 uint64_t seed_ = 0,

172double mean_ = 0,

173double stddev_ = 1,

174int int_scale_ = -1

175 ):

176 seed(seed_), mean(mean_), stddev(stddev_), int_scale(int_scale_), pi(std::acos(-1)) {

177 std::srand((unsigned)seed);

178 }

179

181complex<Element> operator()() const {

182

183 Element reals[2];

184

185for (int i = 0; i < 2; ++i) {

186// Box-Muller transform to generate random numbers with Normal distribution

187double u1 = double(std::rand()) / double(RAND_MAX);

188double u2 = double(std::rand()) / double(RAND_MAX);

189

190// Compute Gaussian random value

191double rnd = std::sqrt(-2 * std::log(u1)) * std::cos(2 * pi * u2);

192 rnd = mean + stddev * rnd;

193

194if (int_scale >= 0) {

195 rnd = double(int(rnd * double(1 << int_scale)));

196 reals[i] = from_real<Element>(rnd / double(1 << int_scale));

197 }

198else {

199 reals[i] = from_real<Element>(rnd);

200 }

201 }

202

203return complex<Element>(reals[0], reals[1]);

204 }

205 };

206

208 template <

209typename Element,

210typename Layout>

211 struct TensorFillGaussianFunc {

212

213using TensorView = TensorView<Element, Layout>;

214

215//

216// Data members

217//

218

219TensorView view;

220RandomGaussianFunc<Element> func;

221

222//

223// Methods

224//

225

227TensorFillGaussianFunc(

228TensorView view_ = TensorView(),

229RandomGaussianFunc<Element> func_ = RandomGaussianFunc<Element>()

230 ):

231 view(view_), func(func_) {

232

233 }

234

236void operator()(Coord<Layout::kRank> const &coord) const {

237 view.at(coord) = func();

238 }

239 };

240

241 } // namespace detail

242

244

246 template <

247typename Element,

248typename Layout>

249 void TensorFillRandomGaussian(

250TensorView<Element, Layout> dst,

251 uint64_t seed,

252double mean = 0,

253double stddev = 1,

254int bits = -1) {

255

258detail::RandomGaussianFunc<Element> random_func(seed, mean, stddev, bits);

259

260detail::TensorFillGaussianFunc<Element, Layout> func(

261 dst,

262 random_func

263 );

264

265TensorForEach(

266 dst.extent(),

267 func

268 );

269 }

270

272

274 template <

275typename Element

276 >

277 void BlockFillRandomGaussian(

278 Element *ptr,

279size_t capacity,

280 uint64_t seed,

281double mean = 0,

282double stddev = 1,

283int bits = -1) {

284

287

288detail::RandomGaussianFunc<Element> random_func(seed, mean, stddev, bits);

289

290for (size_t i = 0; i < capacity; ++i) {

291 ptr[i] = random_func();

292 }

293 }

294

297

298 namespace detail {

299

300 template <typename Element>

301 struct RandomUniformFunc {

302

303using Real = typename RealType<Element>::Type;

304

305 uint64_t seed;

306double range;

307double min;

308int int_scale;

309

310//

311// Methods

312//

313

314RandomUniformFunc(

315 uint64_t seed_ = 0,

316double max = 1,

317double min_ = 0,

318int int_scale_ = -1

319 ):

320 seed(seed_), range(max - min_), min(min_), int_scale(int_scale_) {

321 std::srand((unsigned)seed);

322 }

323

324

326 Element operator()() const {

327

328double rnd = double(std::rand()) / double(RAND_MAX);

329

330 rnd = min + range * rnd;

331

332// Random values are cast to integer after scaling by a power of two to facilitate error

333// testing

334 Element result;

335

336if (int_scale >= 0) {

337 rnd = double(int64_t(rnd * double(1 << int_scale))) / double(1 << int_scale);

338 result = static_cast<Element>(Real(rnd));

339 }

340else {

341 result = static_cast<Element>(Real(rnd));

342 }

343

344return result;

345 }

346 };

347

349 template <typename Element>

350 struct RandomUniformFunc<complex<Element> > {

351

352using Real = typename RealType<Element>::Type;

353

354 uint64_t seed;

355double range;

356double min;

357int int_scale;

358

359//

360// Methods

361//

362

363RandomUniformFunc(

364 uint64_t seed_ = 0,

365double max = 1,

366double min_ = 0,

367int int_scale_ = -1

368 ):

369 seed(seed_), range(max - min_), min(min_), int_scale(int_scale_) {

370 std::srand((unsigned)seed);

371 }

372

373

375 complex<Element> operator()() const {

376

377 Element reals[2];

378

379for (int i = 0; i < 2; ++i) {

380double rnd = double(std::rand()) / double(RAND_MAX);

381

382 rnd = min + range * rnd;

383

384// Random values are cast to integer after scaling by a power of two to facilitate error

385// testing

386

387if (int_scale >= 0) {

388 rnd = double(int(rnd * double(1 << int_scale)));

389 reals[i] = from_real<Element>(Real(rnd / double(1 << int_scale)));

390 }

391else {

392 reals[i] = from_real<Element>(Real(rnd));

393 }

394 }

395

396return complex<Element>(reals[0], reals[1]);

397 }

398 };

399

401 template <

402typename Element,

403typename Layout>

404 struct TensorFillRandomUniformFunc {

405

406using TensorView = TensorView<Element, Layout>;

407

408//

409// Data members

410//

411

412TensorView view;

413RandomUniformFunc<Element> func;

414

415//

416// Methods

417//

418

420TensorFillRandomUniformFunc(

421TensorView view_ = TensorView(),

422RandomUniformFunc<Element> func_ = RandomUniformFunc<Element>()

423 ):

424 view(view_), func(func_) {

425

426 }

427

429void operator()(Coord<Layout::kRank> const &coord) const {

430

431 view.at(coord) = func();

432 }

433 };

434

435 } // namespace detail

436

438

440 template <

441typename Element,

442typename Layout>

443 void TensorFillRandomUniform(

444TensorView<Element, Layout> dst,

445 uint64_t seed,

446double max = 1,

447double min = 0,

448int bits = -1) {

449detail::RandomUniformFunc<Element> random_func(seed, max, min, bits);

452

453detail::TensorFillRandomUniformFunc<Element, Layout> func(

454 dst,

455 random_func

456 );

457

458TensorForEach(

459 dst.extent(),

460 func

461 );

462 }

463

465

467 template <

468typename Element

469 >

470 void BlockFillRandomUniform(

471 Element *ptr,

472size_t capacity,

473 uint64_t seed,

474double max = 1,

475double min = 0,

476int bits = -1) {

477detail::RandomUniformFunc<Element> random_func(seed, max, min, bits);

480

481for (size_t i = 0; i < capacity; ++i) {

482 ptr[i] = random_func();

483 }

484 }

485

488

489 namespace detail {

490

491 template <

492typename Element,

493typename Layout>

494 struct TensorFillDiagonalFunc {

495

496using TensorView = TensorView<Element, Layout>;

497

498//

499// Data members

500//

501

502TensorView view;

503 Element diag;

504 Element other;

505

506//

507// Methods

508//

509

510TensorFillDiagonalFunc(

511TensorView const &view_ = TensorView(),

512 Element diag_ = Element(1),

513 Element other_ = Element(0)

514 ):

515 view(view_), diag(diag_), other(other_) { }

516

517void operator()(Coord<Layout::kRank> const & coord) const {

518bool is_diag = true;

519

520CUTLASS_PRAGMA_UNROLL

521for (int i = 1; i < Layout::kRank; ++i) {

522if (coord[i] != coord[i - 1]) {

523 is_diag = false;

524break;

525 }

526 }

527

528 view.at(coord) = (is_diag ? diag : other);

529 }

530 };

531

532 } // namespace detail

533

535

537 template <

538typename Element,

539typename Layout>

540 void TensorFillDiagonal(

541TensorView<Element, Layout> dst,

542 Element diag = Element(1),

543 Element other = Element(0)) {

544

545detail::TensorFillDiagonalFunc<Element, Layout> func(

546 dst,

547 diag,

548 other

549 );

550

551TensorForEach(

552 dst.extent(),

553 func

554 );

555 }

556

559

561 template <

562typename Element,

563typename Layout>

564 void TensorFillIdentity(

565TensorView<Element, Layout> dst) {

566

567TensorFillDiagonal(dst, Element(1), Element(0));

568 }

569

572

574 template <

575typename Element,

576typename Layout>

577 void TensorUpdateDiagonal(

578TensorView<Element, Layout> dst,

579 Element val = Element(1)) {

580

581typename Layout::Index extent = dst.extent().min();

582

583for (typename Layout::Index i = 0; i < extent; ++i) {

584Coord<Layout::kRank> coord(i);

585 dst.at(coord) = val;

586 }

587 }

588

591

592 namespace detail {

593

594 template <

595typename Element,

596typename Layout>

597 struct TensorUpdateOffDiagonalFunc {

598

599using TensorView = TensorView<Element, Layout>;

600

601//

602// Data members

603//

604

605TensorView view;

606 Element other;

607

608//

609// Methods

610//

611

612TensorUpdateOffDiagonalFunc(

613TensorView const &view_ = TensorView(),

614 Element other_ = Element(0)

615 ):

616 view(view_), other(other_) { }

617

618void operator()(Coord<Layout::kRank> const & coord) const {

619bool is_diag = true;

620

621CUTLASS_PRAGMA_UNROLL

622for (int i = 1; i < Layout::kRank; ++i) {

623if (coord[i] != coord[i - 1]) {

624 is_diag = false;

625break;

626 }

627 }

628

629if (!is_diag) {

630 view.at(coord) = other;

631 }

632 }

633 };

634

635 } // namespace detail

636

638

640 template <

641typename Element,

642typename Layout>

643 void TensorUpdateOffDiagonal(

644TensorView<Element, Layout> dst,

645 Element other = Element(1)) {

646

647detail::TensorUpdateOffDiagonalFunc<Element, Layout> func(

648 dst,

649 other

650 );

651

652TensorForEach(

653 dst.extent(),

654 func

655 );

656 }

657

658

661

662 namespace detail {

663

664 template <

665typename Element,

666typename Layout>

667 struct TensorFillLinearFunc {

668

669using TensorView = TensorView<Element, Layout>;

670

671//

672// Data members

673//

674

675TensorView view;

676 Array<Element, Layout::kRank> v;

677 Element s;

678

679//

680// Methods

681//

682

683TensorFillLinearFunc() { }

684

686TensorFillLinearFunc(

687TensorView const &view_,

688 Array<Element, Layout::kRank> const & v_,

689 Element s_ = Element(0)

690 ):

691 view(view_), v(v_), s(s_) { }

692

694void operator()(Coord<Layout::kRank> const & coord) const {

695

696 Element sum(s);

697

698CUTLASS_PRAGMA_UNROLL

699for (int i = 0; i < Layout::kRank; ++i) {

700 sum += Element(coord[i]) * v[i];

701 }

702

703 view.at(coord) = sum;

704 }

705 };

706

707 } // namespace detail

708

710

712 template <

713typename Element,

714typename Layout>

715 void TensorFillLinear(

716TensorView<Element, Layout> dst,

717 Array<Element, Layout::kRank> const & v,

718 Element s = Element(0)) {

719

720detail::TensorFillLinearFunc<Element, Layout> func(

721 dst,

722 v,

723 s

724 );

725

726TensorForEach(

727 dst.extent(),

728 func

729 );

730 }

731

733

735 template <

736typename Element,

737typename Layout>

738 void TensorFillSequential(

739TensorView<Element, Layout> dst,

740 Element s = Element(0)) {

741

742 Array<Element, Layout::kRank> stride;

743

744 stride[0] = Element(1);

745

746CUTLASS_PRAGMA_UNROLL

747for (int i = 1; i < Layout::kRank; ++i) {

748 stride[i] = stride[i - 1] * Element(dst.extent()[i - 1]);

749 }

750

751TensorFillLinear(dst, stride, s);

752 }

753

756

758 template <

759typename Element

760 >

761 void BlockFillSequential(

762 Element *ptr,

763 int64_t capacity,

764 Element v = Element(1),

765 Element s = Element(0)) {

766int i = 0;

767

768while (i < capacity) {

769cutlass::ReferenceFactory<Element, (cutlass::sizeof_bits<Element>::value <

770 8)>::get(ptr, i) = s;

771

772 s = Element(s + v);

773 ++i;

774 }

775 }

776

779

781 template <

782typename Element

783 >

784 void BlockFillRandom(

785 Element *ptr,

786size_t capacity,

787 uint64_t seed,

788Distribution dist) {

789

790if (dist.kind == Distribution::Gaussian) {

791 BlockFillRandomGaussian<Element>(

792 ptr,

793 capacity,

794 seed,

795 dist.gaussian.mean,

796 dist.gaussian.stddev,

797 dist.int_scale);

798 }

799else if (dist.kind == Distribution::Uniform) {

800 BlockFillRandomUniform<Element>(

801 ptr,

802 capacity,

803 seed,

804 dist.uniform.max,

805 dist.uniform.min,

806 dist.int_scale);

807 }

808 }

809

812

814 template <

815typename Element,

816typename Layout>

817 void TensorCopyDiagonalIn(

818TensorView<Element, Layout> dst,

819 Element const *ptr) {

820

821typename Layout::Index extent = dst.extent().min();

822

823for (typename Layout::Index i = 0; i < extent; ++i) {

824Coord<Layout::kRank> coord(i);

825 dst.at(coord) = ptr[i];

826 }

827 }

828

831

833 template <

834typename Element,

835typename Layout>

836 void TensorCopyDiagonalOut(

837 Element *ptr,

838TensorView<Element, Layout> src) {

839

840typename Layout::Index extent = src.extent().min();

841

842for (typename Layout::Index i = 0; i < extent; ++i) {

843Coord<Layout::kRank> coord(i);

844 ptr[i] = src.at(coord);

845 }

846 }

847

850

851 } // namespace host

852 } // namespace reference

853 } // namespace cutlass

cutlass::reference::host::detail::RandomGaussianFunc::seed

uint64_t seed

Definition: host/tensor_fill.h:112

cutlass::reference::host::detail::TensorFillRandomUniformFunc::operator()

void operator()(Coord< Layout::kRank > const &coord) const

Compute random value and update RNG state.

Definition: host/tensor_fill.h:429

cutlass::cos

CUTLASS_HOST_DEVICE complex< T > cos(complex< T > const &z)

Computes the cosine of complex z.

Definition: complex.h:401

cutlass::reference::host::detail::RandomUniformFunc< complex< Element > >::min

double min

Definition: host/tensor_fill.h:356

cutlass::platform::max

CUTLASS_HOST_DEVICE constexpr const T & max(const T &a, const T &b)

std::max

Definition: platform.h:189

cutlass::reference::host::detail::RandomUniformFunc::Real

typename RealType< Element >::Type Real

Definition: host/tensor_fill.h:303

cutlass::reference::host::TensorCopyDiagonalOut

void TensorCopyDiagonalOut(Element *ptr, TensorView< Element, Layout > src)

Copies the diagonal of a tensor into a dense buffer in host memory.

Definition: host/tensor_fill.h:836

cutlass

Definition: aligned_buffer.h:35

cutlass::Distribution::Uniform

Definition: distribution.h:40

cutlass::reference::host::detail::TensorFillDiagonalFunc

< Layout function

Definition: host/tensor_fill.h:494

cutlass::reference::host::detail::RandomUniformFunc< complex< Element > >::RandomUniformFunc

RandomUniformFunc(uint64_t seed_=0, double max=1, double min_=0, int int_scale_=-1)

Definition: host/tensor_fill.h:363

complex.h

cutlass::Distribution::Gaussian

Definition: distribution.h:40

cutlass::Distribution::uniform

struct cutlass::Distribution::@18::@20 uniform

Uniform distribution.

cutlass::reference::host::detail::TensorFillFunc::TensorView

TensorView< Element, Layout > TensorView

Definition: host/tensor_fill.h:61

cutlass::reference::host::detail::TensorFillDiagonalFunc::operator()

void operator()(Coord< Layout::kRank > const &coord) const

Definition: host/tensor_fill.h:517

cutlass::reference::host::detail::TensorFillDiagonalFunc::diag

Element diag

Definition: host/tensor_fill.h:503

cutlass::reference::host::detail::RandomGaussianFunc::operator()

Element operator()() const

Compute random value and update RNG state.

Definition: host/tensor_fill.h:132

cutlass::RealType::Type

T Type

Definition: real.h:32

cutlass::Distribution::kind

Kind kind

Active variant kind.

Definition: distribution.h:64

cutlass::reference::host::TensorFill

void TensorFill(TensorView< Element, Layout > dst, Element val=Element(0))

Fills a tensor with a uniform value.

Definition: host/tensor_fill.h:92

cutlass::TensorView::extent

CUTLASS_HOST_DEVICE TensorCoord const & extent() const

Returns the extent of the view (the size along each logical dimension).

Definition: tensor_view.h:167

cutlass::reference::host::detail::RandomUniformFunc::RandomUniformFunc

RandomUniformFunc(uint64_t seed_=0, double max=1, double min_=0, int int_scale_=-1)

Definition: host/tensor_fill.h:314

cutlass::reference::host::detail::RandomGaussianFunc< complex< Element > >::RandomGaussianFunc

RandomGaussianFunc(uint64_t seed_=0, double mean_=0, double stddev_=1, int int_scale_=-1)

Definition: host/tensor_fill.h:170

cutlass::Distribution::gaussian

struct cutlass::Distribution::@18::@21 gaussian

Gaussian distribution.

cutlass::reference::host::detail::RandomGaussianFunc< complex< Element > >::int_scale

int int_scale

Definition: host/tensor_fill.h:164

cutlass::reference::host::detail::TensorFillFunc::operator()

void operator()(Coord< Layout::kRank > const &coord) const

Definition: host/tensor_fill.h:79

std

STL namespace.

cutlass::reference::host::detail::TensorUpdateOffDiagonalFunc::view

TensorView view

Definition: host/tensor_fill.h:605

cutlass::reference::host::TensorFillDiagonal

void TensorFillDiagonal(TensorView< Element, Layout > dst, Element diag=Element(1), Element other=Element(0))

Fills a tensor everywhere with a unique value for its diagonal.

Definition: host/tensor_fill.h:540

cutlass::reference::host::detail::TensorFillLinearFunc

< Layout function

Definition: host/tensor_fill.h:667

cutlass::reference::host::detail::RandomGaussianFunc::int_scale

int int_scale

Definition: host/tensor_fill.h:115

cutlass::reference::host::detail::TensorUpdateOffDiagonalFunc

< Layout function

Definition: host/tensor_fill.h:597

cutlass::reference::host::detail::RandomUniformFunc< complex< Element > >::int_scale

int int_scale

Definition: host/tensor_fill.h:357

cutlass::reference::host::TensorFillIdentity

void TensorFillIdentity(TensorView< Element, Layout > dst)

Helper to fill a tensor's diagonal with 1 and 0 everywhere else.

Definition: host/tensor_fill.h:564

cutlass::reference::host::detail::RandomUniformFunc< complex< Element > >::operator()

complex< Element > operator()() const

Compute random value and update RNG state.

Definition: host/tensor_fill.h:375

cutlass::log

CUTLASS_HOST_DEVICE complex< T > log(complex< T > const &z)

Computes the complex exponential of z.

Definition: complex.h:381

cutlass::reference::host::detail::TensorFillGaussianFunc::operator()

void operator()(Coord< Layout::kRank > const &coord) const

Compute random value and update RNG state.

Definition: host/tensor_fill.h:236

array.h

Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

CUTLASS_PRAGMA_UNROLL

#define CUTLASS_PRAGMA_UNROLL

Definition: cutlass.h:110

cutlass::reference::host::detail::RandomGaussianFunc::mean

double mean

Definition: host/tensor_fill.h:113

cutlass::reference::host::TensorUpdateOffDiagonal

void TensorUpdateOffDiagonal(TensorView< Element, Layout > dst, Element other=Element(1))

Writes a uniform value to all elements in the tensor without modifying diagonal elements.

Definition: host/tensor_fill.h:643

cutlass::reference::host::detail::TensorFillLinearFunc::s

Element s

Definition: host/tensor_fill.h:677

cutlass::reference::host::detail::TensorFillLinearFunc::v

Array< Element, Layout::kRank > v

Definition: host/tensor_fill.h:676

cutlass::TensorView< Element, Layout >

cutlass::reference::host::detail::RandomUniformFunc< complex< Element > >::Real

typename RealType< Element >::Type Real

Definition: host/tensor_fill.h:352

cutlass::reference::host::detail::TensorFillGaussianFunc::view

TensorView view

Definition: host/tensor_fill.h:219

cutlass::reference::host::TensorFillRandomGaussian

void TensorFillRandomGaussian(TensorView< Element, Layout > dst, uint64_t seed, double mean=0, double stddev=1, int bits=-1)

Fills a tensor with random values with a Gaussian distribution.

Definition: host/tensor_fill.h:249

cutlass::reference::host::detail::RandomGaussianFunc< complex< Element > >::stddev

double stddev

Definition: host/tensor_fill.h:163

cutlass::reference::host::detail::RandomGaussianFunc< complex< Element > >::operator()

complex< Element > operator()() const

Compute random value and update RNG state.

Definition: host/tensor_fill.h:181

cutlass::reference::host::TensorFillLinear

void TensorFillLinear(TensorView< Element, Layout > dst, Array< Element, Layout::kRank > const &v, Element s=Element(0))

Fills tensor with a linear combination of its coordinate and another vector.

Definition: host/tensor_fill.h:715

cutlass::reference::host::detail::TensorFillGaussianFunc

Computes a random Gaussian distribution.

Definition: host/tensor_fill.h:211

cutlass::reference::host::TensorUpdateDiagonal

void TensorUpdateDiagonal(TensorView< Element, Layout > dst, Element val=Element(1))

Writes a uniform value to the diagonal of a tensor without modifying off-diagonal elements...

Definition: host/tensor_fill.h:577

cutlass::reference::host::detail::TensorFillRandomUniformFunc::view

TensorView view

Definition: host/tensor_fill.h:412

cutlass::ReferenceFactory

Definition: subbyte_reference.h:557

cutlass::reference::host::detail::TensorFillLinearFunc::operator()

void operator()(Coord< Layout::kRank > const &coord) const

Updates the tensor.

Definition: host/tensor_fill.h:694

cutlass::reference::host::BlockFillRandomGaussian

void BlockFillRandomGaussian(Element *ptr, size_t capacity, uint64_t seed, double mean=0, double stddev=1, int bits=-1)

Fills a tensor with random values with a Gaussian distribution.

Definition: host/tensor_fill.h:277

distribution.h

This header contains a class to parametrize a statistical distribution function.

cutlass::reference::host::detail::TensorFillDiagonalFunc::view

TensorView view

Definition: host/tensor_fill.h:502

cutlass::reference::host::detail::TensorFillFunc::value

Element value

Definition: host/tensor_fill.h:68

cutlass::reference::host::detail::RandomUniformFunc::operator()

Element operator()() const

Compute random value and update RNG state.

Definition: host/tensor_fill.h:326

cutlass::reference::host::detail::RandomUniformFunc

Definition: host/tensor_fill.h:301

numeric_types.h

Top-level include for all CUTLASS numeric types.

cutlass::reference::host::detail::TensorFillLinearFunc::TensorFillLinearFunc

TensorFillLinearFunc(TensorView const &view_, Array< Element, Layout::kRank > const &v_, Element s_=Element(0))

Constructs functor.

Definition: host/tensor_fill.h:686

cutlass::platform::min

CUTLASS_HOST_DEVICE constexpr const T & min(const T &a, const T &b)

std::min

Definition: platform.h:183

cutlass::reference::host::detail::RandomGaussianFunc::pi

double pi

Definition: host/tensor_fill.h:116

cutlass::reference::host::detail::TensorUpdateOffDiagonalFunc::operator()

void operator()(Coord< Layout::kRank > const &coord) const

Definition: host/tensor_fill.h:618

cutlass::reference::host::detail::RandomUniformFunc< complex< Element > >::range

double range

Definition: host/tensor_fill.h:355

cutlass::reference::host::BlockFillSequential

void BlockFillSequential(Element *ptr, int64_t capacity, Element v=Element(1), Element s=Element(0))

Fills a block of data with sequential elements.

Definition: host/tensor_fill.h:761

cutlass::reference::host::detail::RandomGaussianFunc::stddev

double stddev

Definition: host/tensor_fill.h:114

cutlass::reference::host::detail::RandomGaussianFunc< complex< Element > >::pi

double pi

Definition: host/tensor_fill.h:165

cutlass::reference::host::detail::TensorUpdateOffDiagonalFunc::TensorUpdateOffDiagonalFunc

TensorUpdateOffDiagonalFunc(TensorView const &view_=TensorView(), Element other_=Element(0))

Definition: host/tensor_fill.h:612

cutlass::reference::host::detail::RandomGaussianFunc

Definition: host/tensor_fill.h:110

cutlass::reference::host::detail::RandomGaussianFunc< complex< Element > >::seed

uint64_t seed

Definition: host/tensor_fill.h:161

cutlass::Coord< Layout::kRank >

cutlass::reference::host::detail::TensorFillGaussianFunc::TensorFillGaussianFunc

TensorFillGaussianFunc(TensorView view_=TensorView(), RandomGaussianFunc< Element > func_=RandomGaussianFunc< Element >())

Construction of Gaussian RNG functor.

Definition: host/tensor_fill.h:227

cutlass::reference::host::detail::TensorUpdateOffDiagonalFunc::other

Element other

Definition: host/tensor_fill.h:606

cutlass::reference::host::detail::TensorFillDiagonalFunc::TensorFillDiagonalFunc

TensorFillDiagonalFunc(TensorView const &view_=TensorView(), Element diag_=Element(1), Element other_=Element(0))

Definition: host/tensor_fill.h:510

cutlass::reference::host::detail::RandomGaussianFunc::RandomGaussianFunc

RandomGaussianFunc(uint64_t seed_=0, double mean_=0, double stddev_=1, int int_scale_=-1)

Definition: host/tensor_fill.h:121

cutlass::reference::host::detail::TensorFillFunc::view

TensorView view

Definition: host/tensor_fill.h:67

cutlass::complex

Definition: complex.h:92

cutlass::reference::host::detail::RandomUniformFunc< complex< Element > >::seed

uint64_t seed

Definition: host/tensor_fill.h:354

cutlass::TensorRef::at

CUTLASS_HOST_DEVICE Reference at(TensorCoord const &coord) const

Returns a reference to the element at a given Coord.

Definition: tensor_ref.h:307

cutlass::reference::host::detail::RandomUniformFunc::range

double range

Definition: host/tensor_fill.h:306

cutlass::reference::host::detail::TensorFillLinearFunc::TensorFillLinearFunc

TensorFillLinearFunc()

Definition: host/tensor_fill.h:683

cutlass::reference::host::BlockFillRandomUniform

void BlockFillRandomUniform(Element *ptr, size_t capacity, uint64_t seed, double max=1, double min=0, int bits=-1)

Fills a tensor with random values with a uniform random distribution.

Definition: host/tensor_fill.h:470

cutlass::reference::host::detail::TensorFillRandomUniformFunc::func

RandomUniformFunc< Element > func

Definition: host/tensor_fill.h:413

cutlass::reference::host::TensorForEach

void TensorForEach(Coord< Rank > extent, Func &func)

Iterates over the index space of a tensor.

Definition: host/tensor_foreach.h:87

cutlass::reference::host::TensorFillRandomUniform

void TensorFillRandomUniform(TensorView< Element, Layout > dst, uint64_t seed, double max=1, double min=0, int bits=-1)

Fills a tensor with random values with a uniform random distribution.

Definition: host/tensor_fill.h:443

cutlass::reference::host::TensorCopyDiagonalIn

void TensorCopyDiagonalIn(TensorView< Element, Layout > dst, Element const *ptr)

Copies a diagonal in from host memory without modifying off-diagonal elements.

Definition: host/tensor_fill.h:817

cutlass::reference::host::detail::RandomUniformFunc::min

double min

Definition: host/tensor_fill.h:307

cutlass::Distribution

Distribution type.

Definition: distribution.h:38

cutlass::reference::host::detail::TensorFillGaussianFunc::func

RandomGaussianFunc< Element > func

Definition: host/tensor_fill.h:220

cutlass::reference::host::TensorFillSequential

void TensorFillSequential(TensorView< Element, Layout > dst, Element s=Element(0))

Fills tensor with a linear combination of its coordinate and another vector.

Definition: host/tensor_fill.h:738

cutlass::reference::host::detail::TensorFillFunc

< Layout function

Definition: host/tensor_fill.h:59

cutlass::Distribution::int_scale

int int_scale

Random values are cast to integer after scaling by this power of two.

Definition: distribution.h:67

cutlass::reference::host::detail::TensorFillFunc::TensorFillFunc

TensorFillFunc(TensorView const &view_=TensorView(), Element value_=Element(0))

Definition: host/tensor_fill.h:74

cutlass::reference::host::detail::TensorFillRandomUniformFunc

Computes a random Gaussian distribution.

Definition: host/tensor_fill.h:404

cutlass.h

Basic include for CUTLASS.

cutlass::reference::host::detail::RandomGaussianFunc< complex< Element > >::mean

double mean

Definition: host/tensor_fill.h:162

cutlass::reference::host::detail::TensorFillLinearFunc::view

TensorView view

Definition: host/tensor_fill.h:675

cutlass::reference::host::detail::RandomUniformFunc::seed

uint64_t seed

Definition: host/tensor_fill.h:305

cutlass::reference::host::detail::TensorFillRandomUniformFunc::TensorFillRandomUniformFunc

TensorFillRandomUniformFunc(TensorView view_=TensorView(), RandomUniformFunc< Element > func_=RandomUniformFunc< Element >())

Construction of Gaussian RNG functor.

Definition: host/tensor_fill.h:420

cutlass::sqrt

CUTLASS_HOST_DEVICE complex< T > sqrt(complex< T > const &z)

Computes the square root of complex number z.

Definition: complex.h:393

cutlass::reference::host::detail::RandomUniformFunc::int_scale

int int_scale

Definition: host/tensor_fill.h:308

cutlass::reference::host::detail::TensorFillDiagonalFunc::other

Element other

Definition: host/tensor_fill.h:504

cutlass::reference::host::BlockFillRandom

void BlockFillRandom(Element *ptr, size_t capacity, uint64_t seed, Distribution dist)

Fills a block of data with sequential elements.

Definition: host/tensor_fill.h:784

tensor_foreach.h


Generated by 1.8.11