Back to Cutlass

CUTLASS: tensor_fill.h Source File

docs/device_2tensor__fill_8h_source.html

4.4.2118.2 KB
Original Source
<!-- do not remove this div, it is closed by doxygen! -->

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

<!-- end header part --><!-- Generated by Doxygen 1.8.11 --> <input type="text" id="MSearchField" value="Search" accesskey="S" onfocus="searchBox.OnSearchFieldFocus(true)" onblur="searchBox.OnSearchFieldFocus(false)" onkeyup="searchBox.OnSearchFieldChange(event)"> <!-- window showing the filter options --> <!-- iframe showing the search results (closed by default) --> <!-- top -->

device/tensor_fill.h

<!--header-->

Go to the documentation of this file.

1 /***************************************************************************************************

2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

25 /* \file

26 \brief Defines device-side elementwise operations on TensorView. Note, the operations defined

27 in this header are not specialized for any particular data layout and are therefore not

28 intended to offer the best possible performance. Rather, they are intended to be generic

29 reference implementations to support the CUTLASS unit tests.

30 */

31

32 #pragma once

33

34 #if !defined(__CUDACC_RTC__)

35

36 // Standard Library includes

37 #include <utility>

38 #include <cstdlib>

39 #include <cmath>

40 #include <type_traits>

41 #include <cstdint>

42

43 #endif

44

45 // CUDA includes

46 #include <cublas_v2.h>

47 #include <curand_kernel.h>

48

49 // Cutlass includes

50 #include "cutlass/cutlass.h"

51 #include "cutlass/array.h"

52 #include "cutlass/tensor_view.h"

53

54 #include "cutlass/util/reference/device/tensor_foreach.h"

55 #include "cutlass/util/distribution.h"

56

58

59 namespace cutlass {

60 namespace reference {

61 namespace device {

62

65

66 namespace detail {

67

68 template <typename FloatType>

69 CUTLASS_DEVICE

70 FloatType random_normal_float(curandState_t *state) {

71return curand_normal(state);

72 }

73

74 template <>

75 CUTLASS_DEVICE

76 double random_normal_float<double>(curandState_t *state) {

77return curand_normal_double(state);

78 }

79

80 template <typename FloatType>

81 CUTLASS_DEVICE

82 FloatType random_uniform_float(curandState_t *state) {

83return curand_uniform(state);

84 }

85

86 template <>

87 CUTLASS_DEVICE

88 double random_uniform_float<double>(curandState_t *state) {

89return curand_uniform_double(state);

90 }

91

92 template <typename Element>

93 struct RandomGaussianFunc {

94

95using FloatType = typename std::conditional<(sizeof(Element) > 4), double, float>::type;

96using IntType = typename std::conditional<(sizeof(Element) > 4), int64_t, int>::type;

97

99struct Params {

100

101//

102// Data members

103//

104

105 uint64_t seed;

106FloatType mean;

107FloatType stddev;

108int int_scale;

109

110//

111// Methods

112//

113

115Params(

116 uint64_t seed_ = 0,

117 Element mean_ = 0,

118 Element stddev_ = 1,

119int int_scale_ = -1

120 ):

121 seed(seed_),

122 mean(static_cast<FloatType>(mean_)),

123 stddev(static_cast<FloatType>(stddev_)),

124 int_scale(int_scale_) {

125

126 }

127 };

128

129//

130// Data members

131//

132

134Params params;

135

137 curandState_t rng_state;

138

139//

140// Methods

141//

142

144 CUTLASS_DEVICE

145RandomGaussianFunc(Params const &params): params(params) {

146

147 uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x;

148

149 curand_init(params.seed, gtid, 0, &rng_state);

150 }

151

153 CUTLASS_DEVICE

154 Element operator()() {

155

156FloatType rnd = random_normal_float<FloatType>(&rng_state);

157 rnd = params.mean + params.stddev * rnd;

158

159 Element result;

160if (params.int_scale >= 0) {

161 rnd = FloatType(IntType(rnd * FloatType(IntType(1) << params.int_scale)));

162 result = Element(rnd / FloatType(IntType(1) << params.int_scale));

163 }

164else {

165 result = Element(rnd);

166 }

167

168return result;

169 }

170 };

171

173 template <

174typename Element,

175typename Layout>

176 struct TensorFillRandomGaussianFunc {

177

179using TensorView = TensorView<Element, Layout>;

180

182typedef typename TensorView::Element T;

183

185typedef typename TensorView::TensorCoord TensorCoord;

186

187using RandomFunc = RandomGaussianFunc<Element>;

188

190struct Params {

191

192//

193// Data members

194//

195

196TensorView view;

197typename RandomFunc::Params random;

198

199//

200// Methods

201//

202

204Params(

205TensorView view_ = TensorView(),

206typename RandomFunc::Params random_ = typename RandomFunc::Params()

207 ):

208 view(view_), random(random_) {

209

210 }

211 };

212

213//

214// Data members

215//

216

217Params params;

218RandomFunc random;

219

220//

221// Methods

222//

223

225 CUTLASS_DEVICE

226TensorFillRandomGaussianFunc(Params const &params): params(params), random(params.random) {

227

228 }

229

231 CUTLASS_DEVICE

232void operator()(TensorCoord const &coord) {

233

234 params.view.at(coord) = random();

235 }

236 };

237

238 } // namespace detail

239

241

243 template <

244typename Element,

245typename Layout>

246 void TensorFillRandomGaussian(

247TensorView<Element, Layout> view,

248 uint64_t seed,

249 Element mean = Element(0),

250 Element stddev = Element(1),

251int bits = -1) {

252

255using RandomFunc = detail::RandomGaussianFunc<Element>;

256using Func = detail::TensorFillRandomGaussianFunc<Element, Layout>;

257using Params = typename Func::Params;

258

259TensorForEach<Func, Layout::kRank, Params>(

260 view.extent(),

261Params(view, typename RandomFunc::Params(seed, mean, stddev, bits))

262 );

263 }

264

266

268 template <typename Element>

269 void BlockFillRandomGaussian(

270 Element *ptr,

271size_t capacity,

272 uint64_t seed,

273 Element mean = Element(0),

274 Element stddev = Element(1),

275int bits = -1) {

276

279using RandomFunc = detail::RandomGaussianFunc<Element>;

280

281typename RandomFunc::Params params(seed, mean, stddev, bits);

282

283BlockForEach<Element, RandomFunc>(ptr, capacity, params);

284 }

285

288

289 namespace detail {

290

292 template <typename Element>

293 struct RandomUniformFunc {

294

295using FloatType = typename std::conditional<

296 (sizeof(Element) > 4),

297 double,

298float>::type;

299

300using IntType = typename std::conditional<

301 (sizeof(Element) > 4),

302 int64_t,

303int>::type;

304

306struct Params {

307

308//

309// Data members

310//

311

312 uint64_t seed;

313FloatType range;

314FloatType min;

315int int_scale;

316

318CUTLASS_HOST_DEVICE

319Params() { }

320

321//

322// Methods

323//

324

326Params(

327 uint64_t seed_ = 0,

328 Element max = 1,

329 Element min_ = 0,

330int int_scale_ = -1

331 ):

332 seed(seed_),

333 range(static_cast<FloatType>(max - min_)),

334 min(static_cast<FloatType>(min_)),

335 int_scale(int_scale_) {

336

337 }

338 };

339

340//

341// Data members

342//

343

345Params params;

346

348 curandState_t rng_state;

349

350//

351// Methods

352//

353

355 CUTLASS_DEVICE

356RandomUniformFunc(Params const &params): params(params) {

357

358 uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x;

359

360 curand_init(params.seed, gtid, 0, &rng_state);

361 }

362

364 CUTLASS_DEVICE

365 Element operator()() {

366

367FloatType rnd = random_uniform_float<FloatType>(&rng_state);

368 rnd = params.min + params.range * rnd;

369

370// Random values are cast to integer after scaling by a power of two to facilitate error

371// testing

372 Element result;

373

374if (params.int_scale >= 0) {

375 rnd = FloatType(IntType(rnd * FloatType(IntType(1) << params.int_scale)));

376 result = Element(rnd / FloatType(IntType(1) << params.int_scale));

377 }

378else {

379 result = Element(rnd);

380 }

381

382return result;

383 }

384 };

385

387 template <

388typename Element,

389typename Layout>

390 struct TensorFillRandomUniformFunc {

391

393using TensorView = TensorView<Element, Layout>;

394

396typedef typename TensorView::Element T;

397

399typedef typename TensorView::TensorCoord TensorCoord;

400

401using RandomFunc = RandomUniformFunc<Element>;

402

404struct Params {

405

406//

407// Data members

408//

409

410TensorView view;

411typename RandomFunc::Params random;

412

414CUTLASS_HOST_DEVICE

415Params() { }

416

417//

418// Methods

419//

420

422Params(

423TensorView view_ = TensorView(),

424typename RandomFunc::Params random_ = RandomFunc::Params()

425 ):

426 view(view_), random(random_) {

427

428 }

429 };

430

431//

432// Data members

433//

434

435Params params;

436RandomFunc random;

437

438//

439// Methods

440//

441

443 CUTLASS_DEVICE

444TensorFillRandomUniformFunc(Params const &params): params(params), random(params.random) {

445 }

446

448 CUTLASS_DEVICE

449void operator()(TensorCoord const &coord) {

450

451 params.view.at(coord) = random();

452 }

453 };

454

455 } // namespace detail

456

458

460 template <

461typename Element,

462typename Layout>

463 void TensorFillRandomUniform(

464TensorView<Element, Layout> view,

465 uint64_t seed,

466 Element max = Element(1),

467 Element min = Element(0),

468int bits = -1) {

469

472using RandomFunc = detail::RandomUniformFunc<Element>;

473using Func = detail::TensorFillRandomUniformFunc<Element, Layout>;

474using Params = typename Func::Params;

475

476typename RandomFunc::Params random(seed, max, min, bits);

477

478TensorForEach<Func, Layout::kRank, Params>(

479 view.size(),

480Params(view, random)

481 );

482 }

483

485

487 template <typename Element>

488 void BlockFillRandomUniform(

489 Element *ptr,

490size_t capacity,

491 uint64_t seed,

492 Element max = Element(1),

493 Element min = Element(0),

494int bits = -1) {

495

498using RandomFunc = detail::RandomUniformFunc<Element>;

499typename RandomFunc::Params params(seed, max, min, bits);

500

501BlockForEach<Element, RandomFunc>(ptr, capacity, params);

502 }

503

506

507 namespace detail {

508

510 template <

511typename Element,

512typename Layout>

513 struct TensorFillDiagonalFunc {

514

516using TensorView = TensorView<Element, Layout>;

517

519typedef typename TensorView::Element T;

520

522typedef typename TensorView::TensorCoord TensorCoord;

523

525struct Params {

526

527//

528// Data members

529//

530

531TensorView view;

532 Element diag;

533 Element other;

534

536CUTLASS_HOST_DEVICE

537Params() { }

538

539//

540// Methods

541//

542

544Params(

545TensorView view_ = TensorView(),

546 Element diag_ = Element(1),

547 Element other_ = Element(0)

548 ):

549 view(view_), diag(diag_), other(other_) {

550

551 }

552 };

553

554//

555// Data members

556//

557

559Params params;

560

561//

562// Methods

563//

564

566 CUTLASS_DEVICE

567TensorFillDiagonalFunc(Params const &params): params(params) {

568

569 }

570

572 CUTLASS_DEVICE

573void operator()(TensorCoord const &coord) {

574

575bool is_diag = true;

576

577CUTLASS_PRAGMA_UNROLL

578for (int i = 1; i < Layout::kRank; ++i) {

579if (coord[i] != coord[i - 1]) {

580 is_diag = false;

581break;

582 }

583 }

584

585 params.view.at(coord) = (is_diag ? params.diag : params.other);

586 }

587 };

588

589 } // namespace detail

590

592

594 template <

595typename Element,

596typename Layout>

597 void TensorFillDiagonal(

598TensorView<Element, Layout> view,

599 Element diag = Element(1),

600 Element other = Element(0)) {

601

602typedef detail::TensorFillDiagonalFunc<Element, Layout> Func;

603typedef typename Func::Params Params;

604

605TensorForEach<Func, Layout::kRank, Params>(

606 view.size(),

607Params(view, diag, other)

608 );

609 }

610

612

614 template <

615typename Element,

616typename Layout>

617 void TensorFill(

618TensorView<Element, Layout> view,

619 Element val = Element(0)) {

620

621TensorFillDiagonal(view, val, val);

622 }

623

625

627 template <

628typename Element,

629typename Layout>

630 void TensorFillIdentity(

631TensorView<Element, Layout> view) {

632

633TensorFillDiagonal(view, Element(1), Element(0));

634 }

635

638

639 namespace detail {

640

642 template <

643typename Element,

644typename Layout>

645 struct TensorUpdateDiagonalFunc {

646

648using TensorView = TensorView<Element, Layout>;

649

651typedef typename TensorView::Element T;

652

654typedef typename TensorView::TensorCoord TensorCoord;

655

657struct Params {

658

659//

660// Data members

661//

662

663TensorView view;

664 Element diag;

665

667CUTLASS_HOST_DEVICE

668Params() { }

669

670//

671// Methods

672//

673

675Params(

676TensorView view_ = TensorView(),

677 Element diag_ = Element(1)

678 ):

679 view(view_), diag(diag_) {

680

681 }

682 };

683

684//

685// Data members

686//

687

689Params params;

690

691//

692// Methods

693//

694

696 CUTLASS_DEVICE

697TensorUpdateDiagonalFunc(Params const &params): params(params) {

698

699 }

700

702 CUTLASS_DEVICE

703void operator()(TensorCoord const &coord) {

704

705bool is_diag = true;

706

707CUTLASS_PRAGMA_UNROLL

708for (int i = 1; i < Layout::kRank; ++i) {

709if (coord[i] != coord[i - 1]) {

710 is_diag = false;

711break;

712 }

713 }

714

715if (is_diag) {

716 params.view.at(coord) = params.diag;

717 }

718 }

719 };

720

721 } // namespace detail

722

724

726 template <

727typename Element,

728typename Layout>

729 void TensorUpdateDiagonal(

730TensorView<Element, Layout> view,

731 Element diag = Element(1)) {

732

733typedef detail::TensorUpdateDiagonalFunc<Element, Layout> Func;

734typedef typename Func::Params Params;

735

736TensorForEach<Func, Layout::kRank, Params>(

737 view.size(),

738Params(view, diag)

739 );

740 }

741

744

745 namespace detail {

746

748 template <

749typename Element,

750typename Layout>

751 struct TensorUpdateOffDiagonalFunc {

752

754using TensorView = TensorView<Element, Layout>;

755

757typedef typename TensorView::Element T;

758

760typedef typename TensorView::TensorCoord TensorCoord;

761

763struct Params {

764

765//

766// Data members

767//

768

769TensorView view;

770 Element other;

771

773CUTLASS_HOST_DEVICE

774Params() { }

775

776//

777// Methods

778//

779

781Params(

782TensorView view_ = TensorView(),

783 Element other_ = Element(0)

784 ):

785 view(view_), other(other_) {

786

787 }

788 };

789

790//

791// Data members

792//

793

795Params params;

796

797//

798// Methods

799//

800

802 CUTLASS_DEVICE

803TensorUpdateOffDiagonalFunc(Params const &params): params(params) {

804

805 }

806

808 CUTLASS_DEVICE

809void operator()(TensorCoord const &coord) {

810

811bool is_diag = true;

812

813CUTLASS_PRAGMA_UNROLL

814for (int i = 1; i < Layout::kRank; ++i) {

815if (coord[i] != coord[i - 1]) {

816 is_diag = false;

817break;

818 }

819 }

820

821if (!is_diag) {

822 params.view.at(coord) = params.other;

823 }

824 }

825 };

826

827 } // namespace detail

828

830

832 template <

833typename Element,

834typename Layout>

835 void TensorUpdateOffDiagonal(

836TensorView<Element, Layout> view,

837 Element other = Element(1)) {

838

839typedef detail::TensorUpdateOffDiagonalFunc<Element, Layout> Func;

840typedef typename Func::Params Params;

841

842TensorForEach<Func, Layout::kRank, Params>(

843 view.size(),

844Params(view, other)

845 );

846 }

847

850

851 namespace detail {

852

854 template <

855typename Element,

856typename Layout>

857 struct TensorFillLinearFunc {

858

860using TensorView = TensorView<Element, Layout>;

861

863typedef typename TensorView::Element T;

864

866typedef typename TensorView::TensorCoord TensorCoord;

867

869struct Params {

870

871//

872// Data members

873//

874

875TensorView view;

876 Array<Element, Layout::kRank> v;

877 Element s;

878

880CUTLASS_HOST_DEVICE

881Params() { }

882

883//

884// Methods

885//

886

888Params(

889TensorView view_,

890 Array<Element, Layout::kRank> const & v_,

891 Element s_ = Element(0)

892 ):

893 view(view_), v(v_), s(s_) {

894

895 }

896 };

897

898//

899// Data members

900//

901

903Params params;

904

905//

906// Methods

907//

908

910 CUTLASS_DEVICE

911TensorFillLinearFunc(Params const &params): params(params) {

912

913 }

914

916 CUTLASS_DEVICE

917void operator()(TensorCoord const &coord) {

918 Element sum = params.s;

919

920CUTLASS_PRAGMA_UNROLL

921for (int i = 0; i < Layout::kRank; ++i) {

922 sum += params.v[i] * Element(coord[i]);

923 }

924

925 params.view.at(coord) = sum;

926 }

927 };

928

929 } // namespace detail

930

932

934 template <

935typename Element,

936typename Layout>

937 void TensorFillLinear(

938TensorView<Element, Layout> view,

939 Array<Element, Layout::kRank> const & v,

940 Element s = Element(0)) {

941

942using Func = detail::TensorFillLinearFunc<Element, Layout>;

943using Params = typename Func::Params;

944

945TensorForEach<Func, Layout::kRank, Params>(

946 view.size(),

947Params(view, v, s)

948 );

949 }

950

953

955 template <

956typename Element

957 >

958 void BlockFillSequential(

959 Element *ptr,

960 int64_t capacity,

961 Element v = Element(1),

962 Element s = Element(0)) {

963

964 }

965

968

970 template <

971typename Element

972 >

973 void BlockFillRandom(

974 Element *ptr,

975size_t capacity,

976 uint64_t seed,

977Distribution dist) {

978

979if (dist.kind == Distribution::Gaussian) {

980 BlockFillRandomGaussian<Element>(

981 ptr,

982 capacity,

983seed,

984static_cast<Element>(dist.gaussian.mean),

985 static_cast<Element>(dist.gaussian.stddev),

986 dist.int_scale);

987 }

988else if (dist.kind == Distribution::Uniform) {

989 BlockFillRandomUniform<Element>(

990 ptr,

991 capacity,

992seed,

993static_cast<Element>(dist.uniform.max),

994 static_cast<Element>(dist.uniform.min),

995 dist.int_scale);

996 }

997 }

998

1001

1002 namespace detail {

1003

1005 template <

1006typename Element,

1007typename Layout>

1008 struct TensorCopyDiagonalInFunc {

1009

1011using TensorView = TensorView<Element, Layout>;

1012

1014typedef typename TensorView::Element T;

1015

1017typedef typename TensorView::TensorCoord TensorCoord;

1018

1020struct Params {

1021

1022//

1023// Data members

1024//

1025

1026TensorView view;

1027 Element const *ptr;

1028

1030CUTLASS_HOST_DEVICE

1031Params() { }

1032

1033//

1034// Methods

1035//

1036

1038Params(

1039TensorView view_,

1040 Element const *ptr_

1041 ):

1042 view(view_), ptr(ptr_) {

1043

1044 }

1045 };

1046

1047//

1048// Data members

1049//

1050

1052Params params;

1053

1054//

1055// Methods

1056//

1057

1059 CUTLASS_DEVICE

1060TensorCopyDiagonalInFunc(Params const &params): params(params) {

1061

1062 }

1063

1065 CUTLASS_DEVICE

1066void operator()(TensorCoord const &coord) {

1067bool is_diagonal = true;

1068

1069CUTLASS_PRAGMA_UNROLL

1070for (int i = 1; i < Layout::kRank; ++i) {

1071if (coord[i] != coord[0]) {

1072 is_diagonal = false;

1073 }

1074 }

1075if (is_diagonal) {

1076 params.view.at(coord) = params.ptr[coord[0]];

1077 }

1078 }

1079 };

1080

1081 } // namespace detail

1082

1084

1086 template <

1087typename Element,

1088typename Layout>

1089 void TensorCopyDiagonalIn(

1090TensorView<Element, Layout> view,

1091 Element const *ptr) {

1092

1093using Func = detail::TensorCopyDiagonalInFunc<Element, Layout>;

1094using Params = typename Func::Params;

1095

1096TensorForEach<Func, Layout::kRank, Params>(

1097 view.size(),

1098Params(view, ptr)

1099 );

1100 }

1101

1104

1105

1106 namespace detail {

1107

1109 template <

1110typename Element,

1111typename Layout>

1112 struct TensorCopyDiagonalOutFunc {

1113

1115using TensorView = TensorView<Element, Layout>;

1116

1118typedef typename TensorView::Element T;

1119

1121typedef typename TensorView::TensorCoord TensorCoord;

1122

1124struct Params {

1125

1126//

1127// Data members

1128//

1129

1130TensorView view;

1131 Element *ptr;

1132

1134CUTLASS_HOST_DEVICE

1135Params() { }

1136

1137//

1138// Methods

1139//

1140

1142Params(

1143TensorView view_,

1144 Element *ptr_

1145 ):

1146 view(view_), ptr(ptr_) {

1147

1148 }

1149 };

1150

1151//

1152// Data members

1153//

1154

1156Params params;

1157

1158//

1159// Methods

1160//

1161

1163 CUTLASS_DEVICE

1164TensorCopyDiagonalOutFunc(Params const &params): params(params) {

1165

1166 }

1167

1169 CUTLASS_DEVICE

1170void operator()(TensorCoord const &coord) {

1171bool is_diagonal = true;

1172

1173CUTLASS_PRAGMA_UNROLL

1174for (int i = 1; i < Layout::kRank; ++i) {

1175if (coord[i] != coord[0]) {

1176 is_diagonal = false;

1177 }

1178 }

1179if (is_diagonal) {

1180 params.ptr[coord[0]] = params.view.at(coord);

1181 }

1182 }

1183 };

1184

1185 } // namespace detail

1186

1188

1190 template <

1191typename Element,

1192typename Layout>

1193 void TensorCopyDiagonalOut(

1194 Element *ptr,

1195TensorView<Element, Layout> view) {

1196

1197using Func = detail::TensorCopyDiagonalOutFunc<Element, Layout>;

1198using Params = typename Func::Params;

1199

1200TensorForEach<Func, Layout::kRank, Params>(

1201 view.size(),

1202Params(view, ptr)

1203 );

1204 }

1205

1208

1209 } // namespace device

1210 } // namespace reference

1211 } // namespace cutlass

cutlass::reference::device::detail::TensorFillLinearFunc::TensorCoord

TensorView::TensorCoord TensorCoord

Coordinate in tensor's index space.

Definition: device/tensor_fill.h:866

cutlass::reference::device::detail::TensorUpdateOffDiagonalFunc

Computes a random Gaussian distribution.

Definition: device/tensor_fill.h:751

cutlass::reference::device::detail::TensorFillLinearFunc::Params::Params

CUTLASS_HOST_DEVICE Params()

Default ctor.

Definition: device/tensor_fill.h:881

cutlass::reference::device::detail::TensorUpdateDiagonalFunc::Params::Params

Params(TensorView view_=TensorView(), Element diag_=Element(1))

Construction of Gaussian RNG functor.

Definition: device/tensor_fill.h:675

cutlass::platform::max

CUTLASS_HOST_DEVICE constexpr const T & max(const T &a, const T &b)

std::max

Definition: platform.h:189

cutlass::reference::device::detail::RandomGaussianFunc::RandomGaussianFunc

CUTLASS_DEVICE RandomGaussianFunc(Params const &params)

Device-side initialization of RNG.

Definition: device/tensor_fill.h:145

cutlass::reference::device::detail::TensorFillRandomUniformFunc::TensorFillRandomUniformFunc

CUTLASS_DEVICE TensorFillRandomUniformFunc(Params const &params)

Device-side initialization of RNG.

Definition: device/tensor_fill.h:444

cutlass::reference::device::detail::TensorCopyDiagonalInFunc::Params::Params

CUTLASS_HOST_DEVICE Params()

Default ctor.

Definition: device/tensor_fill.h:1031

cutlass::reference::device::detail::TensorUpdateOffDiagonalFunc::TensorCoord

TensorView::TensorCoord TensorCoord

Coordinate in tensor's index space.

Definition: device/tensor_fill.h:760

cutlass

Definition: aligned_buffer.h:35

cutlass::Distribution::Uniform

Definition: distribution.h:40

cutlass::reference::device::detail::TensorFillRandomUniformFunc::random

RandomFunc random

Definition: device/tensor_fill.h:436

cutlass::reference::device::TensorCopyDiagonalOut

void TensorCopyDiagonalOut(Element *ptr, TensorView< Element, Layout > view)

Copies the diagonal of a tensor into a dense buffer in host memory.

Definition: device/tensor_fill.h:1193

cutlass::reference::device::detail::RandomUniformFunc::IntType

typename std::conditional< (sizeof(Element) > 4), int64_t, int >::type IntType

Definition: device/tensor_fill.h:303

cutlass::reference::device::detail::RandomUniformFunc::operator()

CUTLASS_DEVICE Element operator()()

Compute random value and update RNG state.

Definition: device/tensor_fill.h:365

cutlass::reference::device::detail::TensorFillRandomUniformFunc::Params::Params

CUTLASS_HOST_DEVICE Params()

Default ctor.

Definition: device/tensor_fill.h:415

cutlass::Distribution::Gaussian

Definition: distribution.h:40

cutlass::reference::device::detail::TensorUpdateDiagonalFunc::Params::view

TensorView view

Definition: device/tensor_fill.h:663

cutlass::Distribution::uniform

struct cutlass::Distribution::@18::@20 uniform

Uniform distribution.

cutlass::reference::device::detail::TensorCopyDiagonalInFunc::Params

Parameters structure.

Definition: device/tensor_fill.h:1020

cutlass::reference::device::detail::TensorFillLinearFunc::T

TensorView::Element T

Scalar type.

Definition: device/tensor_fill.h:863

cutlass::reference::device::detail::TensorCopyDiagonalInFunc::Params::ptr

Element const * ptr

Definition: device/tensor_fill.h:1027

cutlass::reference::device::detail::TensorFillRandomGaussianFunc::Params::view

TensorView view

Definition: device/tensor_fill.h:196

cutlass::reference::device::detail::TensorCopyDiagonalInFunc::TensorCoord

TensorView::TensorCoord TensorCoord

Coordinate in tensor's index space.

Definition: device/tensor_fill.h:1017

cutlass::reference::device::detail::RandomGaussianFunc::Params

Parameters structure.

Definition: device/tensor_fill.h:99

cutlass::Distribution::kind

Kind kind

Active variant kind.

Definition: distribution.h:64

cutlass::reference::device::detail::TensorFillRandomUniformFunc::Params::Params

Params(TensorView view_=TensorView(), typename RandomFunc::Params random_=RandomFunc::Params())

Construction of Gaussian RNG functor.

Definition: device/tensor_fill.h:422

cutlass::reference::device::TensorFillIdentity

void TensorFillIdentity(TensorView< Element, Layout > view)

Fills a tensor's diagonal with 1 and 0 everywhere else.

Definition: device/tensor_fill.h:630

cutlass::TensorView::extent

CUTLASS_HOST_DEVICE TensorCoord const & extent() const

Returns the extent of the view (the size along each logical dimension).

Definition: tensor_view.h:167

cutlass::reference::device::detail::TensorUpdateDiagonalFunc

Computes a random Gaussian distribution.

Definition: device/tensor_fill.h:645

cutlass::reference::device::detail::RandomUniformFunc::Params::int_scale

int int_scale

Definition: device/tensor_fill.h:315

cutlass::reference::device::detail::TensorFillRandomUniformFunc::T

TensorView::Element T

Scalar type.

Definition: device/tensor_fill.h:396

cutlass::reference::device::detail::TensorCopyDiagonalOutFunc::Params::Params

Params(TensorView view_, Element *ptr_)

Construction of Gaussian RNG functor.

Definition: device/tensor_fill.h:1142

cutlass::reference::device::detail::TensorCopyDiagonalInFunc::params

Params params

Parameters object.

Definition: device/tensor_fill.h:1052

cutlass::reference::device::detail::TensorFillRandomUniformFunc::Params::random

RandomFunc::Params random

Definition: device/tensor_fill.h:411

cutlass::Distribution::gaussian

struct cutlass::Distribution::@18::@21 gaussian

Gaussian distribution.

cutlass::reference::device::detail::RandomUniformFunc::Params::min

FloatType min

Definition: device/tensor_fill.h:314

cutlass::reference::device::detail::RandomGaussianFunc

Definition: device/tensor_fill.h:93

cutlass::reference::device::detail::TensorUpdateDiagonalFunc::T

TensorView::Element T

Scalar type.

Definition: device/tensor_fill.h:651

cutlass::reference::device::detail::TensorUpdateDiagonalFunc::TensorUpdateDiagonalFunc

CUTLASS_DEVICE TensorUpdateDiagonalFunc(Params const &params)

Device-side initialization of RNG.

Definition: device/tensor_fill.h:697

cutlass::reference::device::detail::TensorFillLinearFunc::TensorFillLinearFunc

CUTLASS_DEVICE TensorFillLinearFunc(Params const &params)

Device-side initialization of RNG.

Definition: device/tensor_fill.h:911

cutlass::reference::device::TensorCopyDiagonalIn

void TensorCopyDiagonalIn(TensorView< Element, Layout > view, Element const *ptr)

Copies a diagonal in from host memory without modifying off-diagonal elements.

Definition: device/tensor_fill.h:1089

cutlass::reference::device::detail::RandomGaussianFunc::rng_state

curandState_t rng_state

RNG state object.

Definition: device/tensor_fill.h:137

cutlass::reference::device::detail::TensorFillLinearFunc::operator()

CUTLASS_DEVICE void operator()(TensorCoord const &coord)

Compute random value and update RNG state.

Definition: device/tensor_fill.h:917

tensor_foreach.h

cutlass::reference::device::detail::RandomUniformFunc::Params::Params

Params(uint64_t seed_=0, Element max=1, Element min_=0, int int_scale_=-1)

Construction of Gaussian RNG functor.

Definition: device/tensor_fill.h:326

tensor_view.h

Defines a structure containing strides and a pointer to tensor data.

cutlass::reference::device::detail::TensorFillRandomGaussianFunc::random

RandomFunc random

Definition: device/tensor_fill.h:218

cutlass::reference::device::detail::RandomGaussianFunc::Params::seed

uint64_t seed

Definition: device/tensor_fill.h:105

cutlass::reference::device::detail::random_normal_float< double >

CUTLASS_DEVICE double random_normal_float< double >(curandState_t *state)

Definition: device/tensor_fill.h:76

cutlass::FloatType

Defines a floating-point type based on the number of exponent and mantissa bits.

Definition: numeric_types.h:144

cutlass::reference::device::detail::RandomGaussianFunc::FloatType

typename std::conditional<(sizeof(Element) > 4), double, float >::type FloatType

Definition: device/tensor_fill.h:95

cutlass::reference::device::detail::TensorFillLinearFunc::Params::view

TensorView view

Definition: device/tensor_fill.h:875

cutlass::TensorView< Element, Layout >::Element

Element Element

Data type of individual access.

Definition: tensor_view.h:72

cutlass::reference::device::detail::TensorCopyDiagonalInFunc::Params::Params

Params(TensorView view_, Element const *ptr_)

Construction of Gaussian RNG functor.

Definition: device/tensor_fill.h:1038

cutlass::reference::device::detail::TensorFillDiagonalFunc::Params::Params

Params(TensorView view_=TensorView(), Element diag_=Element(1), Element other_=Element(0))

Construction of Gaussian RNG functor.

Definition: device/tensor_fill.h:544

cutlass::reference::device::detail::RandomUniformFunc::Params::seed

uint64_t seed

Definition: device/tensor_fill.h:312

cutlass::reference::device::BlockFillSequential

void BlockFillSequential(Element *ptr, int64_t capacity, Element v=Element(1), Element s=Element(0))

Fills a block of data with sequential elements.

Definition: device/tensor_fill.h:958

array.h

Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

CUTLASS_PRAGMA_UNROLL

#define CUTLASS_PRAGMA_UNROLL

Definition: cutlass.h:110

cutlass::reference::device::detail::RandomUniformFunc::Params

Parameters structure.

Definition: device/tensor_fill.h:306

cutlass::reference::device::detail::TensorFillLinearFunc

Computes a random Gaussian distribution.

Definition: device/tensor_fill.h:857

cutlass::reference::device::detail::RandomGaussianFunc::Params::int_scale

int int_scale

Definition: device/tensor_fill.h:108

cutlass::reference::device::TensorFillRandomGaussian

void TensorFillRandomGaussian(TensorView< Element, Layout > view, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1)

Fills a tensor with random values with a Gaussian distribution.

Definition: device/tensor_fill.h:246

cutlass::reference::device::detail::TensorFillDiagonalFunc::TensorFillDiagonalFunc

CUTLASS_DEVICE TensorFillDiagonalFunc(Params const &params)

Device-side initialization of RNG.

Definition: device/tensor_fill.h:567

cutlass::reference::device::detail::TensorFillRandomGaussianFunc::params

Params params

Definition: device/tensor_fill.h:217

cutlass::reference::device::BlockFillRandomUniform

void BlockFillRandomUniform(Element *ptr, size_t capacity, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1)

Fills a tensor with random values with a uniform random distribution.

Definition: device/tensor_fill.h:488

cutlass::reference::device::detail::TensorFillRandomGaussianFunc::Params

Parameters structure.

Definition: device/tensor_fill.h:190

cutlass::reference::device::detail::TensorCopyDiagonalInFunc::Params::view

TensorView view

Definition: device/tensor_fill.h:1026

cutlass::reference::device::detail::TensorFillLinearFunc::Params

Parameters structure.

Definition: device/tensor_fill.h:869

cutlass::reference::device::detail::TensorFillLinearFunc::Params::Params

Params(TensorView view_, Array< Element, Layout::kRank > const &v_, Element s_=Element(0))

Construction of Gaussian RNG functor.

Definition: device/tensor_fill.h:888

cutlass::TensorView< Element, Layout >

cutlass::reference::device::detail::TensorFillRandomUniformFunc::params

Params params

Definition: device/tensor_fill.h:435

cutlass::reference::device::TensorFillDiagonal

void TensorFillDiagonal(TensorView< Element, Layout > view, Element diag=Element(1), Element other=Element(0))

Fills a tensor everywhere with a unique value for its diagonal.

Definition: device/tensor_fill.h:597

cutlass::TensorView< Element, Layout >::TensorCoord

typename Layout::TensorCoord TensorCoord

Coordinate in logical tensor space.

Definition: tensor_view.h:87

cutlass::reference::device::detail::TensorFillLinearFunc::Params::s

Element s

Definition: device/tensor_fill.h:877

cutlass::reference::device::detail::TensorCopyDiagonalOutFunc::T

TensorView::Element T

Scalar type.

Definition: device/tensor_fill.h:1118

cutlass::reference::device::detail::TensorUpdateOffDiagonalFunc::params

Params params

Parameters object.

Definition: device/tensor_fill.h:795

cutlass::reference::device::detail::TensorFillDiagonalFunc

Computes a random Gaussian distribution.

Definition: device/tensor_fill.h:513

cutlass::reference::device::detail::TensorFillDiagonalFunc::Params

Parameters structure.

Definition: device/tensor_fill.h:525

cutlass::reference::device::detail::TensorCopyDiagonalInFunc

Computes a random Gaussian distribution.

Definition: device/tensor_fill.h:1008

cutlass::reference::device::detail::RandomGaussianFunc::Params::mean

FloatType mean

Definition: device/tensor_fill.h:106

cutlass::reference::device::detail::TensorUpdateDiagonalFunc::Params::diag

Element diag

Definition: device/tensor_fill.h:664

cutlass::reference::device::detail::TensorCopyDiagonalInFunc::TensorCopyDiagonalInFunc

CUTLASS_DEVICE TensorCopyDiagonalInFunc(Params const &params)

Device-side initialization of RNG.

Definition: device/tensor_fill.h:1060

cutlass::reference::device::detail::TensorUpdateDiagonalFunc::TensorCoord

TensorView::TensorCoord TensorCoord

Coordinate in tensor's index space.

Definition: device/tensor_fill.h:654

cutlass::reference::device::detail::TensorFillDiagonalFunc::operator()

CUTLASS_DEVICE void operator()(TensorCoord const &coord)

Compute random value and update RNG state.

Definition: device/tensor_fill.h:573

cutlass::reference::device::TensorFill

void TensorFill(TensorView< Element, Layout > view, Element val=Element(0))

Fills a tensor with a uniform value.

Definition: device/tensor_fill.h:617

distribution.h

This header contains a class to parametrize a statistical distribution function.

cutlass::reference::device::detail::RandomUniformFunc

Computes a random Gaussian distribution.

Definition: device/tensor_fill.h:293

cutlass::reference::device::detail::RandomGaussianFunc::params

Params params

Parameters object.

Definition: device/tensor_fill.h:134

cutlass::reference::device::detail::TensorUpdateDiagonalFunc::operator()

CUTLASS_DEVICE void operator()(TensorCoord const &coord)

Compute random value and update RNG state.

Definition: device/tensor_fill.h:703

cutlass::reference::device::detail::TensorFillRandomUniformFunc::operator()

CUTLASS_DEVICE void operator()(TensorCoord const &coord)

Compute random value and update RNG state.

Definition: device/tensor_fill.h:449

cutlass::reference::device::BlockFillRandomGaussian

void BlockFillRandomGaussian(Element *ptr, size_t capacity, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1)

Fills a tensor with random values with a Gaussian distribution.

Definition: device/tensor_fill.h:269

cutlass::reference::device::detail::TensorCopyDiagonalOutFunc::TensorCoord

TensorView::TensorCoord TensorCoord

Coordinate in tensor's index space.

Definition: device/tensor_fill.h:1121

cutlass::reference::device::detail::TensorCopyDiagonalInFunc::T

TensorView::Element T

Scalar type.

Definition: device/tensor_fill.h:1014

cutlass::reference::device::detail::TensorFillRandomGaussianFunc::TensorCoord

TensorView::TensorCoord TensorCoord

Coordinate in tensor's index space.

Definition: device/tensor_fill.h:185

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

cutlass::reference::device::detail::TensorUpdateOffDiagonalFunc::Params::other

Element other

Definition: device/tensor_fill.h:770

cutlass::reference::device::detail::TensorFillRandomGaussianFunc::operator()

CUTLASS_DEVICE void operator()(TensorCoord const &coord)

Compute random value and update RNG state.

Definition: device/tensor_fill.h:232

cutlass::reference::device::detail::TensorCopyDiagonalInFunc::operator()

CUTLASS_DEVICE void operator()(TensorCoord const &coord)

Only update the diagonal element.

Definition: device/tensor_fill.h:1066

cutlass::reference::device::detail::TensorCopyDiagonalOutFunc

Computes a random Gaussian distribution.

Definition: device/tensor_fill.h:1112

cutlass::platform::min

CUTLASS_HOST_DEVICE constexpr const T & min(const T &a, const T &b)

std::min

Definition: platform.h:183

cutlass::reference::device::detail::TensorCopyDiagonalOutFunc::Params::view

TensorView view

Definition: device/tensor_fill.h:1130

cutlass::reference::device::detail::TensorFillDiagonalFunc::Params::other

Element other

Definition: device/tensor_fill.h:533

cutlass::reference::device::detail::TensorUpdateOffDiagonalFunc::Params

Parameters structure.

Definition: device/tensor_fill.h:763

cutlass::reference::device::detail::RandomUniformFunc::FloatType

typename std::conditional< (sizeof(Element) > 4), double, float >::type FloatType

Definition: device/tensor_fill.h:298

cutlass::reference::device::TensorForEach

Launches a kernel calling a functor for each element in a tensor's index space.

Definition: device/tensor_foreach.h:39

cutlass::reference::device::detail::TensorUpdateDiagonalFunc::Params

Parameters structure.

Definition: device/tensor_fill.h:657

cutlass::reference::device::detail::TensorFillLinearFunc::Params::v

Array< Element, Layout::kRank > v

Definition: device/tensor_fill.h:876

cutlass::reference::device::TensorUpdateDiagonal

void TensorUpdateDiagonal(TensorView< Element, Layout > view, Element diag=Element(1))

Writes a uniform value to the diagonal of a tensor without modifying off-diagonal elements...

Definition: device/tensor_fill.h:729

cutlass::reference::device::detail::TensorFillRandomUniformFunc

Computes a random Gaussian distribution.

Definition: device/tensor_fill.h:390

cutlass::reference::device::detail::random_uniform_float< double >

CUTLASS_DEVICE double random_uniform_float< double >(curandState_t *state)

Definition: device/tensor_fill.h:88

cutlass::reference::device::detail::TensorUpdateOffDiagonalFunc::Params::view

TensorView view

Definition: device/tensor_fill.h:769

cutlass::reference::device::detail::random_normal_float

CUTLASS_DEVICE FloatType random_normal_float(curandState_t *state)

Definition: device/tensor_fill.h:70

cutlass::reference::device::detail::TensorUpdateOffDiagonalFunc::TensorUpdateOffDiagonalFunc

CUTLASS_DEVICE TensorUpdateOffDiagonalFunc(Params const &params)

Device-side initialization of RNG.

Definition: device/tensor_fill.h:803

cutlass::reference::device::detail::RandomUniformFunc::Params::range

FloatType range

Definition: device/tensor_fill.h:313

cutlass::reference::device::BlockFillRandom

void BlockFillRandom(Element *ptr, size_t capacity, uint64_t seed, Distribution dist)

Fills a block of data with sequential elements.

Definition: device/tensor_fill.h:973

cutlass::reference::device::detail::RandomGaussianFunc::Params::Params

Params(uint64_t seed_=0, Element mean_=0, Element stddev_=1, int int_scale_=-1)

Construction of Gaussian RNG functor.

Definition: device/tensor_fill.h:115

cutlass::reference::device::detail::TensorCopyDiagonalOutFunc::TensorCopyDiagonalOutFunc

CUTLASS_DEVICE TensorCopyDiagonalOutFunc(Params const &params)

Device-side initialization of RNG.

Definition: device/tensor_fill.h:1164

cutlass::reference::device::TensorFillLinear

void TensorFillLinear(TensorView< Element, Layout > view, Array< Element, Layout::kRank > const &v, Element s=Element(0))

Fills tensor with a linear combination of its coordinate and another vector.

Definition: device/tensor_fill.h:937

cutlass::reference::device::detail::RandomUniformFunc::RandomUniformFunc

CUTLASS_DEVICE RandomUniformFunc(Params const &params)

Device-side initialization of RNG.

Definition: device/tensor_fill.h:356

cutlass::reference::device::detail::TensorCopyDiagonalOutFunc::Params::ptr

Element * ptr

Definition: device/tensor_fill.h:1131

cutlass::reference::device::detail::TensorFillDiagonalFunc::params

Params params

Parameters object.

Definition: device/tensor_fill.h:559

cutlass::reference::device::detail::TensorUpdateDiagonalFunc::params

Params params

Parameters object.

Definition: device/tensor_fill.h:689

cutlass::reference::device::detail::TensorCopyDiagonalOutFunc::Params

Parameters structure.

Definition: device/tensor_fill.h:1124

cutlass::reference::device::TensorUpdateOffDiagonal

void TensorUpdateOffDiagonal(TensorView< Element, Layout > view, Element other=Element(1))

Writes a uniform value to all elements in the tensor without modifying diagonal elements.

Definition: device/tensor_fill.h:835

cutlass::reference::device::detail::RandomUniformFunc::params

Params params

Parameters object.

Definition: device/tensor_fill.h:345

cutlass::reference::device::detail::TensorUpdateOffDiagonalFunc::T

TensorView::Element T

Scalar type.

Definition: device/tensor_fill.h:757

cutlass::reference::device::detail::TensorFillRandomUniformFunc::TensorCoord

TensorView::TensorCoord TensorCoord

Coordinate in tensor's index space.

Definition: device/tensor_fill.h:399

cutlass::reference::device::detail::TensorFillDiagonalFunc::Params::diag

Element diag

Definition: device/tensor_fill.h:532

cutlass::reference::device::detail::RandomGaussianFunc::IntType

typename std::conditional<(sizeof(Element) > 4), int64_t, int >::type IntType

Definition: device/tensor_fill.h:96

cutlass::reference::device::detail::RandomUniformFunc::Params::Params

CUTLASS_HOST_DEVICE Params()

Default ctor.

Definition: device/tensor_fill.h:319

cutlass::TensorRef::at

CUTLASS_HOST_DEVICE Reference at(TensorCoord const &coord) const

Returns a reference to the element at a given Coord.

Definition: tensor_ref.h:307

cutlass::reference::device::detail::TensorCopyDiagonalOutFunc::Params::Params

CUTLASS_HOST_DEVICE Params()

Default ctor.

Definition: device/tensor_fill.h:1135

cutlass::reference::device::detail::TensorFillDiagonalFunc::Params::view

TensorView view

Definition: device/tensor_fill.h:531

cutlass::reference::device::detail::random_uniform_float

CUTLASS_DEVICE FloatType random_uniform_float(curandState_t *state)

Definition: device/tensor_fill.h:82

cutlass::reference::device::detail::TensorUpdateDiagonalFunc::Params::Params

CUTLASS_HOST_DEVICE Params()

Default ctor.

Definition: device/tensor_fill.h:668

cutlass::reference::device::TensorFillRandomUniform

void TensorFillRandomUniform(TensorView< Element, Layout > view, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1)

Fills a tensor with random values with a uniform random distribution.

Definition: device/tensor_fill.h:463

cutlass::reference::device::detail::TensorCopyDiagonalOutFunc::params

Params params

Parameters object.

Definition: device/tensor_fill.h:1156

cutlass::reference::device::detail::TensorFillDiagonalFunc::TensorCoord

TensorView::TensorCoord TensorCoord

Coordinate in tensor's index space.

Definition: device/tensor_fill.h:522

cutlass::reference::device::detail::TensorFillRandomUniformFunc::Params::view

TensorView view

Definition: device/tensor_fill.h:410

cutlass::reference::device::detail::RandomGaussianFunc::operator()

CUTLASS_DEVICE Element operator()()

Compute random value and update RNG state.

Definition: device/tensor_fill.h:154

cutlass::reference::device::BlockForEach

Definition: device/tensor_foreach.h:92

cutlass::reference::device::detail::TensorCopyDiagonalOutFunc::operator()

CUTLASS_DEVICE void operator()(TensorCoord const &coord)

Compute random value and update RNG state.

Definition: device/tensor_fill.h:1170

cutlass::Distribution

Distribution type.

Definition: distribution.h:38

cutlass::reference::device::detail::RandomUniformFunc::rng_state

curandState_t rng_state

RNG state object.

Definition: device/tensor_fill.h:348

cutlass::reference::device::detail::TensorFillRandomGaussianFunc

Computes a random Gaussian distribution.

Definition: device/tensor_fill.h:176

cutlass::reference::device::detail::TensorUpdateOffDiagonalFunc::Params::Params

CUTLASS_HOST_DEVICE Params()

Default ctor.

Definition: device/tensor_fill.h:774

cutlass::reference::device::detail::TensorFillDiagonalFunc::Params::Params

CUTLASS_HOST_DEVICE Params()

Default ctor.

Definition: device/tensor_fill.h:537

cutlass::Distribution::int_scale

int int_scale

Random values are cast to integer after scaling by this power of two.

Definition: distribution.h:67

cutlass::reference::device::detail::TensorFillDiagonalFunc::T

TensorView::Element T

Scalar type.

Definition: device/tensor_fill.h:519

cutlass::reference::device::detail::TensorFillRandomGaussianFunc::T

TensorView::Element T

Scalar type.

Definition: device/tensor_fill.h:182

cutlass.h

Basic include for CUTLASS.

cutlass::reference::device::detail::TensorFillRandomGaussianFunc::Params::Params

Params(TensorView view_=TensorView(), typename RandomFunc::Params random_=typename RandomFunc::Params())

Construction of Gaussian RNG functor.

Definition: device/tensor_fill.h:204

cutlass::reference::device::detail::TensorFillRandomGaussianFunc::TensorFillRandomGaussianFunc

CUTLASS_DEVICE TensorFillRandomGaussianFunc(Params const &params)

Device-side initialization of RNG.

Definition: device/tensor_fill.h:226

cutlass::reference::device::detail::TensorUpdateOffDiagonalFunc::operator()

CUTLASS_DEVICE void operator()(TensorCoord const &coord)

Compute random value and update RNG state.

Definition: device/tensor_fill.h:809

cutlass::reference::device::detail::TensorFillLinearFunc::params

Params params

Parameters object.

Definition: device/tensor_fill.h:903

cutlass::reference::device::detail::TensorUpdateOffDiagonalFunc::Params::Params

Params(TensorView view_=TensorView(), Element other_=Element(0))

Construction of Gaussian RNG functor.

Definition: device/tensor_fill.h:781

cutlass::reference::device::detail::TensorFillRandomUniformFunc::Params

Parameters structure.

Definition: device/tensor_fill.h:404

cutlass::reference::device::detail::TensorFillRandomGaussianFunc::Params::random

RandomFunc::Params random

Definition: device/tensor_fill.h:197

cutlass::reference::device::detail::RandomGaussianFunc::Params::stddev

FloatType stddev

Definition: device/tensor_fill.h:107

<!-- fragment --> <!-- contents --><!-- start footer part -->
<address class="footer"><small> Generated by 1.8.11 </small></address>