Back to Cutlass

CUTLASS: threadblock_swizzle.h Source File

docs/reduction_2threadblock__swizzle_8h_source.html

4.4.26.7 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

reduction/threadblock_swizzle.h

Go to the documentation of this file.

1 /***************************************************************************************************

2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

28 #pragma once

29 #include "cutlass/coord.h"

30

31 namespace cutlass {

32 namespace reduction {

33 struct DefaultBlockSwizzle {

35CUTLASS_HOST_DEVICE DefaultBlockSwizzle() {}

36

38 CUTLASS_DEVICE dim3 swizzle() { return blockIdx; }

39

41CUTLASS_HOST_DEVICE dim3 get_grid_layout(Coord<3> const &problem_size,

42Coord<3> const &OutputTile) {

43 assert(OutputTile[0] == 1 && OutputTile[1] == 1);

44 assert((problem_size[0] * problem_size[1] * problem_size[2]) % OutputTile[2] == 0);

45 dim3 grid;

46 grid.x = problem_size[0] * problem_size[1] * problem_size[2]

47 / OutputTile[2] ;

48return grid;

49 }

50

52 CUTLASS_DEVICE Coord<3> get_threadblock_offset(Coord<3> const &SubTile) {

53 assert(SubTile[0] == 1 && SubTile[1] == 1);

54 dim3 block = swizzle();

55Coord<3> threadblock_offset =

56make_Coord(0, 0, block.x * SubTile[2]);

57return threadblock_offset;

58 }

59 };

60 } // namespace reduction

61 } // namespace cutlass

cutlass::reduction::DefaultBlockSwizzle

Definition: reduction/threadblock_swizzle.h:33

cutlass

Definition: aligned_buffer.h:35

cutlass::reduction::DefaultBlockSwizzle::DefaultBlockSwizzle

CUTLASS_HOST_DEVICE DefaultBlockSwizzle()

Ctor.

Definition: reduction/threadblock_swizzle.h:35

coord.h

A Coord is a coordinate of arbitrary rank into a tensor or matrix.

cutlass::make_Coord

CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)

Helper to make a 2-element coordinate.

Definition: coord.h:387

cutlass::reduction::DefaultBlockSwizzle::swizzle

CUTLASS_DEVICE dim3 swizzle()

Swizzle the block index.

Definition: reduction/threadblock_swizzle.h:38

cutlass::reduction::DefaultBlockSwizzle::get_grid_layout

CUTLASS_HOST_DEVICE dim3 get_grid_layout(Coord< 3 > const &problem_size, Coord< 3 > const &OutputTile)

Definition: reduction/threadblock_swizzle.h:41

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

cutlass::Coord< 3 >

cutlass::reduction::DefaultBlockSwizzle::get_threadblock_offset

CUTLASS_DEVICE Coord< 3 > get_threadblock_offset(Coord< 3 > const &SubTile)

Definition: reduction/threadblock_swizzle.h:52


Generated by 1.8.11