Back to Cutlass

CUTLASS: cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment > Class Template Reference

docs/classcutlass_1_1epilogue_1_1threadblock_1_1SharedLoadIterator.html

4.4.218.3 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

Public Types | Public Member Functions | Static Public Attributes | List of all members

cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment > Class Template Reference

#include <shared_load_iterator.h>

|

Public Types

| | using | ThreadMap = ThreadMap_ | | | | using | Shape = typename ThreadMap::Shape | | | | using | Element = Element_ | | | | using | Layout = layout::RowMajor | | | | using | TensorRef = TensorRef< Element, Layout > | | | | using | ConstTensorRef = typename TensorRef::ConstTensorRef | | | | using | Index = typename Layout::Index | | | | using | LongIndex = typename Layout::LongIndex | | | | using | TensorCoord = MatrixCoord | | | | using | Fragment = Array< Element, ThreadMap::Iterations::kColumn *ThreadMap::Iterations::kRow *ThreadMap::Iterations::kGroup *ThreadMap::Iterations::kCluster *ThreadMap::kElementsPerAccess > | | | Fragment object. More...
| | | | using | AccessType = AlignedArray< Element, ThreadMap::kElementsPerAccess, kAlignment > | | | Memory access size. More...
| | |

|

Public Member Functions

| | CUTLASS_DEVICE | SharedLoadIterator (TensorRef ref, int thread_idx) | | | Constructor. More...
| | | | CUTLASS_HOST_DEVICE void | add_pointer_offset (LongIndex pointer_offset) | | | Adds a pointer offset in units of Element. More...
| | | | CUTLASS_DEVICE void | add_tile_offset (TensorCoord const &offset) | | | | CUTLASS_DEVICE void | load_with_pointer_offset (Fragment &frag, Index pointer_offset) | | | Loads a fragment from memory. More...
| | | | CUTLASS_DEVICE void | load (Fragment &frag) | | | Loads a fragment. More...
| | |

|

Static Public Attributes

| | static int const | kElementsPerAccess = ThreadMap::kElementsPerAccess | | | | static int const | kMinAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8 | | | | static int const | kAlignment = (MaxAlignment < kMinAlignment ? MaxAlignment : kMinAlignment) | | | | static int const | kThreads = ThreadMap::kThreads | | |

Detailed Description

template<typename ThreadMap_, typename Element_, int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8> class cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >

Tile iterator used to load output tile from shared memory in epilogue.

Satisfies: ReadableTileIterator

Member Typedef Documentation

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::AccessType = AlignedArray< Element, ThreadMap::kElementsPerAccess, kAlignment> |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::ConstTensorRef = typename TensorRef::ConstTensorRef |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Element = Element_ |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Fragment = Array< Element, ThreadMap::Iterations::kColumn * ThreadMap::Iterations::kRow * ThreadMap::Iterations::kGroup * ThreadMap::Iterations::kCluster * ThreadMap::kElementsPerAccess> |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Index = typename Layout::Index |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Layout = layout::RowMajor |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::LongIndex = typename Layout::LongIndex |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Shape = typename ThreadMap::Shape |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::TensorCoord = MatrixCoord |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::TensorRef = TensorRef<Element, Layout> |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::ThreadMap = ThreadMap_ |

Constructor & Destructor Documentation

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

|

| CUTLASS_DEVICE cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::SharedLoadIterator | ( | TensorRef | ref, | | | | int | thread_idx | | | ) | | |

| inline |

Member Function Documentation

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

|

| CUTLASS_HOST_DEVICE void cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::add_pointer_offset | ( | LongIndex | pointer_offset | ) | |

| inline |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

|

| CUTLASS_DEVICE void cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::add_tile_offset | ( | TensorCoord const & | offset | ) | |

| inline |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

|

| CUTLASS_DEVICE void cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::load | ( | Fragment & | frag | ) | |

| inline |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

|

| CUTLASS_DEVICE void cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::load_with_pointer_offset | ( | Fragment & | frag, | | | | Index | pointer_offset | | | ) | | |

| inline |

Member Data Documentation

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

|

| int const cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::kAlignment = (MaxAlignment < kMinAlignment ? MaxAlignment : kMinAlignment) |

| static |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

|

| int const cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::kElementsPerAccess = ThreadMap::kElementsPerAccess |

| static |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

|

| int const cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::kMinAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8 |

| static |

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>

|

| int const cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::kThreads = ThreadMap::kThreads |

| static |


The documentation for this class was generated from the following file:

  • [shared_load_iterator.h](shared load iterator_8h_source.html)

Generated by 1.8.11