docs/classcutlass_1_1epilogue_1_1threadblock_1_1InterleavedPredicatedTileIterator.html
| | CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers |
Classes | Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK > Class Template Reference
#include <predicated_tile_iterator.h>
|
|
| struct | Mask |
| | Mask object. More...
|
| |
| struct | Params |
| |
|
|
| using | ThreadMap = ThreadMap_ |
| |
| using | Element = Element_ |
| |
| using | Layout = layout::ColumnMajorInterleaved< InterleavedK > |
| |
| using | TensorRef = TensorRef< Element, Layout > |
| |
| using | ConstTensorRef = typename TensorRef::ConstTensorRef |
| |
| using | Index = typename Layout::Index |
| |
| using | LongIndex = typename Layout::LongIndex |
| |
| using | TensorCoord = layout::PitchLinearCoord |
| |
| using | Fragment = Array< Element, ThreadMap::kElementsPerAccess > |
| | Fragment object. More...
|
| |
| using | AccessType = AlignedArray< Element, ThreadMap::kElementsPerAccess > |
| | Memory access size. More...
|
| |
|
|
| CUTLASS_DEVICE | InterleavedPredicatedTileIterator (Params const ¶ms, Element *pointer, TensorCoord extent, int thread_idx, TensorCoord threadblock_offset) |
| | Constructor. More...
|
| |
| CUTLASS_HOST_DEVICE void | add_pointer_offset (LongIndex pointer_offset) |
| | Adds a pointer offset in units of Element. More...
|
| |
| CUTLASS_DEVICE void | load (Fragment &frag) |
| | Loads a fragment from memory. More...
|
| |
| CUTLASS_DEVICE void | store (Fragment const &frag) |
| | Stores a fragment to memory. More...
|
| |
| CUTLASS_HOST_DEVICE void | set_iteration_index (int iteration) |
| | Overrides the internal iteration index. More...
|
| |
| CUTLASS_HOST_DEVICE InterleavedPredicatedTileIterator & | operator++ () |
| | Advances to the next position to load or store. More...
|
| |
| CUTLASS_DEVICE void | clear_mask () |
| | Efficiently enables all accesses guarded by mask. More...
|
| |
| CUTLASS_DEVICE void | enable_mask () |
| | Sets the mask. More...
|
| |
| CUTLASS_DEVICE void | get_mask (Mask &mask) |
| | Sets the mask. More...
|
| |
| CUTLASS_DEVICE void | set_mask (Mask const &mask) |
| |
|
| | static int const | kElementsPerAccess = ThreadMap::kElementsPerAccess | | | | static int const | kThreads = ThreadMap::kThreads | | | | static int const | kIterations = ThreadMap::Iterations::kCount | | |
Tile iterator used to load output tile from shared memory in epilogue.
Satisfies: ReadableTileIterator | InterleavedPredicatedTileIterator | ForwardTileIterator
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
| using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::AccessType = AlignedArray<Element, ThreadMap::kElementsPerAccess> |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
| using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::ConstTensorRef = typename TensorRef::ConstTensorRef |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
| using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::Element = Element_ |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
| using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::Fragment = Array<Element, ThreadMap::kElementsPerAccess> |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
| using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::Index = typename Layout::Index |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
| using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::Layout = layout::ColumnMajorInterleaved<InterleavedK> |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
| using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::LongIndex = typename Layout::LongIndex |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
| using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::TensorCoord = layout::PitchLinearCoord |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
| using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::TensorRef = TensorRef<Element, Layout> |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
| using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::ThreadMap = ThreadMap_ |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| CUTLASS_DEVICE cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::InterleavedPredicatedTileIterator | ( | Params const & | params, | | | | Element * | pointer, | | | | TensorCoord | extent, | | | | int | thread_idx, | | | | TensorCoord | threadblock_offset | | | ) | | |
| inline |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| CUTLASS_HOST_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::add_pointer_offset | ( | LongIndex | pointer_offset | ) | |
| inline |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::clear_mask | ( | | ) | |
| inline |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::enable_mask | ( | | ) | |
| inline |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::get_mask | ( | Mask & | mask | ) | |
| inline |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::load | ( | Fragment & | frag | ) | |
| inline |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| CUTLASS_HOST_DEVICE InterleavedPredicatedTileIterator& cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::operator++ | ( | | ) | |
| inline |
Efficiently disables all accesses guarded by mask
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| CUTLASS_HOST_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::set_iteration_index | ( | int | iteration | ) | |
| inline |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::set_mask | ( | Mask const & | mask | ) | |
| inline |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::store | ( | Fragment const & | frag | ) | |
| inline |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| int const cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::kElementsPerAccess = ThreadMap::kElementsPerAccess |
| static |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| int const cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::kIterations = ThreadMap::Iterations::kCount |
| static |
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
|
| int const cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::kThreads = ThreadMap::kThreads |
| static |
The documentation for this class was generated from the following file:
Generated by 1.8.11