Back to Cutlass

CUTLASS: simd.h Source File

docs/simd_8h_source.html

4.4.27.3 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

simd.h

Go to the documentation of this file.

1 /***************************************************************************************************

2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

29 #pragma once

30

31 #include "../array.h"

32 #include "../numeric_types.h"

33

34 namespace cutlass {

35 namespace arch {

36

38

39 //

40 // Element-wise operators

41 //

42

43 CUTLASS_HOST_DEVICE

44 template <typename T, int N>

45 Array<T, N> operator*(Array<T, N> const &a, Array<T, N> const &b) {

46 Array<T, N> d;

47CUTLASS_PRAGMA_UNROLL

48for (int i = 0; i < N; ++i) {

49 d[i] = a[i] * b[i];

50 }

51return d;

52 }

53

54 CUTLASS_HOST_DEVICE

55 template <typename T, int N>

56 Array<T, N> operator+(Array<T, N> const &a, Array<T, N> const &b) {

57 Array<T, N> d;

58CUTLASS_PRAGMA_UNROLL

59for (int i = 0; i < N; ++i) {

60 d[i] = a[i] + b[i];

61 }

62return d;

63 }

64

65 CUTLASS_HOST_DEVICE

66 template <typename T, int N>

67 Array<T, N> operator-(Array<T, N> const &a, Array<T, N> const &b) {

68 Array<T, N> d;

69CUTLASS_PRAGMA_UNROLL

70for (int i = 0; i < N; ++i) {

71 d[i] = a[i] - b[i];

72 }

73return d;

74 }

75

77

78 //

79 // Multiply-accumulate operators

80 //

81

82 CUTLASS_HOST_DEVICE

83 template <typename T, int N>

84 Array<T, N> mac(Array<T, N> const &a, Array<T, N> const &b, Array<T, N> const &c) {

85 Array<T, N> d;

86CUTLASS_PRAGMA_UNROLL

87for (int i = 0; i < N; ++i) {

88 d[i] = a[i] * b[i] + c;

89 }

90return d;

91 }

92

94

95 //

96 // Dot product operator

97 //

98

99 CUTLASS_HOST_DEVICE

100 template <typename Element, typename Accumulator, int N>

101 Accumulator dot(Array<T, N> const &a, Array<T, N> const &b, Accumulator accum) {

102CUTLASS_PRAGMA_UNROLL

103for (int i = 0; i < N; ++i) {

104 accum += a[i] * b[i];

105 }

106return accum;

107 }

108

110

111 } // namespace arch

112 } // namespace cutlass

113

115

116 #include "simd_sm60.h"

117 #include "simd_sm61.h"

118

cutlass

Definition: aligned_buffer.h:35

simd_sm61.h

Templates exposing SIMD operators for SM60.

CUTLASS_PRAGMA_UNROLL

#define CUTLASS_PRAGMA_UNROLL

Definition: cutlass.h:110

cutlass::arch::operator-

CUTLASS_HOST_DEVICE Array< T, N > operator-(Array< T, N > const &a, Array< T, N > const &b)

Definition: simd.h:67

cutlass::arch::operator*

CUTLASS_HOST_DEVICE Array< T, N > operator*(Array< T, N > const &a, Array< T, N > const &b)

Definition: simd.h:45

cutlass::arch::dot

CUTLASS_HOST_DEVICE Accumulator dot(Array< T, N > const &a, Array< T, N > const &b, Accumulator accum)

Definition: simd.h:101

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

cutlass::arch::operator+

CUTLASS_HOST_DEVICE Array< T, N > operator+(Array< T, N > const &a, Array< T, N > const &b)

Definition: simd.h:56

simd_sm60.h

Templates exposing SIMD operators for SM60.

cutlass::arch::mac

CUTLASS_HOST_DEVICE Array< T, N > mac(Array< T, N > const &a, Array< T, N > const &b, Array< T, N > const &c)

Definition: simd.h:84


Generated by 1.8.11