Back to Cutlass

CUTLASS: simd_sm61.h Source File

docs/simd__sm61_8h_source.html

4.4.27.6 KB
Original Source

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

simd_sm61.h

Go to the documentation of this file.

1 /***************************************************************************************************

2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

29 #pragma once

30

31 #include "simd.h"

32

33 namespace cutlass {

34 namespace arch {

35

37

39 CUTLASS_HOST_DEVICE

40 template <>

41 int32_t dot(Array<int8_t, 4> const &a, Array<int8_t, 4> const &b, int32_t accum) {

42

43return accum;

44 }

45

47 CUTLASS_HOST_DEVICE

48 template <>

49 int32_t dot(Array<uint8_t, 4> const &a, Array<int8_t, 4> const &b, int32_t accum) {

50

51return accum;

52 }

53

55 CUTLASS_HOST_DEVICE

56 template <>

57 int32_t dot(Array<int8_t, 4> const &a, Array<uint8_t, 4> const &b, int32_t accum) {

58

59return accum;

60 }

61

63 CUTLASS_HOST_DEVICE

64 template <>

65 int32_t dot(Array<uint8_t, 4> const &a, Array<uint8_t, 4> const &b, int32_t accum) {

66

67return accum;

68 }

69

71

73 CUTLASS_HOST_DEVICE

74 template <>

75 int32_t dot(Array<int16_t, 2> const &a, Array<int8_t, 2> const &b, int32_t accum) {

76

77return accum;

78 }

79

81 CUTLASS_HOST_DEVICE

82 template <>

83 int32_t dot(Array<uint16_t, 2> const &a, Array<int8_t, 2> const &b, int32_t accum) {

84

85return accum;

86 }

87

89 CUTLASS_HOST_DEVICE

90 template <>

91 int32_t dot(Array<int16_t, 2> const &a, Array<uint8_t, 2> const &b, int32_t accum) {

92

93return accum;

94 }

95

97 CUTLASS_HOST_DEVICE

98 template <>

99 int32_t dot(Array<uint16_t, 2> const &a, Array<uint8_t, 2> const &b, int32_t accum) {

100

101return accum;

102 }

103

105

107 CUTLASS_HOST_DEVICE

108 template <>

109 int32_t dot(Array<int16_t, 2> const &a, Array<int16_t, 2> const &b, int32_t accum) {

110

111return accum;

112 }

113

115 CUTLASS_HOST_DEVICE

116 template <>

117 int32_t dot(Array<uint16_t, 2> const &a, Array<int16_t, 2> const &b, int32_t accum) {

118

119return accum;

120 }

121

123 CUTLASS_HOST_DEVICE

124 template <>

125 int32_t dot(Array<int16_t, 2> const &a, Array<uint16_t, 2> const &b, int32_t accum) {

126

127return accum;

128 }

129

131 CUTLASS_HOST_DEVICE

132 template <>

133 int32_t dot(Array<uint16_t, 2> const &a, Array<uint16_t, 2> const &b, int32_t accum) {

134

135return accum;

136 }

137

139

140 } // namespace arch

141 } // namespace cutlass

cutlass

Definition: aligned_buffer.h:35

cutlass::arch::dot

CUTLASS_HOST_DEVICE Accumulator dot(Array< T, N > const &a, Array< T, N > const &b, Accumulator accum)

Definition: simd.h:101

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

simd.h

Templates exposing SIMD operators.


Generated by 1.8.11