CUTLASS: simd_sm60.h Source File - Cutlass

| | CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers |

simd_sm60.h

Go to the documentation of this file.

1 /***************************************************************************************************

3 *

4 * Redistribution and use in source and binary forms, with or without modification, are permitted

5 * provided that the following conditions are met:

6 * * Redistributions of source code must retain the above copyright notice, this list of

7 * conditions and the following disclaimer.

8 * * Redistributions in binary form must reproduce the above copyright notice, this list of

9 * conditions and the following disclaimer in the documentation and/or other materials

10 * provided with the distribution.

11 * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used

12 * to endorse or promote products derived from this software without specific prior written

13 * permission.

14 *

15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR

16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE

18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

19 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,

21 * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23 *

24 **************************************************************************************************/

29 #pragma once

31 #include "simd.h"

33 namespace cutlass {

34 namespace arch {

38 //

39 // Element-wise operators - specialized for half_t x 2

40 //

42 CUTLASS_HOST_DEVICE

43 template <>

44 Array<half_t, 2> operator*(Array<half_t, 2> const &a, Array<half_t, 2> const &b) {

45 Array<half_t, 2> d;

47// TODO

49return d;

50 }

52 CUTLASS_HOST_DEVICE

53 template <>

54 Array<half_t, 2> operator+(AArray<half_t, 2> const &a, Array<half_t, 2> const &b) {

55 Array<half_t, 2> d;

57// TODO

59return d;

60 }

62 CUTLASS_HOST_DEVICE

63 template <>

64 Array<half_t, 2> operator-(Array<half_t, 2> const &a, Array<half_t, 2> const &b) {

65 Array<T, N> d;

67// TODO

69return d;

70 }

75 CUTLASS_HOST_DEVICE

76 template <>

77 Array<half_t, 2> mac(Array<half_t, 2> const &a, Array<half_t, 2> const &b, Array<half_t, 2> const &c) {

78 Array<half_t, 2> d;

80// TODO

82return d;

83 }

88 CUTLASS_HOST_DEVICE

89 template <>

90 half_t dot(Array<half_t, 2> const &a, Array<half_t, 2> const &b, half_t accum) {

92// TODO

94return accum;

95 }

98 CUTLASS_HOST_DEVICE

99 template <>

100 float dot(Array<half_t, 2> const &a, Array<half_t, 2> const &b, float accum) {

101

102// TODO

103

104return accum;

105 }

106

108

109 } // namespace arch

110 } // namespace cutlass

cutlass

Definition: aligned_buffer.h:35

cutlass::half_t

IEEE half-precision floating-point type.

Definition: half.h:126

cutlass::arch::operator-

CUTLASS_HOST_DEVICE Array< T, N > operator-(Array< T, N > const &a, Array< T, N > const &b)

Definition: simd.h:67

cutlass::arch::operator*

CUTLASS_HOST_DEVICE Array< T, N > operator*(Array< T, N > const &a, Array< T, N > const &b)

Definition: simd.h:45

cutlass::arch::dot

CUTLASS_HOST_DEVICE Accumulator dot(Array< T, N > const &a, Array< T, N > const &b, Accumulator accum)

Definition: simd.h:101

CUTLASS_HOST_DEVICE

#define CUTLASS_HOST_DEVICE

Definition: cutlass.h:89

cutlass::arch::operator+

CUTLASS_HOST_DEVICE Array< T, N > operator+(Array< T, N > const &a, Array< T, N > const &b)

Definition: simd.h:56

simd.h

Templates exposing SIMD operators.

cutlass::arch::mac

CUTLASS_HOST_DEVICE Array< T, N > mac(Array< T, N > const &a, Array< T, N > const &b, Array< T, N > const &c)

Definition: simd.h:84

Generated by 1.8.11