# Changeset View

Changeset View

# Standalone View

Standalone View

# cfe/trunk/lib/Headers/f16cintrin.h

Show All 15 Lines | |||||

16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||

17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||

18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||

19 | * THE SOFTWARE. | 19 | * THE SOFTWARE. | ||

20 | * | 20 | * | ||

21 | *===-----------------------------------------------------------------------=== | 21 | *===-----------------------------------------------------------------------=== | ||

22 | */ | 22 | */ | ||

23 | 23 | | |||

24 | #if !defined __X86INTRIN_H && !defined __EMMINTRIN_H && !defined __IMMINTRIN_H | 24 | #if !defined __IMMINTRIN_H | ||

25 | #error "Never use <f16cintrin.h> directly; include <emmintrin.h> instead." | 25 | #error "Never use <f16cintrin.h> directly; include <immintrin.h> instead." | ||

26 | #endif | 26 | #endif | ||

27 | 27 | | |||

28 | #ifndef __F16CINTRIN_H | 28 | #ifndef __F16CINTRIN_H | ||

29 | #define __F16CINTRIN_H | 29 | #define __F16CINTRIN_H | ||

30 | 30 | | |||

31 | /* Define the default attributes for the functions in this file. */ | 31 | /* Define the default attributes for the functions in this file. */ | ||

32 | #define __DEFAULT_FN_ATTRS \ | 32 | #define __DEFAULT_FN_ATTRS \ | ||

33 | __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) | 33 | __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) | ||

34 | 34 | | |||

35 | /// Converts a 16-bit half-precision float value into a 32-bit float | 35 | /* The 256-bit versions of functions in f16cintrin.h. | ||

36 | /// value. | 36 | Intel documents these as being in immintrin.h, and | ||

37 | /// | 37 | they depend on typedefs from avxintrin.h. */ | ||

38 | /// \headerfile <x86intrin.h> | | |||

39 | /// | | |||

40 | /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction. | | |||

41 | /// | | |||

42 | /// \param __a | | |||

43 | /// A 16-bit half-precision float value. | | |||

44 | /// \returns The converted 32-bit float value. | | |||

45 | static __inline float __DEFAULT_FN_ATTRS | | |||

46 | _cvtsh_ss(unsigned short __a) | | |||

47 | { | | |||

48 | __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; | | |||

49 | __v4sf r = __builtin_ia32_vcvtph2ps(v); | | |||

50 | return r[0]; | | |||

51 | } | | |||

52 | | ||||

53 | /// Converts a 32-bit single-precision float value to a 16-bit | | |||

54 | /// half-precision float value. | | |||

55 | /// | | |||

56 | /// \headerfile <x86intrin.h> | | |||

57 | /// | | |||

58 | /// \code | | |||

59 | /// unsigned short _cvtss_sh(float a, const int imm); | | |||

60 | /// \endcode | | |||

61 | /// | | |||

62 | /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction. | | |||

63 | /// | | |||

64 | /// \param a | | |||

65 | /// A 32-bit single-precision float value to be converted to a 16-bit | | |||

66 | /// half-precision float value. | | |||

67 | /// \param imm | | |||

68 | /// An immediate value controlling rounding using bits [2:0]: \n | | |||

69 | /// 000: Nearest \n | | |||

70 | /// 001: Down \n | | |||

71 | /// 010: Up \n | | |||

72 | /// 011: Truncate \n | | |||

73 | /// 1XX: Use MXCSR.RC for rounding | | |||

74 | /// \returns The converted 16-bit half-precision float value. | | |||

75 | #define _cvtss_sh(a, imm) __extension__ ({ \ | | |||

76 | (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \ | | |||

77 | (imm)))[0]); }) | | |||

78 | 38 | | |||

79 | /// Converts a 128-bit vector containing 32-bit float values into a | 39 | /// Converts a 256-bit vector of [8 x float] into a 128-bit vector | ||

80 | /// 128-bit vector containing 16-bit half-precision float values. | 40 | /// containing 16-bit half-precision float values. | ||

81 | /// | 41 | /// | ||

82 | /// \headerfile <x86intrin.h> | 42 | /// \headerfile <x86intrin.h> | ||

83 | /// | 43 | /// | ||

84 | /// \code | 44 | /// \code | ||

85 | /// __m128i _mm_cvtps_ph(__m128 a, const int imm); | 45 | /// __m128i _mm256_cvtps_ph(__m256 a, const int imm); | ||

86 | /// \endcode | 46 | /// \endcode | ||

87 | /// | 47 | /// | ||

88 | /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction. | 48 | /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction. | ||

89 | /// | 49 | /// | ||

90 | /// \param a | 50 | /// \param a | ||

91 | /// A 128-bit vector containing 32-bit float values. | 51 | /// A 256-bit vector containing 32-bit single-precision float values to be | ||

52 | /// converted to 16-bit half-precision float values. | ||||

92 | /// \param imm | 53 | /// \param imm | ||

93 | /// An immediate value controlling rounding using bits [2:0]: \n | 54 | /// An immediate value controlling rounding using bits [2:0]: \n | ||

94 | /// 000: Nearest \n | 55 | /// 000: Nearest \n | ||

95 | /// 001: Down \n | 56 | /// 001: Down \n | ||

96 | /// 010: Up \n | 57 | /// 010: Up \n | ||

97 | /// 011: Truncate \n | 58 | /// 011: Truncate \n | ||

98 | /// 1XX: Use MXCSR.RC for rounding | 59 | /// 1XX: Use MXCSR.RC for rounding | ||

99 | /// \returns A 128-bit vector containing converted 16-bit half-precision float | 60 | /// \returns A 128-bit vector containing the converted 16-bit half-precision | ||

100 | /// values. The lower 64 bits are used to store the converted 16-bit | 61 | /// float values. | ||

101 | /// half-precision floating-point values. | 62 | #define _mm256_cvtps_ph(a, imm) __extension__ ({ \ | ||

102 | #define _mm_cvtps_ph(a, imm) __extension__ ({ \ | 63 | (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); }) | ||

103 | (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)); }) | | |||

104 | 64 | | |||

105 | /// Converts a 128-bit vector containing 16-bit half-precision float | 65 | /// Converts a 128-bit vector containing 16-bit half-precision float | ||

106 | /// values into a 128-bit vector containing 32-bit float values. | 66 | /// values into a 256-bit vector of [8 x float]. | ||

107 | /// | 67 | /// | ||

108 | /// \headerfile <x86intrin.h> | 68 | /// \headerfile <x86intrin.h> | ||

109 | /// | 69 | /// | ||

110 | /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction. | 70 | /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction. | ||

111 | /// | 71 | /// | ||

112 | /// \param __a | 72 | /// \param __a | ||

113 | /// A 128-bit vector containing 16-bit half-precision float values. The lower | 73 | /// A 128-bit vector containing 16-bit half-precision float values to be | ||

114 | /// 64 bits are used in the conversion. | 74 | /// converted to 32-bit single-precision float values. | ||

115 | /// \returns A 128-bit vector of [4 x float] containing converted float values. | 75 | /// \returns A vector of [8 x float] containing the converted 32-bit | ||

116 | static __inline __m128 __DEFAULT_FN_ATTRS | 76 | /// single-precision float values. | ||

117 | _mm_cvtph_ps(__m128i __a) | 77 | static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) | ||

78 | _mm256_cvtph_ps(__m128i __a) | ||||

118 | { | 79 | { | ||

119 | return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); | 80 | return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); | ||

120 | } | 81 | } | ||

121 | 82 | | |||

122 | #undef __DEFAULT_FN_ATTRS | 83 | #undef __DEFAULT_FN_ATTRS | ||

123 | 84 | | |||

124 | #endif /* __F16CINTRIN_H */ | 85 | #endif /* __F16CINTRIN_H */ |