Skip to content

Commit ce8746d

Browse files
author
Abderrazek Zaafrani
committedJan 19, 2018
[AArch64] Add ARMv8.2-A FP16 scalar intrinsics
https://reviews.llvm.org/D41792 llvm-svn: 323006
1 parent e93c63d commit ce8746d

File tree

14 files changed

+1482
-305
lines changed

14 files changed

+1482
-305
lines changed
 

‎clang/include/clang/Basic/BuiltinsNEON.def

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#define GET_NEON_BUILTINS
1818
#include "clang/Basic/arm_neon.inc"
19+
#include "clang/Basic/arm_fp16.inc"
1920
#undef GET_NEON_BUILTINS
2021

2122
#undef BUILTIN

‎clang/include/clang/Basic/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,7 @@ clang_tablegen(arm_neon.inc -gen-arm-neon-sema
4646
-I ${CMAKE_CURRENT_SOURCE_DIR}/../../
4747
SOURCE arm_neon.td
4848
TARGET ClangARMNeon)
49+
clang_tablegen(arm_fp16.inc -gen-arm-neon-sema
50+
-I ${CMAKE_CURRENT_SOURCE_DIR}/../../
51+
SOURCE arm_fp16.td
52+
TARGET ClangARMFP16)

‎clang/include/clang/Basic/arm_fp16.td

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
//===--- arm_fp16.td - ARM FP16 compiler interface ------------------------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file defines the TableGen definitions from which the ARM FP16 header
11+
// file will be generated.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
include "arm_neon_incl.td"
16+
17+
// ARMv8.2-A FP16 intrinsics.
18+
let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in {
19+
20+
// Negate
21+
def VNEGSH : SInst<"vneg", "ss", "Sh">;
22+
23+
// Reciprocal/Sqrt
24+
def SCALAR_FRECPSH : IInst<"vrecps", "sss", "Sh">;
25+
def FSQRTSH : SInst<"vsqrt", "ss", "Sh">;
26+
def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">;
27+
28+
// Reciprocal Estimate
29+
def SCALAR_FRECPEH : IInst<"vrecpe", "ss", "Sh">;
30+
31+
// Reciprocal Exponent
32+
def SCALAR_FRECPXH : IInst<"vrecpx", "ss", "Sh">;
33+
34+
// Reciprocal Square Root Estimate
35+
def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">;
36+
37+
// Rounding
38+
def FRINTZ_S64H : SInst<"vrnd", "ss", "Sh">;
39+
def FRINTA_S64H : SInst<"vrnda", "ss", "Sh">;
40+
def FRINTI_S64H : SInst<"vrndi", "ss", "Sh">;
41+
def FRINTM_S64H : SInst<"vrndm", "ss", "Sh">;
42+
def FRINTN_S64H : SInst<"vrndn", "ss", "Sh">;
43+
def FRINTP_S64H : SInst<"vrndp", "ss", "Sh">;
44+
def FRINTX_S64H : SInst<"vrndx", "ss", "Sh">;
45+
46+
// Conversion
47+
def SCALAR_SCVTFSH : SInst<"vcvth_f16", "Ys", "silUsUiUl">;
48+
def SCALAR_FCVTZSH : SInst<"vcvt_s16", "$s", "Sh">;
49+
def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">;
50+
def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">;
51+
def SCALAR_FCVTZUH : SInst<"vcvt_u16", "bs", "Sh">;
52+
def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">;
53+
def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">;
54+
def SCALAR_FCVTASH : SInst<"vcvta_s16", "$s", "Sh">;
55+
def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">;
56+
def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">;
57+
def SCALAR_FCVTAUH : SInst<"vcvta_u16", "bs", "Sh">;
58+
def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">;
59+
def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">;
60+
def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "$s", "Sh">;
61+
def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">;
62+
def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">;
63+
def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "bs", "Sh">;
64+
def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">;
65+
def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">;
66+
def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "$s", "Sh">;
67+
def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">;
68+
def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">;
69+
def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "bs", "Sh">;
70+
def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">;
71+
def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">;
72+
def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "$s", "Sh">;
73+
def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">;
74+
def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">;
75+
def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "bs", "Sh">;
76+
def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">;
77+
def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">;
78+
79+
def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "silUsUiUl">;
80+
def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">;
81+
def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">;
82+
def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">;
83+
def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">;
84+
def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">;
85+
def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">;
86+
87+
// Comparison
88+
def SCALAR_CMEQRH : SInst<"vceq", "bss", "Sh">;
89+
def SCALAR_CMEQZH : SInst<"vceqz", "bs", "Sh">;
90+
def SCALAR_CMGERH : SInst<"vcge", "bss", "Sh">;
91+
def SCALAR_CMGEZH : SInst<"vcgez", "bs", "Sh">;
92+
def SCALAR_CMGTRH : SInst<"vcgt", "bss", "Sh">;
93+
def SCALAR_CMGTZH : SInst<"vcgtz", "bs", "Sh">;
94+
def SCALAR_CMLERH : SInst<"vcle", "bss", "Sh">;
95+
def SCALAR_CMLEZH : SInst<"vclez", "bs", "Sh">;
96+
def SCALAR_CMLTH : SInst<"vclt", "bss", "Sh">;
97+
def SCALAR_CMLTZH : SInst<"vcltz", "bs", "Sh">;
98+
99+
// Absolute Compare Mask Greater Than Or Equal
100+
def SCALAR_FACGEH : IInst<"vcage", "bss", "Sh">;
101+
def SCALAR_FACLEH : IInst<"vcale", "bss", "Sh">;
102+
103+
// Absolute Compare Mask Greater Than
104+
def SCALAR_FACGT : IInst<"vcagt", "bss", "Sh">;
105+
def SCALAR_FACLT : IInst<"vcalt", "bss", "Sh">;
106+
107+
// Scalar Absolute Value
108+
def SCALAR_ABSH : SInst<"vabs", "ss", "Sh">;
109+
110+
// Scalar Absolute Difference
111+
def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">;
112+
113+
// Add/Sub
114+
def VADDSH : SInst<"vadd", "sss", "Sh">;
115+
def VSUBHS : SInst<"vsub", "sss", "Sh">;
116+
117+
// Max/Min
118+
def VMAXHS : SInst<"vmax", "sss", "Sh">;
119+
def VMINHS : SInst<"vmin", "sss", "Sh">;
120+
def FMAXNMHS : SInst<"vmaxnm", "sss", "Sh">;
121+
def FMINNMHS : SInst<"vminnm", "sss", "Sh">;
122+
123+
// Multiplication/Division
124+
def VMULHS : SInst<"vmul", "sss", "Sh">;
125+
def MULXHS : SInst<"vmulx", "sss", "Sh">;
126+
def FDIVHS : SInst<"vdiv", "sss", "Sh">;
127+
128+
// Vector fused multiply-add operations
129+
def VFMAHS : SInst<"vfma", "ssss", "Sh">;
130+
def VFMSHS : SInst<"vfms", "ssss", "Sh">;
131+
}

0 commit comments

Comments
 (0)