Skip to content

Commit 745615c

Browse files
committedApr 23, 2015
[AArch64] Add nvcast patterns for v4f16 and v8f16
Summary: Constant stores of f16 vectors can create NvCast nodes from various operand types to v4f16 or v8f16 depending on patterns in the stored constants. This patch adds nvcast rules with v4f16 and v8f16 values. AArchISelLowering::LowerBUILD_VECTOR has the details on which constant patterns generate the nvcast nodes. Reviewers: jmolloy, srhines, ab Subscribers: rengolin, aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D9201 llvm-svn: 235610
1 parent b188153 commit 745615c

File tree

2 files changed

+97
-0
lines changed

2 files changed

+97
-0
lines changed
 

‎llvm/lib/Target/AArch64/AArch64InstrInfo.td

+8
Original file line numberDiff line numberDiff line change
@@ -5128,22 +5128,26 @@ def : Pat<(trap), (BRK 1)>;
51285128
// Natural vector casts (64 bit)
51295129
def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
51305130
def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
5131+
def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
51315132
def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
51325133
def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
51335134
def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
51345135

51355136
def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
51365137
def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
5138+
def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
51375139
def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
51385140
def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
51395141

51405142
def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
51415143
def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
5144+
def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
51425145
def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
51435146
def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
51445147

51455148
def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
51465149
def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
5150+
def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
51475151
def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
51485152
def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
51495153
def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
@@ -5158,22 +5162,26 @@ def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
51585162
// Natural vector casts (128 bit)
51595163
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
51605164
def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
5165+
def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
51615166
def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
51625167
def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
51635168
def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
51645169

51655170
def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
51665171
def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
5172+
def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
51675173
def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
51685174
def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
51695175

51705176
def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
51715177
def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
5178+
def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
51725179
def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
51735180
def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
51745181

51755182
def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
51765183
def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
5184+
def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
51775185
def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
51785186
def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
51795187
def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s
2+
3+
; Test pattern (v4f16 (AArch64NvCast (v2i32 FPR64:$src)))
4+
define void @nvcast_v2i32(<4 x half>* %a) #0 {
5+
; CHECK-LABEL: nvcast_v2i32:
6+
; CHECK-NEXT: movi v[[REG:[0-9]+]].2s, #0xab, lsl #16
7+
; CHECK-NEXT: str d[[REG]], [x0]
8+
; CHECK-NEXT: ret
9+
store volatile <4 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <4 x half>* %a
10+
ret void
11+
}
12+
13+
14+
; Test pattern (v4f16 (AArch64NvCast (v4i16 FPR64:$src)))
15+
define void @nvcast_v4i16(<4 x half>* %a) #0 {
16+
; CHECK-LABEL: nvcast_v4i16:
17+
; CHECK-NEXT: movi v[[REG:[0-9]+]].4h, #0xab
18+
; CHECK-NEXT: str d[[REG]], [x0]
19+
; CHECK-NEXT: ret
20+
store volatile <4 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <4 x half>* %a
21+
ret void
22+
}
23+
24+
25+
; Test pattern (v4f16 (AArch64NvCast (v8i8 FPR64:$src)))
26+
define void @nvcast_v8i8(<4 x half>* %a) #0 {
27+
; CHECK-LABEL: nvcast_v8i8:
28+
; CHECK-NEXT: movi v[[REG:[0-9]+]].8b, #0xab
29+
; CHECK-NEXT: str d[[REG]], [x0]
30+
; CHECK-NEXT: ret
31+
store volatile <4 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <4 x half>* %a
32+
ret void
33+
}
34+
35+
36+
; Test pattern (v4f16 (AArch64NvCast (f64 FPR64:$src)))
37+
define void @nvcast_f64(<4 x half>* %a) #0 {
38+
; CHECK-LABEL: nvcast_f64:
39+
; CHECK-NEXT: movi d[[REG:[0-9]+]], #0000000000000000
40+
; CHECK-NEXT: str d[[REG]], [x0]
41+
; CHECK-NEXT: ret
42+
store volatile <4 x half> zeroinitializer, <4 x half>* %a
43+
ret void
44+
}
45+
46+
; Test pattern (v8f16 (AArch64NvCast (v4i32 FPR128:$src)))
47+
define void @nvcast_v4i32(<8 x half>* %a) #0 {
48+
; CHECK-LABEL: nvcast_v4i32:
49+
; CHECK-NEXT: movi v[[REG:[0-9]+]].4s, #0xab, lsl #16
50+
; CHECK-NEXT: str q[[REG]], [x0]
51+
; CHECK-NEXT: ret
52+
store volatile <8 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <8 x half>* %a
53+
ret void
54+
}
55+
56+
57+
; Test pattern (v8f16 (AArch64NvCast (v8i16 FPR128:$src)))
58+
define void @nvcast_v8i16(<8 x half>* %a) #0 {
59+
; CHECK-LABEL: nvcast_v8i16:
60+
; CHECK-NEXT: movi v[[REG:[0-9]+]].8h, #0xab
61+
; CHECK-NEXT: str q[[REG]], [x0]
62+
; CHECK-NEXT: ret
63+
store volatile <8 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <8 x half>* %a
64+
ret void
65+
}
66+
67+
68+
; Test pattern (v8f16 (AArch64NvCast (v16i8 FPR128:$src)))
69+
define void @nvcast_v16i8(<8 x half>* %a) #0 {
70+
; CHECK-LABEL: nvcast_v16i8:
71+
; CHECK-NEXT: movi v[[REG:[0-9]+]].16b, #0xab
72+
; CHECK-NEXT: str q[[REG]], [x0]
73+
; CHECK-NEXT: ret
74+
store volatile <8 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <8 x half>* %a
75+
ret void
76+
}
77+
78+
79+
; Test pattern (v8f16 (AArch64NvCast (v2i64 FPR128:$src)))
80+
define void @nvcast_v2i64(<8 x half>* %a) #0 {
81+
; CHECK-LABEL: nvcast_v2i64:
82+
; CHECK-NEXT: movi v[[REG:[0-9]+]].2d, #0000000000000000
83+
; CHECK-NEXT: str q[[REG]], [x0]
84+
; CHECK-NEXT: ret
85+
store volatile <8 x half> zeroinitializer, <8 x half>* %a
86+
ret void
87+
}
88+
89+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)
Please sign in to comment.