Skip to content

Commit e0ccdc6

Browse files
committedOct 28, 2015
ARM: add backend support for the ABI used in WatchOS
At the LLVM level this ABI is essentially a minimal modification of AAPCS to support 16-byte alignment for vector types and the stack. llvm-svn: 251570
1 parent 2d4d161 commit e0ccdc6

9 files changed

+188
-16
lines changed
 

‎llvm/lib/Target/ARM/ARMCallingConv.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,9 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
199199

200200
// Try to allocate a contiguous block of registers, each of the correct
201201
// size to hold one member.
202-
unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U);
202+
auto &DL = State.getMachineFunction().getDataLayout();
203+
unsigned StackAlign = DL.getStackAlignment();
204+
unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
203205

204206
ArrayRef<uint16_t> RegList;
205207
switch (LocVT.SimpleTy) {

‎llvm/lib/Target/ARM/ARMCallingConv.td

+2
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ def CC_ARM_AAPCS_Common : CallingConv<[
125125
CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>,
126126
CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
127127
CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>,
128+
CCIfType<[v2f64], CCIfAlign<"16",
129+
CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>,
128130
CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>>
129131
]>;
130132

‎llvm/lib/Target/ARM/ARMFrameLowering.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/CodeGen/MachineModuleInfo.h"
2424
#include "llvm/CodeGen/MachineRegisterInfo.h"
2525
#include "llvm/CodeGen/RegisterScavenging.h"
26+
#include "llvm/MC/MCAsmInfo.h"
2627
#include "llvm/IR/CallingConv.h"
2728
#include "llvm/IR/Function.h"
2829
#include "llvm/MC/MCContext.h"
@@ -58,7 +59,7 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
5859
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
5960

6061
// iOS requires FP not to be clobbered for backtracing purpose.
61-
if (STI.isTargetIOS())
62+
if (STI.isTargetIOS() || STI.isTargetWatchOS())
6263
return true;
6364

6465
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1073,7 +1074,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
10731074
// slot offsets can be wrong. The offset for d8 will always be correct.
10741075
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
10751076
unsigned DNum = CSI[i].getReg() - ARM::D8;
1076-
if (DNum >= 8)
1077+
if (DNum > NumAlignedDPRCS2Regs - 1)
10771078
continue;
10781079
int FI = CSI[i].getFrameIdx();
10791080
// The even-numbered registers will be 16-byte aligned, the odd-numbered

‎llvm/lib/Target/ARM/ARMSubtarget.cpp

+20-7
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,18 @@ void ARMSubtarget::initializeEnvironment() {
155155

156156
void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
157157
if (CPUString.empty()) {
158-
if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s"))
159-
// Default to the Swift CPU when targeting armv7s/thumbv7s.
160-
CPUString = "swift";
161-
else
162-
CPUString = "generic";
158+
CPUString = "generic";
159+
160+
if (isTargetDarwin()) {
161+
StringRef ArchName = TargetTriple.getArchName();
162+
if (ArchName.endswith("v7s"))
163+
// Default to the Swift CPU when targeting armv7s/thumbv7s.
164+
CPUString = "swift";
165+
else if (ArchName.endswith("v7k"))
166+
// Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
167+
// ARMv7k does not use SjLj exception handling.
168+
CPUString = "cortex-a7";
169+
}
163170
}
164171

165172
// Insert the architecture feature derived from the target triple into the
@@ -190,7 +197,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
190197

191198
if (isAAPCS_ABI())
192199
stackAlignment = 8;
193-
if (isTargetNaCl())
200+
if (isTargetNaCl() || isAAPCS16_ABI())
194201
stackAlignment = 16;
195202

196203
// FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
@@ -241,8 +248,14 @@ bool ARMSubtarget::isAPCS_ABI() const {
241248
}
242249
bool ARMSubtarget::isAAPCS_ABI() const {
243250
assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
244-
return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS;
251+
return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS ||
252+
TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
245253
}
254+
bool ARMSubtarget::isAAPCS16_ABI() const {
255+
assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
256+
return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
257+
}
258+
246259

247260
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
248261
bool

‎llvm/lib/Target/ARM/ARMSubtarget.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
354354

355355
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
356356
bool isTargetIOS() const { return TargetTriple.isiOS(); }
357+
bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
357358
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
358359
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
359360
bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
@@ -391,12 +392,13 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
391392
// FIXME: this is invalid for WindowsCE
392393
return TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
393394
TargetTriple.getEnvironment() == Triple::EABIHF ||
394-
isTargetWindows();
395+
isTargetWindows() || isAAPCS16_ABI();
395396
}
396397
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
397398

398399
bool isAPCS_ABI() const;
399400
bool isAAPCS_ABI() const;
401+
bool isAAPCS16_ABI() const;
400402

401403
bool useSoftFloat() const { return UseSoftFloat; }
402404
bool isThumb() const { return InThumbMode; }

‎llvm/lib/Target/ARM/ARMTargetMachine.cpp

+7-3
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
6666
static ARMBaseTargetMachine::ARMABI
6767
computeTargetABI(const Triple &TT, StringRef CPU,
6868
const TargetOptions &Options) {
69-
if (Options.MCOptions.getABIName().startswith("aapcs"))
69+
if (Options.MCOptions.getABIName() == "aapcs16")
70+
return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
71+
else if (Options.MCOptions.getABIName().startswith("aapcs"))
7072
return ARMBaseTargetMachine::ARM_ABI_AAPCS;
7173
else if (Options.MCOptions.getABIName().startswith("apcs"))
7274
return ARMBaseTargetMachine::ARM_ABI_APCS;
@@ -83,6 +85,8 @@ computeTargetABI(const Triple &TT, StringRef CPU,
8385
(TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
8486
CPU.startswith("cortex-m")) {
8587
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
88+
} else if (TT.isWatchOS()) {
89+
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
8690
} else {
8791
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
8892
}
@@ -145,7 +149,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
145149
// to 64. We always ty to give them natural alignment.
146150
if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
147151
Ret += "-v64:32:64-v128:32:128";
148-
else
152+
else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16)
149153
Ret += "-v128:64:128";
150154

151155
// Try to align aggregates to 32 bits (the default is 64 bits, which has no
@@ -157,7 +161,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
157161

158162
// The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
159163
// aligned everywhere else.
160-
if (TT.isOSNaCl())
164+
if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
161165
Ret += "-S128";
162166
else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
163167
Ret += "-S64";

‎llvm/lib/Target/ARM/ARMTargetMachine.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ class ARMBaseTargetMachine : public LLVMTargetMachine {
2626
enum ARMABI {
2727
ARM_ABI_UNKNOWN,
2828
ARM_ABI_APCS,
29-
ARM_ABI_AAPCS // ARM EABI
29+
ARM_ABI_AAPCS, // ARM EABI
30+
ARM_ABI_AAPCS16
3031
} TargetABI;
3132

3233
protected:

‎llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,8 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
196196
else
197197
// Use CPU to figure out the exact features.
198198
ARMArchFeature = "+v7";
199-
break; case Triple::ARMSubArch_v7:
199+
break;
200+
case Triple::ARMSubArch_v7:
200201
// v7 CPUs have lots of different feature sets. If no CPU is specified,
201202
// then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
202203
// the "minimum" feature set and use CPU string to figure out the exact
+146
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s
2+
3+
%struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> }
4+
5+
define i32 @test_i64_align() {
6+
; CHECK-LABEL: test_i64_align:
7+
; CHECL: movs r0, #8
8+
ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32)
9+
}
10+
11+
define i32 @test_f64_align() {
12+
; CHECK-LABEL: test_f64_align:
13+
; CHECL: movs r0, #24
14+
ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32)
15+
}
16+
17+
define i32 @test_v2f32_align() {
18+
; CHECK-LABEL: test_v2f32_align:
19+
; CHECL: movs r0, #40
20+
ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32)
21+
}
22+
23+
define i32 @test_v4f32_align() {
24+
; CHECK-LABEL: test_v4f32_align:
25+
; CHECL: movs r0, #64
26+
ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32)
27+
}
28+
29+
; Key point here is than an extra register has to be saved so that the DPRs end
30+
; up in an aligned location (as prologue/epilogue inserter had calculated).
31+
define void @test_dpr_unwind_align() {
32+
; CHECK-LABEL: test_dpr_unwind_align:
33+
; CHECK: push {r5, r6, r7, lr}
34+
; CHECK-NOT: sub sp
35+
; CHECK: vpush {d8, d9}
36+
; [...]
37+
; CHECK: bl _test_i64_align
38+
; CHECK-NOT: add sp,
39+
; CHECK: vpop {d8, d9}
40+
; CHECK-NOT: add sp,
41+
; CHECK: pop {r5, r6, r7, pc}
42+
43+
call void asm sideeffect "", "~{r6},~{d8},~{d9}"()
44+
45+
; Whatever
46+
call i32 @test_i64_align()
47+
ret void
48+
}
49+
50+
; This time, there's no viable way to tack CS-registers onto the list: a real SP
51+
; adjustment needs to be performed to put d8 and d9 where they should be.
52+
define void @test_dpr_unwind_align_manually() {
53+
; CHECK-LABEL: test_dpr_unwind_align_manually:
54+
; CHECK: push {r4, r5, r6, r7, lr}
55+
; CHECK-NOT: sub sp
56+
; CHECK: push.w {r8, r11}
57+
; CHECK: sub sp, #4
58+
; CHECK: vpush {d8, d9}
59+
; [...]
60+
; CHECK: bl _test_i64_align
61+
; CHECK-NOT: add sp,
62+
; CHECK: vpop {d8, d9}
63+
; CHECK: add sp, #4
64+
; CHECK: pop.w {r8, r11}
65+
; CHECK: pop {r4, r5, r6, r7, pc}
66+
67+
call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{d8},~{d9}"()
68+
69+
; Whatever
70+
call i32 @test_i64_align()
71+
ret void
72+
}
73+
74+
; If there's only a CS1 area, the sub should be in the right place:
75+
define void @test_dpr_unwind_align_just_cs1() {
76+
; CHECK-LABEL: test_dpr_unwind_align_just_cs1:
77+
; CHECK: push {r4, r5, r6, r7, lr}
78+
; CHECK: sub sp, #4
79+
; CHECK: vpush {d8, d9}
80+
; CHECK: sub sp, #8
81+
; [...]
82+
; CHECK: bl _test_i64_align
83+
; CHECK: add sp, #8
84+
; CHECK: vpop {d8, d9}
85+
; CHECK: add sp, #4
86+
; CHECK: pop {r4, r5, r6, r7, pc}
87+
88+
call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{d8},~{d9}"()
89+
90+
; Whatever
91+
call i32 @test_i64_align()
92+
ret void
93+
}
94+
95+
; If there are no DPRs, we shouldn't try to align the stack in stages anyway
96+
define void @test_dpr_unwind_align_no_dprs() {
97+
; CHECK-LABEL: test_dpr_unwind_align_no_dprs:
98+
; CHECK: push {r4, r5, r6, r7, lr}
99+
; CHECK: sub sp, #12
100+
; [...]
101+
; CHECK: bl _test_i64_align
102+
; CHECK: add sp, #12
103+
; CHECK: pop {r4, r5, r6, r7, pc}
104+
105+
call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"()
106+
107+
; Whatever
108+
call i32 @test_i64_align()
109+
ret void
110+
}
111+
112+
; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on
113+
; the stack.
114+
define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) {
115+
; CHECK-LABEL: test_v128_stack_pass:
116+
; CHECK: add r[[ADDR:[0-9]+]], sp, #16
117+
; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128]
118+
119+
ret <4 x float> %in
120+
}
121+
122+
declare void @varargs(i32, ...)
123+
124+
; When varargs are enabled, we go down a different route. Still want 128-bit
125+
; alignment though.
126+
define void @test_v128_stack_pass_varargs(<4 x float> %in) {
127+
; CHECK-LABEL: test_v128_stack_pass_varargs:
128+
; CHECK: add r[[ADDR:[0-9]+]], sp, #16
129+
; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128]
130+
131+
call void(i32, ...) @varargs(i32 undef, [3 x i32] undef, float undef, <4 x float> %in)
132+
ret void
133+
}
134+
135+
; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give
136+
; a single pointer), 64-bit quantities must be pass
137+
define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) {
138+
; CHECK-LABEL: test_64bit_gpr_align:
139+
; CHECK: ldr [[RHS:r[0-9]+]], [sp]
140+
; CHECK: adds r0, [[RHS]], r2
141+
; CHECK: adc r1, r3, #0
142+
143+
%ext = zext i32 %sp to i64
144+
%sum = add i64 %ext, %r2_r3
145+
ret i64 %sum
146+
}

0 commit comments

Comments
 (0)
Please sign in to comment.