Skip to content

Commit 74a0bd3

Browse files
committedApr 13, 2016
AArch64: Use a callee save registers for swiftself parameters
It is very likely that the swiftself parameter is alive throughout most functions function so putting it into a callee save register should avoid spills for the callers with only a minimum amount of extra spills in the callees. Currently the generated code is correct but unnecessarily spills and reloads arguments passed in callee save registers, I will address this in upcoming patches. This also adds a missing check that for tail calls the preserved value of the caller must be the same as the callees parameter. Differential Revision: http://reviews.llvm.org/D19007 llvm-svn: 266251
1 parent b10e893 commit 74a0bd3

File tree

4 files changed

+103
-36
lines changed

4 files changed

+103
-36
lines changed
 

‎llvm/lib/Target/AArch64/AArch64CallingConvention.td

+5-2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ def CC_AArch64_AAPCS : CallingConv<[
4545
// supported there.
4646
CCIfNest<CCAssignToReg<[X18]>>,
4747

48+
// Pass SwiftSelf in a callee saved register.
49+
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
50+
4851
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
4952

5053
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
@@ -128,8 +131,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
128131
// slot is 64-bit.
129132
CCIfByVal<CCPassByVal<8, 8>>,
130133

131-
// A SwiftSelf is passed in X9.
132-
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X9], [W9]>>>,
134+
// Pass SwiftSelf in a callee saved register.
135+
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
133136

134137
// A SwiftError is passed in X19.
135138
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>,

‎llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -697,13 +697,13 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
697697
}
698698

699699
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
700-
if (Reg != AArch64::LR)
701-
return getKillRegState(true);
702-
703-
// LR maybe referred to later by an @llvm.returnaddress intrinsic.
704-
bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR);
705-
bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
706-
return getKillRegState(LRKill);
700+
// Do not set a kill flag on values that are also marked as live-in. This
701+
// happens with the @llvm-returnaddress intrinsic and with arguments passed in
702+
// callee saved registers.
703+
// Omitting the kill flags is conservatively correct even if the live-in
704+
// is not used after all.
705+
bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
706+
return getKillRegState(!IsLiveIn);
707707
}
708708

709709
static bool produceCompactUnwindFrame(MachineFunction &MF) {

‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+32-6
Original file line numberDiff line numberDiff line change
@@ -2875,10 +2875,11 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
28752875
CCAssignFnForCall(CallerCC, isVarArg)))
28762876
return false;
28772877
// The callee has to preserve all registers the caller needs to preserve.
2878+
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
2879+
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
28782880
if (!CCMatch) {
2879-
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
2880-
if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
2881-
TRI->getCallPreservedMask(MF, CalleeCC)))
2881+
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2882+
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
28822883
return false;
28832884
}
28842885

@@ -2893,9 +2894,34 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
28932894

28942895
const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
28952896

2896-
// If the stack arguments for this call would fit into our own save area then
2897-
// the call can be made tail.
2898-
return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
2897+
// If the stack arguments for this call do not fit into our own save area then
2898+
// the call cannot be made tail.
2899+
if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
2900+
return false;
2901+
2902+
// Parameters passed in callee saved registers must have the same value in
2903+
// caller and callee.
2904+
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2905+
const CCValAssign &ArgLoc = ArgLocs[I];
2906+
if (!ArgLoc.isRegLoc())
2907+
continue;
2908+
unsigned Reg = ArgLoc.getLocReg();
2909+
// Only look at callee saved registers.
2910+
if (MachineOperand::clobbersPhysReg(CallerPreserved, Reg))
2911+
continue;
2912+
// Check that we pass the value used for the caller.
2913+
// (We look for a CopyFromReg reading a virtual register that is used
2914+
// for the function live-in value of register Reg)
2915+
SDValue Value = OutVals[I];
2916+
if (Value->getOpcode() != ISD::CopyFromReg)
2917+
return false;
2918+
unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
2919+
const MachineRegisterInfo &MRI = MF.getRegInfo();
2920+
if (MRI.getLiveInPhysReg(ArgReg) != Reg)
2921+
return false;
2922+
}
2923+
2924+
return true;
28992925
}
29002926

29012927
SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
+59-21
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,67 @@
1-
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-apple-ios | FileCheck --check-prefix=CHECK-APPLE %s
2-
; RUN: llc -O0 -verify-machineinstrs < %s -mtriple=aarch64-apple-ios | FileCheck --check-prefix=CHECK-O0 %s
1+
; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s
2+
; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s
3+
; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s
34

4-
; Parameter with swiftself should be allocated to x9.
5-
define void @check_swiftself(i32* swiftself %addr0) {
6-
; CHECK-APPLE-LABEL: check_swiftself:
7-
; CHECK-O0-LABEL: check_swiftself:
5+
; Parameter with swiftself should be allocated to x20.
6+
; CHECK-LABEL: swiftself_param:
7+
; CHECK: mov x0, x20
8+
; CHECK-NEXT: ret
9+
define i8* @swiftself_param(i8* swiftself %addr0) {
10+
ret i8 *%addr0
11+
}
812

9-
%val0 = load volatile i32, i32* %addr0
10-
; CHECK-APPLE: ldr w{{.*}}, [x9]
11-
; CHECK-O0: ldr w{{.*}}, [x9]
12-
ret void
13+
; Check that x20 is used to pass a swiftself argument.
14+
; CHECK-LABEL: call_swiftself:
15+
; CHECK: mov x20, x0
16+
; CHECK: bl {{_?}}swiftself_param
17+
; CHECK: ret
18+
define i8 *@call_swiftself(i8* %arg) {
19+
%res = call i8 *@swiftself_param(i8* swiftself %arg)
20+
ret i8 *%res
1321
}
1422

15-
@var8_3 = global i8 0
16-
declare void @take_swiftself(i8* swiftself %addr0)
23+
; x20 should be saved by the callee even if used for swiftself
24+
; CHECK-LABEL: swiftself_clobber:
25+
; CHECK: {{stp|str}} {{.*}}x20{{.*}}sp
26+
; ...
27+
; CHECK: {{ldp|ldr}} {{.*}}x20{{.*}}sp
28+
; CHECK: ret
29+
define i8 *@swiftself_clobber(i8* swiftself %addr0) {
30+
call void asm sideeffect "", "~{x20}"()
31+
ret i8 *%addr0
32+
}
1733

18-
define void @simple_args() {
19-
; CHECK-APPLE-LABEL: simple_args:
20-
; CHECK-O0-LABEL: simple_args:
34+
; Demonstrate that we do not need any movs when calling multiple functions
35+
; with swiftself argument.
36+
; CHECK-LABEL: swiftself_passthrough:
37+
; OPT-NOT: mov{{.*}}x20
38+
; OPT: bl {{_?}}swiftself_param
39+
; OPT-NOT: mov{{.*}}x20
40+
; OPT-NEXT: bl {{_?}}swiftself_param
41+
; OPT: ret
42+
define void @swiftself_passthrough(i8* swiftself %addr0) {
43+
call i8 *@swiftself_param(i8* swiftself %addr0)
44+
call i8 *@swiftself_param(i8* swiftself %addr0)
45+
ret void
46+
}
2147

22-
call void @take_swiftself(i8* @var8_3)
23-
; CHECK-APPLE: add x9,
24-
; CHECK-APPLE: bl {{_?}}take_swiftself
25-
; CHECK-O0: add x9,
26-
; CHECK-O0: bl {{_?}}take_swiftself
48+
; We can use a tail call if the callee swiftself is the same as the caller one.
49+
; CHECK-LABEL: swiftself_tail:
50+
; OPT: b {{_?}}swiftself_param
51+
; OPT-NOT: ret
52+
define i8* @swiftself_tail(i8* swiftself %addr0) {
53+
call void asm sideeffect "", "~{x20}"()
54+
%res = tail call i8* @swiftself_param(i8* swiftself %addr0)
55+
ret i8* %res
56+
}
2757

28-
ret void
58+
; We can not use a tail call if the callee swiftself is not the same as the
59+
; caller one.
60+
; CHECK-LABEL: swiftself_notail:
61+
; CHECK: mov x20, x0
62+
; CHECK: bl {{_?}}swiftself_param
63+
; CHECK: ret
64+
define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind {
65+
%res = tail call i8* @swiftself_param(i8* swiftself %addr1)
66+
ret i8* %res
2967
}

0 commit comments

Comments
 (0)
Please sign in to comment.