Index: llvm/trunk/lib/Target/AArch64/AArch64CallingConvention.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64CallingConvention.td +++ llvm/trunk/lib/Target/AArch64/AArch64CallingConvention.td @@ -45,6 +45,9 @@ // supported there. CCIfNest>, + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + CCIfConsecutiveRegs>, // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, @@ -128,8 +131,8 @@ // slot is 64-bit. CCIfByVal>, - // A SwiftSelf is passed in X9. - CCIfSwiftSelf>>, + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, // A SwiftError is passed in X19. CCIfSwiftError>>, Index: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -697,13 +697,13 @@ } static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { - if (Reg != AArch64::LR) - return getKillRegState(true); - - // LR maybe referred to later by an @llvm.returnaddress intrinsic. - bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR); - bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken()); - return getKillRegState(LRKill); + // Do not set a kill flag on values that are also marked as live-in. This + // happens with the @llvm-returnaddress intrinsic and with arguments passed in + // callee saved registers. + // Omitting the kill flags is conservatively correct even if the live-in + // is not used after all. + bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg); + return getKillRegState(!IsLiveIn); } static bool produceCompactUnwindFrame(MachineFunction &MF) { Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2875,10 +2875,11 @@ CCAssignFnForCall(CallerCC, isVarArg))) return false; // The callee has to preserve all registers the caller needs to preserve. + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (!CCMatch) { - const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); - if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC), - TRI->getCallPreservedMask(MF, CalleeCC))) + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; } @@ -2893,9 +2894,34 @@ const AArch64FunctionInfo *FuncInfo = MF.getInfo(); - // If the stack arguments for this call would fit into our own save area then - // the call can be made tail. - return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea(); + // If the stack arguments for this call do not fit into our own save area then + // the call cannot be made tail. + if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) + return false; + + // Parameters passed in callee saved registers must have the same value in + // caller and callee. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + const CCValAssign &ArgLoc = ArgLocs[I]; + if (!ArgLoc.isRegLoc()) + continue; + unsigned Reg = ArgLoc.getLocReg(); + // Only look at callee saved registers. + if (MachineOperand::clobbersPhysReg(CallerPreserved, Reg)) + continue; + // Check that we pass the value used for the caller. + // (We look for a CopyFromReg reading a virtual register that is used + // for the function live-in value of register Reg) + SDValue Value = OutVals[I]; + if (Value->getOpcode() != ISD::CopyFromReg) + return false; + unsigned ArgReg = cast(Value->getOperand(1))->getReg(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.getLiveInPhysReg(ArgReg) != Reg) + return false; + } + + return true; } SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, Index: llvm/trunk/test/CodeGen/AArch64/swiftself.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/swiftself.ll +++ llvm/trunk/test/CodeGen/AArch64/swiftself.ll @@ -1,29 +1,67 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-apple-ios | FileCheck --check-prefix=CHECK-APPLE %s -; RUN: llc -O0 -verify-machineinstrs < %s -mtriple=aarch64-apple-ios | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s +; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s + +; Parameter with swiftself should be allocated to x20. +; CHECK-LABEL: swiftself_param: +; CHECK: mov x0, x20 +; CHECK-NEXT: ret +define i8* @swiftself_param(i8* swiftself %addr0) { + ret i8 *%addr0 +} + +; Check that x20 is used to pass a swiftself argument. +; CHECK-LABEL: call_swiftself: +; CHECK: mov x20, x0 +; CHECK: bl {{_?}}swiftself_param +; CHECK: ret +define i8 *@call_swiftself(i8* %arg) { + %res = call i8 *@swiftself_param(i8* swiftself %arg) + ret i8 *%res +} -; Parameter with swiftself should be allocated to x9. -define void @check_swiftself(i32* swiftself %addr0) { -; CHECK-APPLE-LABEL: check_swiftself: -; CHECK-O0-LABEL: check_swiftself: - - %val0 = load volatile i32, i32* %addr0 -; CHECK-APPLE: ldr w{{.*}}, [x9] -; CHECK-O0: ldr w{{.*}}, [x9] - ret void -} - -@var8_3 = global i8 0 -declare void @take_swiftself(i8* swiftself %addr0) - -define void @simple_args() { -; CHECK-APPLE-LABEL: simple_args: -; CHECK-O0-LABEL: simple_args: - - call void @take_swiftself(i8* @var8_3) -; CHECK-APPLE: add x9, -; CHECK-APPLE: bl {{_?}}take_swiftself -; CHECK-O0: add x9, -; CHECK-O0: bl {{_?}}take_swiftself +; x20 should be saved by the callee even if used for swiftself +; CHECK-LABEL: swiftself_clobber: +; CHECK: {{stp|str}} {{.*}}x20{{.*}}sp +; ... +; CHECK: {{ldp|ldr}} {{.*}}x20{{.*}}sp +; CHECK: ret +define i8 *@swiftself_clobber(i8* swiftself %addr0) { + call void asm sideeffect "", "~{x20}"() + ret i8 *%addr0 +} +; Demonstrate that we do not need any movs when calling multiple functions +; with swiftself argument. +; CHECK-LABEL: swiftself_passthrough: +; OPT-NOT: mov{{.*}}x20 +; OPT: bl {{_?}}swiftself_param +; OPT-NOT: mov{{.*}}x20 +; OPT-NEXT: bl {{_?}}swiftself_param +; OPT: ret +define void @swiftself_passthrough(i8* swiftself %addr0) { + call i8 *@swiftself_param(i8* swiftself %addr0) + call i8 *@swiftself_param(i8* swiftself %addr0) ret void } + +; We can use a tail call if the callee swiftself is the same as the caller one. +; CHECK-LABEL: swiftself_tail: +; OPT: b {{_?}}swiftself_param +; OPT-NOT: ret +define i8* @swiftself_tail(i8* swiftself %addr0) { + call void asm sideeffect "", "~{x20}"() + %res = tail call i8* @swiftself_param(i8* swiftself %addr0) + ret i8* %res +} + +; We can not use a tail call if the callee swiftself is not the same as the +; caller one. +; CHECK-LABEL: swiftself_notail: +; CHECK: mov x20, x0 +; CHECK: bl {{_?}}swiftself_param +; CHECK: ret +define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind { + %res = tail call i8* @swiftself_param(i8* swiftself %addr1) + ret i8* %res +}