Index: llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -83,6 +83,13 @@ /// True if the call must be tail call optimized. bool IsMustTailCall = false; + + /// True if the call passes all target-independent checks for tail call + /// optimization. + bool IsTailCall = false; + + /// True if the call is to a vararg function. + bool IsVarArg = false; }; /// Argument handling is mostly uniform between the four places that Index: llvm/lib/CodeGen/GlobalISel/CallLowering.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -63,7 +63,9 @@ Info.CallConv = CS.getCallingConv(); Info.SwiftErrorVReg = SwiftErrorVReg; Info.IsMustTailCall = CS.isMustTailCall(); - + Info.IsTailCall = CS.isTailCall() && + isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()); + Info.IsVarArg = CS.getFunctionType()->isVarArg(); return lowerCall(MIRBuilder, Info); } Index: llvm/lib/Target/AArch64/AArch64CallLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64CallLowering.h +++ llvm/lib/Target/AArch64/AArch64CallLowering.h @@ -43,6 +43,10 @@ bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override; + /// Returns true if the call can be lowered as a tail call. + bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const; + bool supportSwiftError() const override { return true; } private: Index: llvm/lib/Target/AArch64/AArch64CallLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64CallLowering.cpp +++ llvm/lib/Target/AArch64/AArch64CallLowering.cpp @@ -232,6 +232,17 @@ const Value *Val, ArrayRef VRegs, Register SwiftErrorVReg) const { + + // Check if a tail call was lowered in this block. If so, we already handled + // the terminator. + MachineFunction &MF = MIRBuilder.getMF(); + if (MF.getFrameInfo().hasTailCall()) { + MachineBasicBlock &MBB = MIRBuilder.getMBB(); + auto FirstTerm = MBB.getFirstTerminator(); + if (FirstTerm != MBB.end() && FirstTerm->isCall()) + return true; + } + auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR); assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && "Return value without a vreg"); @@ -402,6 +413,129 @@ return true; } +/// Return true if the calling convention is one that we can guarantee TCO for. +static bool canGuaranteeTCO(CallingConv::ID CC) { + return CC == CallingConv::Fast; +} + +/// Return true if we might ever do TCO for calls with this calling convention. +static bool mayTailCallThisCC(CallingConv::ID CC) { + switch (CC) { + case CallingConv::C: + case CallingConv::PreserveMost: + case CallingConv::Swift: + return true; + default: + return canGuaranteeTCO(CC); + } +} + +bool AArch64CallLowering::isEligibleForTailCallOptimization( + MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const { + CallingConv::ID CalleeCC = Info.CallConv; + MachineFunction &MF = MIRBuilder.getMF(); + const Function &CallerF = MF.getFunction(); + CallingConv::ID CallerCC = CallerF.getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n"); + + if (!mayTailCallThisCC(CalleeCC)) { + LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n"); + return false; + } + + if (Info.IsVarArg) { + LLVM_DEBUG(dbgs() << "... Tail calling varargs not supported yet.\n"); + return false; + } + + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. Working around this *is* possible (see + // X86). + // + // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try + // it? + // + // On Windows, "inreg" attributes signify non-aggregate indirect returns. + // In this case, it is necessary to save/restore X0 in the callee. Tail + // call opt interferes with this. So we disable tail call opt when the + // caller has an argument with "inreg" attribute. + // + // FIXME: Check whether the callee also has an "inreg" argument. + if (any_of(CallerF.args(), [](const Argument &A) { + return A.hasByValAttr() || A.hasInRegAttr(); + })) { + LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval or " + "inreg arguments.\n"); + return false; + } + + // Externally-defined functions with weak linkage should not be + // tail-called on AArch64 when the OS does not support dynamic + // pre-emption of symbols, as the AAELF spec requires normal calls + // to undefined weak functions to be replaced with a NOP or jump to the + // next instruction. The behaviour of branch instructions in this + // situation (as used for tail calls) is implementation-defined, so we + // cannot rely on the linker replacing the tail call with a return. + if (Info.Callee.isGlobal()) { + const GlobalValue *GV = Info.Callee.getGlobal(); + const Triple &TT = MF.getTarget().getTargetTriple(); + if (GV->hasExternalWeakLinkage() && + (!TT.isOSWindows() || TT.isOSBinFormatELF() || + TT.isOSBinFormatMachO())) { + LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function " + "with weak linkage for this OS.\n"); + return false; + } + } + + // If we have -tailcallopt and matching CCs, at this point, we could return + // true. However, we don't have full tail call support yet. So, continue + // checking. We want to emit a sibling call. + + // I want anyone implementing a new calling convention to think long and hard + // about this assert. + assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && + "Unexpected variadic calling convention"); + + // For now, only support the case where the calling conventions match. + if (!CCMatch) { + LLVM_DEBUG( + dbgs() + << "... Cannot tail call with mismatched calling conventions yet.\n"); + return false; + } + + // For now, only handle callees that take no arguments. + if (!Info.OrigArgs.empty()) { + LLVM_DEBUG( + dbgs() + << "... Cannot tail call callees with outgoing arguments yet.\n"); + return false; + } + + LLVM_DEBUG( + dbgs() << "... Call is eligible for tail call optimization.\n"); + return true; +} + +static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect, + bool IsTailCall) { + if (!IsTailCall) + return IsIndirect ? AArch64::BLR : AArch64::BL; + + if (!IsIndirect) + return AArch64::TCRETURNdi; + + // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use + // x16 or x17. + if (CallerF.hasFnAttribute("branch-target-enforcement")) + return AArch64::TCRETURNriBTI; + + return AArch64::TCRETURNri; +} + bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const { MachineFunction &MF = MIRBuilder.getMF(); @@ -410,6 +544,7 @@ auto &DL = F.getParent()->getDataLayout(); if (Info.IsMustTailCall) { + // TODO: Until we lower all tail calls, we should fall back on this. LLVM_DEBUG(dbgs() << "Cannot lower musttail calls yet.\n"); return false; } @@ -422,6 +557,11 @@ SplitArgs.back().Flags.setZExt(); } + bool IsSibCall = + Info.IsTailCall && isEligibleForTailCallOptimization(MIRBuilder, Info); + if (IsSibCall) + MF.getFrameInfo().setHasTailCall(); + // Find out which ABI gets to decide where things go. const AArch64TargetLowering &TLI = *getTLI(); CCAssignFn *AssignFnFixed = @@ -429,14 +569,33 @@ CCAssignFn *AssignFnVarArg = TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true); - auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); + // If we have a sibling call, then we don't have to adjust the stack. + // Otherwise, we need to adjust it. + MachineInstrBuilder CallSeqStart; + if (!IsSibCall) + CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); // Create a temporarily-floating call instruction so we can add the implicit // uses of arg registers. - auto MIB = MIRBuilder.buildInstrNoInsert(Info.Callee.isReg() ? AArch64::BLR - : AArch64::BL); + unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), IsSibCall); + + // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64 + // register class. Until we can do that, we should fall back here. + if (Opc == AArch64::TCRETURNriBTI) { + LLVM_DEBUG( + dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n"); + return false; + } + + auto MIB = MIRBuilder.buildInstrNoInsert(Opc); MIB.add(Info.Callee); + // Add the byte offset for the tail call. We only have sibling calls, so this + // is always 0. + // TODO: Handle tail calls where we will have a different value here. + if (IsSibCall) + MIB.addImm(0); + // Tell the call which registers are clobbered. auto TRI = MF.getSubtarget().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv()); @@ -485,10 +644,13 @@ MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21)); } - CallSeqStart.addImm(Handler.StackSize).addImm(0); - MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP) - .addImm(Handler.StackSize) - .addImm(0); + if (!IsSibCall) { + // If we aren't sibcalling, we need to move the stack. + CallSeqStart.addImm(Handler.StackSize).addImm(0); + MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP) + .addImm(Handler.StackSize) + .addImm(0); + } return true; } Index: llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - 2>&1 | FileCheck %s --check-prefixes=DARWIN,COMMON +; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-windows -global-isel -o - 2>&1 | FileCheck %s --check-prefixes=WINDOWS,COMMON + +declare void @simple_fn() +define void @tail_call() { + ; COMMON-LABEL: name: tail_call + ; COMMON: bb.1 (%ir-block.0): + ; COMMON: TCRETURNdi @simple_fn, csr_aarch64_aapcs, implicit $sp + tail call void @simple_fn() + ret void +} + +; We should get a TCRETURNri here. +; FIXME: We don't need the COPY. +define void @indirect_tail_call(void()* %func) { + ; COMMON-LABEL: name: indirect_tail_call + ; COMMON: bb.1 (%ir-block.0): + ; COMMON: liveins: $x0 + ; COMMON: [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0 + ; COMMON: TCRETURNri [[COPY]](p0), csr_aarch64_aapcs, implicit $sp + tail call void %func() + ret void +} + +declare void @outgoing_args_fn(i32) +; Right now, callees with outgoing arguments should not be tail called. +; TODO: Support this. +define void @test_outgoing_args(i32 %a) { + ; COMMON-LABEL: name: test_outgoing_args + ; COMMON: bb.1 (%ir-block.0): + ; COMMON: liveins: $w0 + ; COMMON: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; COMMON: $w0 = COPY [[COPY]](s32) + ; COMMON: BL @outgoing_args_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0 + ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; COMMON: RET_ReallyLR + tail call void @outgoing_args_fn(i32 %a) + ret void +} + +; Right now, this should not be tail called. +; TODO: Support this. +declare void @varargs(i32, double, i64, ...) +define void @test_varargs() { + ; COMMON-LABEL: name: test_varargs + ; COMMON: bb.1 (%ir-block.0): + ; COMMON: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; COMMON: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; COMMON: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; COMMON: $w0 = COPY [[C]](s32) + ; COMMON: $d0 = COPY [[C1]](s64) + ; COMMON: $x1 = COPY [[C2]](s64) + ; COMMON: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1 + ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; COMMON: RET_ReallyLR + tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12) + ret void +} + +; Unsupported calling convention for tail calls. Make sure we never tail call +; it. +declare ghccc void @bad_call_conv_fn() +define void @test_bad_call_conv() { + ; COMMON-LABEL: name: test_bad_call_conv + ; COMMON: bb.1 (%ir-block.0): + ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; COMMON: BL @bad_call_conv_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; COMMON: RET_ReallyLR + tail call ghccc void @bad_call_conv_fn() + ret void +} + +; Shouldn't tail call when the caller has byval arguments. +define void @test_byval(i8* byval %ptr) { + ; COMMON-LABEL: name: test_byval + ; COMMON: bb.1 (%ir-block.0): + ; COMMON: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; COMMON: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1) + ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; COMMON: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; COMMON: RET_ReallyLR + tail call void @simple_fn() + ret void +} + +; Shouldn't tail call when the caller has inreg arguments. +define void @test_inreg(i8* inreg %ptr) { + ; COMMON-LABEL: name: test_inreg + ; COMMON: bb.1 (%ir-block.0): + ; COMMON: liveins: $x0 + ; COMMON: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; COMMON: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; COMMON: RET_ReallyLR + tail call void @simple_fn() + ret void +} + +; Shouldn't tail call when the OS doesn't support it. Windows supports this, +; so we should be able to tail call there. +declare extern_weak void @extern_weak_fn() +define void @test_extern_weak() { + ; DARWIN-LABEL: name: test_extern_weak + ; DARWIN: bb.1 (%ir-block.0): + ; DARWIN: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; DARWIN: BL @extern_weak_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; DARWIN: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; DARWIN: RET_ReallyLR + ; WINDOWS-LABEL: name: test_extern_weak + ; WINDOWS: bb.1 (%ir-block.0): + ; WINDOWS: TCRETURNdi @extern_weak_fn, csr_aarch64_aapcs, implicit $sp + tail call void @extern_weak_fn() + ret void +} + +; Right now, mismatched calling conventions should not be tail called. +; TODO: Support this. +declare fastcc void @fast_fn() +define void @test_mismatched_caller() { + ; COMMON-LABEL: name: test_mismatched_caller + ; COMMON: bb.1 (%ir-block.0): + ; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; COMMON: BL @fast_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; COMMON: RET_ReallyLR + tail call fastcc void @fast_fn() + ret void +} Index: llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll =================================================================== --- llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll +++ llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll @@ -1,4 +1,7 @@ ; RUN: llc -mtriple aarch64--none-eabi -mattr=+bti < %s | FileCheck %s +; RUN: llc -mtriple aarch64--none-eabi -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mattr=+bti %s -verify-machineinstrs -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,FALLBACK + +; FALLBACK: remark: :0:0: unable to translate instruction: call: ' tail call void %p()' (in function: bti_enabled) target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-arm-none-eabi" Index: llvm/test/CodeGen/AArch64/dllimport.ll =================================================================== --- llvm/test/CodeGen/AArch64/dllimport.ll +++ llvm/test/CodeGen/AArch64/dllimport.ll @@ -59,4 +59,4 @@ ; CHECK-LABEL: call_internal ; DAG-ISEL: b internal ; FAST-ISEL: b internal -; GLOBAL-ISEL: bl internal +; GLOBAL-ISEL: b internal