Index: llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -95,6 +95,10 @@ /// optimization. bool IsTailCall = false; + /// True if the call was lowered as a tail call. This is consumed by the + /// legalizer. This allows the legalizer to lower libcalls as tail calls. + bool LoweredTailCall = false; + /// True if the call is to a vararg function. bool IsVarArg = false; }; Index: llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -327,6 +327,35 @@ llvm_unreachable("Unknown libcall function"); } +/// True if an instruction is in tail position in its caller. Intended for +/// legalizing libcalls as tail calls when possible. +static bool isLibCallInTailPosition(MachineInstr &MI) { + const Function &F = MI.getParent()->getParent()->getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore NoAlias and NonNull because they don't affect the + // call sequence. + AttributeList CallerAttrs = F.getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) + .removeAttribute(Attribute::NoAlias) + .removeAttribute(Attribute::NonNull) + .hasAttributes()) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + return false; + + // Only tail call if the following instruction is a standard return. + auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + MachineInstr *Next = MI.getNextNode(); + if (!Next || TII.isTailCall(*Next) || !Next->isReturn()) + return false; + + return true; +} + LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, @@ -407,10 +436,24 @@ Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); + Info.IsTailCall = isLibCallInTailPosition(MI); + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) return LegalizerHelper::UnableToLegalize; + if (Info.LoweredTailCall) { + assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); + // We must have a return following the call to get past + // isLibCallInTailPosition. + assert(MI.getNextNode() && MI.getNextNode()->isReturn() && + "Expected instr following MI to be a return?"); + + // We lowered a tail call, so the call is now the return from the block. + // Delete the old return. + MI.getNextNode()->eraseFromParent(); + } + return LegalizerHelper::Legalized; } Index: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp @@ -788,8 +788,10 @@ // If we're tail calling, then we're the return from the block. So, we don't // want to copy anything. - if (IsSibCall) + if (IsSibCall) { + Info.LoweredTailCall = true; return true; + } // Finally we can copy the returned value back into its virtual-register. In // symmetry with the arugments, the physical register must be an Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir @@ -13,13 +13,14 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32) - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: $x0 = COPY [[COPY]](p0) ; CHECK: $x1 = COPY [[COPY1]](p0) ; CHECK: $x2 = COPY [[ZEXT]](s64) - ; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: RET_ReallyLR + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]] + ; CHECK: $w3 = COPY [[AND]](s32) + ; CHECK: TCRETURNdi &memcpy, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3 %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s32) = COPY $w2 @@ -42,13 +43,14 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32) - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: $x0 = COPY [[COPY]](p0) ; CHECK: $x1 = COPY [[COPY1]](p0) ; CHECK: $x2 = COPY [[ZEXT]](s64) - ; CHECK: BL &memmove, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: RET_ReallyLR + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]] + ; CHECK: $w3 = COPY [[AND]](s32) + ; CHECK: TCRETURNdi &memmove, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3 %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s32) = COPY $w2 @@ -71,14 +73,15 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32) - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: $x0 = COPY [[COPY]](p0) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK: $w1 = COPY [[COPY3]](s32) ; CHECK: $x2 = COPY [[ZEXT]](s64) - ; CHECK: BL &memset, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $w1, implicit $x2 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: RET_ReallyLR + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]] + ; CHECK: $w3 = COPY [[AND]](s32) + ; CHECK: TCRETURNdi &memset, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0, implicit $w1, implicit $x2, implicit $w3 %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %2:_(s32) = COPY $w2 @@ -89,3 +92,68 @@ RET_ReallyLR ... +--- +name: no_tail_call +tracksRegLiveness: true +body: | + bb.1: + liveins: $w2, $x0, $x1 + + ; CHECK-LABEL: name: no_tail_call + ; CHECK: liveins: $w2, $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY]](p0) + ; CHECK: $x1 = COPY [[COPY1]](p0) + ; CHECK: $x2 = COPY [[ZEXT]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]] + ; CHECK: $w3 = COPY [[AND]](s32) + ; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[ZEXT]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s32) = COPY $w2 + %4:_(s1) = G_CONSTANT i1 false + %3:_(s64) = G_ZEXT %2(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %3(s64), %4(s1) + $x0 = COPY %3 + RET_ReallyLR implicit $x0 + +... +--- +name: dont_tc_twice +tracksRegLiveness: true +body: | + bb.1: + liveins: $w2, $x0, $x1 + ; CHECK-LABEL: name: dont_tc_twice + ; CHECK: liveins: $w2, $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY]](p0) + ; CHECK: $x1 = COPY [[COPY1]](p0) + ; CHECK: $x2 = COPY [[ZEXT]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]] + ; CHECK: $w3 = COPY [[AND]](s32) + ; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: TCRETURNdi &memset, 0, csr_aarch64_aapcs, implicit $sp + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s32) = COPY $w2 + %4:_(s1) = G_CONSTANT i1 false + %3:_(s64) = G_ZEXT %2(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %3(s64), %4(s1) + TCRETURNdi &memset, 0, csr_aarch64_aapcs, implicit $sp Index: llvm/trunk/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll +++ llvm/trunk/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s +; RUN: llc -global-isel-abort=1 -verify-machineinstrs -mtriple=aarch64-unknown-unknown -global-isel < %s | FileCheck %s ; CHECK-LABEL: tail_memcpy: ; CHECK: b memcpy