diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -480,8 +480,9 @@ /// True if an instruction is in tail position in its caller. Intended for /// legalizing libcalls as tail calls when possible. -static bool isLibCallInTailPosition(const TargetInstrInfo &TII, - MachineInstr &MI) { +static bool isLibCallInTailPosition(MachineInstr &MI, + const TargetInstrInfo &TII, + MachineRegisterInfo &MRI) { MachineBasicBlock &MBB = *MI.getParent(); const Function &F = MBB.getParent()->getFunction(); @@ -500,8 +501,47 @@ CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) return false; - // Only tail call if the following instruction is a standard return. + // Only tail call if the following instruction is a standard return or if we + // have a `thisreturn` callee, and a sequence like: + // + // G_MEMCPY %0, %1, %2 + // $x0 = COPY %0 + // RET_ReallyLR implicit $x0 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end()); + if (Next != MBB.instr_end() && Next->isCopy()) { + switch (MI.getOpcode()) { + default: + llvm_unreachable("unsupported opcode"); + case TargetOpcode::G_BZERO: + return false; + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: + break; + } + + Register VReg = MI.getOperand(0).getReg(); + if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg()) + return false; + + Register PReg = Next->getOperand(0).getReg(); + if (!PReg.isPhysical()) + return false; + + auto Ret = next_nodbg(Next, MBB.instr_end()); + if (Ret == MBB.instr_end() || !Ret->isReturn()) + return false; + + if (Ret->getNumImplicitOperands() != 1) + return false; + + if (PReg != Ret->getOperand(0).getReg()) + return false; + + // Skip over the COPY that we just validated. + Next = Ret; + } + if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn()) return false; @@ -607,7 +647,7 @@ Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0); Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && - isLibCallInTailPosition(MIRBuilder.getTII(), MI); + isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI); std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) @@ -623,7 +663,8 @@ // isLibCallInTailPosition. do { MachineInstr *Next = MI.getNextNode(); - assert(Next && (Next->isReturn() || Next->isDebugInstr()) && + assert(Next && + (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) && "Expected instr following MI to be return or debug inst?"); // We lowered a tail call, so the call is now the return from the block. // Delete the old return. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir @@ -121,8 +121,8 @@ ; CHECK-LABEL: name: no_tail_call ; CHECK: liveins: $w2, $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp @@ -133,8 +133,8 @@ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK: $x0 = COPY [[ZEXT]](s64) ; CHECK: RET_ReallyLR implicit $x0 - %0:_(p0) = COPY $x0 - %1:_(p0) = COPY $x1 + %0:_(p0) = COPY $x1 + %1:_(p0) = COPY $x0 %2:_(s32) = COPY $w2 %3:_(s64) = G_ZEXT %2(s32) G_MEMCPY %0(p0), %1(p0), %3(s64), 1 :: (store unknown-size), (load unknown-size) @@ -168,3 +168,201 @@ %3:_(s64) = G_ZEXT %2(s32) G_MEMCPY %0(p0), %1(p0), %3(s64), 1 :: (store unknown-size), (load unknown-size) TCRETURNdi &memset, 0, csr_aarch64_aapcs, implicit $sp + +... +--- +name: tail_with_copy_ret +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: tail_with_copy_ret + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: $x0 = COPY [[COPY]](p0) + ; CHECK: $x1 = COPY [[COPY1]](p0) + ; CHECK: $x2 = COPY [[COPY2]](s64) + ; CHECK: TCRETURNdi &memcpy, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0, implicit $x1, implicit $x2 + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = COPY $x2 + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8)), (load (s8)) + $x0 = COPY %0(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_tc_mismatched_copies +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: dont_tc_mismatched_copies + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY]](p0) + ; CHECK: $x1 = COPY [[COPY1]](p0) + ; CHECK: $x2 = COPY [[COPY2]](s64) + ; CHECK: BL &memcpy, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x1 = COPY [[COPY]](p0) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = COPY $x2 + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8)), (load (s8)) + $x1 = COPY %0(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_tc_extra_copy +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: dont_tc_extra_copy + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY]](p0) + ; CHECK: $x1 = COPY [[COPY1]](p0) + ; CHECK: $x2 = COPY [[COPY2]](s64) + ; CHECK: BL &memcpy, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY]](p0) + ; CHECK: $x0 = COPY [[COPY]](p0) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = COPY $x2 + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8)), (load (s8)) + $x0 = COPY %0(p0) + $x0 = COPY %0(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_tc_mismatched_ret +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: dont_tc_mismatched_ret + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY]](p0) + ; CHECK: $x1 = COPY [[COPY1]](p0) + ; CHECK: $x2 = COPY [[COPY2]](s64) + ; CHECK: BL &memcpy, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x1 = COPY [[COPY]](p0) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(p0) = COPY $x1 + %1:_(p0) = COPY $x0 + %2:_(s64) = COPY $x2 + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8)), (load (s8)) + $x1 = COPY %0(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: dont_tc_ret_void_copy +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: dont_tc_ret_void_copy + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY]](p0) + ; CHECK: $x1 = COPY [[COPY1]](p0) + ; CHECK: $x2 = COPY [[COPY2]](s64) + ; CHECK: BL &memcpy, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY]](p0) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = COPY $x2 + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8)), (load (s8)) + $x0 = COPY %0(p0) + RET_ReallyLR