Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -763,15 +763,13 @@ /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. - bool IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool isVarArg, - bool isCalleeStructRet, - bool isCallerStructRet, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - SelectionDAG& DAG) const; + bool IsEligibleForTailCallOptimization( + SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, + bool isCalleeStructRet, bool isCallerStructRet, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, SelectionDAG &DAG, + const bool PreferIndirect) const; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1832,29 +1832,40 @@ bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); - bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); - bool isThisReturn = false; - bool isSibCall = false; + bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + bool isThisReturn = false; auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); + bool PreferIndirect = false; // Disable tail calls if they're not supported. if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true") isTailCall = false; + if (isa(Callee)) { + // If we're optimizing for minimum size and the function is called three or + // more times in this block, we can improve codesize by calling indirectly + // as BLXr has a 16-bit encoding. + auto *GV = cast(Callee)->getGlobal(); + auto *BB = CLI.CS.getParent(); + PreferIndirect = + Subtarget->isThumb() && Subtarget->hasMinSize() && + count_if(GV->users(), [&BB](const User *U) { + return isa(U) && cast(U)->getParent() == BB; + }) > 2; + } if (isTailCall) { // Check if it's really possible to do a tail call. - isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(), - Outs, OutVals, Ins, DAG); + isTailCall = IsEligibleForTailCallOptimization( + Callee, CallConv, isVarArg, isStructRet, + MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG, + PreferIndirect); if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. - if (isTailCall) { + if (isTailCall) ++NumTailCalls; - isSibCall = true; - } } // Analyze operands of the call, assigning locations to each operand. @@ -1866,14 +1877,14 @@ // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - // For tail calls, memory operands are available in our caller's stack. - if (isSibCall) + if (isTailCall) { + // For tail calls, memory operands are available in our caller's stack. NumBytes = 0; - - // Adjust the stack pointer for the new arguments... - // These operations are automatically eliminated by the prolog/epilog pass - if (!isSibCall) + } else { + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); + } SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); @@ -1995,7 +2006,7 @@ MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops)); } - } else if (!isSibCall) { + } else if (!isTailCall) { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, @@ -2067,17 +2078,6 @@ MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } } else if (isa(Callee)) { - // If we're optimizing for minimum size and the function is called three or - // more times in this block, we can improve codesize by calling indirectly - // as BLXr has a 16-bit encoding. - auto *GV = cast(Callee)->getGlobal(); - auto *BB = CLI.CS.getParent(); - bool PreferIndirect = - Subtarget->isThumb() && Subtarget->hasMinSize() && - count_if(GV->users(), [&BB](const User *U) { - return isa(U) && cast(U)->getParent() == BB; - }) > 2; - if (!PreferIndirect) { isDirect = true; bool isDef = GV->isStrongDefinitionForLinker(); @@ -2309,28 +2309,25 @@ /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. -bool -ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool isVarArg, - bool isCalleeStructRet, - bool isCallerStructRet, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - SelectionDAG& DAG) const { +bool ARMTargetLowering::IsEligibleForTailCallOptimization( + SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, + bool isCalleeStructRet, bool isCallerStructRet, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, SelectionDAG &DAG, + const bool PreferIndirect) const { MachineFunction &MF = DAG.getMachineFunction(); const Function &CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF.getCallingConv(); assert(Subtarget->supportsTailCall()); - // Tail calls to function pointers cannot be optimized for Thumb1 if the args + // Indirect tail calls cannot be optimized for Thumb1 if the args // to the call take up r0-r3. The reason is that there are no legal registers // left to hold the pointer to the function to be called. if (Subtarget->isThumb1Only() && Outs.size() >= 4 && - !isa(Callee.getNode())) - return false; + (!isa(Callee.getNode()) || PreferIndirect)) + return false; // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. Index: test/CodeGen/ARM/pr42062.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/pr42062.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - %s 2>&1 | FileCheck %s --implicit-check-not=error +target triple = "thumbv8m.base-arm-none-eabi" +@foo = external global i8 +declare i32 @bar(i8* nocapture, i32, i32, i8* nocapture) + +define void @food(i8* %a) #0 { +; CHECK-LABEL: food: +; CHECK: mov [[ARG0:r[4-7]]], r0 +; CHECK-NEXT: movs r1, #8 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: ldr [[FOO_R:r[4-7]]], [[FOO_ADDR:\..*]] +; CHECK-NEXT: ldr [[BAR_R:r[4-7]]], [[BAR_ADDR:\..*]] +; CHECK-NEXT: mov r3, [[FOO_R]] +; CHECK-NEXT: blx [[BAR_R]] +; CHECK-NEXT: movs r1, #9 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: mov r0, [[ARG0]] +; CHECK-NEXT: mov r3, [[FOO_R]] +; CHECK-NEXT: blx [[BAR_R]] +; CHECK-NEXT: movs r1, #7 +; CHECK-NEXT: movs r2, #2 +; CHECK-NEXT: mov r0, [[ARG0]] +; CHECK-NEXT: mov r3, [[FOO_R]] +; CHECK-NEXT: blx [[BAR_R]] +; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK: [[FOO_ADDR]]: +; CHECK-NEXT: .long foo +; CHECK: [[BAR_ADDR]]: +; CHECK-NEXT: .long bar +entry: + %0 = tail call i32 @bar(i8* %a, i32 8, i32 1, i8* nonnull @foo) + %1 = tail call i32 @bar(i8* %a, i32 9, i32 0, i8* nonnull @foo) + %2 = tail call i32 @bar(i8* %a, i32 7, i32 2, i8* nonnull @foo) + ret void +} +attributes #0 = { minsize "target-cpu"="cortex-m23" } +