Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4700,44 +4700,28 @@ static bool isFunctionGlobalAddress(SDValue Callee); -static bool -callsShareTOCBase(const Function *Caller, SDValue Callee, - const TargetMachine &TM) { - // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols - // don't have enough information to determine if the caller and calle share - // the same TOC base, so we have to pessimistically assume they don't for - // correctness. - GlobalAddressSDNode *G = dyn_cast(Callee); - if (!G) - return false; - - const GlobalValue *GV = G->getGlobal(); - // The medium and large code models are expected to provide a sufficiently - // large TOC to provide all data addressing needs of a module with a - // single TOC. Since each module will be addressed with a single TOC then we - // only need to check that caller and callee don't cross dso boundaries. - if (CodeModel::Medium == TM.getCodeModel() || - CodeModel::Large == TM.getCodeModel()) - return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV); - - // Otherwise we need to ensure callee and caller are in the same section, - // since the linker may allocate multiple TOCs, and we don't know which - // sections will belong to the same TOC base. +static bool callsShareTOCBase(const Function *Caller, SDValue Callee, + const TargetMachine &TM) { + // It does not make sense to call callsShareTOCBase() with a caller that + // is PC Relative since PC Relative callers do not have a TOC. +#ifndef NDEBUG + const PPCSubtarget *STICaller = &TM.getSubtarget(*Caller); + assert(!STICaller->isUsingPCRelativeCalls() && + "PC Relative callers do not have a TOC and cannot share a TOC Base"); +#endif - if (!GV->isStrongDefinitionForLinker()) + // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols + // don't have enough information to determine if the caller and callee share + // the same TOC base, so we have to pessimistically assume they don't for + // correctness. + GlobalAddressSDNode *G = dyn_cast(Callee); + if (!G) return false; - // Any explicitly-specified sections and section prefixes must also match. - // Also, if we're using -ffunction-sections, then each function is always in - // a different section (the same is true for COMDAT functions). - if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() || - GV->getSection() != Caller->getSection()) - return false; - if (const auto *F = dyn_cast(GV)) { - if (F->getSectionPrefix() != Caller->getSectionPrefix()) - return false; - } + const GlobalValue *GV = G->getGlobal(); + // If the callee is not dso-local then they caller and callee might not share + // toc-bases. // If the callee might be interposed, then we can't assume the ultimate call // target will be in the same section. Even in cases where we can assume that // interposition won't happen, in any case where the linker might insert a @@ -4755,9 +4739,67 @@ // whether a nop is needed after the call instruction in b, because the linker // will insert a stub, it might complain about a missing nop if we omit it // (although many don't complain in this case). + // IFuncs will fail here and return false. if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV)) return false; + // Functions with PC Relative enabled may clobber the TOC in the same DSO. + // We may need a TOC restore in the situation where the caller requires a + // valid TOC but the callee is PC Relative and does not. + const Function *F = dyn_cast(GV); + const GlobalAlias *Alias = dyn_cast(GV); + + // Either the function or the alias needs to have a valid pointer. + // Otherwise, there is no way to check the callee to see if it uses + // PC Relative addressing. + // Without enough information we must assume that the callee clobbers + // the TOC. + if (!F && !Alias) + return false; + + // If we have a valid Alias we can try to get the function from there. + if (Alias) { + const GlobalObject *GlobalObj = Alias->getBaseObject(); + F = dyn_cast(GlobalObj); + } + + // If we still have no valid function pointer we do not have enough + // information to determine if the callee uses PC Relative calls so we must + // assume that it does. + if (!F) + return false; + + // If the callee uses PC Relative we cannot guarantee that the callee won't + // clobber the TOC of the caller and so we must assume that the two + // functions do not share a TOC base. + const PPCSubtarget *STICallee = &TM.getSubtarget(*F); + if (STICallee->isUsingPCRelativeCalls()) + return false; + + // The medium and large code models are expected to provide a sufficiently + // large TOC to provide all data addressing needs of a module with a + // single TOC. + if (CodeModel::Medium == TM.getCodeModel() || + CodeModel::Large == TM.getCodeModel()) + return true; + + // Otherwise we need to ensure callee and caller are in the same section, + // since the linker may allocate multiple TOCs, and we don't know which + // sections will belong to the same TOC base. + if (!GV->isStrongDefinitionForLinker()) + return false; + + // Any explicitly-specified sections and section prefixes must also match. + // Also, if we're using -ffunction-sections, then each function is always in + // a different section (the same is true for COMDAT functions). + if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() || + GV->getSection() != Caller->getSection()) + return false; + if (const auto *F = dyn_cast(GV)) { + if (F->getSectionPrefix() != Caller->getSectionPrefix()) + return false; + } + return true; } @@ -5277,8 +5319,8 @@ // will rewrite the nop to be a load of the TOC pointer from the linkage area // into gpr2. if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) - return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL - : PPCISD::CALL_NOP; + return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL + : PPCISD::CALL_NOP; return PPCISD::CALL; } Index: llvm/test/CodeGen/PowerPC/func-alias.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/func-alias.ll @@ -0,0 +1,56 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=P8 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=P9 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=P10 + +@newname = dso_local alias i32 (...), bitcast (i32 ()* @oldname to i32 (...)*) + +; Function Attrs: noinline nounwind optnone +define dso_local signext i32 @oldname() #0 { +entry: + ret i32 55 +} + +; Function Attrs: noinline nounwind optnone +define dso_local signext i32 @caller() #0 { +; #P8-LABEL: caller +; #P8: bl newname +; #P8-NOT: nop +; #P8: blr +; #P9-LABEL: caller +; #P9: bl newname +; #P9-NOT: nop +; #P9: blr +; #P10-LABEL: caller +; #P10: bl newname@notoc +; #P10-NOT: nop +; #P10: blr +entry: + %call = call signext i32 bitcast (i32 (...)* @newname to i32 ()*)() + ret i32 %call +} + +; Function Attrs: noinline nounwind optnone -pcrelative-memops +; This caller does not use PC Relative memops +define dso_local signext i32 @caller_nopcrel() #1 { +; #P8-LABEL: caller_nopcrel +; #P8: bl newname +; #P8-NOT: nop +; #P8: blr +; #P9-LABEL: caller_nopcrel +; #P9: bl newname +; #P9-NOT: nop +; #P9: blr +; #P10-LABEL: caller_nopcrel +; #P10: bl newname +; #P10-NEXT: nop +; #P10: blr +entry: + %call = call signext i32 bitcast (i32 (...)* @newname to i32 ()*)() + ret i32 %call +} + +attributes #0 = { noinline nounwind optnone } +attributes #1 = { noinline nounwind optnone "target-features"="-pcrelative-memops" } Index: llvm/test/CodeGen/PowerPC/ifunc.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ifunc.ll +++ llvm/test/CodeGen/PowerPC/ifunc.ll @@ -2,6 +2,12 @@ ; RUN: llc %s -o - -mtriple=powerpc -relocation-model=pic | FileCheck --check-prefix=PLTREL %s ; RUN: llc %s -o - -mtriple=powerpc64 | FileCheck --check-prefix=REL %s ; RUN: llc %s -o - -mtriple=powerpc64 -relocation-model=pic | FileCheck --check-prefix=REL %s +; RUN: llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \ +; RUN: -verify-machineinstrs | FileCheck --check-prefix=LEP8 %s +; RUN: llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 \ +; RUN: -verify-machineinstrs | FileCheck --check-prefix=LEP9 %s +; RUN: llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \ +; RUN: -verify-machineinstrs | FileCheck --check-prefix=LEP10 %s @ifunc1 = dso_local ifunc void(), i8*()* @resolver @ifunc2 = ifunc void(), i8*()* @resolver @@ -9,10 +15,29 @@ define i8* @resolver() { ret i8* null } define void @foo() #0 { - ; REL: bl ifunc1{{$}} - ; REL: bl ifunc2{{$}} - ; PLTREL: bl ifunc1@PLT+32768 - ; PLTREL: bl ifunc2@PLT+32768 + ; REL-LABEL: foo + ; REL: bl ifunc1{{$}} + ; REL: bl ifunc2{{$}} + ; PLTREL-LABEL: foo + ; PLTREL: bl ifunc1@PLT+32768 + ; PLTREL: bl ifunc2@PLT+32768 + ; LEP8-LABEL: foo + ; LEP8: bl ifunc1 + ; LEP8-NEXT: nop + ; LEP8-NEXT: bl ifunc2 + ; LEP8-NEXT: nop + ; LEP8: blr + ; LEP9-LABEL: foo + ; LEP9: bl ifunc1 + ; LEP9-NEXT: nop + ; LEP9-NEXT: bl ifunc2 + ; LEP9-NEXT: nop + ; LEP9: blr + ; LEP10-LABEL: foo + ; LEP10: bl ifunc1@notoc + ; LEP10-NEXT: bl ifunc2@notoc + ; LEP10-NOT: nop + ; LEP10: blr call void @ifunc1() call void @ifunc2() ret void Index: llvm/test/CodeGen/PowerPC/pcrel-local-caller-toc.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/pcrel-local-caller-toc.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names < %s | FileCheck %s + +; The purpose of this test is to check the call protocols for the situation +; where the caller has PC Relative disabled, the callee has PC Relative +; enabled and both functions are in the same file. +; Note that the callee does not know if it clobbers the TOC because it +; contains an external call to @externalFunc. + +@global = external local_unnamed_addr global i32, align 4 + +define dso_local signext i32 @callee(i32 signext %a) local_unnamed_addr #0 { +; CHECK-LABEL: callee: +; CHECK: .localentry callee, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl externalFunc@notoc +; CHECK-NEXT: add r3, r3, r30 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %call = tail call signext i32 @externalFunc(i32 signext %a) #3 + %add = add nsw i32 %call, %a + ret i32 %add +} + +declare signext i32 @externalFunc(i32 signext) local_unnamed_addr #1 + +define dso_local void @caller(i32 signext %a) local_unnamed_addr #2 { +; CHECK-LABEL: caller: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: ld r30, .LC0@toc@l(r4) +; CHECK-NEXT: lwz r4, 0(r30) +; CHECK-NEXT: add r3, r4, r3 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: bl callee +; CHECK-NEXT: nop +; CHECK-NEXT: mullw r3, r3, r3 +; CHECK-NEXT: stw r3, 0(r30) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* @global, align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @callee(i32 signext %add) + %mul = mul nsw i32 %call, %call + store i32 %mul, i32* @global, align 4 + ret void +} + +define dso_local signext i32 @tail_caller(i32 signext %a) local_unnamed_addr #2 { +; CHECK-LABEL: tail_caller: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: lwz r4, 0(r4) +; CHECK-NEXT: add r3, r4, r3 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: bl callee +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* @global, align 4 + %add = add nsw i32 %0, %a + %call = tail call signext i32 @callee(i32 signext %add) + ret i32 %call +} + + +; Left the target features in this test because it is important that caller has +; -pcrelative-memops while callee has +pcrelative-memops +attributes #0 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" } +attributes #1 = { "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" } +attributes #2 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+power9-vector,+vsx,-htm,-pcrelative-memops,-qpx,-spe" } +attributes #3 = { nounwind }