Index: include/llvm/IR/GlobalValue.h =================================================================== --- include/llvm/IR/GlobalValue.h +++ include/llvm/IR/GlobalValue.h @@ -260,6 +260,62 @@ Linkage == CommonLinkage || Linkage == ExternalWeakLinkage; } + /// Returns true if the definition of this global may be replaced by a + /// differently optimized variant of the same source level function at link + /// time. + /// + /// These linkage types inhibits most non-inlining IPO, since a differently + /// optimized variant of the same function can have different observable or + /// undefined behavior than in the variant currently visible. For instance, + /// we could have started with + /// + /// void foo(int *v) { + /// int t = 5 / v[0]; + /// (void) t; + /// } + /// + /// and "refined" it to + /// + /// void foo(int *v) { } + /// + /// However, we cannot infer readnone for `foo`, since that would justify + /// DSE'ing a store to `v[0]` across a call to `foo`, which can cause + /// undefined behavior if the linker replaces the actual call destination with + /// the unoptimized `foo`. + /// + /// Inlining is okay across these linkage types, since the currently visible + /// variant is *a* correct implementation of the original source function; it + /// just isn't the *only* correct implementation. + bool mayBeDerefined() const { + switch (getLinkage()) { + case WeakODRLinkage: + case LinkOnceODRLinkage: + case AvailableExternallyLinkage: + return true; + + default: + return false; + } + + llvm_unreachable("Fully covered switch above!"); + } + + /// Return true if the currently visible definition of this global (if any) is + /// exactly the definition we will see at runtime. + bool isDefinitionExact() const { + return !mayBeDerefined() && !mayBeOverridden(); + } + + /// Return true if this global has an exact defintion. + bool hasExactDefinition() const { + // While this computes exactly the same thing as + // isStrongDefinitionForLinker, the intended uses are different. This + // function is intended to help decide if specific inter-procedural + // transforms are correct, while isStrongDefinitionForLinker's intended use + // is in low level code generation. + return !isDeclaration() && isDefinitionExact(); + } + bool hasExternalLinkage() const { return isExternalLinkage(getLinkage()); } bool hasAvailableExternallyLinkage() const { return isAvailableExternallyLinkage(getLinkage()); @@ -360,6 +416,10 @@ /// Returns true if this global's definition will be the one chosen by the /// linker. + /// + /// NB! Ideally this should not be used at the IR level at all. If you're + /// interested in optimization constraints implied by the linker's ability to + /// choose an implementation, prefer using \c hasExactDefinition. bool isStrongDefinitionForLinker() const { return !(isDeclarationForLinker() || isWeakForLinker()); } Index: lib/Analysis/GlobalsModRef.cpp =================================================================== --- lib/Analysis/GlobalsModRef.cpp +++ lib/Analysis/GlobalsModRef.cpp @@ -471,9 +471,10 @@ const std::vector &SCC = *I; assert(!SCC.empty() && "SCC with no functions?"); - if (!SCC[0]->getFunction() || SCC[0]->getFunction()->mayBeOverridden()) { - // Calls externally or is weak - can't say anything useful. Remove any existing - // function records (may have been created when scanning globals). + if (!SCC[0]->getFunction() || !SCC[0]->getFunction()->isDefinitionExact()) { + // Calls externally or is derefinable - can't say anything useful. Remove + // any existing function records (may have been created when scanning + // globals). for (auto *Node : SCC) FunctionInfos.erase(Node->getFunction()); continue; Index: lib/Transforms/IPO/DeadArgumentElimination.cpp =================================================================== --- lib/Transforms/IPO/DeadArgumentElimination.cpp +++ lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -329,7 +329,7 @@ // %v = load i32 %p // ret void // } - if (!Fn.isStrongDefinitionForLinker()) + if (!Fn.hasExactDefinition()) return false; // Functions with local linkage should already have been handled, except the Index: lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- lib/Transforms/IPO/FunctionAttrs.cpp +++ lib/Transforms/IPO/FunctionAttrs.cpp @@ -69,9 +69,10 @@ // Already perfect! return MAK_ReadNone; - // Definitions with weak linkage may be overridden at linktime with - // something that writes memory, so treat them like declarations. - if (F.isDeclaration() || F.mayBeOverridden()) { + // Non-exact function definitions (see GlobalValue::mayBeDerefined) may be + // overridden at linktime with something that writes memory, so treat them + // like declarations. + if (!F.hasExactDefinition()) { if (AliasAnalysis::onlyReadsMemory(MRB)) return MAK_ReadOnly; @@ -284,8 +285,7 @@ } Function *F = CS.getCalledFunction(); - if (!F || F->isDeclaration() || F->mayBeOverridden() || - !SCCNodes.count(F)) { + if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { Captured = true; return true; } @@ -490,9 +490,9 @@ // Check each function in turn, determining which pointer arguments are not // captured. for (Function *F : SCCNodes) { - // Definitions with weak linkage may be overridden at linktime with + // Definitions with non-exact definitions may be overridden at linktime with // something that captures pointers, so treat them like declarations. - if (F->isDeclaration() || F->mayBeOverridden()) + if (!F->hasExactDefinition()) continue; // Functions that are readonly (or readnone) and nounwind and don't return @@ -745,9 +745,9 @@ if (F->doesNotAlias(0)) continue; - // Definitions with weak linkage may be overridden at linktime, so + // Definitions with non-exact definitions may be overridden at linktime, so // treat them like declarations. - if (F->isDeclaration() || F->mayBeOverridden()) + if (!F->hasExactDefinition()) return false; // We annotate noalias return values, which are only applicable to @@ -859,9 +859,9 @@ Attribute::NonNull)) continue; - // Definitions with weak linkage may be overridden at linktime, so + // Definitions with non-exact definitions may be overridden at linktime, so // treat them like declarations. - if (F->isDeclaration() || F->mayBeOverridden()) + if (!F->hasExactDefinition()) return false; // We annotate nonnull return values, which are only applicable to Index: lib/Transforms/IPO/IPConstantPropagation.cpp =================================================================== --- lib/Transforms/IPO/IPConstantPropagation.cpp +++ lib/Transforms/IPO/IPConstantPropagation.cpp @@ -161,9 +161,9 @@ if (F.getReturnType()->isVoidTy()) return false; // No return value. - // If this function could be overridden later in the link stage, we can't + // If this function could be derefined later in the link stage, we can't // propagate information about its results into callers. - if (F.mayBeOverridden()) + if (!F.isDefinitionExact()) return false; // Check to see if this function returns a constant. Index: lib/Transforms/IPO/PruneEH.cpp =================================================================== --- lib/Transforms/IPO/PruneEH.cpp +++ lib/Transforms/IPO/PruneEH.cpp @@ -94,6 +94,9 @@ SCCMightUnwind = true; SCCMightReturn = true; } else if (F->isDeclaration() || F->mayBeOverridden()) { + // Note: mayBeOverridden is fine above, since we're not inferring new + // attributes here, but only using existing, assumed to be correct, + // function attributes. SCCMightUnwind |= !F->doesNotThrow(); SCCMightReturn |= !F->doesNotReturn(); } else { Index: lib/Transforms/ObjCARC/ObjCARCAPElim.cpp =================================================================== --- lib/Transforms/ObjCARC/ObjCARCAPElim.cpp +++ lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -70,7 +70,7 @@ /// possibly produce autoreleases. bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { if (const Function *Callee = CS.getCalledFunction()) { - if (Callee->isDeclaration() || Callee->mayBeOverridden()) + if (!Callee->hasExactDefinition()) return true; for (const BasicBlock &BB : *Callee) { for (const Instruction &I : BB) Index: lib/Transforms/Scalar/SCCP.cpp =================================================================== --- lib/Transforms/Scalar/SCCP.cpp +++ lib/Transforms/Scalar/SCCP.cpp @@ -1724,9 +1724,9 @@ if (F->isDeclaration()) continue; - // If this is a strong or ODR definition of this function, then we can - // propagate information about its result into callsites of it. - if (!F->mayBeOverridden()) + // If this is an exact definition of this function, then we can propagate + // information about its result into callsites of it. + if (F->hasExactDefinition()) Solver.AddTrackedFunction(&*F); // If this function only has direct calls that we can see, we can track its Index: test/Analysis/GlobalsModRef/comdat-ipo.ll =================================================================== --- /dev/null +++ test/Analysis/GlobalsModRef/comdat-ipo.ll @@ -0,0 +1,21 @@ +; RUN: opt < %s -basicaa -globals-aa -gvn -S | FileCheck %s + +; See PR26774 + +@X = internal global i32 4 + +define i32 @test(i32* %P) { +; CHECK: @test +; CHECK-NEXT: store i32 12, i32* @X +; CHECK-NEXT: call void @doesnotmodX() +; CHECK-NEXT: %V = load i32, i32* @X +; CHECK-NEXT: ret i32 %V + store i32 12, i32* @X + call void @doesnotmodX( ) + %V = load i32, i32* @X + ret i32 %V +} + +define linkonce_odr void @doesnotmodX() { + ret void +} Index: test/Transforms/FunctionAttrs/comdat-ipo.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionAttrs/comdat-ipo.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -functionattrs -S | FileCheck %s + +; See PR26774 + +; CHECK-LABEL: define void @bar(i8* readonly) { +define void @bar(i8* readonly) { + call void @foo(i8* %0) + ret void +} + + +; CHECK-LABEL: define linkonce_odr void @foo(i8* readonly) { +define linkonce_odr void @foo(i8* readonly) { + call void @bar(i8* %0) + ret void +} Index: test/Transforms/IPConstantProp/comdat-ipo.ll =================================================================== --- /dev/null +++ test/Transforms/IPConstantProp/comdat-ipo.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -ipconstprop -S | FileCheck %s + +; See PR26774 + +define i32 @baz() { + ret i32 10 +} + +; We can const-prop @baz's return value *into* @foo, but cannot +; constprop @foo's return value into bar. + +define linkonce_odr i32 @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: %val = call i32 @baz() +; CHECK-NEXT: ret i32 10 + + %val = call i32 @baz() + ret i32 %val +} + +define i32 @bar() { +; CHECK-LABEL: @bar( +; CHECK-NEXT: %val = call i32 @foo() +; CHECK-NEXT: ret i32 %val + + %val = call i32 @foo() + ret i32 %val +} Index: test/Transforms/Inline/comdat-ipo.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/comdat-ipo.ll @@ -0,0 +1,19 @@ +; RUN: opt -inline -S < %s | FileCheck %s + +define i32 @caller() { +; CHECK-LABEL: @caller( +; CHECK-NEXT: %val2 = call i32 @linkonce_callee(i32 42) +; CHECK-NEXT: ret i32 %val2 + + %val = call i32 @odr_callee() + %val2 = call i32 @linkonce_callee(i32 %val); + ret i32 %val2 +} + +define linkonce_odr i32 @odr_callee() { + ret i32 42 +} + +define linkonce i32 @linkonce_callee(i32 %val) { + ret i32 %val +} Index: test/Transforms/ObjCARC/comdat-ipo.ll =================================================================== --- /dev/null +++ test/Transforms/ObjCARC/comdat-ipo.ll @@ -0,0 +1,53 @@ +; RUN: opt -S -objc-arc-apelim < %s | FileCheck %s + +; See PR26774 + +@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_x }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_y }] + +@x = global i32 0 + +declare i32 @bar() nounwind + +define linkonce_odr i32 @foo() nounwind { +entry: + ret i32 5 +} + +define internal void @__cxx_global_var_init() { +entry: + %call = call i32 @foo() + store i32 %call, i32* @x, align 4 + ret void +} + +define internal void @__dxx_global_var_init() { +entry: + %call = call i32 @bar() + store i32 %call, i32* @x, align 4 + ret void +} + +; CHECK-LABEL: define internal void @_GLOBAL__I_x() { +define internal void @_GLOBAL__I_x() { +entry: +; CHECK: call i8* @objc_autoreleasePoolPush() +; CHECK-NEXT: call void @__cxx_global_var_init() +; CHECK-NEXT: call void @objc_autoreleasePoolPop(i8* %0) +; CHECK-NEXT: ret void + + %0 = call i8* @objc_autoreleasePoolPush() nounwind + call void @__cxx_global_var_init() + call void @objc_autoreleasePoolPop(i8* %0) nounwind + ret void +} + +define internal void @_GLOBAL__I_y() { +entry: + %0 = call i8* @objc_autoreleasePoolPush() nounwind + call void @__dxx_global_var_init() + call void @objc_autoreleasePoolPop(i8* %0) nounwind + ret void +} + +declare i8* @objc_autoreleasePoolPush() +declare void @objc_autoreleasePoolPop(i8*) Index: test/Transforms/SCCP/comdat-ipo.ll =================================================================== --- /dev/null +++ test/Transforms/SCCP/comdat-ipo.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -ipsccp -S | FileCheck %s + +; See PR26774 + +define i32 @baz() { + ret i32 10 +} + +; We can const-prop @baz's return value *into* @foo, but cannot +; constprop @foo's return value into bar. + +define linkonce_odr i32 @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: %val = call i32 @baz() +; CHECK-NEXT: ret i32 10 + + %val = call i32 @baz() + ret i32 %val +} + +define i32 @bar() { +; CHECK-LABEL: @bar( +; CHECK-NEXT: %val = call i32 @foo() +; CHECK-NEXT: ret i32 %val + + %val = call i32 @foo() + ret i32 %val +}