diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h --- a/llvm/include/llvm/IR/Constant.h +++ b/llvm/include/llvm/IR/Constant.h @@ -198,6 +198,12 @@ /// hanging off of the globals. void removeDeadConstantUsers() const; + /// Return true if the constant has exactly one live use. + /// + /// This returns the same result as calling Value::hasOneUse after + /// Constant::removeDeadConstantUsers, but doesn't remove dead constants. + bool hasOneLiveUse() const; + const Constant *stripPointerCasts() const { return cast(Value::stripPointerCasts()); } diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1859,8 +1859,8 @@ SingleBBBonus = Threshold * SingleBBBonusPercent / 100; VectorBonus = Threshold * VectorBonusPercent / 100; - bool OnlyOneCallAndLocalLinkage = - F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction(); + bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() && + &F == Call.getCalledFunction(); // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. It may seem odd to update // Cost in updateThreshold, but the bonus depends on the logic in this method. @@ -2665,7 +2665,7 @@ onBlockAnalyzed(BB); } - bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && + bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() && &F == CandidateCall.getCalledFunction(); // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -714,29 +714,41 @@ return Result; } -/// If the specified constantexpr is dead, remove it. This involves recursively -/// eliminating any dead users of the constantexpr. -static bool removeDeadUsersOfConstant(const Constant *C) { +/// Return true if the specified constantexpr is dead. This involves +/// recursively traversing users of the constantexpr. +/// If RemoveDeadUsers is true, also remove dead users at the same time. +static bool constantIsDead(const Constant *C, bool RemoveDeadUsers) { if (isa(C)) return false; // Cannot remove this - while (!C->use_empty()) { - const Constant *User = dyn_cast(C->user_back()); + Value::const_user_iterator I = C->user_begin(), E = C->user_end(); + while (I != E) { + const Constant *User = dyn_cast(*I); if (!User) return false; // Non-constant usage; - if (!removeDeadUsersOfConstant(User)) + if (!constantIsDead(User, RemoveDeadUsers)) return false; // Constant wasn't dead + + // Just removed User, so the iterator was invalidated. + // Since we return immediately upon finding a live user, we can always + // restart from user_begin(). + if (RemoveDeadUsers) + I = C->user_begin(); + else + ++I; } - // If C is only used by metadata, it should not be preserved but should have - // its uses replaced. - if (C->isUsedByMetadata()) { - const_cast(C)->replaceAllUsesWith( - UndefValue::get(C->getType())); + if (RemoveDeadUsers) { + // If C is only used by metadata, it should not be preserved but should + // have its uses replaced. + if (C->isUsedByMetadata()) { + const_cast(C)->replaceAllUsesWith( + UndefValue::get(C->getType())); + } + const_cast(C)->destroyConstant(); } - const_cast(C)->destroyConstant(); + return true; } - void Constant::removeDeadConstantUsers() const { Value::const_user_iterator I = user_begin(), E = user_end(); Value::const_user_iterator LastNonDeadUser = E; @@ -748,7 +760,7 @@ continue; } - if (!removeDeadUsersOfConstant(User)) { + if (!constantIsDead(User, /* RemoveDeadUsers= */ true)) { // If the constant wasn't dead, remember that this was the last live use // and move on to the next constant. LastNonDeadUser = I; @@ -764,6 +776,20 @@ } } +bool Constant::hasOneLiveUse() const { + unsigned NumUses = 0; + for (const Use &use : uses()) { + const Constant *User = dyn_cast(use.getUser()); + if (!User || !constantIsDead(User, /* RemoveDeadUsers= */ false)) { + ++NumUses; + + if (NumUses > 1) + return false; + } + } + return NumUses == 1; +} + Constant *Constant::replaceUndefsWith(Constant *C, Constant *Replacement) { assert(C && Replacement && "Expected non-nullptr constant arguments"); Type *Ty = C->getType(); diff --git a/llvm/test/Transforms/Inline/inline-cost-dead-users.ll b/llvm/test/Transforms/Inline/inline-cost-dead-users.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-cost-dead-users.ll @@ -0,0 +1,32 @@ +; Test to ensure that the inlining cost model isn't tripped up by dead constant users. +; In this case, the call to g via a bitcasted function pointer is canonicalized to +; a direct call to g with bitcasted arguments, leaving the original bitcast +; as a dead use of g. + +; RUN: opt < %s -instcombine -inline -pass-remarks=inline -S 2>&1 \ +; RUN: | FileCheck %s + +; Inline costs of f and g should be the same. + +; CHECK: 'f' inlined into 'h' with (cost=[[EXPECTED_COST:.+]], threshold={{.+}}) +; CHECK: 'g' inlined into 'h' with (cost=[[EXPECTED_COST]], threshold={{.+}}) + +%0 = type { i64, i64, i64 } +%1 = type { i64, i64, i64 } + +define internal void @f(%0* align 8 %a) unnamed_addr { +start: + ret void +} + +define internal void @g(%0* align 8 %a) unnamed_addr { +start: + ret void +} + +define void @h(%0* align 8 %a, %1* align 8 %b) unnamed_addr { +start: + call void @f(%0* align 8 %a) + call void bitcast (void (%0*)* @g to void (%1*)*)(%1* align 8 %b) + ret void +} diff --git a/llvm/test/Transforms/Inline/last-callsite.ll b/llvm/test/Transforms/Inline/last-callsite.ll --- a/llvm/test/Transforms/Inline/last-callsite.ll +++ b/llvm/test/Transforms/Inline/last-callsite.ll @@ -260,10 +260,10 @@ ; constant expression cannot be inlined because the constant expression forms ; a second use. If this part starts failing we need to use more complex ; constant expressions to reference a particular function with them. - %sink = alloca i1 - store volatile i1 icmp ne (i64 ptrtoint (void (i1)* @test4_g to i64), i64 ptrtoint(void (i1)* @test4_g to i64)), i1* %sink + %sink = alloca i64 + store volatile i64 mul (i64 ptrtoint (void (i1)* @test4_g to i64), i64 ptrtoint(void (i1)* @test4_g to i64)), i64* %sink call void @test4_g(i1 true) -; CHECK: store volatile i1 false +; CHECK: store volatile i64 mul (i64 ptrtoint (void (i1)* @test4_g to i64), i64 ptrtoint (void (i1)* @test4_g to i64)), i64* %sink ; CHECK: call void @test4_g(i1 true) ret void