Index: llvm/include/llvm/Analysis/CodeMetrics.h =================================================================== --- llvm/include/llvm/Analysis/CodeMetrics.h +++ llvm/include/llvm/Analysis/CodeMetrics.h @@ -23,6 +23,7 @@ class Loop; class Function; template class SmallPtrSetImpl; +class TargetLibraryInfo; class TargetTransformInfo; class Value; @@ -82,12 +83,14 @@ /// Collect a loop's ephemeral values (those used only by an assume /// or similar intrinsics in the loop). static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, - SmallPtrSetImpl &EphValues); + SmallPtrSetImpl &EphValues, + const TargetLibraryInfo *TLI = nullptr); /// Collect a functions's ephemeral values (those used only by an /// assume or similar intrinsics in the function). static void collectEphemeralValues(const Function *L, AssumptionCache *AC, - SmallPtrSetImpl &EphValues); + SmallPtrSetImpl &EphValues, + const TargetLibraryInfo *TLI = nullptr); }; } Index: llvm/lib/Analysis/CodeMetrics.cpp =================================================================== --- llvm/lib/Analysis/CodeMetrics.cpp +++ llvm/lib/Analysis/CodeMetrics.cpp @@ -18,60 +18,87 @@ #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstructionCost.h" +#include "llvm/Transforms/Utils/Local.h" #define DEBUG_TYPE "code-metrics" using namespace llvm; -static void -appendSpeculatableOperands(const Value *V, - SmallPtrSetImpl &Visited, - SmallVectorImpl &Worklist) { - const User *U = dyn_cast(V); +static void appendSpeculatableOperands(Value *V, + SmallPtrSetImpl &Visited, + SmallVectorImpl &Worklist, + const TargetLibraryInfo *TLI) { + User *U = dyn_cast(V); if (!U) return; - for (const Value *Operand : U->operands()) + for (Value *Operand : U->operands()) if (Visited.insert(Operand).second) - if (const auto *I = dyn_cast(Operand)) - if (!I->mayHaveSideEffects() && !I->isTerminator()) + if (auto *I = dyn_cast(Operand)) + if (wouldInstructionBeTriviallyDead(I, TLI)) Worklist.push_back(I); } -static void completeEphemeralValues(SmallPtrSetImpl &Visited, - SmallVectorImpl &Worklist, - SmallPtrSetImpl &EphValues) { - // Note: We don't speculate PHIs here, so we'll miss instruction chains kept - // alive only by ephemeral values. +static bool collectUsersOfEphemeralCandidate( + Value *V, SmallPtrSetImpl &EphValues, + SmallPtrSetImpl &UsersAndV, const TargetLibraryInfo *TLI) { + UsersAndV.insert(V); + + SmallVector Worklist; + Worklist.push_back(V); + while (!Worklist.empty()) { + Value *Curr = Worklist.pop_back_val(); + for (auto *U : Curr->users()) { + if (EphValues.count(U) || !UsersAndV.insert(U).second) + continue; + auto *I = dyn_cast(U); + if (!I || !wouldInstructionBeTriviallyDead(I, TLI)) + return false; + Worklist.push_back(I); + } + } + return true; +} +static void completeEphemeralValues(SmallPtrSetImpl &Visited, + SmallVectorImpl &Worklist, + SmallPtrSetImpl &EphValues, + const TargetLibraryInfo *TLI) { // Walk the worklist using an index but without caching the size so we can // append more entries as we process the worklist. This forms a queue without // quadratic behavior by just leaving processed nodes at the head of the // worklist forever. + SmallPtrSet Users; for (int i = 0; i < (int)Worklist.size(); ++i) { - const Value *V = Worklist[i]; + Value *V = Worklist[i]; + if (EphValues.count(V)) + continue; assert(Visited.count(V) && "Failed to add a worklist entry to our visited set!"); // If all uses of this value are ephemeral, then so is this value. - if (!all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) - continue; - - EphValues.insert(V); - LLVM_DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n"); - - // Append any more operands to consider. - appendSpeculatableOperands(V, Visited, Worklist); + Users.clear(); + if (collectUsersOfEphemeralCandidate(V, EphValues, Users, TLI)) { + for (auto *EphV : Users) { + EphValues.insert(EphV); + LLVM_DEBUG(dbgs() << "Ephemeral Value: " << *EphV << "\n"); + Visited.insert(EphV); + + // Append any more operands to consider. + appendSpeculatableOperands(EphV, Visited, Worklist, TLI); + } + } } } // Find all ephemeral values. -void CodeMetrics::collectEphemeralValues( - const Loop *L, AssumptionCache *AC, - SmallPtrSetImpl &EphValues) { +void +CodeMetrics::collectEphemeralValues(const Loop *L, AssumptionCache *AC, + SmallPtrSetImpl &EphValues, + const TargetLibraryInfo *TLI) { SmallPtrSet Visited; - SmallVector Worklist; + SmallVector Worklist; for (auto &AssumeVH : AC->assumptions()) { if (!AssumeVH) @@ -85,17 +112,18 @@ continue; if (EphValues.insert(I).second) - appendSpeculatableOperands(I, Visited, Worklist); + appendSpeculatableOperands(I, Visited, Worklist, TLI); } - completeEphemeralValues(Visited, Worklist, EphValues); + completeEphemeralValues(Visited, Worklist, EphValues, TLI); } -void CodeMetrics::collectEphemeralValues( - const Function *F, AssumptionCache *AC, - SmallPtrSetImpl &EphValues) { +void +CodeMetrics::collectEphemeralValues(const Function *F, AssumptionCache *AC, + SmallPtrSetImpl &EphValues, + const TargetLibraryInfo *TLI) { SmallPtrSet Visited; - SmallVector Worklist; + SmallVector Worklist; for (auto &AssumeVH : AC->assumptions()) { if (!AssumeVH) @@ -105,10 +133,10 @@ "Found assumption for the wrong function!"); if (EphValues.insert(I).second) - appendSpeculatableOperands(I, Visited, Worklist); + appendSpeculatableOperands(I, Visited, Worklist, TLI); } - completeEphemeralValues(Visited, Worklist, EphValues); + completeEphemeralValues(Visited, Worklist, EphValues, TLI); } /// Fill in the current structure with information gleaned from the specified Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -92,6 +92,8 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/Transforms/Utils/SizeOpts.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/AssumptionCache.h" #include #include #include @@ -390,6 +392,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { // FIXME: When we can selectively preserve passes, preserve the domtree. + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -425,7 +428,7 @@ } void removeAllAssertingVHReferences(Value *V); - bool eliminateAssumptions(Function &F); + bool eliminateAssumptions(Function &F, AssumptionCache &AC); bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr); bool eliminateMostlyEmptyBlocks(Function &F); BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); @@ -489,6 +492,7 @@ INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) @@ -564,7 +568,8 @@ // Get rid of @llvm.assume builtins before attempting to eliminate empty // blocks, since there might be blocks that only contain @llvm.assume calls // (plus arguments that we can get rid of). - EverMadeChange |= eliminateAssumptions(F); + auto &AC = getAnalysis().getAssumptionCache(F); + EverMadeChange |= eliminateAssumptions(F, AC); // Eliminate blocks that contain only PHI nodes and an // unconditional branch. @@ -718,23 +723,25 @@ return EverMadeChange; } -bool CodeGenPrepare::eliminateAssumptions(Function &F) { +bool CodeGenPrepare::eliminateAssumptions(Function &F, AssumptionCache &AC) { bool MadeChange = false; - for (BasicBlock &BB : F) { - CurInstIterator = BB.begin(); - while (CurInstIterator != BB.end()) { - Instruction *I = &*(CurInstIterator++); - if (auto *Assume = dyn_cast(I)) { - MadeChange = true; - Value *Operand = Assume->getOperand(0); - Assume->eraseFromParent(); - - resetIteratorIfInvalidatedWhileCalling(&BB, [&]() { - RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr); - }); - } + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(&F, &AC, EphValues, TLInfo); + + for (const auto *V : EphValues) { + if (auto *CI = dyn_cast(V)) { + // const_cast is not a good idea, but here we know that Value is not + // constant but just an API collectEphemeralValues provides. + // Alternative would be to traverse all instructions and check whether it + // is in set but it is a redundant compiler time overhead. + auto *I = const_cast(CI); + salvageDebugInfo(*I); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); + I->eraseFromParent(); + MadeChange = true; } } + return MadeChange; } Index: llvm/test/Transforms/CodeGenPrepare/X86/delete-assume-dead-code.ll =================================================================== --- llvm/test/Transforms/CodeGenPrepare/X86/delete-assume-dead-code.ll +++ llvm/test/Transforms/CodeGenPrepare/X86/delete-assume-dead-code.ll @@ -32,9 +32,7 @@ ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[HEADER]] ] -; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV2_INC:%.*]], [[HEADER]] ] ; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[IV2_INC]] = add i32 [[IV2]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[HEADER]] ; CHECK: exit: @@ -63,9 +61,7 @@ ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[HEADER]] ] -; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV2_INC:%.*]], [[HEADER]] ] ; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[IV2_INC]] = add i32 [[IV2]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[HEADER]] ; CHECK: exit: @@ -95,9 +91,7 @@ ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[HEADER]] ] -; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV2_INC:%.*]], [[HEADER]] ] ; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[IV2_INC]] = add i32 [[IV2]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[HEADER]] ; CHECK: exit: