diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -175,7 +175,8 @@ bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU = nullptr, const SimplifyCFGOptions &Options = {}, - ArrayRef LoopHeaders = {}); + ArrayRef LoopHeaders = {}, + SmallPtrSetImpl *EphValues = nullptr); /// This function is used to flatten a CFG. For example, it uses parallel-and /// and parallel-or mode to collapse if-conditions and merge if-regions with diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -212,6 +213,10 @@ SmallVector LoopHeaders(UniqueLoopHeaders.begin(), UniqueLoopHeaders.end()); + SmallPtrSet EphValues; + if (Options.AC) + CodeMetrics::collectEphemeralValues(&F, Options.AC, EphValues); + while (LocalChange) { LocalChange = false; @@ -227,7 +232,7 @@ while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt)) ++BBIt; } - if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) { + if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders, &EphValues)) { LocalChange = true; ++NumSimpl; } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -224,6 +224,7 @@ ArrayRef LoopHeaders; const SimplifyCFGOptions &Options; bool Resimplify; + SmallPtrSetImpl *EphValues; Value *isValueEqualityComparison(Instruction *TI); BasicBlock *GetValueEqualityComparisonCases( @@ -269,8 +270,10 @@ public: SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL, ArrayRef LoopHeaders, - const SimplifyCFGOptions &Opts) - : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) { + const SimplifyCFGOptions &Opts, + SmallPtrSetImpl *EphValues) + : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts), + EphValues(EphValues) { assert((!DTU || !DTU->hasPostDomTree()) && "SimplifyCFG is not yet capable of maintaining validity of a " "PostDomTree, so don't ask for it."); @@ -2427,7 +2430,9 @@ } /// Return true if we can thread a branch across this block. -static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { +static bool +BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB, + SmallPtrSetImpl *EphValues) { int Size = 0; for (Instruction &I : BB->instructionsWithoutDebug()) { @@ -2440,8 +2445,8 @@ return false; // We will delete Phis while threading, so Phis should not be accounted in - // block's size - if (!isa(I)) + // block's size. Ditto for ephemeral values which will also be deleted. + if (!isa(I) && !(EphValues && EphValues->count(&I))) ++Size; // We can only support instructions that do not define values that are @@ -2462,7 +2467,8 @@ /// same block as the branch and if any PHI entries are constants, thread edges /// corresponding to that entry to be branches to their ultimate destination. static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, - const DataLayout &DL, AssumptionCache *AC) { + const DataLayout &DL, AssumptionCache *AC, + SmallPtrSetImpl *EphValues) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -2477,7 +2483,7 @@ } // Now we know that this block has multiple preds and two succs. - if (!BlockIsSimpleEnoughToThreadThrough(BB)) + if (!BlockIsSimpleEnoughToThreadThrough(BB, EphValues)) return false; // Okay, this is a simple enough basic block. See if any phi values are @@ -2577,7 +2583,7 @@ } // Recurse, simplifying any other constants. - return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true; + return FoldCondBranchOnPHI(BI, DTU, DL, AC, EphValues) || true; } return false; @@ -3544,10 +3550,9 @@ /// this function tries to simplify it. We know /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. -static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, - DomTreeUpdater *DTU, - const DataLayout &DL, - const TargetTransformInfo &TTI) { +static bool SimplifyCondBranchToCondBranch( + BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, + const TargetTransformInfo &TTI, SmallPtrSetImpl *EphValues) { assert(PBI->isConditional() && BI->isConditional()); BasicBlock *BB = BI->getParent(); @@ -3569,7 +3574,7 @@ // Otherwise, if there are multiple predecessors, insert a PHI that merges // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. - if (BlockIsSimpleEnoughToThreadThrough(BB)) { + if (BlockIsSimpleEnoughToThreadThrough(BB, EphValues)) { pred_iterator PB = pred_begin(BB), PE = pred_end(BB); PHINode *NewPN = PHINode::Create( Type::getInt1Ty(BB->getContext()), std::distance(PB, PE), @@ -6489,14 +6494,14 @@ // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast(BI->getCondition())) if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC)) + if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC, EphValues)) return requestResimplify(); // Scan predecessor blocks for conditional branches. for (BasicBlock *Pred : predecessors(BB)) if (BranchInst *PBI = dyn_cast(Pred->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI)) + if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI, EphValues)) return requestResimplify(); // Look for diamond patterns. @@ -6717,8 +6722,9 @@ bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const SimplifyCFGOptions &Options, - ArrayRef LoopHeaders) { + ArrayRef LoopHeaders, + SmallPtrSetImpl *EphValues) { return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders, - Options) + Options, EphValues) .run(BB); } diff --git a/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll b/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll --- a/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll +++ b/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll @@ -1,10 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s -; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s +; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=6 -S < %s | FileCheck %s +; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=6 -S < %s | FileCheck %s target datalayout = "e-p:64:64-p5:32:32-A5" declare void @llvm.assume(i1) +declare i1 @llvm.type.test(i8*, metadata) nounwind readnone define void @test_01(i1 %c, i64* align 1 %ptr) local_unnamed_addr #0 { ; CHECK-LABEL: @test_01( @@ -165,3 +166,61 @@ store volatile i64 3, i64* %ptr, align 8 ret void } + +; Try the max block size for PRE again but with the bitcast/type test/assume +; sequence used for whole program devirt. +define void @test_04(i1 %c, i64* align 1 %ptr, [3 x i8*]* %vtable) local_unnamed_addr #0 { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]] +; CHECK: false1: +; CHECK-NEXT: store volatile i64 1, i64* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8* +; CHECK-NEXT: [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo") +; CHECK-NEXT: tail call void @llvm.assume(i1 [[P]]) +; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 3, i64* [[PTR]], align 8 +; CHECK-NEXT: ret void +; CHECK: true2.critedge: +; CHECK-NEXT: [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8* +; CHECK-NEXT: [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo") +; CHECK-NEXT: tail call void @llvm.assume(i1 [[P]]) +; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; CHECK-NEXT: store volatile i64 2, i64* [[PTR]], align 8 +; CHECK-NEXT: ret void +; + br i1 %c, label %true1, label %false1 + +true1: ; preds = %false1, %0 + %vtablei8 = bitcast [3 x i8*]* %vtable to i8* + %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"foo") + tail call void @llvm.assume(i1 %p) + store volatile i64 0, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + store volatile i64 -1, i64* %ptr, align 8 + br i1 %c, label %true2, label %false2 + +false1: ; preds = %0 + store volatile i64 1, i64* %ptr, align 4 + br label %true1 + +true2: ; preds = %true1 + store volatile i64 2, i64* %ptr, align 8 + ret void + +false2: ; preds = %true1 + store volatile i64 3, i64* %ptr, align 8 + ret void +}