Index: llvm/lib/Transforms/Scalar/GuardWidening.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -43,6 +43,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -134,6 +135,7 @@ DominatorTree &DT; PostDominatorTree *PDT; LoopInfo &LI; + BlockFrequencyInfo *BFI; AssumptionCache ∾ MemorySSAUpdater *MSSAU; @@ -292,10 +294,11 @@ public: explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, - LoopInfo &LI, AssumptionCache &AC, - MemorySSAUpdater *MSSAU, DomTreeNode *Root, + LoopInfo &LI, BlockFrequencyInfo *BFI, + AssumptionCache &AC, MemorySSAUpdater *MSSAU, + DomTreeNode *Root, std::function BlockFilter) - : DT(DT), PDT(PDT), LI(LI), AC(AC), MSSAU(MSSAU), Root(Root), + : DT(DT), PDT(PDT), LI(LI), BFI(BFI), AC(AC), MSSAU(MSSAU), Root(Root), BlockFilter(BlockFilter) {} /// The entry point for this pass. @@ -425,6 +428,12 @@ GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr, Instruction *DominatingGuard, bool InvertCond) { + if (BFI) + // Rough heuristic: if the dominating guard is at least twice hotter than + // dominated instruction, bail. + if (BFI->getBlockFreq(DominatingGuard->getParent()).getFrequency() / 2 > + BFI->getBlockFreq(DominatedInstr->getParent()).getFrequency()) + return WS_IllegalOrNegative; Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent()); Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent()); bool HoistingOutOfLoop = false; @@ -829,15 +838,16 @@ FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &LI = AM.getResult(F); + auto &BFI = AM.getResult(F); auto &PDT = AM.getResult(F); auto &AC = AM.getResult(F); auto *MSSAA = AM.getCachedResult(F); std::unique_ptr MSSAU; if (MSSAA) MSSAU = std::make_unique(&MSSAA->getMSSA()); - if (!GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, - DT.getRootNode(), [](BasicBlock *) { return true; }) - .run()) + if (!GuardWideningImpl(DT, &PDT, LI, &BFI, AC, MSSAU ? MSSAU.get() : nullptr, + DT.getRootNode(), + [](BasicBlock *) { return true; }).run()) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -858,10 +868,9 @@ std::unique_ptr MSSAU; if (AR.MSSA) MSSAU = std::make_unique(AR.MSSA); - if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.AC, + if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.BFI, AR.AC, MSSAU ? MSSAU.get() : nullptr, AR.DT.getNode(RootBB), - BlockFilter) - .run()) + BlockFilter).run()) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); @@ -883,16 +892,16 @@ return false; auto &DT = getAnalysis().getDomTree(); auto &LI = getAnalysis().getLoopInfo(); + auto *BFI = &getAnalysis().getBFI(); auto &AC = getAnalysis().getAssumptionCache(F); auto &PDT = getAnalysis().getPostDomTree(); auto *MSSAWP = getAnalysisIfAvailable(); std::unique_ptr MSSAU; if (MSSAWP) MSSAU = std::make_unique(&MSSAWP->getMSSA()); - return GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, - DT.getRootNode(), - [](BasicBlock *) { return true; }) - .run(); + return GuardWideningImpl(DT, &PDT, LI, BFI, AC, + MSSAU ? MSSAU.get() : nullptr, DT.getRootNode(), + [](BasicBlock *) { return true; }).run(); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -918,6 +927,7 @@ return false; auto &DT = getAnalysis().getDomTree(); auto &LI = getAnalysis().getLoopInfo(); + auto *BFI = &getAnalysis().getBFI(); auto &AC = getAnalysis().getAssumptionCache( *L->getHeader()->getParent()); auto *PDTWP = getAnalysisIfAvailable(); @@ -933,9 +943,9 @@ auto BlockFilter = [&](BasicBlock *BB) { return BB == RootBB || L->contains(BB); }; - return GuardWideningImpl(DT, PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, - DT.getNode(RootBB), BlockFilter) - .run(); + return GuardWideningImpl(DT, PDT, LI, BFI, AC, + MSSAU ? MSSAU.get() : nullptr, DT.getNode(RootBB), + BlockFilter).run(); } void getAnalysisUsage(AnalysisUsage &AU) const override { Index: llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll =================================================================== --- llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll +++ llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll @@ -289,9 +289,8 @@ ; CHECK-LABEL: @f_5( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND_0:%.*]] = icmp ugt i32 [[A:%.*]], 7 -; CHECK-NEXT: [[WIDE_CHK:%.*]] = icmp uge i32 [[A]], 11 ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] @@ -302,7 +301,7 @@ ; CHECK-NEXT: [[COND_1:%.*]] = icmp ugt i32 [[A]], 10 ; CHECK-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[COND_1]], [[WIDENABLE_COND3]] -; CHECK-NEXT: br i1 true, label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND4]], label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] ; CHECK-NEXT: ret void @@ -830,9 +829,8 @@ ; CHECK-LABEL: @f_13( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 14 -; CHECK-NEXT: [[WIDE_CHK:%.*]] = icmp ult i32 [[A]], 10 ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] @@ -843,7 +841,7 @@ ; CHECK-NEXT: [[COND_1:%.*]] = icmp slt i32 [[A]], 10 ; CHECK-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[COND_1]], [[WIDENABLE_COND3]] -; CHECK-NEXT: br i1 true, label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND4]], label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] ; CHECK-NEXT: ret void Index: llvm/test/Transforms/GuardWidening/profile-based-profitability-intrinsics.ll =================================================================== --- llvm/test/Transforms/GuardWidening/profile-based-profitability-intrinsics.ll +++ llvm/test/Transforms/GuardWidening/profile-based-profitability-intrinsics.ll @@ -120,17 +120,17 @@ ret i32 -1 } -; FIXME: This loop is so rarely entered, that we don't want to widen here. +; This loop is so rarely entered, that we don't want to widen here. define i32 @test_intrinsic_very_unprofitable(i32 %n, i1 %cond.1, i1 %cond.2) { ; CHECK-LABEL: define i32 @test_intrinsic_very_unprofitable ; CHECK-SAME: (i32 [[N:%.*]], i1 [[COND_1:%.*]], i1 [[COND_2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_1]], [[COND_2]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND_1]]) [ "deopt"() ] ; CHECK-NEXT: [[LOOP_PRECONDITION:%.*]] = icmp uge i32 [[N]], 100 ; CHECK-NEXT: br i1 [[LOOP_PRECONDITION]], label [[LOOP:%.*]], label [[FAILED:%.*]], !prof [[PROF4:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND_2]]) [ "deopt"() ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 100 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]], !prof [[PROF1]] @@ -158,17 +158,17 @@ ret i32 -1 } -; FIXME: This loop is so rarely entered, that we don't want to widen here. +; This loop is so rarely entered, that we don't want to widen here. define i32 @test_intrinsic_unprofitable(i32 %n, i1 %cond.1, i1 %cond.2) { ; CHECK-LABEL: define i32 @test_intrinsic_unprofitable ; CHECK-SAME: (i32 [[N:%.*]], i1 [[COND_1:%.*]], i1 [[COND_2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_1]], [[COND_2]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND_1]]) [ "deopt"() ] ; CHECK-NEXT: [[LOOP_PRECONDITION:%.*]] = icmp uge i32 [[N]], 100 ; CHECK-NEXT: br i1 [[LOOP_PRECONDITION]], label [[LOOP:%.*]], label [[FAILED:%.*]], !prof [[PROF5:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND_2]]) [ "deopt"() ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 100 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]], !prof [[PROF1]] Index: llvm/test/Transforms/GuardWidening/profile-based-profitability_explicit.ll =================================================================== --- llvm/test/Transforms/GuardWidening/profile-based-profitability_explicit.ll +++ llvm/test/Transforms/GuardWidening/profile-based-profitability_explicit.ll @@ -204,14 +204,13 @@ ret i32 -1 } -; FIXME: This loop is so rarely entered, that we don't want to widen here. +; This loop is so rarely entered, that we don't want to widen here. define i32 @test_intrinsic_very_unprofitable(i32 %n, i1 %cond.1, i1 %cond.2) { ; CHECK-LABEL: define i32 @test_intrinsic_very_unprofitable ; CHECK-SAME: (i32 [[N:%.*]], i1 [[COND_1:%.*]], i1 [[COND_2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_1]], [[COND_2]] ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_1]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -223,7 +222,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[GUARDED]] ], [ [[IV_NEXT:%.*]], [[GUARDED1:%.*]] ] ; CHECK-NEXT: [[WIDENABLE_COND4:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND5:%.*]] = and i1 [[COND_2]], [[WIDENABLE_COND4]] -; CHECK-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND5]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: [[DEOPTCALL3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTCALL3]] @@ -271,14 +270,13 @@ ret i32 -1 } -; FIXME: This loop is so rarely entered, that we don't want to widen here. +; This loop is so rarely entered, that we don't want to widen here. define i32 @test_intrinsic_unprofitable(i32 %n, i1 %cond.1, i1 %cond.2) { ; CHECK-LABEL: define i32 @test_intrinsic_unprofitable ; CHECK-SAME: (i32 [[N:%.*]], i1 [[COND_1:%.*]], i1 [[COND_2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_1]], [[COND_2]] ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_1]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -290,7 +288,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[GUARDED]] ], [ [[IV_NEXT:%.*]], [[GUARDED1:%.*]] ] ; CHECK-NEXT: [[WIDENABLE_COND4:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND5:%.*]] = and i1 [[COND_2]], [[WIDENABLE_COND4]] -; CHECK-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND5]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: [[DEOPTCALL3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTCALL3]]