Index: lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnswitch.cpp +++ lib/Transforms/Scalar/LoopUnswitch.cpp @@ -444,6 +444,27 @@ return FindLIVLoopCondition(Cond, L, Changed, Cache); } +static BasicBlock *FindNextSuccessorBlock(TerminatorInst *TI) { + BasicBlock *BB = nullptr; + LLVMContext &Context = TI->getContext(); + + if (TI->getNumSuccessors() == 1) + BB = TI->getSuccessor(0); + else if (BranchInst *BI = dyn_cast(TI)) { + if (BI->getCondition() == ConstantInt::getTrue(Context)) + BB = BI->getSuccessor(0); + else if (BI->getCondition() == ConstantInt::getFalse(Context)) + BB = BI->getSuccessor(1); + } else if (SwitchInst *SI = dyn_cast(TI)) { + if (ConstantInt *CI = dyn_cast(SI->getCondition())) { + SwitchInst::CaseIt CItr = SI->findCaseValue(CI); + BB = CItr.getCaseSuccessor(); + } + } + + return BB; +} + bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { if (skipLoop(L)) return false; @@ -607,6 +628,45 @@ } } + // Given a terminator TI, if TI is in header, and all instructions + // between the beginning of the loop header and TI are guaranteed to + // execute, then no freeze is needed in unswitching it. + // ReachableTI is a set of terminator instructions which does not + // require freeze when being unswitched. + DenseSet ReachableTIs; + { + BasicBlock *BB = currentLoop->getHeader(); + while (BB) { + TerminatorInst *BBTI = BB->getTerminator(); + bool AlwaysReachableTI = true; + + // Check whether all instructions in BB transfers execution + // to successors. + for (auto &I : *BB) { + Instruction *Inst = &I; + if (!isGuaranteedToTransferExecutionToSuccessor(Inst)) { + AlwaysReachableTI = false; + break; + } + } + + if (!AlwaysReachableTI) + break; + + // BBTI is always reachable from the header of the loop. + ReachableTIs.insert(BBTI); + + // Now proceed to the next basic block, or stop here? + BB = FindNextSuccessorBlock(BBTI); + if (BB == nullptr) + break; + + // Check whether the block is already visited. + if (ReachableTIs.count(BB->getTerminator()) > 0) + break; + } + } + // Loop over all of the basic blocks in the loop. If we find an interior // block that is branching on a loop-invariant condition, we can unswitch this // loop. @@ -624,6 +684,9 @@ !isGuaranteedToExecute(*TI, DT, currentLoop, &SafetyInfo)) continue; + // Is freeze needed when unswitching this branch? + bool NeedFreeze = ReachableTIs.count(TI) == 0; + if (BranchInst *BI = dyn_cast(TI)) { // Some branches may be rendered unreachable because of previous // unswitching. @@ -638,8 +701,10 @@ // unswitch on it if we desire. Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, Changed); + // Determine whether we should freeze the condition. if (LoopCond && - UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), true, TI)) { + UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), + NeedFreeze, TI)) { ++NumBranches; return true; } @@ -669,7 +734,7 @@ if (!UnswitchVal) continue; - if (UnswitchIfProfitable(LoopCond, UnswitchVal, true)) { + if (UnswitchIfProfitable(LoopCond, UnswitchVal, NeedFreeze)) { ++NumSwitches; return true; } @@ -915,6 +980,11 @@ // this scenario could be very common in practice. SmallSet Visited; + // If there exists an instruction I which is (1) between the beginning of + // the loop preheader and the branch to unswitch, and (2) not guaranteed to + // transfer execution to successor, then we need to freeze the condition. + bool NeedFreeze = false; + while (true) { // If we exit loop or reach a previous visited block, then // we can not reach any trivial condition candidates (unfoldable @@ -926,26 +996,21 @@ // Check if this loop will execute any side-effecting instructions (e.g. // stores, calls, volatile loads) in the part of the loop that the code // *would* execute. Check the header first. - for (Instruction &I : *CurrentBB) + for (Instruction &I : *CurrentBB) { if (I.mayHaveSideEffects()) return false; - - // FIXME: add check for constant foldable switch instructions. - if (BranchInst *BI = dyn_cast(CurrentTerm)) { - if (BI->isUnconditional()) { - CurrentBB = BI->getSuccessor(0); - } else if (BI->getCondition() == ConstantInt::getTrue(Context)) { - CurrentBB = BI->getSuccessor(0); - } else if (BI->getCondition() == ConstantInt::getFalse(Context)) { - CurrentBB = BI->getSuccessor(1); - } else { - // Found a trivial condition candidate: non-foldable conditional branch. - break; + if (!NeedFreeze) { + // There exists an instruction which can exit from the loop. + // Hoisting branch must not have undef/poison value as its condition. + NeedFreeze |= !isGuaranteedToTransferExecutionToSuccessor(&I); } - } else { - break; } + CurrentBB = FindNextSuccessorBlock(CurrentTerm); + if (CurrentBB == nullptr) + // Found a trivial condition candidate: non-foldable conditional branch. + break; + CurrentTerm = CurrentBB->getTerminator(); } @@ -985,7 +1050,7 @@ return false; // Can't handle this. UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB, - true, CurrentTerm); + NeedFreeze, CurrentTerm); ++NumBranches; return true; } else if (SwitchInst *SI = dyn_cast(CurrentTerm)) { @@ -1028,7 +1093,7 @@ return false; // Can't handle this. UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB, - true, nullptr); + NeedFreeze, nullptr); ++NumSwitches; return true; } Index: test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll =================================================================== --- test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll +++ test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll @@ -5,8 +5,7 @@ ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted ; STATS: 2 loop-unswitch - Number of switches unswitched -; CHECK: %c.fr = freeze i32 %c -; CHECK-NEXT: %1 = icmp eq i32 %c.fr, 1 +; CHECK: %1 = icmp eq i32 %c, 1 ; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge ; CHECK: ..split_crit_edge: ; preds = %0 @@ -25,7 +24,7 @@ ; CHECK-NEXT: br label %loop_begin.backedge.us ; CHECK: .split: ; preds = %..split_crit_edge -; CHECK-NEXT: %2 = icmp eq i32 %c.fr, 2 +; CHECK-NEXT: %2 = icmp eq i32 %c, 2 ; CHECK-NEXT: br i1 %2, label %.split.split.us, label %.split..split.split_crit_edge ; CHECK: .split..split.split_crit_edge: ; preds = %.split @@ -50,7 +49,7 @@ ; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split.split ; CHECK-NEXT: %var_val = load i32, i32* %var -; CHECK-NEXT: switch i32 %c.fr, label %default.us-lcssa.us-lcssa [ +; CHECK-NEXT: switch i32 %c, label %default.us-lcssa.us-lcssa [ ; CHECK-NEXT: i32 1, label %inc ; CHECK-NEXT: i32 2, label %dec ; CHECK-NEXT: ] Index: test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll =================================================================== --- test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll +++ test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll @@ -7,9 +7,7 @@ ; ModuleID = '../llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll' -; CHECK: %c.fr = freeze i32 %c - -; CHECK: %1 = icmp eq i32 %c.fr, 1 +; CHECK: %1 = icmp eq i32 %c, 1 ; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge ; CHECK: ..split_crit_edge: ; preds = %0 @@ -35,7 +33,7 @@ ; CHECK-NEXT: br label %loop_begin ; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split -; CHECK: switch i32 %c.fr, label %second_switch [ +; CHECK: switch i32 %c, label %second_switch [ ; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge ; CHECK-NEXT: ] Index: test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll =================================================================== --- test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll +++ test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll @@ -5,10 +5,8 @@ ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted ; STATS: 3 loop-unswitch - Number of switches unswitched -; CHECK: %c.fr = freeze i32 %c - ; CHECK: %d.fr = freeze i32 %d -; CHECK-NEXT: %1 = icmp eq i32 %c.fr, 1 +; CHECK-NEXT: %1 = icmp eq i32 %c, 1 ; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge ; CHECK: ..split_crit_edge: ; preds = %0 @@ -69,7 +67,7 @@ ; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us6, %.split.split.us ; CHECK-NEXT: %var_val.us2 = load i32, i32* %var -; CHECK-NEXT: switch i32 %c.fr, label %second_switch.us3 [ +; CHECK-NEXT: switch i32 %c, label %second_switch.us3 [ ; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge.us ; CHECK-NEXT: ] @@ -90,7 +88,7 @@ ; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split.split ; CHECK-NEXT: %var_val = load i32, i32* %var -; CHECK-NEXT: switch i32 %c.fr, label %second_switch [ +; CHECK-NEXT: switch i32 %c, label %second_switch [ ; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge ; CHECK-NEXT: ] Index: test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll =================================================================== --- test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll +++ test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll @@ -16,8 +16,7 @@ %cmp1 = icmp eq i32 %a, 12345 br i1 %cmp1, label %if.then, label %if.else, !prof !0 ; CHECK: %cmp1 = icmp eq i32 %a, 12345 -; CHECK-NEXT: %cmp1.fr = freeze i1 %cmp1 -; CHECK-NEXT: br i1 %cmp1.fr, label %for.body.us, label %for.body, !prof !0 +; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0 if.then: ; preds = %for.body ; CHECK: for.body.us: ; CHECK: add nsw i32 %{{.*}}, 123 @@ -54,8 +53,7 @@ br label %for.body ;CHECK: entry: ;CHECK-NEXT: %cmp1 = icmp eq i32 1, 2 -;CHECK-NEXT: %cmp1.fr = freeze i1 %cmp1 -;CHECK-NEXT: br i1 %cmp1.fr, label %for.body, label %for.cond.cleanup.split, !prof !1 +;CHECK-NEXT: br i1 %cmp1, label %for.body, label %for.cond.cleanup.split, !prof !1 ;CHECK: for.body: for.body: ; preds = %for.inc, %entry %inc.i = phi i32 [ 0, %entry ], [ %inc, %if.then ] Index: test/Transforms/LoopUnswitch/copy-metadata.ll =================================================================== --- test/Transforms/LoopUnswitch/copy-metadata.ll +++ test/Transforms/LoopUnswitch/copy-metadata.ll @@ -3,8 +3,7 @@ ; This test checks if unswitched condition preserve make.implicit metadata. define i32 @test(i1 %cond) { -; CHECK: %cond.fr = freeze i1 %cond -; CHECK: br i1 %cond.fr, label %..split_crit_edge, label %.loop_exit.split_crit_edge, !make.implicit !0 +; CHECK: br i1 %cond, label %..split_crit_edge, label %.loop_exit.split_crit_edge, !make.implicit !0 br label %loop_begin loop_begin: Index: test/Transforms/LoopUnswitch/freeze.ll =================================================================== --- test/Transforms/LoopUnswitch/freeze.ll +++ test/Transforms/LoopUnswitch/freeze.ll @@ -1,4 +1,5 @@ -; 6 test cases which must introduce a freeze instruction after loop unswitch +; 4 Test cases which must introduce a freeze instruction after loop unswitch +; , and 2 test cases which must not introduce a freeze instruction ; RUN: opt < %s -loop-simplify -loop-rotate -loop-unswitch -S | FileCheck %s @x = common global i32 0, align 4 @@ -259,8 +260,7 @@ br label %for.cond ; CHECK: %cmp1 = icmp eq i32 %b, 0 -; CHECK-NEXT: %cmp1.fr = freeze i1 %cmp1 -; CHECK-NEXT: br i1 %cmp1.fr, label %for.body.lr.ph.split.us, label %for.body.lr.ph.for.body.lr.ph.split_crit_edge +; CHECK-NEXT: br i1 %cmp1, label %for.body.lr.ph.split.us, label %for.body.lr.ph.for.body.lr.ph.split_crit_edge for.cond: ; preds = %for.inc, %entry %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.inc ] @@ -309,8 +309,7 @@ br label %for.cond ; CHECK: %cmp1 = icmp eq i32 %b, 0 -; CHECK-NEXT: %cmp1.fr = freeze i1 %cmp1 -; CHECK-NEXT: br i1 %cmp1.fr, label %for.body.lr.ph.split.us, label %for.body.lr.ph.for.body.lr.ph.split_crit_edge +; CHECK-NEXT: br i1 %cmp1, label %for.body.lr.ph.split.us, label %for.body.lr.ph.for.body.lr.ph.split_crit_edge for.cond: ; preds = %for.inc, %entry %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.inc ] Index: test/Transforms/LoopUnswitch/infinite-loop.ll =================================================================== --- test/Transforms/LoopUnswitch/infinite-loop.ll +++ test/Transforms/LoopUnswitch/infinite-loop.ll @@ -13,12 +13,10 @@ ; CHECK-LABEL: @func_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: %b.fr = freeze i1 %b -; CHECK-NEXT: %a.fr = freeze i1 %a -; CHECK-NEXT: br i1 %a.fr, label %entry.split, label %abort0.split +; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split ; CHECK: entry.split: -; CHECK-NEXT: br i1 %b.fr, label %for.body, label %abort1.split +; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split ; CHECK: for.body: ; CHECK-NEXT: br label %for.body Index: test/Transforms/LoopUnswitch/msan.ll =================================================================== --- test/Transforms/LoopUnswitch/msan.ll +++ test/Transforms/LoopUnswitch/msan.ll @@ -122,8 +122,7 @@ entry: ; CHECK: %[[Y:.*]] = load i64, i64* @y, align 8 ; CHECK-NEXT: %[[YB:.*]] = icmp eq i64 %[[Y]], 0 -; CHECK-NEXT: %[[YB]].fr = freeze i1 %[[YB]] -; CHECK-NEXT: br i1 %[[YB]].fr, +; CHECK-NEXT: br i1 %[[YB]], %x2 = alloca i8, align 1 %frombool1 = zext i1 %x to i8 Index: test/Transforms/LoopUnswitch/trivial-unswitch.ll =================================================================== --- test/Transforms/LoopUnswitch/trivial-unswitch.ll +++ test/Transforms/LoopUnswitch/trivial-unswitch.ll @@ -5,15 +5,13 @@ ; after unswitching the first one. -; CHECK: %cond2.fr = freeze i1 %cond2 -; CHECK: %cond1.fr = freeze i1 %cond1 -; CHECK: br i1 %cond1.fr, label %..split_crit_edge, label %.loop_exit.split_crit_edge +; CHECK: br i1 %cond1, label %..split_crit_edge, label %.loop_exit.split_crit_edge ; CHECK: ..split_crit_edge: ; preds = %0 ; CHECK: br label %.split ; CHECK: .split: ; preds = %..split_crit_edge -; CHECK: br i1 %cond2.fr, label %.split..split.split_crit_edge, label %.split.loop_exit.split1_crit_edge +; CHECK: br i1 %cond2, label %.split..split.split_crit_edge, label %.split.loop_exit.split1_crit_edge ; CHECK: .split..split.split_crit_edge: ; preds = %.split ; CHECK: br label %.split.split