Index: lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnswitch.cpp +++ lib/Transforms/Scalar/LoopUnswitch.cpp @@ -498,6 +498,27 @@ return {FCond, OpChain}; } +static BasicBlock *FindNextSuccessorBlock(TerminatorInst *TI) { + BasicBlock *BB = nullptr; + LLVMContext &Context = TI->getContext(); + + if (TI->getNumSuccessors() == 1) + BB = TI->getSuccessor(0); + else if (BranchInst *BI = dyn_cast(TI)) { + if (BI->getCondition() == ConstantInt::getTrue(Context)) + BB = BI->getSuccessor(0); + else if (BI->getCondition() == ConstantInt::getFalse(Context)) + BB = BI->getSuccessor(1); + } else if (SwitchInst *SI = dyn_cast(TI)) { + if (ConstantInt *CI = dyn_cast(SI->getCondition())) { + SwitchInst::CaseIt CItr = SI->findCaseValue(CI); + BB = CItr->getCaseSuccessor(); + } + } + + return BB; +} + bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { if (skipLoop(L)) return false; @@ -638,6 +659,45 @@ } } + // Given a terminator TI, if TI is in header, and all instructions + // between the beginning of the loop header and TI are guaranteed to + // execute, then no freeze is needed in unswitching it. + // ReachableTI is a set of terminator instructions which does not + // require freeze when being unswitched. + DenseSet ReachableTIs; + { + BasicBlock *BB = currentLoop->getHeader(); + while (BB) { + TerminatorInst *BBTI = BB->getTerminator(); + bool AlwaysReachableTI = true; + + // Check whether all instructions in BB transfers execution + // to successors. + for (auto &I : *BB) { + Instruction *Inst = &I; + if (!isGuaranteedToTransferExecutionToSuccessor(Inst)) { + AlwaysReachableTI = false; + break; + } + } + + if (!AlwaysReachableTI) + break; + + // BBTI is always reachable from the header of the loop. + ReachableTIs.insert(BBTI); + + // Now proceed to the next basic block, or stop here? + BB = FindNextSuccessorBlock(BBTI); + if (BB == nullptr) + break; + + // Check whether the block is already visited. + if (ReachableTIs.count(BB->getTerminator()) > 0) + break; + } + } + // Loop over all of the basic blocks in the loop. If we find an interior // block that is branching on a loop-invariant condition, we can unswitch this // loop. @@ -655,6 +715,9 @@ !isGuaranteedToExecute(*TI, DT, currentLoop, &SafetyInfo)) continue; + // Is freeze needed when unswitching this branch? + bool NeedFreeze = ReachableTIs.count(TI) == 0; + if (BranchInst *BI = dyn_cast(TI)) { // Some branches may be rendered unreachable because of previous // unswitching. @@ -669,8 +732,10 @@ // unswitch on it if we desire. Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, Changed).first; + // Determine whether we should freeze the condition. if (LoopCond && - UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), true, TI)) { + UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), + NeedFreeze, TI)) { ++NumBranches; return true; } @@ -724,7 +789,7 @@ if (!UnswitchVal) continue; - if (UnswitchIfProfitable(LoopCond, UnswitchVal, true)) { + if (UnswitchIfProfitable(LoopCond, UnswitchVal, NeedFreeze)) { ++NumSwitches; // In case of a full LIV, UnswitchVal is the value we unswitched out. // In case of a partial LIV, we only unswitch when its an AND-chain @@ -989,6 +1054,11 @@ // this scenario could be very common in practice. SmallSet Visited; + // If there exists an instruction I which is (1) between the beginning of + // the loop preheader and the branch to unswitch, and (2) not guaranteed to + // transfer execution to successor, then we need to freeze the condition. + bool NeedFreeze = false; + while (true) { // If we exit loop or reach a previous visited block, then // we can not reach any trivial condition candidates (unfoldable @@ -1000,34 +1070,22 @@ // Check if this loop will execute any side-effecting instructions (e.g. // stores, calls, volatile loads) in the part of the loop that the code // *would* execute. Check the header first. - for (Instruction &I : *CurrentBB) + for (Instruction &I : *CurrentBB) { if (I.mayHaveSideEffects()) return false; - if (BranchInst *BI = dyn_cast(CurrentTerm)) { - if (BI->isUnconditional()) { - CurrentBB = BI->getSuccessor(0); - } else if (BI->getCondition() == ConstantInt::getTrue(Context)) { - CurrentBB = BI->getSuccessor(0); - } else if (BI->getCondition() == ConstantInt::getFalse(Context)) { - CurrentBB = BI->getSuccessor(1); - } else { - // Found a trivial condition candidate: non-foldable conditional branch. - break; + if (!NeedFreeze) { + // There exists an instruction which can exit from the loop. + // Hoisting branch must not have undef/poison value as its condition. + NeedFreeze |= !isGuaranteedToTransferExecutionToSuccessor(&I); } - } else if (SwitchInst *SI = dyn_cast(CurrentTerm)) { - // At this point, any constant-foldable instructions should have probably - // been folded. - ConstantInt *Cond = dyn_cast(SI->getCondition()); - if (!Cond) - break; - // Find the target block we are definitely going to. - CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor(); - } else { - // We do not understand these terminator instructions. - break; } + CurrentBB = FindNextSuccessorBlock(CurrentTerm); + if (CurrentBB == nullptr) + // Found a trivial condition candidate: non-foldable conditional branch. + break; + CurrentTerm = CurrentBB->getTerminator(); } @@ -1067,7 +1125,7 @@ return false; // Can't handle this. UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB, - true, CurrentTerm); + NeedFreeze, CurrentTerm); ++NumBranches; return true; } else if (SwitchInst *SI = dyn_cast(CurrentTerm)) { @@ -1109,7 +1167,7 @@ return false; // Can't handle this. UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB, - true, nullptr); + NeedFreeze, nullptr); // We are only unswitching full LIV. BranchesInfo.setUnswitched(SI, CondVal); Index: test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll =================================================================== --- test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll +++ test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll @@ -4,8 +4,7 @@ ; STATS: 2 loop-unswitch - Number of switches unswitched -; CHECK: %c.fr = freeze i32 %c -; CHECK-NEXT: %1 = icmp eq i32 %c.fr, 1 +; CHECK: %1 = icmp eq i32 %c, 1 ; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge ; CHECK: ..split_crit_edge: ; preds = %0 @@ -24,7 +23,7 @@ ; CHECK-NEXT: br label %loop_begin.backedge.us ; CHECK: .split: ; preds = %..split_crit_edge -; CHECK-NEXT: %2 = icmp eq i32 %c.fr, 2 +; CHECK-NEXT: %2 = icmp eq i32 %c, 2 ; CHECK-NEXT: br i1 %2, label %.split.split.us, label %.split..split.split_crit_edge ; CHECK: .split..split.split_crit_edge: ; preds = %.split @@ -49,7 +48,7 @@ ; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split.split ; CHECK-NEXT: %var_val = load i32, i32* %var -; CHECK-NEXT: switch i32 %c.fr, label %default.us-lcssa.us-lcssa [ +; CHECK-NEXT: switch i32 %c, label %default.us-lcssa.us-lcssa [ ; CHECK-NEXT: i32 1, label %inc ; CHECK-NEXT: i32 2, label %dec ; CHECK-NEXT: ] Index: test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll =================================================================== --- test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll +++ test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll @@ -6,9 +6,7 @@ ; ModuleID = '../llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll' -; CHECK: %c.fr = freeze i32 %c - -; CHECK: %1 = icmp eq i32 %c.fr, 1 +; CHECK: %1 = icmp eq i32 %c, 1 ; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge ; CHECK: ..split_crit_edge: ; preds = %0 @@ -34,7 +32,7 @@ ; CHECK-NEXT: br label %loop_begin ; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split -; CHECK: switch i32 %c.fr, label %second_switch [ +; CHECK: switch i32 %c, label %second_switch [ ; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge ; CHECK-NEXT: ] Index: test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll =================================================================== --- test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll +++ test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll @@ -4,10 +4,8 @@ ; STATS: 3 loop-unswitch - Number of switches unswitched -; CHECK: %c.fr = freeze i32 %c - ; CHECK: %d.fr = freeze i32 %d -; CHECK-NEXT: %1 = icmp eq i32 %c.fr, 1 +; CHECK-NEXT: %1 = icmp eq i32 %c, 1 ; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge ; CHECK: ..split_crit_edge: ; preds = %0 @@ -68,7 +66,7 @@ ; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us6, %.split.split.us ; CHECK-NEXT: %var_val.us2 = load i32, i32* %var -; CHECK-NEXT: switch i32 %c.fr, label %second_switch.us3 [ +; CHECK-NEXT: switch i32 %c, label %second_switch.us3 [ ; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge.us ; CHECK-NEXT: ] @@ -89,7 +87,7 @@ ; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split.split ; CHECK-NEXT: %var_val = load i32, i32* %var -; CHECK-NEXT: switch i32 %c.fr, label %second_switch [ +; CHECK-NEXT: switch i32 %c, label %second_switch [ ; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge ; CHECK-NEXT: ] Index: test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll =================================================================== --- test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll +++ test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll @@ -16,8 +16,7 @@ %cmp1 = icmp eq i32 %a, 12345 br i1 %cmp1, label %if.then, label %if.else, !prof !0 ; CHECK: %cmp1 = icmp eq i32 %a, 12345 -; CHECK-NEXT: %cmp1.fr = freeze i1 %cmp1 -; CHECK-NEXT: br i1 %cmp1.fr, label %for.body.us, label %for.body, !prof !0 +; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0 if.then: ; preds = %for.body ; CHECK: for.body.us: ; CHECK: add nsw i32 %{{.*}}, 123 @@ -54,8 +53,7 @@ br label %for.body ;CHECK: entry: ;CHECK-NEXT: %cmp1 = icmp eq i32 1, 2 -;CHECK-NEXT: %cmp1.fr = freeze i1 %cmp1 -;CHECK-NEXT: br i1 %cmp1.fr, label %for.body, label %for.cond.cleanup.split, !prof !1 +;CHECK-NEXT: br i1 %cmp1, label %for.body, label %for.cond.cleanup.split, !prof !1 ;CHECK: for.body: for.body: ; preds = %for.inc, %entry %inc.i = phi i32 [ 0, %entry ], [ %inc, %if.then ] Index: test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll =================================================================== --- test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll +++ test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll @@ -6,11 +6,9 @@ ; CHECK-LABEL: {{^}}define amdgpu_kernel void @uniform_unswitch ; CHECK: entry: ; CHECK-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp -; CHECK-NEXT: br i1 [[LOOP_COND]] -; CHECK: for.body.lr.ph: ; CHECK-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456 -; CHECK-NEXT: [[IF_COND]].fr = freeze i1 [[IF_COND]] -; CHECK-NEXT: br i1 [[IF_COND]] +; CHECK-NEXT: and i1 [[LOOP_COND]], [[IF_COND]] +; CHECK-NEXT: br i1 define amdgpu_kernel void @uniform_unswitch(i32 * nocapture %out, i32 %n, i32 %x) { entry: Index: test/Transforms/LoopUnswitch/basictest.ll =================================================================== --- test/Transforms/LoopUnswitch/basictest.ll +++ test/Transforms/LoopUnswitch/basictest.ll @@ -106,8 +106,7 @@ ; CHECK: define void @and_i2_as_switch_input(i2 ; CHECK: entry: ; This is an indication that the loop has been unswitched. -; CHECK: freeze i2 %a -; CHECK: icmp eq i2 %a.fr, 0 +; CHECK: icmp eq i2 %a, 0 ; CHECK: br ; There should be no more unswitching after the 1st unswitch. ; CHECK-NOT: icmp eq @@ -151,8 +150,7 @@ ; CHECK: define void @or_i2_as_switch_input(i2 ; CHECK: entry: ; This is an indication that the loop has been unswitched. -; CHECK: freeze i2 %a -; CHECK: icmp eq i2 %a.fr, -1 +; CHECK: icmp eq i2 %a, -1 ; CHECK: br ; There should be no more unswitching after the 1st unswitch. ; CHECK-NOT: icmp eq @@ -198,8 +196,7 @@ ; CHECK: define void @or_i2_as_switch_input_unswitch_default(i2 ; CHECK: entry: ; This is an indication that the loop has been unswitched. -; CHECK: freeze i2 %a -; CHECK: icmp eq i2 %a.fr, -1 +; CHECK: icmp eq i2 %a, -1 ; CHECK: br ; There should be no more unswitching after the 1st unswitch. ; CHECK-NOT: icmp eq Index: test/Transforms/LoopUnswitch/copy-metadata.ll =================================================================== --- test/Transforms/LoopUnswitch/copy-metadata.ll +++ test/Transforms/LoopUnswitch/copy-metadata.ll @@ -4,8 +4,7 @@ define i32 @test(i1 %cond) { ; CHECK-LABEL: @test( -; CHECK: %cond.fr = freeze i1 %cond -; CHECK: br i1 %cond.fr, label %..split_crit_edge, label %.loop_exit.split_crit_edge, !make.implicit !0 +; CHECK: br i1 %cond, label %..split_crit_edge, label %.loop_exit.split_crit_edge, !make.implicit !0 br label %loop_begin loop_begin: Index: test/Transforms/LoopUnswitch/freeze.ll =================================================================== --- test/Transforms/LoopUnswitch/freeze.ll +++ test/Transforms/LoopUnswitch/freeze.ll @@ -1,4 +1,5 @@ -; 6 test cases which must introduce a freeze instruction after loop unswitch +; 4 Test cases which must introduce a freeze instruction after loop unswitch +; , and 2 test cases which must not introduce a freeze instruction ; RUN: opt < %s -loop-simplify -loop-rotate -loop-unswitch -S | FileCheck %s @x = common global i32 0, align 4 @@ -259,8 +260,7 @@ br label %for.cond ; CHECK: %cmp1 = icmp eq i32 %b, 0 -; CHECK-NEXT: %cmp1.fr = freeze i1 %cmp1 -; CHECK-NEXT: br i1 %cmp1.fr, label %for.body.lr.ph.split.us, label %for.body.lr.ph.for.body.lr.ph.split_crit_edge +; CHECK-NEXT: br i1 %cmp1, label %for.body.lr.ph.split.us, label %for.body.lr.ph.for.body.lr.ph.split_crit_edge for.cond: ; preds = %for.inc, %entry %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.inc ] @@ -309,8 +309,7 @@ br label %for.cond ; CHECK: %cmp1 = icmp eq i32 %b, 0 -; CHECK-NEXT: %cmp1.fr = freeze i1 %cmp1 -; CHECK-NEXT: br i1 %cmp1.fr, label %for.body.lr.ph.split.us, label %for.body.lr.ph.for.body.lr.ph.split_crit_edge +; CHECK-NEXT: br i1 %cmp1, label %for.body.lr.ph.split.us, label %for.body.lr.ph.for.body.lr.ph.split_crit_edge for.cond: ; preds = %for.inc, %entry %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.inc ] Index: test/Transforms/LoopUnswitch/infinite-loop.ll =================================================================== --- test/Transforms/LoopUnswitch/infinite-loop.ll +++ test/Transforms/LoopUnswitch/infinite-loop.ll @@ -13,12 +13,10 @@ ; CHECK-LABEL: @func_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: %b.fr = freeze i1 %b -; CHECK-NEXT: %a.fr = freeze i1 %a -; CHECK-NEXT: br i1 %a.fr, label %entry.split, label %abort0.split +; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split ; CHECK: entry.split: -; CHECK-NEXT: br i1 %b.fr, label %for.body, label %abort1.split +; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split ; CHECK: for.body: ; CHECK-NEXT: br label %for.body Index: test/Transforms/LoopUnswitch/msan.ll =================================================================== --- test/Transforms/LoopUnswitch/msan.ll +++ test/Transforms/LoopUnswitch/msan.ll @@ -122,8 +122,7 @@ entry: ; CHECK: %[[Y:.*]] = load i64, i64* @y, align 8 ; CHECK-NEXT: %[[YB:.*]] = icmp eq i64 %[[Y]], 0 -; CHECK-NEXT: %[[YB]].fr = freeze i1 %[[YB]] -; CHECK-NEXT: br i1 %[[YB]].fr, +; CHECK-NEXT: br i1 %[[YB]], %x2 = alloca i8, align 1 %frombool1 = zext i1 %x to i8 Index: test/Transforms/LoopUnswitch/trivial-unswitch.ll =================================================================== --- test/Transforms/LoopUnswitch/trivial-unswitch.ll +++ test/Transforms/LoopUnswitch/trivial-unswitch.ll @@ -5,15 +5,13 @@ ; after unswitching the first one. -; CHECK: %cond2.fr = freeze i1 %cond2 -; CHECK: %cond1.fr = freeze i1 %cond1 -; CHECK: br i1 %cond1.fr, label %..split_crit_edge, label %.loop_exit.split_crit_edge +; CHECK: br i1 %cond1, label %..split_crit_edge, label %.loop_exit.split_crit_edge ; CHECK: ..split_crit_edge: ; preds = %0 ; CHECK: br label %.split ; CHECK: .split: ; preds = %..split_crit_edge -; CHECK: br i1 %cond2.fr, label %.split..split.split_crit_edge, label %.split.loop_exit.split1_crit_edge +; CHECK: br i1 %cond2, label %.split..split.split_crit_edge, label %.split.loop_exit.split1_crit_edge ; CHECK: .split..split.split_crit_edge: ; preds = %.split ; CHECK: br label %.split.split @@ -54,8 +52,7 @@ ; ; CHECK: define i32 @test2( ; This is an indication that the loop has been unswitched on %cond1. -; CHECK: %cond1.fr = freeze i1 %cond -; CHECK: br i1 %cond1.fr, label %..split_crit_edge, label %.loop_exit.split_crit_edge +; CHECK: br i1 %cond1, label %..split_crit_edge, label %.loop_exit.split_crit_edge ; CHECK: ..split_crit_edge: ; preds = %0 ; CHECK: br label %.split