Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -84,6 +84,7 @@ STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); +STATISTIC(NumHoistedBranches, "Number of hoisted branches"); /// Memory promotion is enabled by default. static cl::opt @@ -875,6 +876,48 @@ continue; } + // Try to hoist guard expressed as widenable conditions. + if (isGuardAsWidenableBranch(&I) && MustExecuteWithoutWritesBefore(I)) { + Value *Cond, *WC; + BasicBlock *Guarded, *Deopt; + parseWidenableBranch(&I, Cond, WC, Guarded, Deopt); + // Prerequisites are following: + // 1. The guard should have an invariant condition. Note that we only + // check Cond, while WC's side effects are modeled like it's not + // a loop invariant, but we still can hoist it. + // 2. All instructions from deopt block should depend on loop-invariant + // operands only. TODO: We can also allow operands be from deopt + // block itself. + if (CurLoop->isLoopInvariant(Cond) && + all_of(*Deopt, [&](Instruction &I) { + return CurLoop->hasLoopInvariantOperands(&I); + })) { + BranchInst &Guard = cast(I); + Instruction *WCI = cast(WC); + Instruction *GuardCond = cast(Guard.getCondition()); + BasicBlock *HoistBlock = CFH.getOrCreateHoistedBlock(BB); + + // First, hoist WC and (Cond & WC) out of the loop. + hoist(*WCI, DT, CurLoop, HoistBlock, SafetyInfo, MSSAU, ORE); + hoist(*GuardCond, DT, CurLoop, HoistBlock, SafetyInfo, MSSAU, ORE); + + // Create a new guard in preheader, equivalent to the initial guard. + SplitBlockAndInsertIfThen( + GuardCond, HoistBlock->getTerminator(), true, + Guard.getMetadata(LLVMContext::MD_prof), DT, LI, Deopt); + BranchInst *Term = cast(HoistBlock->getTerminator()); + Term->swapSuccessors(); + Term->getSuccessor(0)->setName("guarded"); + + // Replace the old guard with an unconditional branch. + BranchInst::Create(Guarded, &Guard); + Guard.eraseFromParent(); + NumHoistedBranches++; + Changed = true; + continue; + } + } + if (PHINode *PN = dyn_cast(&I)) { if (CFH.canHoistPHI(PN)) { // Redirect incoming blocks first to ensure that we create hoisted Index: llvm/test/Transforms/LICM/guards_as_widenable_branches.ll =================================================================== --- llvm/test/Transforms/LICM/guards_as_widenable_branches.ll +++ llvm/test/Transforms/LICM/guards_as_widenable_branches.ll @@ -0,0 +1,800 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; REQUIRES: asserts +; RUN: opt -licm -basicaa -ipt-expensive-asserts=true < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' -ipt-expensive-asserts=true < %s -S | FileCheck %s + +; Hoist guard and load. +define void @test1(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED1:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: guarded1: +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[PTR:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[GUARDED1]] ], [ [[X_INC:%.*]], [[GUARDED:%.*]] ] +; CHECK-NEXT: br label [[GUARDED]] +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0) ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[VAL]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: ; preds = %guarded, %entry + %x = phi i32 [ 0, %entry ], [ %x.inc, %guarded ] + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0) ] + ret void + +guarded: ; preds = %loop + %val = load i32, i32* %ptr + %x.inc = add i32 %x, %val + br label %loop +} + +; Do not hoist the guard because the deopt block is dependent on in-loop value. +define void @test1_neg(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @test1_neg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[GUARDED:%.*]] ] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !1 +; CHECK: deopt: +; CHECK-NEXT: [[X_LCSSA:%.*]] = phi i32 [ [[X]], [[LOOP]] ] +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0, i32 [[X_LCSSA]]) ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[PTR:%.*]] +; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[VAL]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: ; preds = %guarded, %entry + %x = phi i32 [ 0, %entry ], [ %x.inc, %guarded ] + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0, i32 %x) ] + ret void + +guarded: ; preds = %loop + %val = load i32, i32* %ptr + %x.inc = add i32 %x, %val + br label %loop +} + +; Hoist the guard, eliminate store, promote load to Phi. +; NOTE: In intrinsic form LICM can't hoist over a side effect +define void @test2(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[GUARDED:%.*]] ] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !1 +; CHECK: deopt: +; CHECK-NEXT: store i32 0, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0) ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[X_INC]] = add i32 [[X]], 0 +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: ; preds = %guarded, %entry + %x = phi i32 [ 0, %entry ], [ %x.inc, %guarded ] + store i32 0, i32* %ptr + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0) ] + ret void + +guarded: ; preds = %loop + %val = load i32, i32* %ptr + %x.inc = add i32 %x, %val + br label %loop +} + +; Hoist store, guard and load. +; NOTE: In intrinsic form LICM can't hoist over a side effect +define void @test2b(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @test2b( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 1 +; CHECK-NEXT: store i32 0, i32* [[P2]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED1:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: guarded1: +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[PTR]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[GUARDED1]] ], [ [[X_INC:%.*]], [[GUARDED:%.*]] ] +; CHECK-NEXT: br label [[GUARDED]] +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0) ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[VAL]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: ; preds = %guarded, %entry + %x = phi i32 [ 0, %entry ], [ %x.inc, %guarded ] + %p2 = getelementptr i32, i32* %ptr, i32 1 + store i32 0, i32* %p2 + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0) ] + ret void + +guarded: ; preds = %loop + %val = load i32, i32* %ptr + %x.inc = add i32 %x, %val + br label %loop +} + +; Hoist guard. Eliminate store. Promote load to a Phi. +; NOTE: In intrinsic form, LICM cannot hoist load because of aliasing. +define void @test3(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED1:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: guarded1: +; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[GUARDED1]] ], [ 0, [[GUARDED:%.*]] ] +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[GUARDED1]] ], [ [[X_INC:%.*]], [[GUARDED]] ] +; CHECK-NEXT: br label [[GUARDED]] +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0) ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[TMP0]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: ; preds = %guarded, %entry + %x = phi i32 [ 0, %entry ], [ %x.inc, %guarded ] + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0) ] + ret void + +guarded: ; preds = %loop + %val = load i32, i32* %ptr + store i32 0, i32* %ptr + %x.inc = add i32 %x, %val + br label %loop +} + +; Hoist load and guard. +define void @test4(i1 %c, i32* %p) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[INVARIANT_COND:%.*]] = icmp ne i32 [[A]], 100 +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[INVARIANT_COND]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED1:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: guarded1: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[GUARDED1]] ], [ [[IV_NEXT:%.*]], [[GUARDED:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: br label [[BACKEDGE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: br label [[GUARDED]] +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %guarded, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %guarded ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: ; preds = %loop + br label %backedge + +if.false: ; preds = %loop + br label %backedge + +backedge: ; preds = %if.false, %if.true + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %invariant_cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %backedge + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded: ; preds = %backedge + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %guarded + ret void +} + +; Do not hoist across a conditionally executed side effect. +define void @test4a(i1 %c, i32* %p, i32* %q) { +; CHECK-LABEL: @test4a( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[GUARDED:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: store i32 123, i32* [[Q:%.*]] +; CHECK-NEXT: br label [[BACKEDGE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[INVARIANT_COND:%.*]] = icmp ne i32 [[A]], 100 +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[INVARIANT_COND]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !1 +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %guarded, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %guarded ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: ; preds = %loop + store i32 123, i32* %q + br label %backedge + +if.false: ; preds = %loop + br label %backedge + +backedge: ; preds = %if.false, %if.true + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %invariant_cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %backedge + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded: ; preds = %backedge + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %guarded + ret void +} + +; Do not hoist a conditionally executed guard. +define void @test4b(i1 %c, i32* %p, i32* %q) { +; CHECK-LABEL: @test4b( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[INVARIANT_COND:%.*]] = icmp ne i32 [[A]], 100 +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[INVARIANT_COND]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !1 +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: if.false: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: ; preds = %loop + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %invariant_cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %if.true + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded: ; preds = %if.true + br label %backedge + +if.false: ; preds = %loop + br label %backedge + +backedge: ; preds = %if.false, %guarded + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret void +} + +; Check that we don't hoist across a store in the header. +define void @test4c(i1 %c, i32* %p, i8* noalias %s) { +; CHECK-LABEL: @test4c( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i8 0, i8* [[S:%.*]] +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[INVARIANT_COND:%.*]] = icmp ne i32 [[A]], 100 +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[INVARIANT_COND]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED1:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: guarded1: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[GUARDED1]] ], [ [[IV_NEXT:%.*]], [[GUARDED:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: br label [[BACKEDGE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: br label [[GUARDED]] +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %guarded, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %guarded ] + %iv.next = add i32 %iv, 1 + store i8 0, i8* %s + br i1 %c, label %if.true, label %if.false + +if.true: ; preds = %loop + br label %backedge + +if.false: ; preds = %loop + br label %backedge + +backedge: ; preds = %if.false, %if.true + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %invariant_cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %backedge + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded: ; preds = %backedge + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %guarded + ret void +} + +; Check that we don't hoist across a store in a conditionally execute block. +define void @test4d(i1 %c, i32* %p, i8* noalias %s) { +; CHECK-LABEL: @test4d( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[INVARIANT_COND:%.*]] = icmp ne i32 [[A]], 100 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[GUARDED:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: store i8 0, i8* [[S:%.*]] +; CHECK-NEXT: br label [[BACKEDGE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[INVARIANT_COND]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !1 +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %guarded, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %guarded ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: ; preds = %loop + store i8 0, i8* %s + br label %backedge + +if.false: ; preds = %loop + br label %backedge + +backedge: ; preds = %if.false, %if.true + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %invariant_cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %backedge + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded: ; preds = %backedge + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %guarded + ret void +} + +; Check that we don't hoist across a store before the guard in the backedge. +define void @test4e(i1 %c, i32* %p, i8* noalias %s) { +; CHECK-LABEL: @test4e( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[INVARIANT_COND:%.*]] = icmp ne i32 [[A]], 100 +; CHECK-NEXT: store i8 0, i8* [[S:%.*]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[INVARIANT_COND]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED1:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: guarded1: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[GUARDED1]] ], [ [[IV_NEXT:%.*]], [[GUARDED:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: br label [[BACKEDGE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: br label [[GUARDED]] +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %guarded, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %guarded ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: ; preds = %loop + br label %backedge + +if.false: ; preds = %loop + br label %backedge + +backedge: ; preds = %if.false, %if.true + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + store i8 0, i8* %s + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %invariant_cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %backedge + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded: ; preds = %backedge + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %guarded + ret void +} + +; Check that we can hoist the guard in spite of store which happens after. +define void @test4f(i1 %c, i32* %p, i8* noalias %s) { +; CHECK-LABEL: @test4f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[INVARIANT_COND:%.*]] = icmp ne i32 [[A]], 100 +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[INVARIANT_COND]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED1:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: guarded1: +; CHECK-NEXT: store i8 0, i8* [[S:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[GUARDED1]] ], [ [[IV_NEXT:%.*]], [[GUARDED:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: br label [[BACKEDGE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: br label [[GUARDED]] +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %guarded, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %guarded ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: ; preds = %loop + br label %backedge + +if.false: ; preds = %loop + br label %backedge + +backedge: ; preds = %if.false, %if.true + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %invariant_cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %backedge + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded: ; preds = %backedge + store i8 0, i8* %s + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %guarded + ret void +} + +; Do not hoist an invariant guard across a variant guard. +define void @test5(i1 %c, i32* %p, i32* %q) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[INVARIANT_COND:%.*]] = icmp ne i32 [[A]], 100 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[VARIANT_COND:%.*]] = icmp ne i32 [[A]], [[IV]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[VARIANT_COND]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !1 +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[INVARIANT_COND]], [[WIDENABLE_COND3]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND4]], label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof !1 +; CHECK: deopt2: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded1: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + %variant_cond = icmp ne i32 %a, %iv + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %variant_cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded: ; preds = %loop + %widenable_cond3 = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond4 = and i1 %invariant_cond, %widenable_cond3 + br i1 %exiplicit_guard_cond4, label %guarded1, label %deopt2, !prof !0 + +deopt2: ; preds = %guarded + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded1: ; preds = %guarded + br label %backedge + +backedge: ; preds = %guarded1 + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret void +} + +; Hoist an invariant guard, leave the following variant guard in the loop. +define void @test5a(i1 %c, i32* %p, i32* %q) { +; CHECK-LABEL: @test5a( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[INVARIANT_COND:%.*]] = icmp ne i32 [[A]], 100 +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[INVARIANT_COND]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED2:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: guarded2: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[GUARDED2]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[VARIANT_COND:%.*]] = icmp ne i32 [[A]], [[IV]] +; CHECK-NEXT: br label [[GUARDED:%.*]] +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[VARIANT_COND]], [[WIDENABLE_COND3]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND4]], label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof !1 +; CHECK: deopt2: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded1: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + %variant_cond = icmp ne i32 %a, %iv + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %invariant_cond, %widenable_cond + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded: ; preds = %loop + %widenable_cond3 = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond4 = and i1 %variant_cond, %widenable_cond3 + br i1 %exiplicit_guard_cond4, label %guarded1, label %deopt2, !prof !0 + +deopt2: ; preds = %guarded + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded1: ; preds = %guarded + br label %backedge + +backedge: ; preds = %guarded1 + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret void +} + +declare void @llvm.experimental.deoptimize.isVoid(...) + +; Function Attrs: inaccessiblememonly nounwind +declare i1 @llvm.experimental.widenable.condition() #0 + +attributes #0 = { inaccessiblememonly nounwind } + +!0 = !{!"branch_weights", i32 1048576, i32 1}