Index: lib/Transforms/Scalar/GuardWidening.cpp =================================================================== --- lib/Transforms/Scalar/GuardWidening.cpp +++ lib/Transforms/Scalar/GuardWidening.cpp @@ -83,6 +83,13 @@ cl::init(1000)); +static cl::opt InsertFictiveGuardsToPreheader( + "loop-guard-widening-insert-fictive-guards-to-preheader", cl::Hidden, + cl::desc("When turned on, loop guard widening will attempt to insert " + "fictive guards to loop preheader when it thinks that it may " + "enable more widening opportunities"), + cl::init(false)); + namespace { // Get the condition of \p I. It can either be a guard or a conditional branch. @@ -844,6 +851,42 @@ initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); } + Instruction *insertFictiveGuardToPreheader(const Loop *L) { + // If the option is disabled, bail. + if (!InsertFictiveGuardsToPreheader) + return nullptr; + // Make sure that the loop has a preheader. + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) + return nullptr; + + // Find last guard instruction in preheader. Insert a fictive guard after it + // but before any instruction that can write memory and invalidate the deopt + // bundle. + Instruction *InsertionPoint = nullptr; + IntrinsicInst *LastSeenGuard = nullptr; + + for (Instruction &I : *Preheader) + if (isa(I) && isGuard(&I)) { + LastSeenGuard = cast(&I); + InsertionPoint = nullptr; + } else if (!InsertionPoint && (I.mayWriteToMemory() || I.isTerminator())) + InsertionPoint = &I; + + // If there are no instructions between the guard and the insertion point, + // it makes to sense to insert the fictive guard. + if (!LastSeenGuard || LastSeenGuard->getNextNode() == InsertionPoint) + return nullptr; + + assert(InsertionPoint && "Should be!"); + // If we think that inserting a fictive guard with "true" condition here + // might be profitable, do it. + auto *NewGuard = LastSeenGuard->clone(); + setCondition(NewGuard, ConstantInt::getTrue(LastSeenGuard->getContext())); + NewGuard->insertBefore(InsertionPoint); + return NewGuard; + } + bool runOnLoop(Loop *L, LPPassManager &LPM) override { if (skipLoop(L)) return false; @@ -860,8 +903,16 @@ BranchProbabilityInfo *BPI = nullptr; if (WidenFrequentBranches) BPI = &getAnalysis().getBPI(); - return GuardWideningImpl(DT, PDT, LI, BPI, - DT.getNode(RootBB), BlockFilter).run(); + Instruction *FictiveGuard = insertFictiveGuardToPreheader(L); + bool Changed = GuardWideningImpl(DT, PDT, LI, BPI, + DT.getNode(RootBB), BlockFilter).run(); + // If the fictive guard wasn't useful for widening, erase it. + if (FictiveGuard && FictiveGuard->getParent() && + getCondition(FictiveGuard) == + ConstantInt::getTrue(FictiveGuard->getContext())) + // Do not use eliminateGuard because we don't want to mess the statistics. + eliminateGuard(FictiveGuard); + return Changed; } void getAnalysisUsage(AnalysisUsage &AU) const override { Index: test/Transforms/GuardWidening/fictive-guards.ll =================================================================== --- /dev/null +++ test/Transforms/GuardWidening/fictive-guards.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-guard-widening-insert-fictive-guards-to-preheader=true -S -loop-guard-widening < %s | FileCheck %s + +; Test fictive guards insertion mechanism. + +declare void @llvm.experimental.guard(i1,...) + +; Check that we can insert another guard and widen into it if widening into the +; existing guard is impossible. +define void @test_01(i1 %c, i32* %p) { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[C:%.*]]) [ "deopt"() ] +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[INVARIANT_COND:%.*]] = icmp ne i32 [[A]], 100 +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 true, [[INVARIANT_COND]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + call void (i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"() ] + %a = load i32, i32* %p + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i32 %iv, 1 + %invariant_cond = icmp ne i32 %a, 100 + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Make sure that the fictive guard does not exist after widening. +define void @test_02(i1 %c, i1 %c2, i32* %p) { +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[C:%.*]], [[C2:%.*]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ] +; CHECK-NEXT: [[UNNEEDED_COND:%.*]] = or i1 [[C]], [[C2]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + call void (i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"() ] + %unneeded_cond = or i1 %c, %c2 + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i32 %iv, 1 + call void (i1, ...) @llvm.experimental.guard(i1 %c2) [ "deopt"() ] + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Make sure that we don't insert a fictive guard after the memory-writing +; instruction. +define void @test_03(i1 %c, i32* %p, i32* %s) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[C:%.*]]) [ "deopt"() ] +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[INVARIANT_COND:%.*]] = icmp ne i32 [[A]], 100 +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 true, [[INVARIANT_COND]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ] +; CHECK-NEXT: store i32 0, i32* [[S:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + call void (i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"() ] + %a = load i32, i32* %p + store i32 0, i32* %s + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i32 %iv, 1 + %invariant_cond = icmp ne i32 %a, 100 + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +}