Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -58,6 +58,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -456,6 +457,11 @@ // that the next instruction visited is guaranteed to execute if the loop // is entered. bool IsMustExecute = CurLoop->getHeader() == BB; + // Keep track of whether the prefix instructions could have written memory. + // TODO: This and IsMustExecute may be done smarter if we keep track of all + // throwing and mem-writing operations in every block, e.g. using something + // similar to isGuaranteedToExecute. + bool IsMemoryNotModified = CurLoop->getHeader() == BB; for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { Instruction &I = *II++; @@ -513,8 +519,23 @@ continue; } + using namespace PatternMatch; + if (match(&I, m_Intrinsic()) && + IsMustExecute && IsMemoryNotModified && + CurLoop->hasLoopInvariantOperands(&I)) { + bool Hoisted = hoist(I, DT, CurLoop, SafetyInfo, ORE); + Changed |= Hoisted; + // If we haven't hoisted the guard, we need to update isMustExecute. + // TODO: in current implementation, hoist always return true. Consider + // making it void and inserting an unconditional continue here. + if (Hoisted) + continue; + } + if (IsMustExecute) IsMustExecute = isGuaranteedToTransferExecutionToSuccessor(&I); + if (IsMemoryNotModified) + IsMemoryNotModified = !I.mayWriteToMemory(); } } Index: test/Transforms/GuardWidening/loop-schedule.ll =================================================================== --- test/Transforms/GuardWidening/loop-schedule.ll +++ test/Transforms/GuardWidening/loop-schedule.ll @@ -41,8 +41,8 @@ ; CHECK: %cond_0 = icmp ult i32 %a, 10 ; CHECK: %cond_1 = icmp ult i32 %b, 10 ; CHECK: %wide.chk = and i1 %cond_0, %cond_1 -; CHECK-LABEL: loop: ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ] +; CHECK-LABEL: loop: entry: br label %loop Index: test/Transforms/LICM/guards.ll =================================================================== --- test/Transforms/LICM/guards.ll +++ test/Transforms/LICM/guards.ll @@ -1,13 +1,13 @@ ; RUN: opt -licm -basicaa < %s -S | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s -; TODO: should be able to hoist both guard and load +; Hoist guard and load. define void @test1(i1 %cond, i32* %ptr) { ; CHECK-LABEL: @test1( ; CHECK-LABEL: entry: -; CHECK-LABEL: loop: ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond) ; CHECK: %val = load i32, i32* %ptr +; CHECK-LABEL: loop: entry: br label %loop @@ -62,13 +62,13 @@ } -; Could hoist, but don't +; Hoist guard and load. define void @test3(i1 %cond, i32* %ptr) { ; CHECK-LABEL: @test3( ; CHECK-LABEL: entry: -; CHECK-LABEL: loop: ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond) -; CHECK: %val = load i32, i32* %ptr +; CHECK: %ptr.promoted = load i32, i32* %ptr +; CHECK-LABEL: loop: entry: br label %loop @@ -82,4 +82,183 @@ br label %loop } + +; TODO: We can also hoist this guard from mustexec non-header block. +define void @test4(i1 %c, i32* %p) { + +; CHECK-LABEL: @test4( +; CHECK-LABEL: entry: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK-LABEL: loop: +; CHECK-LABEL: backedge: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: + br label %backedge + +if.false: + br label %backedge + +backedge: + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Do not hoist across a conditionally executed side effect. +define void @test4a(i1 %c, i32* %p, i32* %q) { + +; CHECK-LABEL: @test4a( +; CHECK-LABEL: entry: +; CHECK-LABEL: loop: +; CHECK-LABEL: if.true: +; CHECK: store +; CHECK-LABEL: backedge: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: + store i32 123, i32* %q + br label %backedge + +if.false: + br label %backedge + +backedge: + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Do not hoist a conditionally executed guard. +define void @test4b(i1 %c, i32* %p, i32* %q) { + +; CHECK-LABEL: @test4b( +; CHECK-LABEL: entry: +; CHECK-LABEL: loop: +; CHECK-LABEL: if.true: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) +; CHECK-LABEL: backedge: + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %if.true, label %if.false + +if.true: + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + br label %backedge + +if.false: + br label %backedge + +backedge: + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Do not hoist an invariant guard across a variant guard. +define void @test5(i1 %c, i32* %p, i32* %q) { + +; CHECK-LABEL: @test5( +; CHECK-LABEL: entry: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK-LABEL: loop: +; CHECK: %variant_cond = icmp ne i32 %a, %iv +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %variant_cond) +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + %variant_cond = icmp ne i32 %a, %iv + call void (i1, ...) @llvm.experimental.guard(i1 %variant_cond) [ "deopt"() ] + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + br label %backedge + +backedge: + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + +; Hoist an invariant guard, leave the following variant guard in the loop. +define void @test5a(i1 %c, i32* %p, i32* %q) { + +; CHECK-LABEL: @test5a( +; CHECK-LABEL: entry: +; CHECK: %a = load i32, i32* %p +; CHECK: %invariant_cond = icmp ne i32 %a, 100 +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) +; CHECK-LABEL: loop: +; CHECK: %variant_cond = icmp ne i32 %a, %iv +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %variant_cond) + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %iv.next = add i32 %iv, 1 + %a = load i32, i32* %p + %invariant_cond = icmp ne i32 %a, 100 + %variant_cond = icmp ne i32 %a, %iv + call void (i1, ...) @llvm.experimental.guard(i1 %invariant_cond) [ "deopt"() ] + call void (i1, ...) @llvm.experimental.guard(i1 %variant_cond) [ "deopt"() ] + br label %backedge + +backedge: + %loop_cond = icmp slt i32 %iv.next, 1000 + br i1 %loop_cond, label %loop, label %exit + +exit: + ret void +} + declare void @llvm.experimental.guard(i1, ...)