Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -1515,6 +1516,37 @@ SafetyInfo->BlockColors = colorEHFunclets(*Fn); } +/// Return true if we can prove that the given ExitBlock is not reached on the +/// first iteration of the given loop. That is, the backedge of the loop must +/// be executed before the ExitBlock is executed in any dynamic execution trace. +static BasicBlock* ProveDirectionOnFirstIteration(BranchInst *BI, + const DominatorTree *DT, + const Loop *CurLoop) { + assert(CurLoop->contains(BI->getParent()) && "query is meaningless"); + if (!BI->isConditional()) + return BI->getSuccessor(0); + // todo: handle fcmp someday + // todo: this would be a lot more powerful if we used scev, but all the + // plumbing is currently missing + auto *ICI = dyn_cast(BI->getCondition()); + if (!ICI) + return nullptr; + // Check for cmp (phi [x, preheader] ...), y where (pred x, y is known + auto *LHS = dyn_cast(ICI->getOperand(0)); + auto *RHS = ICI->getOperand(1); + if (!LHS || LHS->getParent() != CurLoop->getHeader()) + return nullptr; + auto DL = BI->getParent()->getParent()->getParent()->getDataLayout(); + auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader()); + auto *SimpleValOrNull = SimplifyICmpInst(ICI->getPredicate(), + IVStart, RHS, + {DL, nullptr, DT, nullptr, BI}); + auto *SimpleCst = dyn_cast_or_null(SimpleValOrNull); + if (!SimpleCst) + return nullptr; + return BI->getSuccessor(SimpleCst->isZeroValue()); +} + /// Returns true if the instruction in a loop is guaranteed to execute at least /// once. bool llvm::isGuaranteedToExecute(const Instruction &Inst, @@ -1537,14 +1569,48 @@ if (SafetyInfo->MayThrow) return false; + // Note: There are two styles of reasoning intermixed below for + // implementation efficiency reasons. They are: + // 1) If we can prove that the instruction dominates all exit blocks, then we + // know the instruction must have executed on *some* iteration before we + // exit. We do not prove *which* iteration the instruction must execute on. + // 2) If we can prove that the instruction dominates the latch and any exit + // which might be taken on the first iteration, we know the instruction must + // execute on the first iteration. This second style allows a conditional + // exit before the instruction of interest which is provably not taken on the + // first iteration. This is a quite common case for range check like + // patterns. + // Get the exit blocks for the current loop. SmallVector ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); // Verify that the block dominates each of the exit blocks of the loop. for (BasicBlock *ExitBlock : ExitBlocks) - if (!DT->dominates(Inst.getParent(), ExitBlock)) + if (!DT->dominates(Inst.getParent(), ExitBlock)) { + // Last ditch check! + BasicBlock *CurBlock = CurLoop->getHeader(); + while (true) { + BranchInst *TermBI = dyn_cast(CurBlock->getTerminator()); + if (!TermBI) + break; + BasicBlock* Next = ProveDirectionOnFirstIteration(TermBI, DT, CurLoop); + if (!Next || !CurLoop->contains(Next)) + break; + if (Next == CurLoop->getHeader()) + // Hit cycle - todo: handle inner cycles! + break; + // TODO: This is expensive, must cache! + if (!isGuaranteedToTransferExecutionToSuccessor(Next)) + break; + if (Inst.getParent() == Next) + // We've proven there's a guaranteed to execute path to Inst on first + // iteration of the loop + return true; + CurBlock = Next; + } return false; + } // As a degenerate case, if the loop is statically infinite then we haven't // proven anything since there are no exit blocks. Index: test/Transforms/LICM/hoist-mustexec.ll =================================================================== --- test/Transforms/LICM/hoist-mustexec.ll +++ test/Transforms/LICM/hoist-mustexec.ll @@ -0,0 +1,218 @@ +; RUN: opt -S -basicaa -licm < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,loop(licm)' -S %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @f() nounwind + +; constant fold on first ieration +define i32 @test1(i32* noalias nocapture readonly %a) nounwind uwtable { +; CHECK-LABEL: @test1( +entry: +; CHECK: %i1 = load i32, i32* %a, align 4 +; CHECK-NEXT: br label %for.body + br label %for.body + +for.body: + %iv = phi i32 [ 0, %entry ], [ %inc, %continue ] + %acc = phi i32 [ 0, %entry ], [ %add, %continue ] + %r.chk = icmp ult i32 %iv, 2000 + br i1 %r.chk, label %continue, label %fail +continue: + %i1 = load i32, i32* %a, align 4 + %add = add nsw i32 %i1, %acc + %inc = add nuw nsw i32 %iv, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret i32 %add + +fail: + call void @f() + ret i32 -1 +} + +; Count down from a.length w/entry guard +; TODO: currently unable to prove the following: +; ule i32 (add nsw i32 %len, -1), %len where len is [0, 512] +define i32 @test2(i32* noalias nocapture readonly %a) nounwind uwtable { +; CHECK-LABEL: @test2( +entry: + %len = load i32, i32* %a, align 4, !range !{i32 0, i32 512} + %is.non.pos = icmp eq i32 %len, 0 + br i1 %is.non.pos, label %fail, label %preheader +preheader: + %lenminusone = add nsw i32 %len, -1 + br label %for.body +for.body: + %iv = phi i32 [ %lenminusone, %preheader ], [ %dec, %continue ] + %acc = phi i32 [ 0, %preheader ], [ %add, %continue ] + %r.chk = icmp ule i32 %iv, %len + br i1 %r.chk, label %continue, label %fail +continue: +; CHECK-LABEL: continue +; CHECK: %i1 = load i32, i32* %a, align 4 + %i1 = load i32, i32* %a, align 4 + %add = add nsw i32 %i1, %acc + %dec = add nsw i32 %iv, -1 + %exitcond = icmp eq i32 %dec, 0 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret i32 %add + +fail: + call void @f() + ret i32 -1 +} + +; trivially true for zero +define i32 @test3(i32* noalias nocapture readonly %a) nounwind uwtable { +; CHECK-LABEL: @test3( +entry: + %len = load i32, i32* %a, align 4, !range !{i32 0, i32 512} + %is.zero = icmp eq i32 %len, 0 + br i1 %is.zero, label %fail, label %preheader +preheader: +; CHECK: %i1 = load i32, i32* %a, align 4 +; CHECK-NEXT: br label %for.body + br label %for.body +for.body: + %iv = phi i32 [ 0, %preheader ], [ %inc, %continue ] + %acc = phi i32 [ 0, %preheader ], [ %add, %continue ] + %r.chk = icmp ule i32 %iv, %len + br i1 %r.chk, label %continue, label %fail +continue: + %i1 = load i32, i32* %a, align 4 + %add = add nsw i32 %i1, %acc + %inc = add nuw nsw i32 %iv, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret i32 %add + +fail: + call void @f() + ret i32 -1 +} + +; requires fact length is non-zero +; TODO: IsKnownNonNullFromDominatingConditions is currently only be done for +; pointers; should handle integers too +define i32 @test4(i32* noalias nocapture readonly %a) nounwind uwtable { +; CHECK-LABEL: @test4( +entry: + %len = load i32, i32* %a, align 4, !range !{i32 0, i32 512} + %is.zero = icmp eq i32 %len, 0 + br i1 %is.zero, label %fail, label %preheader +preheader: + br label %for.body +for.body: + %iv = phi i32 [ 0, %preheader ], [ %inc, %continue ] + %acc = phi i32 [ 0, %preheader ], [ %add, %continue ] + %r.chk = icmp ult i32 %iv, %len + br i1 %r.chk, label %continue, label %fail +continue: +; CHECK-LABEL: continue +; CHECK: %i1 = load i32, i32* %a, align 4 + %i1 = load i32, i32* %a, align 4 + %add = add nsw i32 %i1, %acc + %inc = add nuw nsw i32 %iv, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret i32 %add + +fail: + call void @f() + ret i32 -1 +} + +; variation on test1 with branch swapped +define i32 @test-brswap(i32* noalias nocapture readonly %a) nounwind uwtable { +; CHECK-LABEL: @test-brswap( +entry: +; CHECK: %i1 = load i32, i32* %a, align 4 +; CHECK-NEXT: br label %for.body + br label %for.body + +for.body: + %iv = phi i32 [ 0, %entry ], [ %inc, %continue ] + %acc = phi i32 [ 0, %entry ], [ %add, %continue ] + %r.chk = icmp ugt i32 %iv, 2000 + br i1 %r.chk, label %fail, label %continue +continue: + %i1 = load i32, i32* %a, align 4 + %add = add nsw i32 %i1, %acc + %inc = add nuw nsw i32 %iv, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret i32 %add + +fail: + call void @f() + ret i32 -1 +} + +define i32 @test-nonphi(i32* noalias nocapture readonly %a) nounwind uwtable { +; CHECK-LABEL: @test-nonphi( +entry: + br label %for.body + +for.body: +; CHECK-LABEL: continue +; CHECK: %i1 = load i32, i32* %a, align 4 + %iv = phi i32 [ 0, %entry ], [ %inc, %continue ] + %acc = phi i32 [ 0, %entry ], [ %add, %continue ] + %xor = xor i32 %iv, 72 + %r.chk = icmp ugt i32 %xor, 2000 + br i1 %r.chk, label %fail, label %continue +continue: + %i1 = load i32, i32* %a, align 4 + %add = add nsw i32 %i1, %acc + %inc = add nuw nsw i32 %iv, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret i32 %add + +fail: + call void @f() + ret i32 -1 +} + +define i32 @test-wrongphi(i32* noalias nocapture readonly %a) nounwind uwtable { +; CHECK-LABEL: @test-wrongphi( +entry: + br label %for.body + +for.body: + %iv = phi i32 [ 0, %entry ], [ %inc, %continue ] + %acc = phi i32 [ 0, %entry ], [ %add, %continue ] + br label %dummy_block +dummy_block: + %wrongphi = phi i32 [12, %for.body] + %r.chk = icmp ugt i32 %wrongphi, 2000 + br i1 %r.chk, label %fail, label %continue +continue: +; CHECK-LABEL: continue +; CHECK: %i1 = load i32, i32* %a, align 4 + %i1 = load i32, i32* %a, align 4 + %add = add nsw i32 %i1, %acc + %inc = add nuw nsw i32 %iv, 1 + %exitcond = icmp eq i32 %inc, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret i32 %add + +fail: + call void @f() + ret i32 -1 +}