Index: lib/Transforms/Utils/SimplifyIndVar.cpp =================================================================== --- lib/Transforms/Utils/SimplifyIndVar.cpp +++ lib/Transforms/Utils/SimplifyIndVar.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -183,11 +184,92 @@ Pred = ICmpInst::getSwappedPredicate(Pred); } + Value* Other = ICmp->getOperand(1 - IVOperIdx); + + // Pattern match the form + // loop_exit_br (icmp ne/eq (gep inbounds p, (0,+,Stride)) nullptr) + // and convert it into a loop invariant null check on 'p' since we know the + // address can't wrap and the allocation is of size==0 (i.e. p can't validly + // be beyond null to start with if it's ever inbounds at null). + // + // Note: We handle this explicitly in terms of values as SCEV looses the + // inbounds facts on the GEP when forming the SCEV expressions. If we don't + // special case this here, we'll LFTR the expression into a form where due + // to the lost inbounds, we'll loose the ability to discharge the check. + if (auto *GEP = dyn_cast(IVOperand)) + if (ICmpInst::isEquality(Pred) && + isa(Other) && cast(Other)->isNullValue() && + !NullPointerIsDefined(ICmp->getFunction(), + Other->getType()->getPointerAddressSpace()) && + GEP->isInBounds() && GEP->getNumOperands() == 2 && + L->isLoopInvariant(GEP->getPointerOperand()) && + GEP->getType()->isPointerTy()) { + IRBuilder<> B(ICmp); + + // TODO: factor our GEP expression generation logic (i.e. split + // getGEPExpr into indexing and base addition portions) + auto &DL = ICmp->getModule()->getDataLayout(); + auto *ImplicitType = + B.getIntNTy(DL.getPointerSizeInBits(GEP->getPointerAddressSpace())); + const SCEV *Idx = + SE->getTruncateOrSignExtend(SE->getSCEV(GEP->getOperand(1)), + ImplicitType); + + // With inbounds, p + PosNum != nullptr; + // whereas p + NegNum == nullptr would imply p is out of bounds + if (SE->isKnownNonZero(Idx)) { + if (Pred == ICmpInst::ICMP_NE) + ICmp->replaceAllUsesWith(B.getTrue()); + else + ICmp->replaceAllUsesWith(B.getFalse()); + DeadInsts.emplace_back(ICmp); + return true; + } + + + // If this condition controls an exiting branch which dominates the + // latch, return it. Else, return null. + auto getControlledExitBr = [this](Loop *L, Value *Cond) -> BranchInst* { + if (Cond->user_begin() == Cond->user_end()) + return nullptr; + auto I = Cond->user_begin(); + if (std::next(I) != Cond->user_end()) + return nullptr; + if (BasicBlock *Latch = L->getLoopLatch()) + if (BranchInst *BI = dyn_cast(*I)) + if (DT->dominates(BI->getParent(), Latch)) { + unsigned ContainedCount = 0; + for (BasicBlock *BB : successors(BI->getParent())) + if (L->contains(BB)) + ContainedCount++; + if (ContainedCount == 1) + return BI; + } + return nullptr; + }; + + if (isa(Idx) && + cast(Idx)->getStart()->isZero()) + if (getControlledExitBr(L, ICmp)) { + Value *Base = GEP->getPointerOperand(); + Value *Null = + ConstantPointerNull::get(cast(Base->getType())); + ICmp->replaceAllUsesWith(B.CreateICmp(Pred, Base, Null)); + DeadInsts.emplace_back(ICmp); + return true; + } + + // We could emit two conditions (one loop invariant null check, one + // loop varying check on Idx) if desired, but choose not since + // profitability is non-obvious. + } + + // Get the SCEVs for the ICmp operands (in the specific context of the // current loop) const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); - const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); - const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); + const SCEV *S = SE->getSCEVAtScope(IVOperand, ICmpLoop); + const SCEV *X = SE->getSCEVAtScope(Other, ICmpLoop); ICmpInst::Predicate InvariantPredicate; const SCEV *InvariantLHS, *InvariantRHS; Index: test/Transforms/IndVarSimplify/inbounds-gep-null.ll =================================================================== --- test/Transforms/IndVarSimplify/inbounds-gep-null.ll +++ test/Transforms/IndVarSimplify/inbounds-gep-null.ll @@ -0,0 +1,203 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S < %s -indvars | FileCheck %s + +@G = external global i32 + +define void @test_ne(i8* %base) { +; CHECK-LABEL: @test_ne( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i8* [[BASE:%.*]], null +; CHECK-NEXT: br i1 [[TMP0]], label [[LATCH]], label [[EXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: store volatile i32 0, i32* @G +; CHECK-NEXT: [[CND2:%.*]] = icmp ult i64 [[IV]], 200 +; CHECK-NEXT: br i1 [[CND2]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [0, %entry], [%iv.next, %latch] + %iv.next = add nsw nuw i64 %iv, 1 + %gep = getelementptr inbounds i8, i8* %base, i64 %iv + %cnd1 = icmp ne i8* %gep, null + br i1 %cnd1, label %latch, label %exit +latch: + store volatile i32 0, i32* @G + %cnd2 = icmp ult i64 %iv, 200 + br i1 %cnd2, label %loop, label %exit + +exit: + ret void +} + +define void @test_ne_non_zero_idx(i8* %base) { +; CHECK-LABEL: @test_ne_non_zero_idx( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: br i1 true, label [[LATCH]], label [[EXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: store volatile i32 0, i32* @G +; CHECK-NEXT: [[CND2:%.*]] = icmp ult i64 [[IV]], 200 +; CHECK-NEXT: br i1 [[CND2]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [1, %entry], [%iv.next, %latch] + %iv.next = add nsw nuw i64 %iv, 1 + %gep = getelementptr inbounds i8, i8* %base, i64 %iv + %cnd1 = icmp ne i8* %gep, null + br i1 %cnd1, label %latch, label %exit +latch: + store volatile i32 0, i32* @G + %cnd2 = icmp ult i64 %iv, 200 + br i1 %cnd2, label %loop, label %exit + +exit: + ret void +} + +define void @test_ne_unknown_start(i8* %base, i64 %start) { +; CHECK-LABEL: @test_ne_unknown_start( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 [[IV]] +; CHECK-NEXT: [[CND1:%.*]] = icmp ne i8* [[GEP]], null +; CHECK-NEXT: br i1 [[CND1]], label [[LATCH]], label [[EXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: store volatile i32 0, i32* @G +; CHECK-NEXT: [[CND2:%.*]] = icmp ult i64 [[IV]], 200 +; CHECK-NEXT: br i1 [[CND2]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [%start, %entry], [%iv.next, %latch] + %iv.next = add nsw nuw i64 %iv, 1 + %gep = getelementptr inbounds i8, i8* %base, i64 %iv + %cnd1 = icmp ne i8* %gep, null + br i1 %cnd1, label %latch, label %exit +latch: + store volatile i32 0, i32* @G + %cnd2 = icmp ult i64 %iv, 200 + br i1 %cnd2, label %loop, label %exit + +exit: + ret void +} + +define void @test_eq(i8* %base) { +; CHECK-LABEL: @test_eq( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i8* [[BASE:%.*]], null +; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LATCH]] +; CHECK: latch: +; CHECK-NEXT: store volatile i32 0, i32* @G +; CHECK-NEXT: [[CND2:%.*]] = icmp ult i64 [[IV]], 200 +; CHECK-NEXT: br i1 [[CND2]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [0, %entry], [%iv.next, %latch] + %iv.next = add nsw nuw i64 %iv, 1 + %gep = getelementptr inbounds i8, i8* %base, i64 %iv + %cnd1 = icmp eq i8* %gep, null + br i1 %cnd1, label %exit, label %latch +latch: + store volatile i32 0, i32* @G + %cnd2 = icmp ult i64 %iv, 200 + br i1 %cnd2, label %loop, label %exit + +exit: + ret void +} + +define void @test_eq_noloop(i8* %base) { +; CHECK-LABEL: @test_eq_noloop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i8* [[BASE:%.*]], null +; CHECK-NEXT: br i1 [[TMP0]], label [[LATCH]], label [[EXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: store volatile i32 0, i32* @G +; CHECK-NEXT: [[CND2:%.*]] = icmp ult i64 [[IV]], 200 +; CHECK-NEXT: br i1 [[CND2]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [0, %entry], [%iv.next, %latch] + %iv.next = add nsw nuw i64 %iv, 1 + %gep = getelementptr inbounds i8, i8* %base, i64 %iv + %cnd1 = icmp eq i8* %gep, null + br i1 %cnd1, label %latch, label %exit +latch: + store volatile i32 0, i32* @G + %cnd2 = icmp ult i64 %iv, 200 + br i1 %cnd2, label %loop, label %exit + +exit: + ret void +} + + +define void @test_ne_swapped_cmp_operands(i8* %base) { +; CHECK-LABEL: @test_ne_swapped_cmp_operands( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i8* [[BASE:%.*]], null +; CHECK-NEXT: br i1 [[TMP0]], label [[LATCH]], label [[EXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: store volatile i32 0, i32* @G +; CHECK-NEXT: [[CND2:%.*]] = icmp ult i64 [[IV]], 200 +; CHECK-NEXT: br i1 [[CND2]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [0, %entry], [%iv.next, %latch] + %iv.next = add nsw nuw i64 %iv, 1 + %gep = getelementptr inbounds i8, i8* %base, i64 %iv + %cnd1 = icmp ne i8* null, %gep + br i1 %cnd1, label %latch, label %exit +latch: + store volatile i32 0, i32* @G + %cnd2 = icmp ult i64 %iv, 200 + br i1 %cnd2, label %loop, label %exit + +exit: + ret void +}