Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -565,6 +565,9 @@ /// forgetMemoizedResults - Drop memoized information computed for S. void forgetMemoizedResults(const SCEV *S); + /// Return an existing SCEV for V if there is one, otherwise return nullptr. + const SCEV *getExistingSCEV(Value *V); + /// Return false iff given SCEV contains a SCEVUnknown with NULL value- /// pointer. bool checkValidity(const SCEV *S) const; @@ -579,6 +582,11 @@ bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, const Loop *L); + // Return SCEV no-wrap flags that can be proven based on reasoning + // about how poison produced from no-wrap flags on this value + // (e.g. a nuw add) would trigger undefined behavior on overflow. + SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V); + public: static char ID; // Pass identification, replacement for typeid ScalarEvolution(); Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -30,6 +30,7 @@ class DominatorTree; class TargetLibraryInfo; class LoopInfo; + class Loop; /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. @@ -288,7 +289,53 @@ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT); - + + /// Return true if this function can prove that the instruction I will + /// always transfer execution to one of its successors (including the next + /// instruction that follows within a basic block). E.g. this is not + /// guaranteed for function calls that could loop infinitely. + /// + /// In other words, this function returns false for instructions that may + /// transfer execution or fail to transfer execution in a way that is not + /// captured in the CFG nor in the sequence of instructions within a basic + /// block. + /// + /// Undefined behavior is assumed not to happen, so e.g. division is + /// guaranteed to transfer execution to the following instruction even + /// though division by zero might cause undefined behavior. + bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I); + + /// Return true if this function can prove that the instruction I + /// is executed for every iteration of the loop L. + /// + /// Note that this currently only considers the loop header. + bool isGuaranteedToExecuteForEveryIteration(const Instruction *I, + const Loop *L); + + /// Return true if this function can prove that I is guaranteed to yield + /// full-poison (all bits poison) if at least one of its operands are + /// full-poison (all bits poison). + /// + /// The exact rules for how poison propagates through instructions have + /// not been settled as of 2015-07-10, so this function is conservative + /// and only considers poison to be propagated in uncontroversial + /// cases. There is no attempt to track values that may be only partially + /// poison. + bool propagatesFullPoison(const Instruction *I); + + /// Return either nullptr or an operand of I such that I will trigger + /// undefined behavior if I is executed and that operand has a full-poison + /// value (all bits poison). + const Value *getGuaranteedNonFullPoisonOp(const Instruction *I); + + /// Return true if this function can prove that if PoisonI is executed + /// and yields a full-poison value (all bits poison), then that will + /// trigger undefined behavior. + /// + /// Note that this currently only considers the basic block that is + /// the parent of I. + bool isKnownNotFullPoison(const Instruction *PoisonI); + /// \brief Specific patterns of select instructions we can match. enum SelectPatternFlavor { SPF_UNKNOWN = 0, Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -2936,7 +2936,8 @@ // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP // instruction to its SCEV, because the Instruction may be guarded by control // flow and the no-overflow bits may not be valid for the expression in any - // context. + // context. This can be fixed similarly to how these flags are handled for + // adds. SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap; const SCEV *TotalOffset = getConstant(IntPtrTy, 0); @@ -3315,22 +3316,25 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + const SCEV *S = getExistingSCEV(V); + if (S == nullptr) { + S = createSCEV(V); + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); + } + return S; +} + +const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { + assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { const SCEV *S = I->second; if (checkValidity(S)) return S; - else - ValueExprMap.erase(I); + ValueExprMap.erase(I); } - const SCEV *S = createSCEV(V); - - // The process of creating a SCEV for V may have caused other SCEVs - // to have been created, so it's necessary to insert the new entry - // from scratch, rather than trying to remember the insert position - // above. - ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); - return S; + return nullptr; } /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V @@ -4089,8 +4093,63 @@ return setRange(S, SignHint, ConservativeResult); } -/// createSCEV - We know that there is no SCEV for the specified value. -/// Analyze the expression. +SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) { + const BinaryOperator *BinOp = cast(V); + + // Return early if there are no flags to propagate to the SCEV. + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + if (BinOp->hasNoUnsignedWrap()) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); + if (BinOp->hasNoSignedWrap()) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); + if (Flags == SCEV::FlagAnyWrap) { + return SCEV::FlagAnyWrap; + } + + // Here we check that BinOp is in the header of the innermost loop + // containing BinOp, since we only deal with instructions in the loop + // header. The actual loop we need to check later will come from an add + // recurrence, but getting that requires computing the SCEV of the operands, + // which can be expensive. This check we can do cheaply to rule out some + // cases early. + Loop *innermostContainingLoop = LI->getLoopFor(BinOp->getParent()); + if (innermostContainingLoop == nullptr || + innermostContainingLoop->getHeader() != BinOp->getParent()) + return SCEV::FlagAnyWrap; + + // Only proceed if we can prove that BinOp does not yield poison. + if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap; + + // At this point we know that if V is executed, then it does not wrap + // according to at least one of NSW or NUW. If V is not executed, then we do + // not know if the calculation that V represents would wrap. Multiple + // instructions can map to the same SCEV. If we apply NSW or NUW from V to + // the SCEV, we must guarantee no wrapping for that SCEV also when it is + // derived from other instructions that map to the same SCEV. We cannot make + // that guarantee for cases where V is not executed. So we need to find the + // loop that V is considered in relation to and prove that V is executed for + // every iteration of that loop. That implies that the value that V + // calculates does not wrap anywhere in the loop, so then we can apply the + // flags to the SCEV. + // + // We check isLoopInvariant to disambiguate in case we are adding two + // recurrences from different loops, so that we know which loop to prove + // that V is executed in. + for (int OpIndex = 0; OpIndex < 2; ++OpIndex) { + const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex)); + if (auto *AddRec = dyn_cast(Op)) { + const int OtherOpIndex = 1 - OpIndex; + const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex)); + if (isLoopInvariant(OtherOp, AddRec->getLoop()) && + isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop())) + return Flags; + } + } + return SCEV::FlagAnyWrap; +} + +/// createSCEV - We know that there is no SCEV for the specified value. Analyze +/// the expression. /// const SCEV *ScalarEvolution::createSCEV(Value *V) { if (!isSCEVable(V->getType())) @@ -4127,29 +4186,52 @@ // Instead, gather up all the operands and make a single getAddExpr call. // LLVM IR canonical form means we need only traverse the left operands. // - // Don't apply this instruction's NSW or NUW flags to the new - // expression. The instruction may be guarded by control flow that the - // no-wrap behavior depends on. Non-control-equivalent instructions can be - // mapped to the same SCEV expression, and it would be incorrect to transfer - // NSW/NUW semantics to those operations. + // FIXME: Expand this handling of NSW and NUW to other instructions, like + // sub and mul. SmallVector AddOps; - AddOps.push_back(getSCEV(U->getOperand(1))); - for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { - unsigned Opcode = Op->getValueID() - Value::InstructionVal; - if (Opcode != Instruction::Add && Opcode != Instruction::Sub) + for (Value *Op = U;; Op = U->getOperand(0)) { + U = dyn_cast(Op); + unsigned Opcode = U ? U->getOpcode() : 0; + if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) { + assert(Op != V && "V should be an add"); + AddOps.push_back(getSCEV(Op)); break; - U = cast(Op); + } + + if (auto *OpSCEV = getExistingSCEV(Op)) { + AddOps.push_back(OpSCEV); + break; + } + + // If a NUW or NSW flag can be applied to the SCEV for this + // addition, then compute the SCEV for this addition by itself + // with a separate call to getAddExpr. We need to do that + // instead of pushing the operands of the addition onto AddOps, + // since the flags are only known to apply to this particular + // addition - they may not apply to other additions that can be + // formed with operands from AddOps. + // + // FIXME: Expand this to sub instructions. + if (Opcode == Instruction::Add && isa(U)) { + SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U); + if (Flags != SCEV::FlagAnyWrap) { + AddOps.push_back(getAddExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1)), Flags)); + break; + } + } + const SCEV *Op1 = getSCEV(U->getOperand(1)); if (Opcode == Instruction::Sub) AddOps.push_back(getNegativeSCEV(Op1)); else AddOps.push_back(Op1); } - AddOps.push_back(getSCEV(U->getOperand(0))); return getAddExpr(AddOps); } + case Instruction::Mul: { - // Don't transfer NSW/NUW for the same reason as AddExpr. + // FIXME: Transfer NSW/NUW as in AddExpr. SmallVector MulOps; MulOps.push_back(getSCEV(U->getOperand(1))); for (Value *Op = U->getOperand(0); Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -3316,6 +3316,167 @@ return OverflowResult::MayOverflow; } +bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { + // FIXME: This conservative implementation can be relaxed. E.g. most + // atomic operations are guaranteed to terminate on most platforms + // and most functions terminate. + + return !I->isAtomic() && // atomics may never succeed on some platforms + !isa(I) && // could throw and might not terminate + !isa(I) && // might not terminate and could throw to + // non-successor (see bug 24185 for details). + !isa(I) && // has no successors + !isa(I); // has no successors +} + +bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, + const Loop *L) { + // The loop header is guaranteed to be executed for every iteration. + // + // FIXME: Relax this constraint to cover all basic blocks that are + // guaranteed to be executed at every iteration. + if (I->getParent() != L->getHeader()) return false; + + for (const Instruction &LI : *L->getHeader()) { + if (&LI == I) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; + } + llvm_unreachable("Instruction not contained in its own parent basic block."); +} + +bool llvm::propagatesFullPoison(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + case Instruction::Trunc: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + // These operations all propagate poison unconditionally. Note that poison + // is not any particular value, so xor or subtraction of poison with + // itself still yields poison, not zero. + return true; + + case Instruction::AShr: + case Instruction::SExt: + // For these operations, one bit of the input is replicated across + // multiple output bits. A replicated poison bit is still poison. + return true; + + case Instruction::Shl: { + // Left shift *by* a poison value is poison. The number of + // positions to shift is unsigned, so no negative values are + // possible there. Left shift by zero places preserves poison. So + // it only remains to consider left shift of poison by a positive + // number of places. + // + // A left shift by a positive number of places leaves the lowest order bit + // non-poisoned. However, if such a shift has a no-wrap flag, then we can + // make the poison operand violate that flag, yielding a fresh full-poison + // value. + auto *OBO = cast(I); + return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap(); + } + + case Instruction::Mul: { + // A multiplication by zero yields a non-poison zero result, so we need to + // rule out zero as an operand. Conservatively, multiplication by a + // non-zero constant is not multiplication by zero. + // + // Multiplication by a non-zero constant can leave some bits + // non-poisoned. For example, a multiplication by 2 leaves the lowest + // order bit unpoisoned. So we need to consider that. + // + // Multiplication by 1 preserves poison. If the multiplication has a + // no-wrap flag, then we can make the poison operand violate that flag + // when multiplied by any integer other than 0 and 1. + auto *OBO = cast(I); + if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) { + for (Value *V : OBO->operands()) { + if (auto *CI = dyn_cast(V)) { + // A ConstantInt cannot yield poison, so we can assume that it is + // the other operand that is poison. + return !CI->isZero(); + } + } + } + return false; + } + + case Instruction::GetElementPtr: + // A GEP implicitly represents a sequence of additions, subtractions, + // truncations, sign extensions and multiplications. The multiplications + // are by the non-zero sizes of some set of types, so we do not have to be + // concerned with multiplication by zero. If the GEP is in-bounds, then + // these operations are implicitly no-signed-wrap so poison is propagated + // by the arguments above for Add, Sub, Trunc, SExt and Mul. + return cast(I)->isInBounds(); + + default: + return false; + } +} + +const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Store: + return cast(I)->getPointerOperand(); + + case Instruction::Load: + return cast(I)->getPointerOperand(); + + case Instruction::AtomicCmpXchg: + return cast(I)->getPointerOperand(); + + case Instruction::AtomicRMW: + return cast(I)->getPointerOperand(); + + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + return I->getOperand(1); + + default: + return nullptr; + } +} + +bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) { + // We currently only look for uses of poison values within the same basic + // block, as that makes it easier to guarantee that the uses will be + // executed given that PoisonI is executed. + // + // FIXME: Expand this to consider uses beyond the same basic block. To do + // this, look out for the distinction between post-dominance and strong + // post-dominance. + const BasicBlock *BB = PoisonI->getParent(); + + // Set of instructions that we have proved will yield poison if PoisonI + // does. + SmallSet YieldsPoison; + YieldsPoison.insert(PoisonI); + + for (const Instruction *I = PoisonI, *E = BB->end(); I != E; + I = I->getNextNode()) { + if (I != PoisonI) { + const Value *NotPoison = getGuaranteedNonFullPoisonOp(I); + if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(I)) return false; + } + + // Mark poison that propagates from I through uses of I. + if (YieldsPoison.count(I)) { + for (const User *User : I->users()) { + const Instruction *UserI = cast(User); + if (UserI->getParent() == BB && propagatesFullPoison(UserI)) + YieldsPoison.insert(User); + } + } + } + return false; +} + static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, Index: test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll =================================================================== --- test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll +++ test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll @@ -11,7 +11,7 @@ ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}<%for.k>] +; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}<%for.k>] define void @foo(i64 %n, i64 %m, i64 %o, double* %A) { entry: Index: test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll =================================================================== --- test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll +++ test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll @@ -11,7 +11,7 @@ ; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}<%for.k>] +; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}<%for.k>] define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) { entry: Index: test/Analysis/ScalarEvolution/flags-from-poison.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/flags-from-poison.ll @@ -0,0 +1,358 @@ +; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s + +; Positive and negative tests for inferring flags like nsw from +; reasoning about how a poison value from overflow would trigger +; undefined behavior. + +define void @foo() { + ret void +} + +; Example where an add should get the nsw flag, so that a sext can be +; distributed over the add. +define void @test-add-nsw(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-nsw +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + +; CHECK: %index64 = +; CHECK: --> {(sext i32 %offset to i64),+,1} + %index64 = sext i32 %index32 to i64 + + %ptr = getelementptr inbounds float, float* %input, i64 %index64 + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + call void @foo() + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Example where an add should get the nuw flag. +define void @test-add-nuw(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-nuw +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nuw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nuw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; With no load to trigger UB from poison, we cannot infer nsw. +define void @test-add-no-load(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-no-load +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nuw i32 %i, 1 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; The current code is only supposed to look at the loop header, so +; it should not infer nsw in this case, as that would require looking +; outside the loop header. +define void @test-add-not-header(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-not-header +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ] + br label %loop2 +loop2: + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Same thing as test-add-not-header, but in this case only the load +; instruction is outside the loop header. +define void @test-add-not-header2(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-not-header2 +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + br label %loop2 +loop2: + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; The call instruction makes it not guaranteed that the add will be +; executed, since it could run forever or throw an exception, so we +; cannot assume that the UB is realized. +define void @test-add-call(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-call +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + call void @foo() + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Same issue as test-add-call, but this time the call is between the +; producer of poison and the load that consumes it. +define void @test-add-call2(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-call2 +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + call void @foo() + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Without inbounds, GEP does not propagate poison in the very +; conservative approach used here. +define void @test-add-no-inbounds(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-no-inbounds +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Multiplication by a non-zero constant propagates poison if there is +; a nuw or nsw flag on the multiplication. +define void @test-add-mul-propagates(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-mul-propagates +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %indexmul = mul nuw i32 %index32, 2 + %ptr = getelementptr inbounds float, float* %input, i32 %indexmul + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Multiplication by a non-constant should not propagate poison in the +; very conservative approach used here. +define void @test-add-mul-no-propagation(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-mul-no-propagation +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %indexmul = mul nsw i32 %index32, %offset + %ptr = getelementptr inbounds float, float* %input, i32 %indexmul + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Multiplication by a non-zero constant does not propagate poison +; without a no-wrap flag. +define void @test-add-mul-no-propagation2(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-mul-no-propagation2 +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %indexmul = mul i32 %index32, 2 + %ptr = getelementptr inbounds float, float* %input, i32 %indexmul + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Division by poison triggers UB. +define void @test-add-div(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-div +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %j = +; CHECK: --> {%offset,+,1} + %j = add nsw i32 %i, %offset + + %q = sdiv i32 %numIterations, %j + %nexti = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Remainder of poison by non-poison divisor does not trigger UB. +define void @test-add-div2(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-div2 +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %j = +; CHECK: --> {%offset,+,1} + %j = add nsw i32 %i, %offset + + %q = sdiv i32 %j, %numIterations + %nexti = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Store to poison address triggers UB. +define void @test-add-store(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-store +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + store float 1.0, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Three sequential adds where the middle add should have nsw. There is +; a special case for sequential adds and this test covers that. We have to +; put the final add first in the program since otherwise the special case +; is not triggered, hence the strange basic block ordering. +define void @test-add-twice(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-twice +entry: + br label %loop +loop2: +; CHECK: %seq = +; CHECK: --> {(2 + %offset),+,1} + %seq = add nsw nuw i32 %index32, 1 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop + +loop: + %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ] + + %j = add nsw i32 %i, 1 +; CHECK: %index32 = +; CHECK: --> {(1 + %offset),+,1} + %index32 = add nsw i32 %j, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + store float 1.0, float* %ptr, align 4 + br label %loop2 +exit: + ret void +} Index: test/Transforms/LoopStrengthReduce/sext-ind-var.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/sext-ind-var.ll @@ -0,0 +1,36 @@ +; RUN: opt -loop-reduce -S < %s | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-unknown-unknown" + +; LSR used not to be able to generate a float* induction variable in +; these cases due to scalar evolution not propagating nsw from an +; instruction to the SCEV, preventing distributing sext into the +; corresponding addrec. + +define float @testadd(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @testadd +; CHECK: sext i32 %offset to i64 +; CHECK: loop: +; CHECK-DAG: phi float* +; CHECK-DAG: phi i32 +; CHECK-NOT: sext + +entry: + br label %loop + +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ] + %index32 = add nuw nsw i32 %i, %offset + %index64 = sext i32 %index32 to i64 + %ptr = getelementptr inbounds float, float* %input, i64 %index64 + %addend = load float, float* %ptr, align 4 + %nextsum = fadd float %sum, %addend + %nexti = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop + +exit: + ret float %nextsum +}