Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -237,17 +237,15 @@ }; /// This class represents an assumption that two SCEV expressions are equal, -/// and this can be checked at run-time. We assume that the left hand side is -/// a SCEVUnknown and the right hand side a constant. +/// and this can be checked at run-time. class SCEVEqualPredicate final : public SCEVPredicate { - /// We assume that LHS == RHS, where LHS is a SCEVUnknown and RHS a - /// constant. - const SCEVUnknown *LHS; - const SCEVConstant *RHS; + /// We assume that LHS == RHS. + const SCEV *LHS; + const SCEV *RHS; public: - SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEVUnknown *LHS, - const SCEVConstant *RHS); + SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEV *LHS, + const SCEV *RHS); /// Implementation of the SCEVPredicate interface bool implies(const SCEVPredicate *N) const override; @@ -256,10 +254,10 @@ const SCEV *getExpr() const override; /// Returns the left hand side of the equality. - const SCEVUnknown *getLHS() const { return LHS; } + const SCEV *getLHS() const { return LHS; } /// Returns the right hand side of the equality. - const SCEVConstant *getRHS() const { return RHS; } + const SCEV *getRHS() const { return RHS; } /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const SCEVPredicate *P) { @@ -1246,6 +1244,22 @@ SmallVector NewOp(Operands.begin(), Operands.end()); return getAddRecExpr(NewOp, L, Flags); } + + /// Similar to createAddRecFromPHI, but with the additional flexibility of + /// suggesting runtime overflow checks in case casts are encountered. + /// If successful, the analysis records that for this loop, \p SymbolicPHI, + /// which is the UnknownSCEV currently representing the PHI, can be rewritten + /// into an AddRec, assuming some predicates; The function then returns the + /// AddRec and the predicates as a pair, and caches this pair in + /// PredicatedSCEVRewrites. + /// If the analysis is not successful, a mapping from the \p SymbolicPHI to + /// itself (with no predicates) is recorded, and a nullptr with an empty + /// predicates vector is returned as a pair. + /// The function is intended to be called from PSCEV (the caller will decide + /// whether to actually add the predicates and carry out the rewrites). + Optional>> + createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI); + /// Returns an expression for a GEP /// /// \p GEP The GEP. The indices contained in the GEP itself are ignored, @@ -1654,8 +1668,7 @@ return F.getParent()->getDataLayout(); } - const SCEVPredicate *getEqualPredicate(const SCEVUnknown *LHS, - const SCEVConstant *RHS); + const SCEVPredicate *getEqualPredicate(const SCEV *LHS, const SCEV *RHS); const SCEVPredicate * getWrapPredicate(const SCEVAddRecExpr *AR, @@ -1697,6 +1710,12 @@ FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; + /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression + /// they can be rewritten into under certain predicates. + DenseMap, + std::pair>> + PredicatedSCEVRewrites; + /// The head of a linked list of all SCEVUnknown values that have been /// allocated. This is used by releaseMemory to locate them all and call /// their destructors. Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -4099,6 +4099,295 @@ return None; } +/// Helper function to createAddRecFromPHIWithCasts. We have a phi +/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via +/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the +/// way. This function checks if \p Op, an operand of this SCEVAddExpr, +/// follows one of the following patterns: +/// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) +/// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) +/// If the SCEV expression of \p Op confirms with one of the expected patterns +/// we return the type of the truncation operation, and indicate whether the +/// truncated type should be treated as signed/unsigned by setting +/// \p Signed to true/false, respectively. +static Type *isSimpleCastedPHI(const SCEV *Op, const SCEV *SymbolicPHI, + bool &Signed, ScalarEvolution &SE) { + const SCEVSignExtendExpr *SExt = dyn_cast(Op); + const SCEVZeroExtendExpr *ZExt = dyn_cast(Op); + if (!SExt && !ZExt) + return nullptr; + const SCEVTruncateExpr *Trunc = + SExt ? dyn_cast(SExt->getOperand()) + : dyn_cast(ZExt->getOperand()); + if (!Trunc) + return nullptr; + const SCEV *X = Trunc->getOperand(); + if (X != SymbolicPHI) + return nullptr; + unsigned SourceBits = SE.getTypeSizeInBits(X->getType()); + unsigned NewBits = SExt ? SE.getTypeSizeInBits(SExt->getType()) + : SE.getTypeSizeInBits(ZExt->getType()); + if (SourceBits != NewBits) + return nullptr; + Signed = SExt ? true : false; + return Trunc->getType(); +} + +// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the +// computation that updates the phi follows the following pattern: +// (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum +// which correspond to a phi->trunc->sext/zext->add->phi update chain. +// If so, try to see if it can be rewritten as an AddRecExpr under some +// Predicates. If successful, return them as a pair. Also cache the results +// of the analysis. +// +// Example usage scenario: +// Say the Rewriter is called for the following SCEV: +// 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step) +// where: +// %X = phi i64 (%Start, %BEValue) +// It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X), +// and call this function with %SymbolicPHI = %X. +// +// The analysis will find that the value coming around the backedge has +// the following SCEV: +// BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step) +// Upon concluding that this matches the desired pattern, the function +// will return the pair {NewAddRec, SmallPredsVec} where: +// NewAddRec = {%Start,+,%Step} +// SmallPredsVec = {P1, P2, P3} as follows: +// P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)} Flags: +// P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64) +// P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64) +// The returned pair means that SymbolicPHI can be rewritten into NewAddRec +// under the predicates {P1,P2,P3}. +// This predicated rewrite will be cached in PredicatedSCEVRewrites: +// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)} +// +// TODO's: +// +// 1) Extend the Induction descriptor to also support inductions that involve +// casts: When needed (namely, when we are called in the context of the +// vectorizer induction analysis), a Set of cast instructions will be +// populated by this method, and provided back to isInductionPHI. This is +// needed to allow the vectorizer to properly record them to be ignored by +// the cost model and to avoid vectorizing them (otherwise these casts, +// which are redundant under the runtime overflow checks, will be +// vectorized, which can be costly). +// +// 2) Support additional induction/PHISCEV patterns: We also want to support +// inductions where the sext-trunc / zext-trunc operations (partly) occur +// after the induction update operation (the induction increment): +// +// (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix) +// which correspond to a phi->add->trunc->sext/zext->phi update chain. +// +// (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) +// which correspond to a phi->trunc->add->sext/zext->phi update chain. +// +// 3) Outline common code with createAddRecFromPHI to avoid duplication. +// +Optional>> +ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { + SmallVector Predicates; + + // *** Part1: Analysis: Check if we have the expected pattern + + auto *PN = cast(SymbolicPHI->getValue()); + if (!PN->getType()->isIntegerTy()) + return None; + + const Loop *L = LI.getLoopFor(PN->getParent()); + if (!L || L->getHeader() != PN->getParent()) + return None; + + // Check to see if we already analyzed this PHI. + auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L}); + if (I != PredicatedSCEVRewrites.end()) { + std::pair> Rewrite = + I->second; + // Analysis was done before and failed to create an AddRec: + if (Rewrite.first == SymbolicPHI) + return None; + // Analysis was done before and succeeded to create an AddRec under + // a predicate: + assert(isa(Rewrite.first) && "Expected an AddRec"); + assert(!(Rewrite.second).empty() && "Expected to find Predicates"); + return Rewrite; + } + + // In case the analysis that follows fails, we (speculatively) record in the + // cache that it had failed. + // If the analysis succeeds, we override this entry with the proposed rewrite. + PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates}; + + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi as an addrec if it has a unique entry value and a unique + // backedge value. + Value *BEValueV = nullptr, *StartValueV = nullptr; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (L->contains(PN->getIncomingBlock(i))) { + if (!BEValueV) { + BEValueV = V; + } else if (BEValueV != V) { + BEValueV = nullptr; + break; + } + } else if (!StartValueV) { + StartValueV = V; + } else if (StartValueV != V) { + StartValueV = nullptr; + break; + } + } + if (!BEValueV || !StartValueV) + return None; + + const SCEV *BEValue = getSCEV(BEValueV); + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, possibly with casts that we can ignore under + // an appropriate runtime guard, then we found a simple induction variable! + const auto *Add = dyn_cast(BEValue); + if (!Add) + return None; + + // If there is a single occurrence of the symbolic value, possibly + // casted, replace it with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + Type *TruncTy = nullptr; + bool Signed; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicPHI || + // In this case we will add a SCEV predicate that allows us to prove + // that Add->getOperand(i) == SymbolicPHI. + (TruncTy = + isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this))) + if (FoundIndex == e) { + FoundIndex = i; + break; + } + + if (FoundIndex == Add->getNumOperands()) + return None; + + // Create an add with everything but the specified operand. + SmallVector Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // The runtime checks will not be valid if the step amount is + // varying inside the loop. + if (!isLoopInvariant(Accum, L)) + return None; + + + // *** Part2: Create the predicates + + // Analysis was successful: we have a phi-with-cast pattern for which we + // can return an AddRec expression under some predicates. Let's see which + // predicates are required, and prove that they guarantee that the NewExpr + // that we return is equal to the original Expr, where: + // for i==0: + // NewExpr == OrigExpr == Start + // for i>0: + // NewExpr = Start + i * Accum + // OrigExpr = SExtTrunc(Start + (i-1)*Accum) + Accum + // or in other words, prove that: + // Start + (i-1)*Accum == SExtTrunc( Start + (i-1)*Accum ) + // + // We use the abbreviation SExtTrunc(%w) to denote: + // (sext ix (trunc iy %w to ix) to iy) + // + // We will be adding the following Predicates: + // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum) + // fits within the truncated type (does not overflow) for i = 0 to n-1. + // P2: An Equal predicate that guarantees that Start == SExtTrunc(Start) + // P3: An Equal predicate that guarantees that Accum == SExtTrunc(Accum) + // + // Now, let's start with the initial steps: + // At iteration 0 we want to prove: + // Start == Start :: Holds Trivially. + // + // At iteration 1 we want to prove: + // Start + Accum == SExtTrunc(Start) + Accum :: from P2 + // + // Now to the induction step: + // At iteration 2 we want to prove: + // Start + 2*Accum == SExtTrunc(SExtTrunc(Start) + Accum) + Accum + // or in other words: + // Start + Accum == SExtTrunc(SExtTrunc(Start) + Accum) + // == SExtTrunc(Start + Accum) :: from P2 + // == SExt(Trunc(Start) + Trunc(Accum)) :: Trunc(x+y)=>Trunc(x)+Trunc(y) + // == SExtTrunc(Start) + SExtTrunc(Accum):: from P1: + // :: SExt(x+y)=>SExt(x)+SExt(y) + // == Start + Accum :: from P2,P3 + // + // More formally: + // Given that, by induction hypothesis: + // Start + i*Accum == SExtTrunc(Start + (i-1)*Accum) + Accum :: (step I) + // Prove that: + // Start + (i+1)*Accum == SExtTrunc(Start + i*Accum) + Accum :: (step I+1) + // + // Start + (i+1)*Accum + // == (Start + i*Accum) + Accum + // == (SExtTrunc(Start + (i-1)*Accum) + Accum) + Accum :: from step I + // == SExtTrunc(Start + (i-1)*Accum) + SExtTrunc(Accum) + Accum :: from P3 + // == SExtTrunc((Start + (i-1)*Accum) + Accum) + Accum + // :: from P1: SExt(x)+SExt(y)=>SExt(x+y) + // == SExtTrunc(Start + i*Accum) + Accum + // + // By induction, the same applies to all iterations 1<=i(PHISCEV); + if (!AR) + return None; + + SCEVWrapPredicate::IncrementWrapFlags AddedFlags = + Signed ? SCEVWrapPredicate::IncrementNSSW + : SCEVWrapPredicate::IncrementNUSW; + const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); + Predicates.push_back(AddRecPred); + + // Create the Equal Predicates P2,P3: + auto AppendPredicate = [&](const SCEV *Expr) -> void { + assert (isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); + const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); + const SCEV *ExtendedExpr = + Signed ? getSignExtendExpr(TruncatedExpr, Expr->getType()) + : getZeroExtendExpr(TruncatedExpr, Expr->getType()); + if (Expr != ExtendedExpr && + !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) { + const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr); + Predicates.push_back(Pred); + } + }; + + AppendPredicate(StartVal); + AppendPredicate(Accum); + + // *** Part3: Predicates are ready. Now go ahead and create the new addrec in + // which the casts had been folded away. The caller can rewrite SymbolicPHI + // into NewAR if it will also add the runtime overflow checks specified in + // Predicates. + auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap); + + std::pair> PredRewrite = + std::make_pair(NewAR, Predicates); + // Remember the result of the analysis for this SCEV at this locayyytion. + PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite; + return PredRewrite; +} + /// A helper function for createAddRecFromPHI to handle simple cases. /// /// This function tries to find an AddRec expression for the simplest (yet most @@ -5835,6 +6124,16 @@ RemoveLoopFromBackedgeMap(BackedgeTakenCounts); RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts); + // Drop information about predicated SCEV rewrites for this loop. + for (auto I = PredicatedSCEVRewrites.begin(); + I != PredicatedSCEVRewrites.end();) { + std::pair Entry = I->first; + if (Entry.second == L) + PredicatedSCEVRewrites.erase(I++); + else + ++I; + } + // Drop information about expressions based on loop-header PHIs. SmallVector Worklist; PushLoopPHIs(L, Worklist); @@ -9990,6 +10289,7 @@ UniqueSCEVs(std::move(Arg.UniqueSCEVs)), UniquePreds(std::move(Arg.UniquePreds)), SCEVAllocator(std::move(Arg.SCEVAllocator)), + PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)), FirstUnknown(Arg.FirstUnknown) { Arg.FirstUnknown = nullptr; } @@ -10390,6 +10690,15 @@ HasRecMap.erase(S); MinTrailingZerosCache.erase(S); + for (auto I = PredicatedSCEVRewrites.begin(); + I != PredicatedSCEVRewrites.end();) { + std::pair Entry = I->first; + if (Entry.first == S) + PredicatedSCEVRewrites.erase(I++); + else + ++I; + } + auto RemoveSCEVFromBackedgeMap = [S, this](DenseMap &Map) { for (auto I = Map.begin(), E = Map.end(); I != E;) { @@ -10549,10 +10858,11 @@ AU.addRequiredTransitive(); } -const SCEVPredicate * -ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS, - const SCEVConstant *RHS) { +const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS, + const SCEV *RHS) { FoldingSetNodeID ID; + assert(LHS->getType() == RHS->getType() && + "Type mismatch between LHS and RHS"); // Unique this node based on the arguments ID.AddInteger(SCEVPredicate::P_Equal); ID.AddPointer(LHS); @@ -10615,8 +10925,7 @@ if (IPred->getLHS() == Expr) return IPred->getRHS(); } - - return Expr; + return convertToAddRecWithPreds(Expr); } const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { @@ -10652,17 +10961,40 @@ } private: - bool addOverflowAssumption(const SCEVAddRecExpr *AR, - SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { - auto *A = SE.getWrapPredicate(AR, AddedFlags); + bool addOverflowAssumption(const SCEVPredicate *P) { if (!NewPreds) { // Check if we've already made this assumption. - return Pred && Pred->implies(A); + return Pred && Pred->implies(P); } - NewPreds->insert(A); + NewPreds->insert(P); return true; } + bool addOverflowAssumption(const SCEVAddRecExpr *AR, + SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { + auto *A = SE.getWrapPredicate(AR, AddedFlags); + return addOverflowAssumption(A); + } + + // If \p Expr represents a PHINode, we try to see if it can be represented + // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible + // to add this predicate as a runtime overflow check, we return the AddRec. + // If \p Expr does not meet these conditions (is not a PHI node, or we + // couldn't create an AddRec for it, or couldn't add the predicate), we just + // return \p Expr. + const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) { + if (!isa(Expr->getValue())) + return Expr; + Optional>> + PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr); + if (!PredicatedRewrite) + return Expr; + for (auto *P : PredicatedRewrite->second) + if (!addOverflowAssumption(P)) + return Expr; + return PredicatedRewrite->first; + } + SmallPtrSetImpl *NewPreds; SCEVUnionPredicate *Pred; const Loop *L; @@ -10699,9 +11031,10 @@ : FastID(ID), Kind(Kind) {} SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID, - const SCEVUnknown *LHS, - const SCEVConstant *RHS) - : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {} + const SCEV *LHS, const SCEV *RHS) + : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) { + assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match"); +} bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const { const auto *Op = dyn_cast(N); Index: llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -0,0 +1,231 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Check that the vectorizer identifies the %p.09 phi, +; as an induction variable, despite the potential overflow +; due to the truncation from 32bit to 8bit. +; SCEV will detect the pattern "sext(trunc(%p.09)) + %step" +; and generate the required runtime checks under which +; we can assume no overflow. We check here that we generate +; exactly one runtime overflow check, and a runtime equality +; check verifying that the step of the induction is equal to +; sext(trunc(step)). See pr30654. +; +; int a[N]; +; void doit1(int n, int step) { +; int i; +; char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; + + +; CHECK-LABEL: @doit1 +; CHECK: vector.scevcheck +; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow +; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]] +; CHECK: %ident.check = icmp ne i32 {{.*}}, %{{.*}} +; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check +; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK: vector.body: +; CHECK: <4 x i32> + +@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16 + +; Function Attrs: norecurse nounwind uwtable +define void @doit1(i32 %n, i32 %step) local_unnamed_addr { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.preheader, label %for.end + +for.body.preheader: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %sext = shl i32 %p.09, 24 + %conv = ashr exact i32 %sext, 24 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; Same as above, but for checking the SCEV "zext(trunc(%p.09)) + %step": +; +; int a[N]; +; void doit2(int n, int step) { +; int i; +; unsigned char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; + +; CHECK-LABEL: @doit2 +; CHECK: vector.scevcheck +; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow +; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]] +; CHECK: %ident.check = icmp ne i32 {{.*}}, %{{.*}} +; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check +; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK: vector.body: +; CHECK: <4 x i32> + +; Function Attrs: norecurse nounwind uwtable +define void @doit2(i32 %n, i32 %step) local_unnamed_addr { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.preheader, label %for.end + +for.body.preheader: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %conv = and i32 %p.09, 255 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; Here we check that the same phi scev analysis would fail +; to create the runtime checks because the step is not invariant. +; As a result vectorization will fail. +; +; int a[N]; +; void doit3(int n, int step) { +; int i; +; char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; step += 2; +; } +; } +; + +; CHECK-LABEL: @doit3 +; CHECK-NOT: vector.scevcheck +; CHECK-NOT: vector.body: +; CHECK-LABEL: for.body: + +; Function Attrs: norecurse nounwind uwtable +define void @doit3(i32 %n, i32 %step) local_unnamed_addr { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body.preheader, label %for.end + +for.body.preheader: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %p.012 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %step.addr.010 = phi i32 [ %add3, %for.body ], [ %step, %for.body.preheader ] + %sext = shl i32 %p.012, 24 + %conv = ashr exact i32 %sext, 24 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step.addr.010 + %add3 = add nsw i32 %step.addr.010, 2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + + +; Lastly, we also check the case where we can tell at compile time that +; the step of the induction is equal to sext(trunc(step)), in which case +; we don't have to check this equality at runtime (we only need the +; runtime overflow check). +; +; a[N]; +; void doit4(int n, char cstep) { +; int i; +; char p = 0; +; int istep = cstep; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + istep; +; } +; } + +; CHECK-LABEL: @doit4 +; CHECK: vector.scevcheck +; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK: %{{.*}} = or i1 {{.*}}, %mul.overflow +; CHECK-NOT: %ident.check = icmp ne i32 {{.*}}, %{{.*}} +; CHECK-NOT: %{{.*}} = or i1 %{{.*}}, %ident.check +; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK: vector.body: +; CHECK: <4 x i32> + +; Function Attrs: norecurse nounwind uwtable +define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { +entry: + %conv = sext i8 %cstep to i32 + %cmp10 = icmp sgt i32 %n, 0 + br i1 %cmp10, label %for.body.preheader, label %for.end + +for.body.preheader: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %p.011 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %sext = shl i32 %p.011, 24 + %conv2 = ashr exact i32 %sext, 24 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv2, i32* %arrayidx, align 4 + %add = add nsw i32 %conv2, %conv + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +}