Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1198,6 +1198,22 @@ SmallVector NewOp(Operands.begin(), Operands.end()); return getAddRecExpr(NewOp, L, Flags); } + + /// Similar to createAddRecFromPHI, but with the additional flexibility of + /// suggesting runtime overflow checks in case casts are encountered. + /// If successful, the analysis records that for this loop, \p SymbolicPHI, + /// which is the UnknownSCEV currently representing the PHI, can be rewritten + /// into an AddRec, assuming some predicate; The function then returns the + /// AddRec and sets \p Pred to the required predicate. + /// If the analysis is not successful, a mapping from the \p SumbolicPHI to + /// itself (with no predicates) is recorded, and a nullptr is returned both + /// for \p Pred and for the returned SCEV. + /// The results of the analysis are cahced in TentativeSCEVRewrites. + /// The function is intended to be called from PSCEV (the caller will decide + /// whether to actually add the predicates and carry out the rewrites). + const SCEV *createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI, + const SCEVPredicate **Pred); + /// Returns an expression for a GEP /// /// \p GEP The GEP. The indices contained in the GEP itself are ignored, @@ -1647,6 +1663,12 @@ FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; + /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression + /// they can be rewritten into under a certain predicate. + DenseMap, + std::pair> + PredicatedSCEVRewrites; + /// The head of a linked list of all SCEVUnknown values that have been /// allocated. This is used by releaseMemory to locate them all and call /// their destructors. Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -4026,6 +4026,205 @@ return None; } +/// Helper function to createAddRecFromPHIWithCasts. We have a phi +/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via +/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the +/// way. This function checks if \p Op, an operand of this SCEVAddExpr, +/// follows one of the following patterns: +/// Op == (Sext ix (Trunc iy (%SymbolicPHI) to ix) to iy) +/// Op == (Zext ix (Trunc iy (%SymbolicPHI) to ix) to iy) +/// If the SCEV expression of \p Op confirms with one of the expected patterns +/// we return the type of the truncation operation, and indicate whether the +/// tuncated type should be treated as signed/unsigned by setting +/// \p Signed to true/false, respectively. +static Type *isSimpleCastedPHI(const SCEV *Op, const SCEV *SymbolicPHI, + bool &Signed, ScalarEvolution &SE) { + const SCEVSignExtendExpr *Sext = dyn_cast(Op); + const SCEVZeroExtendExpr *Zext = dyn_cast(Op); + if (!Sext && !Zext) + return nullptr; + const SCEVTruncateExpr *Trunc = + Sext ? dyn_cast(Sext->getOperand()) + : dyn_cast(Zext->getOperand()); + if (!Trunc) + return nullptr; + const SCEV *X = Trunc->getOperand(); + if (X != SymbolicPHI) + return nullptr; + unsigned SourceBits = SE.getTypeSizeInBits(X->getType()); + unsigned NewBits = Sext ? SE.getTypeSizeInBits(Sext->getType()) + : SE.getTypeSizeInBits(Zext->getType()); + if (SourceBits != NewBits) + return nullptr; + Signed = Sext ? true : false; + return Trunc->getType(); +} + +// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the +// computation that updates the phi follows one of the following patterns: +// (Sext ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum +// (Zext ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum +// which correspond to a phi->trunc->sext->add->phi update chain. +// +// Example usage scenario: +// Say the PredRewriter is called for the following SCEV: +// 8 * ((sext i32 (trunc i64 %x to i32) to i64) + %step) +// So it will visitMul->visitAdd->visitSext->visitTrunc->visitUnknown(%x), +// and call this function with %SymbolicPHI = %x. +// The analysis will find that the value coming around the backedge has +// the following SCEV: +// BEValue = ((sext i32 (trunc i64 %x to i32) to i64) + %step) +// and we'll create the following predicate: +// P1: AR: {0,+,(trunc i64 %step to i32)} Added Flags: +// and return the following new AddRec to our caller: +// Returned AddRec: {0,+,%step} +// \p Pred will hold P1, and the suggested predicated rewritew will be +// cached in PredicatedSCEVRewrites: +// PredicatedSCEVRewrites[{%x,L}] = {AddRec, P1} +// +// TODO's: +// +// 1) Extend the Induction descriptor to also support inductions that involve +// casts: When needed (namely, when we are called in the context of the +// vectorizer induction analysis), a Set of cast instructions will be +// populated by this method, and provided back to isInductionPHI. This is +// needed to allow the vectorizer to properly record them to be ignored by +// the cost model and to avoid vectorizing them (otherwise these casts, +// which are redundant under the runtime overflow checks, will be +// vectorized, which can be costly). +// +// 2) Support additional induction/PHISCEV patterns: We also want to support +// inductions where the sext-trunc / zext-trunc operations (partly) occur +// after the induction update operation (the induction increment): +// +// (Trunc iy (Sext ix (%SynbolicPHI + InvariantAccum) to iy) to ix) +// (Trunc iy (Zext ix (%SynbolicPHI + InvariantAccum) to iy) to ix) +// which correspond to a phi->add->trunc->sext->phi update chain. +// +// (Trunc iy ((Sext ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) +// (Trunc iy ((Zext ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) +// which correspond to a phi->trunc->add->sext->phi update chain. +// +// 3) Outline common code with createAddRecFromPHI to avoid duplication. +// +const SCEV * +ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI, + const SCEVPredicate **Pred) { + *Pred = nullptr; + + auto *PN = cast(SymbolicPHI->getValue()); + if (!PN->getType()->isIntegerTy()) + return nullptr; + + const Loop *L = LI.getLoopFor(PN->getParent()); + if (!L || L->getHeader() != PN->getParent()) + return nullptr; + + // Check to see if we already analyzed this PHI. + auto I = PredicatedSCEVRewrites.find(std::make_pair(SymbolicPHI, L)); + if (I != PredicatedSCEVRewrites.end()) { + std::pair Rewrite = I->second; + // Analysis was done before and failed to create an AddRec: + if (Rewrite.first == SymbolicPHI) + return nullptr; + // Analysis was done before and succeeded to create an AddRec under + // a predicate: + assert(isa(Rewrite.first) && "Expected an AddRec"); + *Pred = Rewrite.second; + return Rewrite.first; + } + + auto Pair = std::make_pair(SymbolicPHI, L); + PredicatedSCEVRewrites[Pair] = std::make_pair(SymbolicPHI, nullptr); + + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi as an addrec if it has a unique entry value and a unique + // backedge value. + Value *BEValueV = nullptr, *StartValueV = nullptr; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (L->contains(PN->getIncomingBlock(i))) { + if (!BEValueV) { + BEValueV = V; + } else if (BEValueV != V) { + BEValueV = nullptr; + break; + } + } else if (!StartValueV) { + StartValueV = V; + } else if (StartValueV != V) { + StartValueV = nullptr; + break; + } + } + if (!BEValueV || !StartValueV) + return nullptr; + + const SCEV *BEValue = getSCEV(BEValueV); + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, possibly with casts that we can ignore under + // an appropriate runtime guard, then we found a simple induction variable! + const auto *Add = dyn_cast(BEValue); + if (!Add) + return nullptr; + + // If there is a single occurrence of the symbolic value, possibly + // casted, replace it with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + Type *TruncTy = nullptr; + bool Signed; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicPHI || + // In this case we will add a SCEV predicate that allows us to prove + // that Add->getOperand(i) == SymbolicPHI. + (TruncTy = + isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this))) + if (FoundIndex == e) { + FoundIndex = i; + break; + } + + if (FoundIndex == Add->getNumOperands()) + return nullptr; + + // Create an add with everything but the specified operand. + SmallVector Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // This is not a valid addrec if the step amount is varying each + // loop iteration, but is not itself an addrec in this loop. + if (!isLoopInvariant(Accum, L) && + !(isa(Accum) && + cast(Accum)->getLoop() == L)) + return nullptr; + + // Create a truncated addrec for which we will add a no overflow check. + const SCEV *StartVal = getSCEV(StartValueV); + const SCEV *PHISCEV = + getAddRecExpr(getTruncateExpr(StartVal, TruncTy), + getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap); + const auto *AR = dyn_cast(PHISCEV); + if (!AR) + return nullptr; + + // Now go ahead and create the new addrec in which the casts had been folded + // away. The caller can rewrite SymbolicPHI into NewAR if it will also add + // the proper runtime overflow check as specificed in Pred. + SCEVWrapPredicate::IncrementWrapFlags AddedFlags = + Signed ? SCEVWrapPredicate::IncrementNSSW + : SCEVWrapPredicate::IncrementNUSW; + *Pred = getWrapPredicate(AR, AddedFlags); + auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap); + + // Remember the result of the analysis for this SCEV at this location. + PredicatedSCEVRewrites[Pair] = std::make_pair(NewAR, *Pred); + return NewAR; +} + const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { const Loop *L = LI.getLoopFor(PN->getParent()); if (!L || L->getHeader() != PN->getParent()) @@ -5706,6 +5905,17 @@ RemoveLoopFromBackedgeMap(BackedgeTakenCounts); RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts); + // Drop information about predicated SCEV rewrites for this loop. + for (auto I = PredicatedSCEVRewrites.begin(), + E = PredicatedSCEVRewrites.end(); + I != E;) { + std::pair Entry = I->first; + if (Entry.second == L) + PredicatedSCEVRewrites.erase(I++); + else + ++I; + } + // Drop information about expressions based on loop-header PHIs. SmallVector Worklist; PushLoopPHIs(L, Worklist); @@ -9765,6 +9975,7 @@ UniqueSCEVs(std::move(Arg.UniqueSCEVs)), UniquePreds(std::move(Arg.UniquePreds)), SCEVAllocator(std::move(Arg.SCEVAllocator)), + PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)), FirstUnknown(Arg.FirstUnknown) { Arg.FirstUnknown = nullptr; } @@ -10165,6 +10376,16 @@ HasRecMap.erase(S); MinTrailingZerosCache.erase(S); + for (auto I = PredicatedSCEVRewrites.begin(), + E = PredicatedSCEVRewrites.end(); + I != E;) { + std::pair Entry = I->first; + if (Entry.first == S) + PredicatedSCEVRewrites.erase(I++); + else + ++I; + } + auto RemoveSCEVFromBackedgeMap = [S, this](DenseMap &Map) { for (auto I = Map.begin(), E = Map.end(); I != E;) { @@ -10399,8 +10620,7 @@ if (IPred->getLHS() == Expr) return IPred->getRHS(); } - - return Expr; + return analyzeUnknownSCEVPHI(Expr); } const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { @@ -10447,6 +10667,25 @@ return true; } + // If \p Expr represents a PHINode, we try to see if it can be represented + // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible + // to add this predicate as a runtime overflow check, we return the AddRec. + // If \p Expr does not meet these conditions (is not a PHI node, or we + // couldn't create an AddRec for it, or couldn't add the predicate), we just + // return \p Expr. + const SCEV *analyzeUnknownSCEVPHI(const SCEVUnknown *Expr) { + const SCEVPredicate *PHISCEVPred; + const SCEV *AR; + if (!isa(Expr->getValue()) || + !(AR = SE.createAddRecFromPHIWithCasts(Expr, &PHISCEVPred))) + return Expr; + auto WrapPred = cast(PHISCEVPred); + auto AddRec = cast(WrapPred->getExpr()); + if (addOverflowAssumption(AddRec, WrapPred->getFlags())) + return AR; + return Expr; + } + SmallPtrSetImpl *NewPreds; SCEVUnionPredicate *Pred; const Loop *L; Index: llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -0,0 +1,107 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Check that the vectorizer identifies the %p.09 phi, +; as an induction variable, despite the potential overflow +; due to the truncation from 32bit to 8bit. +; SCEV will detect the pattern "sext(trunc(%p.09)) + %step" +; and generate the required runtime overflow check under which +; we can assume no overflow. See pr30654. +; +; int a[N]; +; void doit1(int n, int step) { +; int i; +; char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; + +; CHECK-LABEL: @doit1 +; CHECK: vector.scevcheck +; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK: vector.body: +; CHECK: <4 x i32> + +@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16 + +; Function Attrs: norecurse nounwind uwtable +define void @doit1(i32 %n, i32 %step) local_unnamed_addr { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.preheader, label %for.end + +for.body.preheader: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %sext = shl i32 %p.09, 24 + %conv = ashr exact i32 %sext, 24 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; Same as above, but for checkinhg the SCEV "zext(trunc(%p.09)) + %step": +; +; int a[N]; +; void doit2(int n, int step) { +; int i; +; unsigned char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; + +; CHECK-LABEL: @doit2 +; CHECK: vector.scevcheck +; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK: vector.body: +; CHECK: <4 x i32> + +; Function Attrs: norecurse nounwind uwtable +define void @doit2(i32 %n, i32 %step) local_unnamed_addr { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.preheader, label %for.end + +for.body.preheader: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %conv = and i32 %p.09, 255 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +}