Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -815,7 +815,21 @@ /// Helper function called from createNodeForPHI. const SCEV *createAddRecFromPHI(PHINode *PN); - + + /// Helper function to createAddRecFromPHIWithCasts. We have a phi + /// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via + /// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the + /// way. This function checks if \p Op, an operand of this SCEVAddExpr, + /// follows one of the following patterns: + /// Op == (Sext ix (Trunc iy (%SymbolicPHI) to ix) to iy) + /// Op == (Zext ix (Trunc iy (%SymbolicPHI) to ix) to iy) + /// If the SCEV expression of \p Op confirms with one of the expected patterns + /// we return the type of the truncation operation, and indicate whether the + /// tuncated type should be treated as signed/unsigned by setting + /// \p Signed to true/false, respectively. + Type *isSimpleCastedPHI(const SCEV *Op, const SCEV *SymbolicPHI, + bool &Signed); + /// Helper function called from createNodeForPHI. const SCEV *createNodeFromSelectLikePHI(PHINode *PN); @@ -1181,6 +1195,22 @@ SmallVector NewOp(Operands.begin(), Operands.end()); return getAddRecExpr(NewOp, L, Flags); } + + /// Similar to createAddRecFromPHI, but with the additional flexibility of + /// suggesting runtime overflow checks in case casts are encountered. + /// If successful, the analysis records that for this loop, \p SymbolicPHI, + /// which is the UnknownSCEV currently representing the PHI, can be rewritten + /// into an AddRec, assuming some predicate; The function then returns the + /// AddRec and sets \p Pred to the required predicate. + /// If the analysis is not successful, a mapping from the \p SumbolicPHI to + /// itself (with no predicates) is recorded, and a nullptr is returned both + /// for \p Pred and for the returned SCEV. + /// The results of the analysis are cahced in TentativeSCEVRewrites. + /// The function is intended to be called from PSCEV (the caller will decide + /// whether to actually add the predicates and carry out the rewrites). + const SCEV *createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI, + const SCEVPredicate **Pred); + /// Returns an expression for a GEP /// /// \p GEP The GEP. The indices contained in the GEP itself are ignored, @@ -1630,6 +1660,12 @@ FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; + /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression + /// they can be rewritten into under a certain predicate. + DenseMap, + std::pair> + TentativeSCEVRewrites; + /// The head of a linked list of all SCEVUnknown values that have been /// allocated. This is used by releaseMemory to locate them all and call /// their destructors. Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -4017,6 +4017,203 @@ return None; } +Type *ScalarEvolution::isSimpleCastedPHI(const SCEV *Op, + const SCEV *SymbolicPHI, + bool &Signed) { + const SCEVSignExtendExpr *Sext = dyn_cast(Op); + const SCEVZeroExtendExpr *Zext = dyn_cast(Op); + if (!Sext && !Zext) + return nullptr; + const SCEVTruncateExpr *Trunc = + Sext ? dyn_cast(Sext->getOperand()) + : dyn_cast(Zext->getOperand()); + if (!Trunc) + return nullptr; + const SCEV *X = Trunc->getOperand(); + if (X != SymbolicPHI) + return nullptr; + unsigned SourceBits = getTypeSizeInBits(X->getType()); + unsigned NewBits = Sext ? getTypeSizeInBits(Sext->getType()) + : getTypeSizeInBits(Zext->getType()); + if (SourceBits != NewBits) + return nullptr; + Signed = Sext ? true : false; + return Trunc->getType(); +} + +// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the +// computation that updates the phi follows one of the following patterns: +// (Sext ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum +// (Zext ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum +// which correspond to a phi->trunc->sext->add->phi update chain. +// +// Example usage scenario: +// Say the PredRewriter is called for the following SCEV: +// 8 * ((sext i32 (trunc i64 %x to i32) to i64) + %step) +// So it will visitMul->visitAdd->visitSext->visitTrunc->visitUnknown(%x), +// and call this function with %SymbolicPHI = %x. +// The analysis will find that the value coming around the backedge has +// the following SCEV: +// BEValue = ((sext i32 (trunc i64 %x to i32) to i64) + %step) +// and we'll create the following predicate: +// P1: AR: {0,+,(trunc i64 %step to i32)} Added Flags: +// and return the following new AddRec to our caller: +// Returned AddRec: {0,+,%step} +// \p Pred will hold P1, and the suggested predicated rewritew will be +// cached in TentativeSCEVRewrites: +// TentativeSCEVRewrites[{%x,L}] = {AddRec, P1} +// +// TODO's: +// +// 1) Extend the Induction descriptor to also support inductions that involve +// casts: When needed (namely, when we are called in the context of the +// vectorizer induction analysis), a Set of cast instructions will be +// populated by this method, and provided back to isInductionPHI. This is +// needed to allow the vectorizer to properly record them to be ignored by +// the cost model and to avoid vectorizing them (otherwise these casts, +// which are redundant under the runtime overflow checks, will be +// vectorized, which can be costly). +// +// 2) Support additional induction/PHISCEV patterns: We also want to support +// inductions where the sext-trunc / zext-trunc operations (partly) occur +// after the induction update operation (the induction increment): +// +// (Trunc iy (Sext ix (%SynbolicPHI + InvariantAccum) to iy) to ix) +// (Trunc iy (Zext ix (%SynbolicPHI + InvariantAccum) to iy) to ix) +// which correspond to a phi->add->trunc->sext->phi update chain. +// +// (Trunc iy ((Sext ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) +// (Trunc iy ((Zext ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) +// which correspond to a phi->trunc->add->sext->phi update chain. +// +// 3) Outline common code with createAddRecFromPHI to avoid duplication. +// +const SCEV * +ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI, + const SCEVPredicate **Pred) { + *Pred = nullptr; + + auto *PN = cast(SymbolicPHI->getValue()); + if (!PN->getType()->isIntegerTy()) + return nullptr; + + const Loop *L = LI.getLoopFor(PN->getParent()); + if (!L || L->getHeader() != PN->getParent()) + return nullptr; + + // Check to see if we already analyzed this PHI. + auto I = TentativeSCEVRewrites.find(std::make_pair(SymbolicPHI, L)); + if (I != TentativeSCEVRewrites.end()) { + std::pair Rewrite = I->second; + // Analysis was done before and failed to create an AddRec: + if (Rewrite.first == SymbolicPHI) + return nullptr; + // Analysis was done before and succeeded to create an AddRec under + // a predicate: + assert(isa(Rewrite.first) && "Expected an AddRec"); + *Pred = Rewrite.second; + return Rewrite.first; + } + + auto Pair = std::make_pair(SymbolicPHI, L); + TentativeSCEVRewrites[Pair] = std::make_pair(SymbolicPHI, nullptr); + + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi as an addrec if it has a unique entry value and a unique + // backedge value. + Value *BEValueV = nullptr, *StartValueV = nullptr; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (L->contains(PN->getIncomingBlock(i))) { + if (!BEValueV) { + BEValueV = V; + } else if (BEValueV != V) { + BEValueV = nullptr; + break; + } + } else if (!StartValueV) { + StartValueV = V; + } else if (StartValueV != V) { + StartValueV = nullptr; + break; + } + } + if (!BEValueV || !StartValueV) + return nullptr; + + const SCEV *BEValue = getSCEV(BEValueV); + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, possibly with casts that we can ignore under + // an appropriate runtime guard, then we found a simple induction variable! + const auto *Add = dyn_cast(BEValue); + if (!Add) + return nullptr; + + // If there is a single occurrence of the symbolic value, possibly + // casted, replace it with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + Type *TruncTy = nullptr; + bool Signed; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicPHI || + (TruncTy = isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed))) + if (FoundIndex == e) { + FoundIndex = i; + break; + } + + if (FoundIndex == Add->getNumOperands()) + return nullptr; + + // Create an add with everything but the specified operand. + SmallVector Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // This is not a valid addrec if the step amount is varying each + // loop iteration, but is not itself an addrec in this loop. + if (!isLoopInvariant(Accum, L) && + !(isa(Accum) && + cast(Accum)->getLoop() == L)) + return nullptr; + + // Create a truncated addrec for which we will add a no overflow check. + const SCEV *StartVal = getSCEV(StartValueV); + const SCEV *PHISCEV = + getAddRecExpr(getTruncateExpr(StartVal, TruncTy), + getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap); + const auto *AR = dyn_cast(PHISCEV); + if (!AR) + return nullptr; + + // Now go ahead and create the new addrec in which the casts had been folded + // away. The caller can rewrite SymbolicPHI into NewAR if it will also add + // the proper runtime overflow check as specificed in Pred. + // + // Note: Seems that we could do something better here, and return the above + // truncated addrec, with a narrower type than the one we return below. + // But then the type of the addrec would not match the type of the phi + // node, which would break some assumptions about the induction variable + // later on in the vectorizer, so that would require more changes. Also, we + // reach this point from a visitUnknown called by a visitTruncate; + // returning an already truncated addrec to visitTruncate would require + // also changing visitTruncate since it expects to get the (wider) type + // before truncation. + // + SCEVWrapPredicate::IncrementWrapFlags AddedFlags = + Signed ? SCEVWrapPredicate::IncrementNSSW + : SCEVWrapPredicate::IncrementNUSW; + *Pred = getWrapPredicate(AR, AddedFlags); + auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap); + + // Remember the result of the analysis for this SCEV at this location. + TentativeSCEVRewrites[Pair] = std::make_pair(NewAR, *Pred); + return NewAR; +} + const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { const Loop *L = LI.getLoopFor(PN->getParent()); if (!L || L->getHeader() != PN->getParent()) @@ -10217,8 +10414,27 @@ if (IPred->getLHS() == Expr) return IPred->getRHS(); } + return analyzeUnknownSCEVPHI(Expr, nullptr); + } - return Expr; + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + SCEVWrapPredicate::IncrementWrapFlags AddedFlags; + // We call analyzeTruncOperand instead of calling visit(Expr->getOperand()) + // directly because we need extra context about the signess of the operand; + // without it we don't know which predicate to add (NUSW or NSSW). + const SCEV *Operand = analyzeTruncOperand(Expr->getOperand(), &AddedFlags); + const SCEVAddRecExpr *AR = dyn_cast(Operand); + if (AR && AR->getLoop() == L && AR->isAffine() && + AddedFlags != SCEVWrapPredicate::IncrementAnyWrap) { + const SCEV *Step = AR->getStepRecurrence(SE); + Type *Ty = Expr->getType(); + const SCEV *TruncAR = SE.getAddRecExpr( + SE.getTruncateExpr(AR->getStart(), Ty), SE.getTruncateExpr(Step, Ty), + L, AR->getNoWrapFlags()); + if (addOverflowAssumption(cast(TruncAR), AddedFlags)) + return TruncAR; + } + return SE.getTruncateExpr(Operand, Expr->getType()); } const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { @@ -10265,6 +10481,63 @@ return true; } + // If \p Expr represents a PHINode, we try to see if it can be represented + // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible + // to add this predicate as a runtime overflow check, we return the AddRec + // and also any known wrapping information for this AddRec (NSSW/NUSW) in + // \p AddedFlags. If \p Expr does not meet these conditions (is not a PHI + // node, or we couldn't create an AddRec for it, or couldn't add the + // predicate), we just return \p Expr without setting anything in + // \p AddedFlags. + const SCEV * + analyzeUnknownSCEVPHI(const SCEVUnknown *Expr, + SCEVWrapPredicate::IncrementWrapFlags *AddedFlags) { + const SCEVPredicate *PHISCEVPred; + const SCEV *AR; + if (!isa(Expr->getValue()) || + !(AR = SE.createAddRecFromPHIWithCasts(Expr, &PHISCEVPred))) + return Expr; + auto WrapPred = cast(PHISCEVPred); + auto AddRec = cast(WrapPred->getExpr()); + if (addOverflowAssumption(AddRec, WrapPred->getFlags())) { + if (AddedFlags) + *AddedFlags = cast(PHISCEVPred)->getFlags(); + return AR; + } + return Expr; + } + + // Helper function to visitTruncateExpr. It is essentially a wrapper to the + // call visit(Op); The purpose of this wrapper is to provide the relevant + // wrap flags, if possible, for the truncate visitor to use when creating + // the predicate that allows it to fold the truncation into the AddRec. + const SCEV * + analyzeTruncOperand(const SCEV *Op, + SCEVWrapPredicate::IncrementWrapFlags *AddedFlags) { + *AddedFlags = SCEVWrapPredicate::IncrementAnyWrap; + + switch (Op->getSCEVType()) { + case scUnknown: { + const auto *UnknownSCEVOp = cast(Op); + return analyzeUnknownSCEVPHI(UnknownSCEVOp, AddedFlags); + } + + case scZeroExtend: { + *AddedFlags = SCEVWrapPredicate::IncrementNUSW; + return visitZeroExtendExpr(cast(Op)); + } + + case scSignExtend: { + *AddedFlags = SCEVWrapPredicate::IncrementNSSW; + return visitSignExtendExpr(cast(Op)); + } + + default: + // TODO: Try to do more here to infer AddedFlags. + return visit(Op); + } + } + SmallPtrSetImpl *NewPreds; SCEVUnionPredicate *Pred; const Loop *L; Index: llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -0,0 +1,107 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Check that the vectorizer identifies the %p.09 phi, +; as an induction variable, despite the potential overflow +; due to the truncation from 32bit to 8bit. +; SCEV will detect the pattern "sext(trunc(%p.09)) + %step" +; and generate the required runtime overflow check under which +; we can assume no overflow. See pr30654. +; +; int a[N]; +; void doit1(int n, int step) { +; int i; +; char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; + +; CHECK-LABEL: @doit1 +; CHECK: vector.scevcheck +; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK: vector.body: +; CHECK: <4 x i32> + +@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16 + +; Function Attrs: norecurse nounwind uwtable +define void @doit1(i32 %n, i32 %step) local_unnamed_addr { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.preheader, label %for.end + +for.body.preheader: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %sext = shl i32 %p.09, 24 + %conv = ashr exact i32 %sext, 24 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +; Same as above, but for checkinhg the SCEV "zext(trunc(%p.09)) + %step": +; +; int a[N]; +; void doit2(int n, int step) { +; int i; +; unsigned char p = 0; +; for (i = 0; i < n; i++) { +; a[i] = p; +; p = p + step; +; } +; } +; + +; CHECK-LABEL: @doit2 +; CHECK: vector.scevcheck +; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) +; CHECK: vector.body: +; CHECK: <4 x i32> + +; Function Attrs: norecurse nounwind uwtable +define void @doit2(i32 %n, i32 %step) local_unnamed_addr { +entry: + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.preheader, label %for.end + +for.body.preheader: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %conv = and i32 %p.09, 255 + %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %add = add nsw i32 %conv, %step + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +}