Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -498,10 +498,25 @@ /// ExprValueMapType - The typedef for ExprValueMap. /// - typedef DenseMap> ExprValueMapType; + typedef std::pair ValueOffsetPair; + typedef DenseMap> ExprValueMapType; /// ExprValueMap -- This map records the original values from which /// the SCEV expr is generated from. + /// + /// We want to represent the mapping as SCEV -> ValueOffsetPair instead + /// of SCEV -> Value: + /// Suppose the SCEV S1 -> Value V1 mapping is saved in ExprValueMap, + /// and suppose S1 is a SCEVAddExpr which is composed of another SCEV + /// S2 + SCEVConstant C1. When SCEV S3 is expanded and S3 = S2 + C2 + /// (C2 is different with C1), because S3 and S2 are not in the + /// ExprValueMap (only S1 is in the map), S3 cannot be represented + /// by V1 - C1 + C2. + /// That is why we represent ExprValueMap as a mapping from SCEV to + /// ValueOffsetPair. If S1 is generated from V1, and S1 equals to + /// S2 + C1, we will add both S1->{V1, 0} and S2->{V1, C1} into the + /// map. In this way, S3 can be represented by V1 - C1 + C2. This + /// is helpful when S2 is a complex SCEV expr. ExprValueMapType ExprValueMap; /// The typedef for ValueExprMap. @@ -1152,9 +1167,13 @@ /// bool containsAddRecurrence(const SCEV *S); + /// If \p S is a SCEVAddExpr and it is composed of a SCEV S' and an + /// offset I, then return {S', I}, else return {\p S, nullptr}. + std::pair splitAddExpr(const SCEV *S); + /// getSCEVValues - Return the Value set from which the SCEV expr is /// generated. - SetVector *getSCEVValues(const SCEV *S); + SetVector *getSCEVValues(const SCEV *S); /// eraseValueFromMap - Erase Value from ValueExprMap and ExprValueMap. void eraseValueFromMap(Value *V); Index: include/llvm/Analysis/ScalarEvolutionExpander.h =================================================================== --- include/llvm/Analysis/ScalarEvolutionExpander.h +++ include/llvm/Analysis/ScalarEvolutionExpander.h @@ -264,7 +264,8 @@ PointerType *PTy, Type *Ty, Value *V); /// \brief Find a previous Value in ExprValueMap for expand. - Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); + ScalarEvolution::ValueOffsetPair + FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); Value *expand(const SCEV *S); Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -3386,8 +3386,35 @@ return F.FoundOne; } -/// getSCEVValues - Return the Value set from S. -SetVector *ScalarEvolution::getSCEVValues(const SCEV *S) { +/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}. +std::pair +ScalarEvolution::splitAddExpr(const SCEV *S) { + if (static_cast(S->getSCEVType()) != scAddExpr) + return {S, nullptr}; + + const SCEVNAryExpr *Add = cast(S); + if (Add->getNumOperands() != 2) + return {S, nullptr}; + auto *Op1 = Add->getOperand(0); + auto *Op2 = Add->getOperand(1); + + if ((Op1->getSCEVType() == scConstant || Op2->getSCEVType() != scConstant) && + (Op1->getSCEVType() != scConstant || Op2->getSCEVType() == scConstant)) + return {S, nullptr}; + + const SCEVConstant *ConstOp = + cast(Op1->getSCEVType() == scConstant ? Op1 : Op2); + const SCEV *NonConstOp = Op1->getSCEVType() != scConstant ? Op1 : Op2; + + ConstantInt *Offset = ConstOp->getValue(); + return {NonConstOp, Offset}; +} + +/// getSCEVValues - Return the ValueOffsetPair set from S. S can be +/// represented by the value and offset from any ValueOffsetPair in +/// the set. +SetVector * +ScalarEvolution::getSCEVValues(const SCEV *S) { ExprValueMapType::iterator SI = ExprValueMap.find_as(S); if (SI == ExprValueMap.end()) return nullptr; @@ -3395,24 +3422,34 @@ if (VerifySCEVMap) { // Check there is no dangling Value in the set returned. for (const auto &VE : SI->second) - assert(ValueExprMap.count(VE)); + assert(ValueExprMap.count(VE.first)); } #endif return &SI->second; } /// eraseValueFromMap - Erase Value from ValueExprMap and ExprValueMap. -/// If ValueExprMap.erase(V) is not used together with forgetMemoizedResults(S), -/// eraseValueFromMap should be used instead to ensure whenever V->S is removed -/// from ValueExprMap, V is also removed from the set of ExprValueMap[S]. +/// ValueExprMap.erase(V) cannot be used separately. eraseValueFromMap +/// should be used to remove V from ValueExprMap and ExprValueMap at +/// the same time. void ScalarEvolution::eraseValueFromMap(Value *V) { ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { const SCEV *S = I->second; - SetVector *SV = getSCEVValues(S); - // Remove V from the set of ExprValueMap[S] + SetVector *SV = getSCEVValues(S); + // Remove {V, 0} from the set of ExprValueMap[S] if (SV) - SV->remove(V); + SV->remove({V, nullptr}); + + // Remove {V, Offset} from the set of ExprValueMap[Stripped] + const SCEV *Stripped; + ConstantInt *Offset; + std::tie(Stripped, Offset) = splitAddExpr(S); + if (Offset != nullptr) { + SetVector *SV = getSCEVValues(Stripped); + if (SV) + SV->remove({V, Offset}); + } ValueExprMap.erase(V); } } @@ -3427,11 +3464,26 @@ S = createSCEV(V); // During PHI resolution, it is possible to create two SCEVs for the same // V, so it is needed to double check whether V->S is inserted into - // ValueExprMap before insert S->V into ExprValueMap. + // ValueExprMap before insert S->{V, 0} into ExprValueMap. std::pair Pair = ValueExprMap.insert({SCEVCallbackVH(V, this), S}); - if (Pair.second) - ExprValueMap[S].insert(V); + if (Pair.second) { + ExprValueMap[S].insert({V, nullptr}); + + // If S == Stripped + Offset, add Stripped -> {V, Offset} into + // ExprValueMap. + const SCEV *Stripped = S; + ConstantInt *Offset = nullptr; + std::tie(Stripped, Offset) = splitAddExpr(S); + // If stripped is SCEVUnknown, don't bother to save + // stripped -> {v, offset}. it doesn't simplify and sometimes even + // increase the complexity of expansion code. + // If v is GetElementPtrInst, don't save stripped -> {v, offset} + // because it may generate add/sub instead of GEP in SCEV expansion. + if (Offset != nullptr && Stripped->getSCEVType() != scUnknown && + !isa(V)) + ExprValueMap[Stripped].insert({V, Offset}); + } } return S; } @@ -3444,8 +3496,8 @@ const SCEV *S = I->second; if (checkValidity(S)) return S; + eraseValueFromMap(V); forgetMemoizedResults(S); - ValueExprMap.erase(I); } return nullptr; } @@ -3717,8 +3769,8 @@ if (!isa(I) || !isa(Old) || (I != PN && Old == SymName)) { + eraseValueFromMap(It->first); forgetMemoizedResults(Old); - ValueExprMap.erase(It); } } @@ -4052,7 +4104,7 @@ // to create an AddRecExpr for this PHI node. We can not keep this temporary // as it will prevent later (possibly simpler) SCEV expressions to be added // to the ValueExprMap. - ValueExprMap.erase(PN); + eraseValueFromMap(PN); } return nullptr; @@ -5374,8 +5426,8 @@ // case, createNodeForPHI will perform the necessary updates on its // own when it gets to that point. if (!isa(I) || !isa(Old)) { + eraseValueFromMap(It->first); forgetMemoizedResults(Old); - ValueExprMap.erase(It); } if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); @@ -5423,8 +5475,8 @@ ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { + eraseValueFromMap(It->first); forgetMemoizedResults(It->second); - ValueExprMap.erase(It); if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); } @@ -5458,8 +5510,8 @@ ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { + eraseValueFromMap(It->first); forgetMemoizedResults(It->second); - ValueExprMap.erase(It); if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); } Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -1598,9 +1598,10 @@ return V; } -Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S, - const Instruction *InsertPt) { - SetVector *Set = SE.getSCEVValues(S); +ScalarEvolution::ValueOffsetPair +SCEVExpander::FindValueInExprValueMap(const SCEV *S, + const Instruction *InsertPt) { + SetVector *Set = SE.getSCEVValues(S); // If the expansion is not in CanonicalMode, and the SCEV contains any // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. if (CanonicalMode || !SE.containsAddRecurrence(S)) { @@ -1609,21 +1610,21 @@ // Choose a Value from the set which dominates the insertPt. // insertPt should be inside the Value's parent loop so as not to break // the LCSSA form. - for (auto const &Ent : *Set) { + for (auto const &VOPair : *Set) { + Value *V = VOPair.first; + ConstantInt *Offset = VOPair.second; Instruction *EntInst = nullptr; - if (Ent && isa(Ent) && - (EntInst = cast(Ent)) && - S->getType() == Ent->getType() && + if (V && isa(V) && (EntInst = cast(V)) && + S->getType() == V->getType() && EntInst->getFunction() == InsertPt->getFunction() && SE.DT.dominates(EntInst, InsertPt) && (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || - SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) { - return Ent; - } + SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) + return {V, Offset}; } } } - return nullptr; + return {nullptr, nullptr}; } // The expansion of SCEV will either reuse a previous Value in ExprValueMap, @@ -1671,10 +1672,14 @@ Builder.SetInsertPoint(InsertPt); // Expand the expression into instructions. - Value *V = FindValueInExprValueMap(S, InsertPt); + ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt); + Value *V = VO.first; if (!V) V = visit(S); + else if (VO.second) { + V = Builder.CreateSub(V, VO.second); + } // Remember the expanded value for this SCEV at this location. // @@ -1887,8 +1892,9 @@ // Use expand's logic which is used for reusing a previous Value in // ExprValueMap. - if (Value *Val = FindValueInExprValueMap(S, At)) - return Val; + ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At); + if (VO.first) + return VO.first; // There is potential to make this significantly smarter, but this simple // heuristic already gets some interesting cases. Index: test/Analysis/ScalarEvolution/scev-expander-existing-value-offset.ll =================================================================== --- test/Analysis/ScalarEvolution/scev-expander-existing-value-offset.ll +++ test/Analysis/ScalarEvolution/scev-expander-existing-value-offset.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S |FileCheck %s +; SCEV expansion uses existing value or value + offset to reduce duplicate code expansion so foo should only generate one select inst after loop vectorization. +; CHECK-LABEL: @foo( +; CHECK: select +; CHECK-NOT: select + +@ySrcL = common global i8* null, align 8 +@smL = common global i32 0, align 4 + +define void @foo(i32 %rwL, i32 %kL, i32 %xfL) { +entry: + %sub = add nsw i32 %rwL, -1 + %shr = ashr i32 %xfL, 6 + %cmp.i = icmp slt i32 %sub, %shr + %cond.i = select i1 %cmp.i, i32 %sub, i32 %shr + %cmp6 = icmp sgt i32 %cond.i, %kL + br i1 %cmp6, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %tmp = load i8*, i8** @ySrcL, align 8 + %tmp1 = sext i32 %kL to i64 + %tmp2 = sext i32 %cond.i to i64 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ %tmp1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %reduct.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i8, i8* %tmp, i64 %indvars.iv + %tmp3 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %tmp3 to i32 + %add = add nsw i32 %conv, %reduct.07 + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %cmp = icmp slt i64 %indvars.iv.next, %tmp2 + br i1 %cmp, label %for.body, label %for.end.loopexit, !llvm.loop !0 + +for.end.loopexit: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %reduct.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ] + store i32 %reduct.0.lcssa, i32* @smL, align 4 + ret void +} + +!0 = distinct !{!0, !1, !2} +!1 = !{!"llvm.loop.vectorize.width", i32 4} +!2 = !{!"llvm.loop.interleave.count", i32 1}