Index: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h @@ -495,10 +495,29 @@ /// The typedef for ExprValueMap. /// - typedef DenseMap> ExprValueMapType; + typedef std::pair ValueOffsetPair; + typedef DenseMap> ExprValueMapType; /// ExprValueMap -- This map records the original values from which /// the SCEV expr is generated from. + /// + /// We want to represent the mapping as SCEV -> ValueOffsetPair instead + /// of SCEV -> Value: + /// Suppose we know S1 expands to V1, and + /// S1 = S2 + C_a + /// S3 = S2 + C_b + /// where C_a and C_b are different SCEVConstants. Then we'd like to + /// expand S3 as V1 - C_a + C_b instead of expanding S2 literally. + /// It is helpful when S2 is a complex SCEV expr. + /// + /// In order to do that, we represent ExprValueMap as a mapping from + /// SCEV to ValueOffsetPair. We will save both S1->{V1, 0} and + /// S2->{V1, C_a} into the map when we create SCEV for V1. When S3 + /// is expanded, it will first expand S2 to V1 - C_a because of + /// S2->{V1, C_a} in the map, then expand S3 to V1 - C_a + C_b. + /// + /// Note: S->{V, Offset} in the ExprValueMap means S can be expanded + /// to V - Offset. ExprValueMapType ExprValueMap; /// The typedef for ValueExprMap. @@ -1181,7 +1200,7 @@ bool containsAddRecurrence(const SCEV *S); /// Return the Value set from which the SCEV expr is generated. - SetVector *getSCEVValues(const SCEV *S); + SetVector *getSCEVValues(const SCEV *S); /// Erase Value from ValueExprMap and ExprValueMap. void eraseValueFromMap(Value *V); Index: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -309,7 +309,8 @@ PointerType *PTy, Type *Ty, Value *V); /// \brief Find a previous Value in ExprValueMap for expand. - Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); + ScalarEvolution::ValueOffsetPair + FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); Value *expand(const SCEV *S); Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -3378,8 +3378,28 @@ return F.FoundOne; } -/// Return the Value set from S. -SetVector *ScalarEvolution::getSCEVValues(const SCEV *S) { +/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}. +/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an +/// offset I, then return {S', I}, else return {\p S, nullptr}. +static std::pair splitAddExpr(const SCEV *S) { + const auto *Add = dyn_cast(S); + if (!Add) + return {S, nullptr}; + + if (Add->getNumOperands() != 2) + return {S, nullptr}; + + auto *ConstOp = dyn_cast(Add->getOperand(0)); + if (!ConstOp) + return {S, nullptr}; + + return {Add->getOperand(1), ConstOp->getValue()}; +} + +/// Return the ValueOffsetPair set for \p S. \p S can be represented +/// by the value and offset from any ValueOffsetPair in the set. +SetVector * +ScalarEvolution::getSCEVValues(const SCEV *S) { ExprValueMapType::iterator SI = ExprValueMap.find_as(S); if (SI == ExprValueMap.end()) return nullptr; @@ -3387,24 +3407,31 @@ if (VerifySCEVMap) { // Check there is no dangling Value in the set returned. for (const auto &VE : SI->second) - assert(ValueExprMap.count(VE)); + assert(ValueExprMap.count(VE.first)); } #endif return &SI->second; } -/// Erase Value from ValueExprMap and ExprValueMap. If ValueExprMap.erase(V) is -/// not used together with forgetMemoizedResults(S), eraseValueFromMap should be -/// used instead to ensure whenever V->S is removed from ValueExprMap, V is also -/// removed from the set of ExprValueMap[S]. +/// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V) +/// cannot be used separately. eraseValueFromMap should be used to remove +/// V from ValueExprMap and ExprValueMap at the same time. void ScalarEvolution::eraseValueFromMap(Value *V) { ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { const SCEV *S = I->second; - SetVector *SV = getSCEVValues(S); - // Remove V from the set of ExprValueMap[S] - if (SV) - SV->remove(V); + // Remove {V, 0} from the set of ExprValueMap[S] + if (SetVector *SV = getSCEVValues(S)) + SV->remove({V, nullptr}); + + // Remove {V, Offset} from the set of ExprValueMap[Stripped] + const SCEV *Stripped; + ConstantInt *Offset; + std::tie(Stripped, Offset) = splitAddExpr(S); + if (Offset != nullptr) { + if (SetVector *SV = getSCEVValues(Stripped)) + SV->remove({V, Offset}); + } ValueExprMap.erase(V); } } @@ -3419,11 +3446,26 @@ S = createSCEV(V); // During PHI resolution, it is possible to create two SCEVs for the same // V, so it is needed to double check whether V->S is inserted into - // ValueExprMap before insert S->V into ExprValueMap. + // ValueExprMap before insert S->{V, 0} into ExprValueMap. std::pair Pair = ValueExprMap.insert({SCEVCallbackVH(V, this), S}); - if (Pair.second) - ExprValueMap[S].insert(V); + if (Pair.second) { + ExprValueMap[S].insert({V, nullptr}); + + // If S == Stripped + Offset, add Stripped -> {V, Offset} into + // ExprValueMap. + const SCEV *Stripped = S; + ConstantInt *Offset = nullptr; + std::tie(Stripped, Offset) = splitAddExpr(S); + // If stripped is SCEVUnknown, don't bother to save + // Stripped -> {V, offset}. It doesn't simplify and sometimes even + // increase the complexity of the expansion code. + // If V is GetElementPtrInst, don't save Stripped -> {V, offset} + // because it may generate add/sub instead of GEP in SCEV expansion. + if (Offset != nullptr && !isa(Stripped) && + !isa(V)) + ExprValueMap[Stripped].insert({V, Offset}); + } } return S; } @@ -3436,8 +3478,8 @@ const SCEV *S = I->second; if (checkValidity(S)) return S; + eraseValueFromMap(V); forgetMemoizedResults(S); - ValueExprMap.erase(I); } return nullptr; } @@ -3675,8 +3717,8 @@ if (!isa(I) || !isa(Old) || (I != PN && Old == SymName)) { + eraseValueFromMap(It->first); forgetMemoizedResults(Old); - ValueExprMap.erase(It); } } @@ -4055,7 +4097,7 @@ // to create an AddRecExpr for this PHI node. We can not keep this temporary // as it will prevent later (possibly simpler) SCEV expressions to be added // to the ValueExprMap. - ValueExprMap.erase(PN); + eraseValueFromMap(PN); } return nullptr; @@ -5431,8 +5473,8 @@ // case, createNodeForPHI will perform the necessary updates on its // own when it gets to that point. if (!isa(I) || !isa(Old)) { + eraseValueFromMap(It->first); forgetMemoizedResults(Old); - ValueExprMap.erase(It); } if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); @@ -5477,8 +5519,8 @@ ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { + eraseValueFromMap(It->first); forgetMemoizedResults(It->second); - ValueExprMap.erase(It); if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); } @@ -5511,8 +5553,8 @@ ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { + eraseValueFromMap(It->first); forgetMemoizedResults(It->second); - ValueExprMap.erase(It); if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); } Index: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1626,9 +1626,10 @@ return V; } -Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S, - const Instruction *InsertPt) { - SetVector *Set = SE.getSCEVValues(S); +ScalarEvolution::ValueOffsetPair +SCEVExpander::FindValueInExprValueMap(const SCEV *S, + const Instruction *InsertPt) { + SetVector *Set = SE.getSCEVValues(S); // If the expansion is not in CanonicalMode, and the SCEV contains any // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. if (CanonicalMode || !SE.containsAddRecurrence(S)) { @@ -1637,21 +1638,21 @@ // Choose a Value from the set which dominates the insertPt. // insertPt should be inside the Value's parent loop so as not to break // the LCSSA form. - for (auto const &Ent : *Set) { + for (auto const &VOPair : *Set) { + Value *V = VOPair.first; + ConstantInt *Offset = VOPair.second; Instruction *EntInst = nullptr; - if (Ent && isa(Ent) && - (EntInst = cast(Ent)) && - S->getType() == Ent->getType() && + if (V && isa(V) && (EntInst = cast(V)) && + S->getType() == V->getType() && EntInst->getFunction() == InsertPt->getFunction() && SE.DT.dominates(EntInst, InsertPt) && (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || - SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) { - return Ent; - } + SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) + return {V, Offset}; } } } - return nullptr; + return {nullptr, nullptr}; } // The expansion of SCEV will either reuse a previous Value in ExprValueMap, @@ -1699,10 +1700,14 @@ Builder.SetInsertPoint(InsertPt); // Expand the expression into instructions. - Value *V = FindValueInExprValueMap(S, InsertPt); + ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt); + Value *V = VO.first; if (!V) V = visit(S); + else if (VO.second) { + V = Builder.CreateSub(V, VO.second); + } // Remember the expanded value for this SCEV at this location. // @@ -1915,7 +1920,7 @@ // Use expand's logic which is used for reusing a previous Value in // ExprValueMap. - if (Value *Val = FindValueInExprValueMap(S, At)) + if (Value *Val = FindValueInExprValueMap(S, At).first) return Val; // There is potential to make this significantly smarter, but this simple Index: llvm/trunk/test/Analysis/ScalarEvolution/scev-expander-existing-value-offset.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/scev-expander-existing-value-offset.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/scev-expander-existing-value-offset.ll @@ -0,0 +1,88 @@ +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S |FileCheck %s +; SCEV expansion uses existing value or value + offset to reduce duplicate code expansion so foo should only generate one select inst after loop vectorization. +; CHECK-LABEL: @foo( +; CHECK: select +; CHECK-NOT: select + +@ySrcL = common global i8* null, align 8 +@smL = common global i32 0, align 4 + +define void @foo(i32 %rwL, i32 %kL, i32 %xfL) { +entry: + %sub = add nsw i32 %rwL, -1 + %shr = ashr i32 %xfL, 6 + %cmp.i = icmp slt i32 %sub, %shr + %cond.i = select i1 %cmp.i, i32 %sub, i32 %shr + %cmp6 = icmp sgt i32 %cond.i, %kL + br i1 %cmp6, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %tmp = load i8*, i8** @ySrcL, align 8 + %tmp1 = sext i32 %kL to i64 + %tmp2 = sext i32 %cond.i to i64 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ %tmp1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %reduct.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i8, i8* %tmp, i64 %indvars.iv + %tmp3 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %tmp3 to i32 + %add = add nsw i32 %conv, %reduct.07 + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %cmp = icmp slt i64 %indvars.iv.next, %tmp2 + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %reduct.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ] + store i32 %reduct.0.lcssa, i32* @smL, align 4 + ret void +} +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S |FileCheck %s +; SCEV expansion uses existing value or value + offset to reduce duplicate code expansion so foo should only generate one select inst after loop vectorization. +; CHECK-LABEL: @foo( +; CHECK: select +; CHECK-NOT: select + +@ySrcL = common global i8* null, align 8 +@smL = common global i32 0, align 4 + +define void @foo(i32 %rwL, i32 %kL, i32 %xfL) { +entry: + %sub = add nsw i32 %rwL, -1 + %shr = ashr i32 %xfL, 6 + %cmp.i = icmp slt i32 %sub, %shr + %cond.i = select i1 %cmp.i, i32 %sub, i32 %shr + %cmp6 = icmp sgt i32 %cond.i, %kL + br i1 %cmp6, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %tmp = load i8*, i8** @ySrcL, align 8 + %tmp1 = sext i32 %kL to i64 + %tmp2 = sext i32 %cond.i to i64 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ %tmp1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %reduct.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i8, i8* %tmp, i64 %indvars.iv + %tmp3 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %tmp3 to i32 + %add = add nsw i32 %conv, %reduct.07 + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %cmp = icmp slt i64 %indvars.iv.next, %tmp2 + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %reduct.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ] + store i32 %reduct.0.lcssa, i32* @smL, align 4 + ret void +}