Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -147,7 +147,7 @@ BoxedLoopsMapTy BoxedLoopsMap; /// @brief Map to remember loads that are required to be invariant. - DenseMap RequiredInvariantLoadsMap; + DenseMap RequiredInvariantLoadsMap; /// @brief Context variables for SCoP detection. struct DetectionContext { @@ -185,14 +185,14 @@ BoxedLoopsSetTy &BoxedLoopsSet; /// @brief Loads that need to be invariant during execution. - InvariantLoadsSetTy &RequiredILS; + InvariantLoadsClassesTy &RequiredILC; DetectionContext(Region &R, AliasAnalysis &AA, NonAffineSubRegionSetTy &NASRS, BoxedLoopsSetTy &BLS, - InvariantLoadsSetTy &RequiredILS, bool Verify) + InvariantLoadsClassesTy &RequiredILC, bool Verify) : CurRegion(R), AST(AA), Verifying(Verify), Log(&R), hasLoads(false), hasStores(false), hasAffineLoops(false), NonAffineSubRegionSet(NASRS), - BoxedLoopsSet(BLS), RequiredILS(RequiredILS) {} + BoxedLoopsSet(BLS), RequiredILC(RequiredILC) {} }; // Remember the valid regions @@ -251,16 +251,16 @@ /// @return True if the call instruction is valid, false otherwise. static bool isValidCallInst(CallInst &CI); - /// @brief Check if the given loads could be invariant and can be hoisted. + /// @brief Check if the required invariant loads can be hoisted. /// /// If true is returned the loads are added to the required invariant loads /// contained in the @p Context. /// - /// @param RequiredILS The loads to check. + /// @param RequiredILC The loads to check. /// @param Context The current detection context. /// /// @return True if all loads can be assumed invariant. - bool onlyValidRequiredInvariantLoads(InvariantLoadsSetTy &RequiredILS, + bool onlyValidRequiredInvariantLoads(InvariantLoadsClassesTy &RequiredILC, DetectionContext &Context) const; /// @brief Check if a value is invariant in the region Reg. @@ -400,7 +400,8 @@ const BoxedLoopsSetTy *getBoxedLoops(const Region *R) const; /// @brief Return the set of required invariant loads for @p R. - const InvariantLoadsSetTy *getRequiredInvariantLoads(const Region *R) const; + const InvariantLoadsClassesTy * + getRequiredInvariantLoads(const Region *R) const; /// @brief Return true if @p SubR is a non-affine subregion in @p ScopR. bool isNonAffineSubRegion(const Region *SubR, const Region *ScopR) const; Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -699,8 +699,11 @@ /// @brief Type for invariant memory accesses and their domain context. using InvariantAccessTy = std::pair; +/// @brief Type for multiple equivalent invariant memory accesses. +using InvariantAccessListTy = std::forward_list; + /// @brief Type for multiple invariant memory accesses and their domain context. -using InvariantAccessesTy = SmallVector; +using InvariantAccessesTy = SmallVector; ///===----------------------------------------------------------------------===// /// @brief Statement of the Scop @@ -925,12 +928,12 @@ BB = Block; } - /// @brief Move the memory access in @p InvMAs to @p TargetList. + /// @brief Move the memory access in @p InvMAs to @p InvariantAccesses. /// /// Note that scalar accesses that are caused by any access in @p InvMAs will /// be eliminated too. void hoistMemoryAccesses(MemoryAccessList &InvMAs, - InvariantAccessesTy &TargetList); + InvariantAccessesTy &InvariantAccesses); typedef MemoryAccessVec::iterator iterator; typedef MemoryAccessVec::const_iterator const_iterator; @@ -1213,6 +1216,7 @@ /// /// At the moment we perform the following simplifications: /// - removal of empty statements (due to invariant load hoisting) + /// - consolidation of invariant loads of the same address. void simplifySCoP(); /// @brief Hoist invariant memory loads and check for required ones. @@ -1233,6 +1237,9 @@ /// @brief Simplify the assumed and boundary context. void simplifyContexts(); + /// @brief Return the required invariant load equivalence classes. + const InvariantLoadsClassesTy &getRIL() const; + /// @brief Create a new SCoP statement for either @p BB or @p R. /// /// Either @p BB or @p R should be non-null. A new statement for the non-null @@ -1612,7 +1619,7 @@ /// @param ScopRIL The required invariant loads equivalence classes. void buildMemoryAccess(Instruction *Inst, Loop *L, Region *R, const ScopDetection::BoxedLoopsSetTy *BoxedLoops, - const InvariantLoadsSetTy &ScopRIL); + const InvariantLoadsClassesTy &ScopRIL); /// @brief Analyze and extract the cross-BB scalar dependences (or, /// dataflow dependencies) of an instruction. Index: include/polly/Support/SCEVAffinator.h =================================================================== --- include/polly/Support/SCEVAffinator.h +++ include/polly/Support/SCEVAffinator.h @@ -14,6 +14,7 @@ #ifndef POLLY_SCEV_AFFINATOR_H #define POLLY_SCEV_AFFINATOR_H +#include "polly/Support/ScopHelper.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -51,11 +52,13 @@ /// @brief Translate a SCEV to an isl_pw_aff. /// - /// @param E he expression that is translated. - /// @param BB The block in which @p E is executed. + /// @param E The expression that is translated. + /// @param BB The block in which @p E is executed. + /// @param ILC The invariant loads equivalence classes of the SCoP. /// /// @returns The isl representation of the SCEV @p E in @p Domain. __isl_give isl_pw_aff *getPwAff(const llvm::SCEV *E, + const InvariantLoadsClassesTy &ILC, llvm::BasicBlock *BB = nullptr); /// @brief Compute the context in which integer wrapping is happending. Index: include/polly/Support/SCEVValidator.h =================================================================== --- include/polly/Support/SCEVValidator.h +++ include/polly/Support/SCEVValidator.h @@ -48,10 +48,11 @@ bool hasScalarDepsInsideRegion(const llvm::SCEV *S, const llvm::Region *R); bool isAffineExpr(const llvm::Region *R, const llvm::SCEV *Expression, llvm::ScalarEvolution &SE, const llvm::Value *BaseAddress = 0, - InvariantLoadsSetTy *ILS = nullptr); + InvariantLoadsClassesTy *ILC = nullptr); std::vector getParamsInAffineExpr(const llvm::Region *R, const llvm::SCEV *Expression, llvm::ScalarEvolution &SE, + const InvariantLoadsClassesTy &ILC, const llvm::Value *BaseAddress = 0); /// @brief Extract the constant factors from the multiplication @p M. Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -43,9 +43,13 @@ typedef llvm::DenseMap ValueMapT; typedef llvm::SmallVector VectorValueMapT; -/// @brief Type for a set of invariant loads. +/// @brief Type for a __ordered__ set of invariant loads. using InvariantLoadsSetTy = llvm::SetVector; +/// @brief Type for equivalence classes of invariant loads. +using InvariantLoadsClassesTy = + llvm::DenseMap; + /// Temporary Hack for extended regiontree. /// /// @brief Cast the region to loop. @@ -144,5 +148,16 @@ /// @return True if @p LInst can be hoisted in @p R. bool isHoistableLoad(llvm::LoadInst *LInst, llvm::Region &R, llvm::LoopInfo &LI, llvm::ScalarEvolution &SE); + +/// @brief Get the representing SCEV for invariant loads if applicable. +/// +/// @param S The SCEV to normalize. +/// @param SE The ScalarEvolution analysis. +/// @param ILC The invariant loads equivalence classes. +/// +/// @return The representing SCEV for invariant loads or @p S if none. +const llvm::SCEV * +normalizeInvariantLoadSCEV(const llvm::SCEV *S, llvm::ScalarEvolution &SE, + const InvariantLoadsClassesTy &ILC); } #endif Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -254,10 +254,10 @@ if (Verify) { BoxedLoopsSetTy DummyBoxedLoopsSet; NonAffineSubRegionSetTy DummyNonAffineSubRegionSet; - InvariantLoadsSetTy DummyILS; + InvariantLoadsClassesTy DummyILC; DetectionContext Context(const_cast(R), *AA, DummyNonAffineSubRegionSet, DummyBoxedLoopsSet, - DummyILS, false /*verifying*/); + DummyILC, false /*verifying*/); return isValidRegion(Context); } @@ -300,14 +300,18 @@ } bool ScopDetection::onlyValidRequiredInvariantLoads( - InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { - Region &CurRegion = Context.CurRegion; + InvariantLoadsClassesTy &RequiredILC, DetectionContext &Context) const { - for (LoadInst *LInst : RequiredILS) - if (!isHoistableLoad(LInst, CurRegion, *LI, *SE)) - return false; + for (const auto &RILEquivClass : RequiredILC) + for (LoadInst *LInst : RILEquivClass.second) + if (!isHoistableLoad(LInst, Context.CurRegion, *LI, *SE)) + return false; - Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); + for (auto &EquivClass : RequiredILC) { + auto &ContextEquivClass = Context.RequiredILC[EquivClass.first]; + ContextEquivClass.insert(EquivClass.second.begin(), + EquivClass.second.end()); + } return true; } @@ -315,11 +319,11 @@ bool ScopDetection::isAffine(const SCEV *S, DetectionContext &Context, Value *BaseAddress) const { - InvariantLoadsSetTy AccessILS; - if (!isAffineExpr(&Context.CurRegion, S, *SE, BaseAddress, &AccessILS)) + InvariantLoadsClassesTy AccessILC; + if (!isAffineExpr(&Context.CurRegion, S, *SE, BaseAddress, &AccessILC)) return false; - if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) + if (!onlyValidRequiredInvariantLoads(AccessILC, Context)) return false; return true; @@ -697,7 +701,8 @@ if (Inst && CurRegion.contains(Inst)) { LoadInst *LInst = dyn_cast(Inst); if (LInst && isHoistableLoad(LInst, CurRegion, *LI, *SE)) { - Context.RequiredILS.insert(LInst); + const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); + Context.RequiredILC[PointerSCEV].insert(LInst); continue; } @@ -1127,7 +1132,7 @@ return &BLMIt->second; } -const InvariantLoadsSetTy * +const InvariantLoadsClassesTy * ScopDetection::getRequiredInvariantLoads(const Region *R) const { auto RILMIt = RequiredInvariantLoadsMap.find(R); if (RILMIt == RequiredInvariantLoadsMap.end()) @@ -1140,10 +1145,10 @@ BoxedLoopsSetTy DummyBoxedLoopsSet; NonAffineSubRegionSetTy DummyNonAffineSubRegionSet; - InvariantLoadsSetTy DummyILS; + InvariantLoadsClassesTy DummyILC; DetectionContext Context(const_cast(R), *AA, DummyNonAffineSubRegionSet, DummyBoxedLoopsSet, - DummyILS, true /*verifying*/); + DummyILC, true /*verifying*/); isValidRegion(Context); } Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -1097,14 +1097,15 @@ auto Expr = Subscripts[i + IndexOffset]; auto Size = Sizes[i]; - InvariantLoadsSetTy AccessILS; - if (!isAffineExpr(&Parent.getRegion(), Expr, SE, nullptr, &AccessILS)) + InvariantLoadsClassesTy AccessRIL; + if (!isAffineExpr(&Parent.getRegion(), Expr, SE, nullptr, &AccessRIL)) continue; bool NonAffine = false; - for (LoadInst *LInst : AccessILS) - if (!ScopRIL.count(LInst)) - NonAffine = true; + for (const auto EquivClass : AccessRIL) + for (LoadInst *LInst : EquivClass.second) + if (!ScopRIL.lookup(EquivClass.first).count(LInst)) + NonAffine = true; if (NonAffine) continue; @@ -1364,7 +1365,7 @@ void ScopStmt::dump() const { print(dbgs()); } void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs, - InvariantAccessesTy &TargetList) { + InvariantAccessesTy &InvariantAccesses) { // Remove all memory accesses in @p InvMAs from this statement together // with all scalar accesses that were caused by them. The tricky iteration @@ -1415,8 +1416,36 @@ } } - for (MemoryAccess *MA : InvMAs) - TargetList.push_back(std::make_pair(MA, isl_set_copy(DomainCtx))); + for (MemoryAccess *MA : InvMAs) { + + // Check for another invariant access that accesses the same location as + // MA and if found consolidate them. Otherwise create a new equivalence + // class at the end of InvariantAccesses. + // TODO: This is quadratic in the number invariant accesses. + LoadInst *LInst = cast(MA->getAccessInstruction()); + const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand()); + bool Consolidated = false; + + for (auto &IAClass : InvariantAccesses) { + InvariantAccessTy &IA = IAClass.front(); + LoadInst *IALInst = cast(IA.first->getAccessInstruction()); + const SCEV *IAPointerSCEV = SE.getSCEV(IALInst->getPointerOperand()); + if (PointerSCEV != IAPointerSCEV) + continue; + + isl_set *IAClassDomainCtx = + isl_set_union(IA.second, isl_set_copy(DomainCtx)); + IAClass.push_front(std::make_pair(MA, IAClassDomainCtx)); + Consolidated = true; + break; + } + + if (Consolidated) + continue; + + InvariantAccesses.emplace_back( + InvariantAccessListTy({std::make_pair(MA, isl_set_copy(DomainCtx))})); + } isl_set_free(DomainCtx); } @@ -1444,6 +1473,9 @@ } __isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) const { + // Normalize invariant loads first to get the correct parameter SCEV. + Parameter = normalizeInvariantLoadSCEV(Parameter, *getSE(), getRIL()); + ParamIdType::const_iterator IdIter = ParameterIds.find(Parameter); if (IdIter == ParameterIds.end()) @@ -2365,8 +2397,8 @@ } } - for (const auto &IA : InvariantAccesses) - isl_set_free(IA.second); + for (const auto &IAClass : InvariantAccesses) + isl_set_free(IAClass.front().second); } void Scop::updateAccessDimensionality() { @@ -2452,18 +2484,19 @@ IsOptimized = true; // Check required invariant loads that were tagged during SCoP detection. - for (LoadInst *LI : *SD.getRequiredInvariantLoads(&getRegion())) { - assert(LI && getRegion().contains(LI)); - ScopStmt *Stmt = getStmtForBasicBlock(LI->getParent()); - if (Stmt->lookupAccessesFor(LI) != nullptr) { - DEBUG(errs() << "\n\nWARNING: Load (" << *LI - << ") is required to be invariant but was not marked as " - "such. SCoP for " - << getRegion() << " will be dropped\n\n"); - addAssumption(isl_set_empty(getParamSpace())); - return; + for (const auto EquivClass : getRIL()) + for (LoadInst *LI : EquivClass.second) { + assert(LI && getRegion().contains(LI)); + ScopStmt *Stmt = getStmtForBasicBlock(LI->getParent()); + if (Stmt->lookupAccessesFor(LI) != nullptr) { + DEBUG(errs() << "\n\nWARNING: Load (" << *LI + << ") is required to be invariant but was not marked as " + "such. SCoP for " + << getRegion() << " will be dropped\n\n"); + addAssumption(isl_set_empty(getParamSpace())); + return; + } } - } // We want invariant accesses to be sorted in a "natural order" because there // might be dependences between invariant loads. These can be caused by @@ -2475,8 +2508,12 @@ // we already ordered the accesses such that indirect loads can be resolved, // thus we use a stable sort here. - auto compareInvariantAccesses = [this](const InvariantAccessTy &IA0, - const InvariantAccessTy &IA1) { + auto compareInvariantAccesses = [this]( + const InvariantAccessListTy &IAClass0, + const InvariantAccessListTy &IAClass1) { + const InvariantAccessTy &IA0 = IAClass0.front(); + const InvariantAccessTy &IA1 = IAClass1.front(); + Instruction *AI0 = IA0.first->getAccessInstruction(); Instruction *AI1 = IA1.first->getAccessInstruction(); @@ -2703,9 +2740,9 @@ OS.indent(4) << "Region: " << getNameStr() << "\n"; OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n"; OS.indent(4) << "Invariant Accesses: {\n"; - for (const auto &IA : InvariantAccesses) { - IA.first->print(OS); - OS.indent(12) << "Execution Context: " << IA.second << "\n"; + for (const auto &IAClass : InvariantAccesses) { + IAClass.front().first->print(OS); + OS.indent(12) << "Execution Context: " << IAClass.front().second << "\n"; } OS.indent(4) << "}\n"; printContext(OS.indent(4)); @@ -2718,8 +2755,12 @@ isl_ctx *Scop::getIslCtx() const { return IslCtx; } +const InvariantLoadsClassesTy &Scop::getRIL() const { + return *SD.getRequiredInvariantLoads(&getRegion()); +} + __isl_give isl_pw_aff *Scop::getPwAff(const SCEV *E, BasicBlock *BB) { - return Affinator.getPwAff(E, BB); + return Affinator.getPwAff(E, getRIL(), BB); } __isl_give isl_union_set *Scop::getDomains() const { @@ -3148,7 +3189,7 @@ void ScopInfo::buildMemoryAccess( Instruction *Inst, Loop *L, Region *R, const ScopDetection::BoxedLoopsSetTy *BoxedLoops, - const InvariantLoadsSetTy &ScopRIL) { + const InvariantLoadsClassesTy &ScopRIL) { unsigned Size; Type *SizeType; Value *Val; @@ -3196,13 +3237,14 @@ bool AllAffineSubcripts = true; for (auto Subscript : Subscripts) { - InvariantLoadsSetTy AccessILS; + InvariantLoadsClassesTy AccessRIL; AllAffineSubcripts = - isAffineExpr(R, Subscript, *SE, nullptr, &AccessILS); + isAffineExpr(R, Subscript, *SE, nullptr, &AccessRIL); - for (LoadInst *LInst : AccessILS) - if (!ScopRIL.count(LInst)) - AllAffineSubcripts = false; + for (const auto EquivClass : AccessRIL) + for (LoadInst *LInst : EquivClass.second) + if (!ScopRIL.lookup(EquivClass.first).count(LInst)) + AllAffineSubcripts = false; if (!AllAffineSubcripts) break; @@ -3240,14 +3282,15 @@ isVariantInNonAffineLoop = true; } - InvariantLoadsSetTy AccessILS; + InvariantLoadsClassesTy AccessRIL; bool IsAffine = !isVariantInNonAffineLoop && - isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue(), &AccessILS); + isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue(), &AccessRIL); - for (LoadInst *LInst : AccessILS) - if (!ScopRIL.count(LInst)) - IsAffine = false; + for (const auto EquivClass : AccessRIL) + for (LoadInst *LInst : EquivClass.second) + if (!ScopRIL.lookup(EquivClass.first).count(LInst)) + IsAffine = false; // FIXME: Size of the number of bytes of an array element, not the number of // elements as probably intended here. @@ -3311,8 +3354,11 @@ // Do not build scalar dependences for required invariant loads as we will // hoist them later on anyway or drop the SCoP if we cannot. - if (ScopRIL.count(dyn_cast(Inst))) - continue; + if (LoadInst *LInst = dyn_cast(Inst)) { + const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); + if (ScopRIL.lookup(PointerSCEV).count(LInst)) + continue; + } if (buildScalarDependences(Inst, &R, NonAffineSubRegion)) { if (!isa(Inst)) Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -905,8 +905,8 @@ void IslNodeBuilder::preloadInvariantLoads() { - const auto &InvAccList = S.getInvariantAccesses(); - if (InvAccList.empty()) + const auto &InvariantAccesses = S.getInvariantAccesses(); + if (InvariantAccesses.empty()) return; const Region &R = S.getRegion(); @@ -920,14 +920,16 @@ isl_ast_build *Build = isl_ast_build_from_context(isl_set_universe(S.getParamSpace())); - for (const auto &IA : InvAccList) { - MemoryAccess *MA = IA.first; + for (const auto &IAClass : InvariantAccesses) { + + MemoryAccess *MA = IAClass.front().first; assert(!MA->isImplicit()); - isl_set *Domain = isl_set_copy(IA.second); + isl_set *Domain = isl_set_copy(IAClass.front().second); Instruction *AccInst = MA->getAccessInstruction(); Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build); - ValueMap[AccInst] = PreloadVal; + for (const InvariantAccessTy &IA : IAClass) + ValueMap[IA.first->getAccessInstruction()] = PreloadVal; if (SE.isSCEVable(AccInst->getType())) { isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); Index: lib/Support/SCEVAffinator.cpp =================================================================== --- lib/Support/SCEVAffinator.cpp +++ lib/Support/SCEVAffinator.cpp @@ -33,8 +33,9 @@ isl_pw_aff_free(CachedPair.second); } -__isl_give isl_pw_aff *SCEVAffinator::getPwAff(const SCEV *Expr, - BasicBlock *BB) { +__isl_give isl_pw_aff * +SCEVAffinator::getPwAff(const SCEV *Expr, const InvariantLoadsClassesTy &ILC, + BasicBlock *BB) { this->BB = BB; if (BB) { @@ -44,7 +45,7 @@ } else NumIterators = 0; - S->addParams(getParamsInAffineExpr(&R, Expr, SE)); + S->addParams(getParamsInAffineExpr(&R, Expr, SE, ILC)); return visit(Expr); } Index: lib/Support/SCEVValidator.cpp =================================================================== --- lib/Support/SCEVValidator.cpp +++ lib/Support/SCEVValidator.cpp @@ -126,12 +126,12 @@ const Region *R; ScalarEvolution &SE; const Value *BaseAddress; - InvariantLoadsSetTy *ILS; + InvariantLoadsClassesTy *ILC; public: SCEVValidator(const Region *R, ScalarEvolution &SE, const Value *BaseAddress, - InvariantLoadsSetTy *ILS) - : R(R), SE(SE), BaseAddress(BaseAddress), ILS(ILS) {} + InvariantLoadsClassesTy *ILC) + : R(R), SE(SE), BaseAddress(BaseAddress), ILC(ILC) {} class ValidatorResult visitConstant(const SCEVConstant *Constant) { return ValidatorResult(SCEVType::INT); @@ -339,9 +339,17 @@ } ValidatorResult visitLoadInstruction(Instruction *I, const SCEV *S) { - if (R->contains(I) && ILS) { - ILS->insert(cast(I)); - return ValidatorResult(SCEVType::PARAM, S); + if (R->contains(I) && ILC) { + // Insert I in the equivalence class of the pointer of I and use + // the first element of that equivalence class to determine the + // parameter that is used for I. + + LoadInst *LInst = cast(I); + const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand()); + auto &EquivClass = (*ILC)[PointerSCEV]; + EquivClass.insert(cast(I)); + const SCEV *EquivClassSCEV = SE.getSCEV(EquivClass[0]); + return ValidatorResult(SCEVType::PARAM, EquivClassSCEV); } return visitGenericInst(I, S); @@ -564,11 +572,11 @@ } bool isAffineExpr(const Region *R, const SCEV *Expr, ScalarEvolution &SE, - const Value *BaseAddress, InvariantLoadsSetTy *ILS) { + const Value *BaseAddress, InvariantLoadsClassesTy *ILC) { if (isa(Expr)) return false; - SCEVValidator Validator(R, SE, BaseAddress, ILS); + SCEVValidator Validator(R, SE, BaseAddress, ILC); DEBUG({ dbgs() << "\n"; dbgs() << "Expr: " << *Expr << "\n"; @@ -587,15 +595,18 @@ return Result.isValid(); } -std::vector getParamsInAffineExpr(const Region *R, - const SCEV *Expr, - ScalarEvolution &SE, - const Value *BaseAddress) { +std::vector +getParamsInAffineExpr(const Region *R, const SCEV *Expr, ScalarEvolution &SE, + const InvariantLoadsClassesTy &ILC, + const Value *BaseAddress) { if (isa(Expr)) return std::vector(); - InvariantLoadsSetTy ILS; - SCEVValidator Validator(R, SE, BaseAddress, &ILS); + // TODO: The const cast is necessary since the Validator is also used to + // create the equivalence classes, however here we just want the + // parameters to be normalized with regards to ILC. + SCEVValidator Validator(R, SE, BaseAddress, + const_cast(&ILC)); ValidatorResult Result = Validator.visit(Expr); assert(Result.isValid() && "Requested parameters for an invalid SCEV!"); Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -380,3 +380,21 @@ return true; } + +const SCEV * +polly::normalizeInvariantLoadSCEV(const SCEV *S, ScalarEvolution &SE, + const InvariantLoadsClassesTy &ILC) { + const SCEVUnknown *SU = dyn_cast_or_null(S); + if (!SU) + return S; + + LoadInst *LInst = dyn_cast(SU->getValue()); + if (!LInst) + return S; + + const auto &EquivClass = ILC.lookup(SE.getSCEV(LInst->getPointerOperand())); + if (EquivClass.empty()) + return S; + + return SE.getSCEV(*EquivClass.begin()); +} Index: test/Isl/CodeGen/whole-scop-non-affine-subregion.ll =================================================================== --- test/Isl/CodeGen/whole-scop-non-affine-subregion.ll +++ test/Isl/CodeGen/whole-scop-non-affine-subregion.ll @@ -2,9 +2,9 @@ ; RUN: -polly-codegen -S < %s | FileCheck %s ; CHECK: polly.start - +; int /* pure */ g() ; void f(int *A) { -; if (*A > 42) +; if (g()) ; *A = *A + 1; ; else ; *A = *A - 1; @@ -17,22 +17,26 @@ br label %entry.split entry.split: - %tmp = load i32, i32* %A, align 4 - %cmp = icmp sgt i32 %tmp, 42 + %call = call i32 @g() + %cmp = icmp eq i32 %call, 0 br i1 %cmp, label %if.then, label %if.else if.then: ; preds = %entry %tmp1 = load i32, i32* %A, align 4 %add = add nsw i32 %tmp1, 1 + store i32 %add, i32* %A, align 4 br label %if.end if.else: ; preds = %entry %tmp2 = load i32, i32* %A, align 4 %sub = add nsw i32 %tmp2, -1 + store i32 %sub, i32* %A, align 4 br label %if.end if.end: ; preds = %if.else, %if.then - %storemerge = phi i32 [ %sub, %if.else ], [ %add, %if.then ] - store i32 %storemerge, i32* %A, align 4 ret void } + +declare i32 @g() #0 + +attributes #0 = { nounwind readnone } Index: test/ScopInfo/intra_and_inter_bb_scalar_dep.ll =================================================================== --- test/ScopInfo/intra_and_inter_bb_scalar_dep.ll +++ test/ScopInfo/intra_and_inter_bb_scalar_dep.ll @@ -17,8 +17,8 @@ ; CHECK: Invariant Accesses: { ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK: MemRef_init_ptr[0] -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: MemRef_init_ptr[0] +; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NOT: MemRef_init_ptr[0] ; CHECK: } define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: Index: test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll =================================================================== --- /dev/null +++ test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll @@ -0,0 +1,106 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; Verify that we only have one parameter and one invariant load for all +; three loads that occure in the region but actually access the same +; location. Also check that the execution context is the most generic +; one, e.g., here the universal set. +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_bounds[0] +; CHECK-NEXT: Execution Context: [tmp] -> { : } +; CHECK-NEXT: } +; +; CHECK: p0: %tmp +; CHECK-NOT: p1 +; CHECK: Statements { +; CHECK: Stmt_for_body_6 +; CHECK: Domain := +; CHECK: [tmp] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + tmp and i1 >= 0 and i1 <= -1 + tmp and i2 >= 0 and i2 <= -1 + tmp }; +; CHECK: Schedule := +; CHECK: [tmp] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [tmp] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [tmp] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: } +; +; int bounds[1]; +; double data[1024][1024][1024]; +; +; void foo() { +; int i, j, k; +; for (k = 0; k < bounds[0]; k++) +; for (j = 0; j < bounds[0]; j++) +; for (i = 0; i < bounds[0]; i++) +; data[k][j][i] += i + j + k; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@bounds = common global [1 x i32] zeroinitializer, align 4 +@data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16 + +define void @foo() { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.16, %entry + %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ] + %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp7 = sext i32 %tmp to i64 + %cmp = icmp slt i64 %indvars.iv5, %tmp7 + br i1 %cmp, label %for.body, label %for.end.18 + +for.body: ; preds = %for.cond + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc.13, %for.body + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ] + %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp9 = sext i32 %tmp8 to i64 + %cmp2 = icmp slt i64 %indvars.iv3, %tmp9 + br i1 %cmp2, label %for.body.3, label %for.end.15 + +for.body.3: ; preds = %for.cond.1 + br label %for.cond.4 + +for.cond.4: ; preds = %for.inc, %for.body.3 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ] + %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp11 = sext i32 %tmp10 to i64 + %cmp5 = icmp slt i64 %indvars.iv, %tmp11 + br i1 %cmp5, label %for.body.6, label %for.end + +for.body.6: ; preds = %for.cond.4 + %tmp12 = add nsw i64 %indvars.iv, %indvars.iv3 + %tmp13 = add nsw i64 %tmp12, %indvars.iv5 + %tmp14 = trunc i64 %tmp13 to i32 + %conv = sitofp i32 %tmp14 to double + %arrayidx11 = getelementptr inbounds [1024 x [1024 x [1024 x double]]], [1024 x [1024 x [1024 x double]]]* @data, i64 0, i64 %indvars.iv5, i64 %indvars.iv3, i64 %indvars.iv + %tmp15 = load double, double* %arrayidx11, align 8 + %add12 = fadd double %tmp15, %conv + store double %add12, double* %arrayidx11, align 8 + br label %for.inc + +for.inc: ; preds = %for.body.6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond.4 + +for.end: ; preds = %for.cond.4 + br label %for.inc.13 + +for.inc.13: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond.1 + +for.end.15: ; preds = %for.cond.1 + br label %for.inc.16 + +for.inc.16: ; preds = %for.end.15 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + br label %for.cond + +for.end.18: ; preds = %for.cond + ret void +} Index: test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll =================================================================== --- /dev/null +++ test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll @@ -0,0 +1,109 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; Verify that we only have one parameter and one invariant load for all +; three loads that occure in the region but actually access the same +; location. Also check that the execution context is the most generic +; one, e.g., here the universal set. +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_bounds[0] +; CHECK-NEXT: Execution Context: [tmp, p] -> { : } +; CHECK-NEXT: } +; +; CHECK: p0: %tmp +; CHECK: p1: %p +; CHECK-NOT: p2: +; CHECK: Statements { +; CHECK: Stmt_for_body_6 +; CHECK: Domain := +; CHECK: [tmp, p] -> { Stmt_for_body_6[i0, i1, i2] : p = 0 and i0 >= 0 and i0 <= -1 + tmp and i1 >= 0 and i1 <= -1 + tmp and i2 >= 0 and i2 <= -1 + tmp }; +; CHECK: Schedule := +; CHECK: [tmp, p] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [tmp, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [tmp, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: } +; +; int bounds[1]; +; double data[1024][1024][1024]; +; +; void foo(int p) { +; int i, j, k; +; for (k = 0; k < bounds[0]; k++) +; if (p == 0) +; for (j = 0; j < bounds[0]; j++) +; for (i = 0; i < bounds[0]; i++) +; data[k][j][i] += i + j + k; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@bounds = common global [1 x i32] zeroinitializer, align 4 +@data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16 + +define void @foo(i32 %p) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.16, %entry + %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ] + %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp7 = sext i32 %tmp to i64 + %cmp = icmp slt i64 %indvars.iv5, %tmp7 + br i1 %cmp, label %for.body, label %for.end.18 + +for.body: ; preds = %for.cond + %cmpp = icmp eq i32 %p, 0 + br i1 %cmpp, label %for.cond.1, label %for.inc.16 + +for.cond.1: ; preds = %for.inc.13, %for.body + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ] + %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp9 = sext i32 %tmp8 to i64 + %cmp2 = icmp slt i64 %indvars.iv3, %tmp9 + br i1 %cmp2, label %for.body.3, label %for.end.15 + +for.body.3: ; preds = %for.cond.1 + br label %for.cond.4 + +for.cond.4: ; preds = %for.inc, %for.body.3 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ] + %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp11 = sext i32 %tmp10 to i64 + %cmp5 = icmp slt i64 %indvars.iv, %tmp11 + br i1 %cmp5, label %for.body.6, label %for.end + +for.body.6: ; preds = %for.cond.4 + %tmp12 = add nsw i64 %indvars.iv, %indvars.iv3 + %tmp13 = add nsw i64 %tmp12, %indvars.iv5 + %tmp14 = trunc i64 %tmp13 to i32 + %conv = sitofp i32 %tmp14 to double + %arrayidx11 = getelementptr inbounds [1024 x [1024 x [1024 x double]]], [1024 x [1024 x [1024 x double]]]* @data, i64 0, i64 %indvars.iv5, i64 %indvars.iv3, i64 %indvars.iv + %tmp15 = load double, double* %arrayidx11, align 8 + %add12 = fadd double %tmp15, %conv + store double %add12, double* %arrayidx11, align 8 + br label %for.inc + +for.inc: ; preds = %for.body.6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond.4 + +for.end: ; preds = %for.cond.4 + br label %for.inc.13 + +for.inc.13: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond.1 + +for.end.15: ; preds = %for.cond.1 + br label %for.inc.16 + +for.inc.16: ; preds = %for.end.15 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + br label %for.cond + +for.end.18: ; preds = %for.cond + ret void +}