Index: polly/include/polly/ScopBuilder.h =================================================================== --- polly/include/polly/ScopBuilder.h +++ polly/include/polly/ScopBuilder.h @@ -394,6 +394,23 @@ /// Required inv. loads: LB[0], LB[1], (V, if it may alias with A or LB) void hoistInvariantLoads(); + /// Return true if and only if @p LI is a required invariant load. + bool isRequiredInvariantLoad(LoadInst *LI) const { + return scop->getRequiredInvariantLoads().count(LI); + } + + /// Check if the base ptr of @p MA is in the SCoP but not hoistable. + bool hasNonHoistableBasePtrInScop(MemoryAccess *MA, isl::union_map Writes); + + /// Return the context under which the access cannot be hoisted. + /// + /// @param Access The access to check. + /// @param Writes The set of all memory writes in the scop. + /// + /// @return Return the context under which the access cannot be hoisted or a + /// nullptr if it cannot be hoisted at all. + isl::set getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes); + /// Collect loads which might form a reduction chain with @p StoreMA. /// /// Check if the stored value for @p StoreMA is a binary operator with one or Index: polly/include/polly/ScopInfo.h =================================================================== --- polly/include/polly/ScopInfo.h +++ polly/include/polly/ScopInfo.h @@ -49,6 +49,11 @@ extern bool UseInstructionNames; +// The maximal number of basic sets we allow during domain construction to +// be created. More complex scops will result in very high compile time and +// are also unlikely to result in good code +extern int const MaxDisjunctsInDomain; + /// Enumeration of assumptions Polly can take. enum AssumptionKind { ALIASING, @@ -2028,9 +2033,6 @@ /// Return the access for the base ptr of @p MA if any. MemoryAccess *lookupBasePtrAccess(MemoryAccess *MA); - /// Check if the base ptr of @p MA is in the SCoP but not hoistable. - bool hasNonHoistableBasePtrInScop(MemoryAccess *MA, isl::union_map Writes); - /// Check if @p MA can always be hoisted without execution context. bool canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty, bool MAInvalidCtxIsEmpty, @@ -2635,15 +2637,6 @@ return MinMaxAliasGroups; } - /// Return the context under which the access cannot be hoisted. - /// - /// @param Access The access to check. - /// @param Writes The set of all memory writes in the scop. - /// - /// @return Return the context under which the access cannot be hoisted or a - /// nullptr if it cannot be hoisted at all. - isl::set getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes); - /// Get an isl string representing the context. std::string getContextStr() const; @@ -2716,11 +2709,6 @@ /// Add @p LI to the set of required invariant loads. void addRequiredInvariantLoad(LoadInst *LI) { DC.RequiredILS.insert(LI); } - /// Return true if and only if @p LI is a required invariant load. - bool isRequiredInvariantLoad(LoadInst *LI) const { - return getRequiredInvariantLoads().count(LI); - } - /// Return the set of boxed (thus overapproximated) loops. const BoxedLoopsSetTy &getBoxedLoops() const { return DC.BoxedLoopsSet; } Index: polly/lib/Analysis/ScopBuilder.cpp =================================================================== --- polly/lib/Analysis/ScopBuilder.cpp +++ polly/lib/Analysis/ScopBuilder.cpp @@ -18,6 +18,7 @@ #include "polly/ScopDetection.h" #include "polly/ScopInfo.h" #include "polly/Support/GICHelper.h" +#include "polly/Support/ISLTools.h" #include "polly/Support/SCEVValidator.h" #include "polly/Support/ScopHelper.h" #include "polly/Support/VirtualInstruction.h" @@ -25,6 +26,7 @@ #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/RegionInfo.h" @@ -62,6 +64,12 @@ bool polly::ModelReadOnlyScalars; +// The maximal number of dimensions we allow during invariant load construction. +// More complex access ranges will result in very high compile time and are also +// unlikely to result in good code. This value is very high and should only +// trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006). +static int const MaxDimensionsInAccessRange = 9; + static cl::opt XModelReadOnlyScalars( "polly-analyze-read-only-scalars", cl::desc("Model read-only scalar values in the scop description"), @@ -1329,7 +1337,7 @@ InvariantAccessesTy InvariantAccesses; for (MemoryAccess *Access : Stmt) - if (isl::set NHCtx = scop->getNonHoistableCtx(Access, Writes)) + if (isl::set NHCtx = getNonHoistableCtx(Access, Writes)) InvariantAccesses.push_back({Access, NHCtx}); // Transfer the memory access from the statement to the SCoP. @@ -1339,6 +1347,115 @@ } } +/// Check if an access range is too complex. +/// +/// An access range is too complex, if it contains either many disjuncts or +/// very complex expressions. As a simple heuristic, we assume if a set to +/// be too complex if the sum of existentially quantified dimensions and +/// set dimensions is larger than a threshold. This reliably detects both +/// sets with many disjuncts as well as sets with many divisions as they +/// arise in h264. +/// +/// @param AccessRange The range to check for complexity. +/// +/// @returns True if the access range is too complex. +static bool isAccessRangeTooComplex(isl::set AccessRange) { + int NumTotalDims = 0; + + for (isl::basic_set BSet : AccessRange.get_basic_set_list()) { + NumTotalDims += BSet.dim(isl::dim::div); + NumTotalDims += BSet.dim(isl::dim::set); + } + + if (NumTotalDims > MaxDimensionsInAccessRange) + return true; + + return false; +} + +bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA, + isl::union_map Writes) { + if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) { + return getNonHoistableCtx(BasePtrMA, Writes).is_null(); + } + + Value *BaseAddr = MA->getOriginalBaseAddr(); + if (auto *BasePtrInst = dyn_cast(BaseAddr)) + if (!isa(BasePtrInst)) + return scop->contains(BasePtrInst); + + return false; +} + +isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access, + isl::union_map Writes) { + // TODO: Loads that are not loop carried, hence are in a statement with + // zero iterators, are by construction invariant, though we + // currently "hoist" them anyway. This is necessary because we allow + // them to be treated as parameters (e.g., in conditions) and our code + // generation would otherwise use the old value. + + auto &Stmt = *Access->getStatement(); + BasicBlock *BB = Stmt.getEntryBlock(); + + if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() || + Access->isMemoryIntrinsic()) + return nullptr; + + // Skip accesses that have an invariant base pointer which is defined but + // not loaded inside the SCoP. This can happened e.g., if a readnone call + // returns a pointer that is used as a base address. However, as we want + // to hoist indirect pointers, we allow the base pointer to be defined in + // the region if it is also a memory access. Each ScopArrayInfo object + // that has a base pointer origin has a base pointer that is loaded and + // that it is invariant, thus it will be hoisted too. However, if there is + // no base pointer origin we check that the base pointer is defined + // outside the region. + auto *LI = cast(Access->getAccessInstruction()); + if (hasNonHoistableBasePtrInScop(Access, Writes)) + return nullptr; + + isl::map AccessRelation = Access->getAccessRelation(); + assert(!AccessRelation.is_empty()); + + if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators())) + return nullptr; + + AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain()); + isl::set SafeToLoad; + + auto &DL = scop->getFunction().getParent()->getDataLayout(); + if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(), + DL)) { + SafeToLoad = isl::set::universe(AccessRelation.get_space().range()); + } else if (BB != LI->getParent()) { + // Skip accesses in non-affine subregions as they might not be executed + // under the same condition as the entry of the non-affine subregion. + return nullptr; + } else { + SafeToLoad = AccessRelation.range(); + } + + if (isAccessRangeTooComplex(AccessRelation.range())) + return nullptr; + + isl::union_map Written = Writes.intersect_range(SafeToLoad); + isl::set WrittenCtx = Written.params(); + bool IsWritten = !WrittenCtx.is_empty(); + + if (!IsWritten) + return WrittenCtx; + + WrittenCtx = WrittenCtx.remove_divs(); + bool TooComplex = WrittenCtx.n_basic_set() >= MaxDisjunctsInDomain; + if (TooComplex || !isRequiredInvariantLoad(LI)) + return nullptr; + + scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(), + AS_RESTRICTION, LI->getParent()); + return WrittenCtx; +} + void ScopBuilder::collectCandidateReductionLoads( MemoryAccess *StoreMA, SmallVectorImpl &Loads) { ScopStmt *Stmt = StoreMA->getStatement(); Index: polly/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/lib/Analysis/ScopInfo.cpp +++ polly/lib/Analysis/ScopInfo.cpp @@ -110,22 +110,13 @@ STATISTIC(NumSingletonWritesInLoops, "Number of singleton writes nested in affine loops after ScopInfo"); -// The maximal number of basic sets we allow during domain construction to -// be created. More complex scops will result in very high compile time and -// are also unlikely to result in good code -static int const MaxDisjunctsInDomain = 20; +int const polly::MaxDisjunctsInDomain = 20; // The number of disjunct in the context after which we stop to add more // disjuncts. This parameter is there to avoid exponential growth in the // number of disjunct when adding non-convex sets to the context. static int const MaxDisjunctsInContext = 4; -// The maximal number of dimensions we allow during invariant load construction. -// More complex access ranges will result in very high compile time and are also -// unlikely to result in good code. This value is very high and should only -// trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006). -static int const MaxDimensionsInAccessRange = 9; - static cl::opt OptComputeOut("polly-analysis-computeout", cl::desc("Bound the scop analysis by a maximal amount of " @@ -2993,20 +2984,6 @@ return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst); } -bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess *MA, - isl::union_map Writes) { - if (auto *BasePtrMA = lookupBasePtrAccess(MA)) { - return getNonHoistableCtx(BasePtrMA, Writes).is_null(); - } - - Value *BaseAddr = MA->getOriginalBaseAddr(); - if (auto *BasePtrInst = dyn_cast(BaseAddr)) - if (!isa(BasePtrInst)) - return contains(BasePtrInst); - - return false; -} - bool Scop::buildAliasChecks(AliasAnalysis &AA) { if (!PollyUseRuntimeAliasChecks) return true; @@ -3684,100 +3661,6 @@ } } -/// Check if an access range is too complex. -/// -/// An access range is too complex, if it contains either many disjuncts or -/// very complex expressions. As a simple heuristic, we assume if a set to -/// be too complex if the sum of existentially quantified dimensions and -/// set dimensions is larger than a threshold. This reliably detects both -/// sets with many disjuncts as well as sets with many divisions as they -/// arise in h264. -/// -/// @param AccessRange The range to check for complexity. -/// -/// @returns True if the access range is too complex. -static bool isAccessRangeTooComplex(isl::set AccessRange) { - unsigned NumTotalDims = 0; - - for (isl::basic_set BSet : AccessRange.get_basic_set_list()) { - NumTotalDims += BSet.dim(isl::dim::div); - NumTotalDims += BSet.dim(isl::dim::set); - } - - if (NumTotalDims > MaxDimensionsInAccessRange) - return true; - - return false; -} - -isl::set Scop::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) { - // TODO: Loads that are not loop carried, hence are in a statement with - // zero iterators, are by construction invariant, though we - // currently "hoist" them anyway. This is necessary because we allow - // them to be treated as parameters (e.g., in conditions) and our code - // generation would otherwise use the old value. - - auto &Stmt = *Access->getStatement(); - BasicBlock *BB = Stmt.getEntryBlock(); - - if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() || - Access->isMemoryIntrinsic()) - return nullptr; - - // Skip accesses that have an invariant base pointer which is defined but - // not loaded inside the SCoP. This can happened e.g., if a readnone call - // returns a pointer that is used as a base address. However, as we want - // to hoist indirect pointers, we allow the base pointer to be defined in - // the region if it is also a memory access. Each ScopArrayInfo object - // that has a base pointer origin has a base pointer that is loaded and - // that it is invariant, thus it will be hoisted too. However, if there is - // no base pointer origin we check that the base pointer is defined - // outside the region. - auto *LI = cast(Access->getAccessInstruction()); - if (hasNonHoistableBasePtrInScop(Access, Writes)) - return nullptr; - - isl::map AccessRelation = Access->getAccessRelation(); - assert(!AccessRelation.is_empty()); - - if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators())) - return nullptr; - - AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain()); - isl::set SafeToLoad; - - auto &DL = getFunction().getParent()->getDataLayout(); - if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(), - DL)) { - SafeToLoad = isl::set::universe(AccessRelation.get_space().range()); - } else if (BB != LI->getParent()) { - // Skip accesses in non-affine subregions as they might not be executed - // under the same condition as the entry of the non-affine subregion. - return nullptr; - } else { - SafeToLoad = AccessRelation.range(); - } - - if (isAccessRangeTooComplex(AccessRelation.range())) - return nullptr; - - isl::union_map Written = Writes.intersect_range(SafeToLoad); - isl::set WrittenCtx = Written.params(); - bool IsWritten = !WrittenCtx.is_empty(); - - if (!IsWritten) - return WrittenCtx; - - WrittenCtx = WrittenCtx.remove_divs(); - bool TooComplex = WrittenCtx.n_basic_set() >= MaxDisjunctsInDomain; - if (TooComplex || !isRequiredInvariantLoad(LI)) - return nullptr; - - addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(), AS_RESTRICTION, - LI->getParent()); - return WrittenCtx; -} - ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, ArrayRef Sizes, MemoryKind Kind,