Index: include/polly/DeLICM.h =================================================================== --- include/polly/DeLICM.h +++ include/polly/DeLICM.h @@ -18,14 +18,201 @@ #ifndef POLLY_DELICM_H #define POLLY_DELICM_H +#include "polly/Support/GICHelper.h" + namespace llvm { class PassRegistry; class Pass; } // anonymous namespace namespace polly { +class Scop; + /// Create a new DeLICM pass instance. llvm::Pass *createDeLICMPass(); + +/// Convert a zone (range between timepoints) to timepoints. +/// +/// A zone represents the time between (integer) timepoints, but not the +/// timepoints themselves. This function can be used to determine whether a +/// timepoint lies within a zone. +/// +/// For instance, the range (1,3), representing the time between 1 and 3, is +/// represented by the zone +/// +/// { [i] : 1 < i <= 3 } +/// +/// The set of timepoints that lie completely within this range is +/// +/// { [i] : 1 < i < 3 } +/// +/// A typical use-case is the range in which a value written by a store is +/// available until it is overwritten by another value. If the write is at +/// timepoint 1 and its value is overwritten by another value at timepoint 3, +/// the value is available between those timepoints: timepoint 2 in this +/// example. +/// +/// +/// When InclStart is true, the range is interpreted left-inclusive, i.e. adds +/// the timepoint 1 to the result: +/// +/// { [i] : 1 <= i < 3 } +/// +/// In the use-case mentioned above that means that the value written at +/// timepoint 1 is already available in timepoint 1 (write takes place before +/// any read of it even if executed at the same timepoint) +/// +/// When InclEnd is true, the range is interpreted right-inclusive, i.e. adds +/// the timepoint 3 to the result: +/// +/// { [i] : 1 < i <= 3 } +/// +/// In the use-case mentioned above that means that although the value is +/// overwritten in timepoint 3, the old value is still available at timepoint 3 +/// (write takes place after any read even if executed at the same timepoint) +/// +/// @param Zone { Zone[] } +/// @param InclStart Include timepoints adjacent to the beginning of a zone. +/// @param InclEnd Include timepoints adjacent to the ending of a zone. +/// +/// @return { Scatter[] } +IslPtr convertZoneToTimepoints(IslPtr Zone, + bool InclStart, bool InclEnd); + +/// Compute the reaching definition statement or the next overwrite for each +/// definition of an array element. +/// +/// The reaching definition of an array element at a specific timepoint is the +/// statement instance that has written the current element's content. +/// Alternatively, this function determines for each timepoint and element which +/// write is going to overwrite an element at a future timepoint. This can be +/// seen as "reaching definition in reverse" where definitions are found in the +/// past. +/// +/// For example: +/// +/// Schedule := { Write[] -> [0]; Overwrite[] -> [10] } +/// Defs := { Write[] -> A[5]; Overwrite[] -> A[5] } +/// +/// If index 5 of array A is written at timepoint 0 and 10, the resulting +/// reaching definitions are: +/// +/// { [A[5] -> [i]] -> Write[] : 0 < i < 10; +/// [A[5] -> [i]] -> Overwrite[] : 10 < i } +/// +/// Between timepoint 0 (Write[]) and timepoint 10 (Overwrite[]), the +/// content of A[5] is written by statement instance Write[] and after +/// timepoint 10 by Overwrite[]. Values not defined in the map have no known +/// definition. This includes the statement instance timepoints themselves, +/// because reads at those timepoints could either read the old or the new +/// value, defined only by the statement itself. But this can be changed by @p +/// InclPrevDef and @p InclNextDef. InclPrevDef=false and InclNextDef=true +/// returns a zone. Unless @p InclPrevDef and @p InclNextDef are both true, +/// there is only one unique definition per element and timepoint. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// Schedule of (at least) all array writes. Instances not in +/// @p Writes are ignored. +/// @param Writes { DomainWrite[] -> Element[] } +/// Elements written to by the statement instances. +/// @param Reverse If true, look for definitions in the future. That is, +/// find the write that is overwrites the current value. +/// @param InclPrevDef Include the definition's timepoint to the set of +/// well-defined elements (any load at that timepoint happen +/// at the writes). In the example, enabling this option adds +/// {[A[5] -> [0]] -> Write[]; [A[5] -> [10]] -> Overwrite[]} +/// to the result. +/// @param InclNextDef Whether to assume that at the timepoint where an element +/// is overwritten, it still contains the old value (any load +/// at that timepoint would happen before the overwrite). In +/// this example, enabling this adds +/// { [A[] -> [10]] -> Write[] } to the result. +/// +/// @return { [Element[] -> Scatter[]] -> DomainWrite[] } +/// The reaching definitions or future overwrite as described above, or +/// nullptr if either @p Schedule or @p Writes is nullptr, or the isl +/// max operations count has exceeded. +IslPtr computeReachingWrite(IslPtr Schedule, + IslPtr Writes, + bool Reverse, bool InclPrevDef, + bool InclNextDef); + +/// Compute the timepoints where the contents of an array element are not used. +/// +/// An element is unused at a timepoint when the element is overwritten in +/// the future, but it is not read in between. Another way to express this: the +/// time from when the element is written, to the most recent read before it, or +/// infinitely into the past if there is no read before. Such unused elements +/// can be overwritten by any value without changing the scop's semantics. An +/// example: +/// +/// Schedule := { Read[] -> [0]; Write[] -> [10]; Def[] -> [20] } +/// Writes := { Write[] -> A[5]; Def[] -> A[6] } +/// Reads := { Read[] -> A[5] } +/// +/// The result is: +/// +/// { A[5] -> [i] : 0 < i < 10; +/// A[6] -> [i] : i < 20 } +/// +/// That is, A[5] is unused between timepoint 0 (the read) and timepoint 10 (the +/// write). A[6] is unused before timepoint 20, but might be used after the +/// scop's execution (A[5] and any other A[i] as well). Use InclLastRead=false +/// and InclWrite=true to interpret the result as zone. +/// +/// @param Schedule { Domain[] -> Scatter[] } +/// The schedule of (at least) all statement instances +/// occurring in @p Writes or @p Reads. All other +/// instances are ignored. +/// @param Writes { DomainWrite[] -> Element[] } +/// Elements written to by the statement instances. +/// @param Reads { DomainRead[] -> Element[] } +/// Elements read from by the statement instances. +/// @param ReadEltInSameInst Whether a load reads the value from a write +/// that is scheduled at the same timepoint (Writes +/// happen before reads). Otherwise, loads use the +/// value of an element that it had before the +/// timepoint (Reads before writes). For example: +/// { Read[] -> [0]; Write[] -> [0] } +/// With ReadEltInSameInst=false it is assumed that the +/// read happens before the write, such that the +/// element is never unused, or just at timepoint 0, +/// depending on InclLastRead/InclWrite. +/// With ReadEltInSameInst=false it assumes that the +/// value just written is used. Anything before +/// timepoint 0 is considered unused. +/// @param InclLastRead Whether a timepoint where an element is last read +/// counts as unused (the read happens at the beginning +/// of its timepoint, and nothing (else) can use it +/// during the timepoint). In the example, this option +/// adds { A[5] -> [0] } to the result. +/// @param InclWrite Whether the timepoint where an element is written +/// itself counts as unused (the write happens at the +/// end of its timepoint; no (other) operations uses +/// the element during the timepoint). In this example, +/// this adds +/// { A[5] -> [10]; A[6] -> [20] } to the result. +/// +/// @return { Element[] -> Scatter[] } +/// The unused timepoints as defined above, or nullptr if either @p +/// Schedule, @p Writes are @p Reads is nullptr, or the ISL max +/// operations count is exceeded. +IslPtr computeArrayUnused(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, + bool ReadEltInSameInst, + bool InclLastRead, bool InclWrite); + +/// Determine whether two lifetimes are conflicting. +/// +/// Used by unittesting. +bool isConflicting(IslPtr ExistingOccupied, + IslPtr ExistingUnused, + IslPtr ExistingWrites, + IslPtr ProposedOccupied, + IslPtr ProposedUnused, + IslPtr ProposedWrites, + llvm::raw_ostream *OS = nullptr, unsigned Indent = 0); } // namespace polly namespace llvm { Index: lib/Transform/DeLICM.cpp =================================================================== --- lib/Transform/DeLICM.cpp +++ lib/Transform/DeLICM.cpp @@ -13,11 +13,99 @@ // Namely, remove register/scalar dependencies by mapping them back to array // elements. // +// The algorithms here work on the scatter space - the image space of the +// schedule returned by Scop::getSchedule(). We call an element in that space a +// "timepoint". Timepoints are lexicographically ordered such that we can +// defined ranges in the scatter space. We use two flavors of such ranges: +// Timepoint sets and zones. A timepoint set is simply a subset of the scatter +// space and is directly stored as isl_set. +// +// Zones are used to describe the space between timepoints as open sets, i.e. +// they do not contain the extrema. Using isl rational sets to express these +// would be overkill. We also cannot store them as the integer timepoints they +// contain; the (nonempty) zone between 1 and 2 would be empty and +// indistinguishable from e.g. the zone between 3 and 4. Also, we cannot store +// the integer set including the extrema; the set ]1,2[ + ]3,4[ could be +// coalesced to ]1,3[, although we defined the range [2,3] to be not in the set. +// Instead, we store the "half-open" integer extrema, including the lower bound, +// but excluding the upper bound. Examples: +// +// * The set { [i] : 1 <= i <= 3 } represents the zone ]0,3[ (which contains the +// integer points 1 and 2, but not 0 or 3) +// +// * { [1] } represents the zone ]0,1[ +// +// * { [i] : i = 1 or i = 3 } represents the zone ]0,1[ + ]2,3[ +// +// Therefore, an integer i in the set represents the zone ]i-1,i[, i.e. strictly +// speaking the integer points never belong to the zone. However, depending an +// the interpretation, one might want to include them: +// +// * The timepoints adjacent to two unit zones +// +// * The timepoints before a unit zone begins +// +// * The timepoints that directly follow a unit zone +// +// It sometimes helps to think about a value of i stored in an isl_set to +// represent the timepoint i-0.5 between two integer-valued timepoints. +// +// +// The code makes use of maps and sets in many different spaces. To not loose +// track in which space a set or map is expected to be in, variables holding an +// isl reference are usually annotated in the comments. They roughly follow isl +// syntax for spaces, but only the tuples, not the dimensions. The tuples have a +// meaning as follows: +// +// * Space[] - An unspecified tuple. Used for function parameters such that the +// function caller can use it for anything they like. +// +// * Domain[] - A statement instance as returned by ScopStmt::getDomain() +// isl_id_get_name: Stmt_ +// isl_id_get_user: Pointer to ScopStmt +// +// * Element[] - An array element as in the range part of +// MemoryAccess::getAccessRelation() +// isl_id_get_name: MemRef_ +// isl_id_get_user: Pointer to ScopArrayInfo +// +// * Scatter[] - Scatter space or space of timepoints +// Has no tuple id +// +// * Zone[] - Range between timepoints as described above +// Has no tuple id +// +// An annotation "{ Domain[] -> Scatter[] }" therefore means: A map from a +// statement instance to a timepoint, aka a schedule. There is only one scatter +// space, but most of the time multiple statements are processed in one set. +// This is why most of the time isl_union_map has to be used. +// +// The basic algorithm works as follows: +// At first we verify that the SCoP is compatible with this technique. For +// instance, two writes cannot write to the same location at the same statement +// instance because we cannot determine within the polyhedral model which one +// comes first. Once this was verified, we compute zones at which an array +// element is unused. This computation can fail if it takes too long. Then the +// main algorithm is executed. Because every store potentially trails an unused +// zone, we start at stores. We search for a scalar (MemoryKind::Value or +// MemoryKind::PHI) that we can map to the array element overwritten by the +// store, preferably one that is used by the store or at least the ScopStmt. +// When it does not conflict with the lifetime of the values in the array +// element, the map is applied and the unused zone updated as it is now used. We +// continue to try to map scalars to the array element until there are no more +// candidates to map. The algorithm is greedy in the sense that the first scalar +// not conflicting will be mapped. Other scalars processed later that could have +// fit the same unused zone will be rejected. As such the result depends on the +// processing order. +// //===----------------------------------------------------------------------===// #include "polly/DeLICM.h" +#include "polly/Options.h" #include "polly/ScopInfo.h" #include "polly/ScopPass.h" +#include "polly/Support/ISLTools.h" +#include "llvm/ADT/Statistic.h" #define DEBUG_TYPE "polly-delicm" using namespace polly; @@ -25,11 +113,1444 @@ namespace { +cl::opt + DelicmMaxOps("polly-delicm-max-ops", + cl::desc("Maximum number of isl operations to invest for " + "lifetime analysis; 0=no limit"), + cl::init(1000000), cl::cat(PollyCategory)); + +STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs"); +STATISTIC(DeLICMOutOfQuota, + "Analyses aborted because max_operations was reached"); +STATISTIC(DeLICMIncompatible, "Number of SCoPs incompatible for analysis"); +STATISTIC(MappedValueScalars, "Number of mapped Value scalars"); +STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars"); +STATISTIC(TargetsMapped, "Number of stores used for at least one mapping"); +STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized"); + +/// Class for keeping track of scalar def-use chains in the polyhedral +/// representation. +/// +/// MemoryKind::Value: +/// There is one definition per llvm::Value or zero (read-only values defined +/// before the SCoP) and an arbitrary number of reads. +/// +/// MemoryKind::PHI, MemoryKind::ExitPHI: +/// There is at least one write (the incoming blocks/stmts) and one +/// (MemoryKind::PHI) or zero (MemoryKind::ExitPHI) reads per llvm::PHINode. +class ScalarDefUseChains { +private: + /// The definitions (i.e. write MemoryAccess) of a MemoryKind::Value scalar. + DenseMap ValueDefAccs; + + /// List of all uses (i.e. read MemoryAccesses) for a MemoryKind::Value + /// scalar. + DenseMap> ValueUseAccs; + + /// The receiving part (i.e. read MemoryAccess) of a MemoryKind::PHI scalar. + DenseMap PHIReadAccs; + + /// List of all incoming values (write MemoryAccess) of a MemoryKind::PHI or + /// MemoryKind::ExitPHI scalar. + DenseMap> + PHIIncomingAccs; + +public: + /// Find the MemoryAccesses that access the ScopArrayInfo-represented memory. + /// + /// @param S The SCoP to analyze. + void compute(Scop *S) { + // Purge any previous result. + reset(); + + for (auto &Stmt : *S) { + for (auto *MA : Stmt) { + if (MA->isOriginalValueKind() && MA->isWrite()) { + auto *SAI = MA->getScopArrayInfo(); + assert(!ValueDefAccs.count(SAI) && + "There can be at most one " + "definition per MemoryKind::Value scalar"); + ValueDefAccs[SAI] = MA; + } + + if (MA->isOriginalValueKind() && MA->isRead()) + ValueUseAccs[MA->getScopArrayInfo()].push_back(MA); + + if (MA->isOriginalAnyPHIKind() && MA->isRead()) { + auto *SAI = MA->getScopArrayInfo(); + assert(!PHIReadAccs.count(SAI) && + "There must be exactly one read " + "per PHI (that's where the PHINode is)"); + PHIReadAccs[SAI] = MA; + } + + if (MA->isOriginalAnyPHIKind() && MA->isWrite()) + PHIIncomingAccs[MA->getScopArrayInfo()].push_back(MA); + } + } + } + + /// Free all memory used by the analysis. + void reset() { + ValueDefAccs.clear(); + ValueUseAccs.clear(); + PHIReadAccs.clear(); + PHIIncomingAccs.clear(); + } + + MemoryAccess *getValueDef(const ScopArrayInfo *SAI) const { + return ValueDefAccs.lookup(SAI); + } + + ArrayRef getValueUses(const ScopArrayInfo *SAI) const { + auto It = ValueUseAccs.find(SAI); + if (It == ValueUseAccs.end()) + return {}; + return It->second; + } + + MemoryAccess *getPHIRead(const ScopArrayInfo *SAI) const { + return PHIReadAccs.lookup(SAI); + } + + ArrayRef getPHIIncomings(const ScopArrayInfo *SAI) const { + auto It = PHIIncomingAccs.find(SAI); + if (It == PHIIncomingAccs.end()) + return {}; + return It->second; + } +}; + +IslPtr computeReachingDefinition(IslPtr Schedule, + IslPtr Writes, + bool InclDef, bool InclRedef) { + return computeReachingWrite(Schedule, Writes, false, InclDef, InclRedef); +} + +IslPtr computeReachingOverwrite(IslPtr Schedule, + IslPtr Writes, + bool InclPrevWrite, + bool InclOverwrite) { + return computeReachingWrite(Schedule, Writes, true, InclPrevWrite, + InclOverwrite); +} + +/// Compute the next overwrite for a scalar. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// Schedule of (at least) all writes. Instances not in @p +/// Writes are ignored. +/// @param Writes { DomainWrite[] } +/// The element instances that write to the scalar. +/// @param InclPrevWrite Whether to extend the timepoints to include +/// the timepoint where the previous write happens. +/// @param InclOverwrite Whether the reaching overwrite includes the timepoint +/// of the overwrite itself. +/// +/// @return { Scatter[] -> DomainDef[] } +IslPtr +computeScalarReachingOverwrite(IslPtr Schedule, + IslPtr Writes, bool InclPrevWrite, + bool InclOverwrite) { + + // { DomainWrite[] } + auto WritesMap = give(isl_union_map_from_domain(Writes.take())); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto Result = computeReachingOverwrite( + std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite); + + return give(isl_union_map_domain_factor_range(Result.take())); +} + +/// Overload of computeScalarReachingOverwrite, with only one writing statement. +/// Consequently, the result consists of only one map space. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// @param Writes { DomainWrite[] } +/// @param InclPrevWrite Include the previous write to result. +/// @param InclOverwrite Include the overwrite to the result. +/// +/// @return { Scatter[] -> DomainWrite[] } +IslPtr computeScalarReachingOverwrite(IslPtr Schedule, + IslPtr Writes, + bool InclPrevWrite, + bool InclOverwrite) { + auto ScatterSpace = getScatterSpace(Schedule); + auto DomSpace = give(isl_set_get_space(Writes.keep())); + + auto ReachOverwrite = computeScalarReachingOverwrite( + Schedule, give(isl_union_set_from_set(Writes.take())), InclPrevWrite, + InclOverwrite); + + auto ResultSpace = give(isl_space_map_from_domain_and_range( + ScatterSpace.take(), DomSpace.take())); + return singleton(std::move(ReachOverwrite), ResultSpace); +} + +/// Compute the reaching definition of a scalar. +/// +/// Compared to computeReachingDefinition, there is just one element which is +/// accessed and therefore only a set if instances that accesses that element is +/// required. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// @param Writes { DomainWrite[] } +/// @param InclDef Include the timepoint of the definition to the result. +/// @param InclRedef Include the timepoint of the overwrite into the result. +/// +/// @return { Scatter[] -> DomainWrite[] } +IslPtr +computeScalarReachingDefinition(IslPtr Schedule, + IslPtr Writes, bool InclDef, + bool InclRedef) { + + // { DomainWrite[] -> Element[] } + auto Defs = give(isl_union_map_from_domain(Writes.take())); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto ReachDefs = + computeReachingDefinition(Schedule, Defs, InclDef, InclRedef); + + // { Scatter[] -> DomainWrite[] } + return give(isl_union_set_unwrap( + isl_union_map_range(isl_union_map_curry(ReachDefs.take())))); +} + +/// Compute the reaching definition of a scalar. +/// +/// This overload accepts only a single writing statement as an isl_map, +/// consequently the result also is only a single isl_map. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// @param Writes { DomainWrite[] } +/// @param InclDef Include the timepoint of the definition to the result. +/// @param InclRedef Include the timepoint of the overwrite into the result. +/// +/// @return { Scatter[] -> DomainWrite[] } +IslPtr computeScalarReachingDefinition( // { Domain[] -> Zone[] } + IslPtr Schedule, IslPtr Writes, bool InclDef, + bool InclRedef) { + auto DomainSpace = give(isl_set_get_space(Writes.keep())); + auto ScatterSpace = getScatterSpace(Schedule); + + // { Scatter[] -> DomainWrite[] } + auto UMap = computeScalarReachingDefinition( + Schedule, give(isl_union_set_from_set(Writes.take())), InclDef, + InclRedef); + + auto ResultSpace = give(isl_space_map_from_domain_and_range( + ScatterSpace.take(), DomainSpace.take())); + return singleton(UMap, ResultSpace); +} + +/// If InputVal is not defined in the stmt itself, return the MemoryAccess that +/// reads the scalar. Return nullptr otherwise (if the value is defined in the +/// scop, or is synthesizable). +MemoryAccess *getInputAccessOf(Value *InputVal, ScopStmt *Stmt) { + for (auto *MA : *Stmt) { + if (!MA->isRead()) + continue; + if (!MA->isLatestScalarKind()) + continue; + + assert(MA->getAccessValue() == MA->getBaseAddr()); + if (MA->getAccessValue() == InputVal) + return MA; + } + return nullptr; +} + +/// Represent the knowledge of the contents of any array elements in any zone or +/// the knowledge we would add when mapping a scalar to an array element. +/// +/// Every array element at every zone unit has one of two states: +/// +/// - Undef: Not occupied by any value so a transformation can change it to +/// other values. +/// +/// - Occupied: The element contains a value that is still needed. +/// +/// The union of Undef and Unknown zones forms the universe, the set of all +/// elements at every timepoint. The universe can easily be derived from the +/// array elements that are accessed someway. Arrays that are never accessed +/// also never play a role in any computation and can hence be ignored. With a +/// given universe, only one of the sets needs to stored implicitly. Computing +/// the complement is also an expensive operation, hence this class has been +/// designed that only one of sets is needed while the other is assumed to be +/// implicit. It can still be given, but is mostly ignored. +/// +/// There are two use cases for the Knowledge class: +/// +/// 1) To represent the knowledge of the current state of ScopInfo. The undef +/// state means that an element is currently unused: there is no read of it +/// before the next overwrite. Also called 'Existing'. +/// +/// 2) To represent the requirements for mapping a scalar to array elements. The +/// undef state means that there is no change/requirement. Also called +/// 'Proposed'. +/// +/// In addition to these states at unit zones, Knowledge needs to know when +/// values are written. This is because written values may have no lifetime (one +/// reason is that the value is never read). Such writes would therefore never +/// conflict, but overwrite values that might still be required. Another source +/// of problems are multiple writes to the same element at the same timepoint, +/// because their order is undefined. +class Knowledge { +private: + /// { [Element[] -> Zone[]] } + /// Set of array elements and when they are alive. + /// Can contain a nullptr; in this case the set is implicitly defined as the + /// complement of #Unused. + IslPtr Occupied; + + /// { [Element[] -> Zone[]] } + /// Set of array elements when they are not alive, i.e. their memory can be + /// used for other purposed. Can contain a nullptr; in this case the set is + /// implicitly defined as the complement of #Occupied. + IslPtr Unused; + + /// { [Element[] -> Scatter[]] } + /// The write actions currently in the scop or that would be added when + /// mapping a scalar. + IslPtr Written; + + /// Check whether this Knowledge object is well-formed. + void checkConsistency() const { + // Default-initialized object + if (!Occupied && !Unused && !Written) + return; + + assert(Occupied || Unused); + assert(!Occupied || !Unused || + (isl_union_set_is_disjoint(Occupied.keep(), Unused.keep()) == + isl_bool_true)); + assert(Written); + } + +public: + /// Initialize a nullptr-Knowledge. This is only provided for convenience; do + /// not use such an object. + Knowledge() {} + + /// Create a new object with the given members. + Knowledge(IslPtr Occupied, IslPtr Unused, + IslPtr Written) + : Occupied(std::move(Occupied)), Unused(std::move(Unused)), + Written(std::move(Written)) { + checkConsistency(); + } + + /// Alternative constructor taking isl_sets instead isl_union_sets. + Knowledge(IslPtr Occupied, IslPtr Unused, + IslPtr Written) + : Knowledge(give(isl_union_set_from_set(Occupied.take())), + give(isl_union_set_from_set(Unused.take())), + give(isl_union_set_from_set(Written.take()))) {} + + /// Return whether this object was not default-constructed. + bool isUsable() const { return (Occupied || Unused) && Written; } + + /// Print the content of this object to @p OS. + void print(llvm::raw_ostream &OS, unsigned Indent = 0) const { + if (isUsable()) { + if (Occupied) + OS.indent(Indent) << "Occupied: " << Occupied << "\n"; + else + OS.indent(Indent) << "Occupied: \n"; + if (Unused) + OS.indent(Indent) << "Unused: " << Unused << "\n"; + else + OS.indent(Indent) << "Unused: \n"; + OS.indent(Indent) << "Written : " << Written << '\n'; + } else { + OS.indent(Indent) << "Invalid knowledge\n"; + } + } + + /// Combine two knowledges, this and @p That. + void learnFrom(Knowledge That) { + assert(!isConflicting(*this, That)); + assert(Unused && !Occupied); + assert(!That.Unused && That.Occupied); + + if (Unused && That.Occupied) + Unused = + give(isl_union_set_subtract(Unused.take(), That.Occupied.copy())); + + Written = give(isl_union_set_union(Written.take(), That.Written.take())); + + checkConsistency(); + } + + /// Determine whether two Knowledges conflict with each other. + /// + /// In theory @p Existing and @p Proposed are symmetric, but the + /// implementation is constrained by the implicit interpretation. That is, @p + /// Existing must have #Unused defined (use case 1) and @p Proposed must have + /// #Occupied defined (use case 1). + /// + /// A conflict is defined as non-preserved semantics when they are merged. For + /// instance, when for the same array and zone they assume different + /// llvm::Values. + /// + /// @param Existing One of the knowledges with #Unused defined. + /// @param Proposed One of the knowledges with #Occupied defined. + /// @param OS Dump the conflict reason to this output stream; use + /// nullptr to not output anything. + /// @param Indent Indention for the conflict reason. + /// + /// @return True, iff the two knowledges are conflicting. + static bool isConflicting(const Knowledge &Existing, + const Knowledge &Proposed, + llvm::raw_ostream *OS = nullptr, + unsigned Indent = 0) { + assert(Existing.Unused); + assert(Proposed.Occupied); + + // Are the new lifetimes required for Proposed unused in Existing? + if (isl_union_set_is_subset(Proposed.Occupied.keep(), + Existing.Unused.keep()) != isl_bool_true) { + if (OS) { + auto ConflictingLifetimes = give(isl_union_set_subtract( + Proposed.Occupied.copy(), Existing.Unused.copy())); + OS->indent(Indent) << "Proposed lifetimes are not unused in existing\n"; + OS->indent(Indent) << "Conflicting lifetimes: " << ConflictingLifetimes + << "\n"; + } + return true; + } + + // Do the writes in Existing only overwrite unused values in Proposed? + auto ProposedFixedDefs = + convertZoneToTimepoints(Proposed.Occupied, true, false); + if (isl_union_set_is_disjoint(Existing.Written.keep(), + ProposedFixedDefs.keep()) != isl_bool_true) { + if (OS) { + auto ConflictingWrites = give(isl_union_set_intersect( + Existing.Written.copy(), ProposedFixedDefs.copy())); + OS->indent(Indent) << "Proposed writes into range used by existing\n"; + OS->indent(Indent) << "Conflicting writes: " << ConflictingWrites + << "\n"; + } + return true; + } + + // Do the new writes in Proposed only overwrite unused values in Existing? + auto ExistingAvailableDefs = + convertZoneToTimepoints(Existing.Unused, true, false); + if (isl_union_set_is_subset(Proposed.Written.keep(), + ExistingAvailableDefs.keep()) != + isl_bool_true) { + if (OS) { + auto ConflictingWrites = give(isl_union_set_subtract( + Proposed.Written.copy(), ExistingAvailableDefs.copy())); + OS->indent(Indent) + << "Proposed a lifetime where there is an Existing write into it\n"; + OS->indent(Indent) << "Conflicting writes: " << ConflictingWrites + << "\n"; + } + return true; + } + + // Does Proposed write at the same time as Existing already does (order of + // writes is undefined)? + if (isl_union_set_is_disjoint(Existing.Written.keep(), + Proposed.Written.keep()) != isl_bool_true) { + if (OS) { + auto ConflictingWrites = give(isl_union_set_intersect( + Existing.Written.copy(), Proposed.Written.copy())); + OS->indent(Indent) << "Proposed writes at the same time as an already " + "Existing write\n"; + OS->indent(Indent) << "Conflicting writes: " << ConflictingWrites + << "\n"; + } + return true; + } + + return false; + } +}; + +/// Base class for algorithms based on zones, like DeLICM. +class ZoneAlgorithm { +protected: + /// Hold a reference to the isl_ctx to avoid it being freed before we released + /// all of the isl objects. + /// + /// This must be declared before any other member that holds an isl object. + /// This guarantees that the shared_ptr and its isl_ctx is destructed last, + /// after all other members free'd the isl objects they were holding. + std::shared_ptr IslCtx; + + /// Cached reaching definitions for each ScopStmt. + /// + /// Use getScalarReachingDefinition() to get its contents. + DenseMap> ScalarReachDefZone; + + /// The analyzed Scop. + Scop *S; + + /// Parameter space that does not need realignment. + IslPtr ParamSpace; + + /// Space the schedule maps to. + IslPtr ScatterSpace; + + /// Cached version of the schedule and domains. + IslPtr Schedule; + + /// Set of all referenced elements. + /// { Element[] -> Element[] } + IslPtr AllElements; + + /// Combined access relations of all MemoryKind::Array READ accesses. + /// { DomainRead[] -> Element[] } + IslPtr AllReads; + + /// Combined access relations of all MemoryKind::Array, MAY_WRITE accesses. + /// { DomainMayWrite[] -> Element[] } + IslPtr AllMayWrites; + + /// Combined access relations of all MemoryKind::Array, MUST_WRITE accesses. + /// { DomainMustWrite[] -> Element[] } + IslPtr AllMustWrites; + + /// Prepare the object before computing the zones of @p S. + ZoneAlgorithm(Scop *S) + : IslCtx(S->getSharedIslCtx()), S(S), Schedule(give(S->getSchedule())) { + + auto Domains = give(S->getDomains()); + + Schedule = + give(isl_union_map_intersect_domain(Schedule.take(), Domains.take())); + ParamSpace = give(isl_union_map_get_space(Schedule.keep())); + ScatterSpace = getScatterSpace(Schedule); + } + +private: + /// Check whether @p Stmt can be accurately analyzed by zones. + /// + /// What violates our assumptions: + /// - A load after a write of the same location; we assume that all reads + /// occur before the writes. + /// - Two writes to the same location; we cannot model the order in which + /// these occur. + /// + /// Scalar reads implicitly always occur before other accesses therefore never + /// violate the first condition. There is also at most one write to a scalar, + /// satisfying the second condition. + bool isCompatibleStmt(ScopStmt *Stmt) { + auto Stores = makeEmptyUnionMap(); + auto Loads = makeEmptyUnionMap(); + + // This assumes that the MemoryKind::Array MemoryAccesses are iterated in + // order. + for (auto *MA : *Stmt) { + if (!MA->isLatestArrayKind()) + continue; + + auto AccRel = + give(isl_union_map_from_map(getAccessRelationFor(MA).take())); + + if (MA->isRead()) { + // Reject store after load to same location. + if (!isl_union_map_is_disjoint(Stores.keep(), AccRel.keep())) + return false; + + Loads = give(isl_union_map_union(Loads.take(), AccRel.take())); + + continue; + } + + if (!isa(MA->getAccessInstruction())) { + DEBUG(dbgs() << "WRITE that is not a StoreInst not supported\n"); + return false; + } + + // In region statements the order is less clear, eg. the load and store + // might be in a boxed loop. + if (Stmt->isRegionStmt() && + !isl_union_map_is_disjoint(Loads.keep(), AccRel.keep())) + return false; + + // Do not allow more than one store to the same location. + if (!isl_union_map_is_disjoint(Stores.keep(), AccRel.keep())) + return false; + + Stores = give(isl_union_map_union(Stores.take(), AccRel.take())); + } + + return true; + } + + void addArrayReadAccess(MemoryAccess *MA) { + assert(MA->isLatestArrayKind()); + assert(MA->isRead()); + + // { DomainRead[] -> Element[] } + auto AccRel = getAccessRelationFor(MA); + AllReads = give(isl_union_map_add_map(AllReads.take(), AccRel.copy())); + } + + void addArrayWriteAccess(MemoryAccess *MA) { + assert(MA->isLatestArrayKind()); + assert(MA->isWrite()); + + // { Domain[] -> Element[] } + auto AccRel = getAccessRelationFor(MA); + + if (MA->isMustWrite()) + AllMustWrites = + give(isl_union_map_add_map(AllMustWrites.take(), AccRel.copy())); + + if (MA->isMayWrite()) + AllMayWrites = + give(isl_union_map_add_map(AllMayWrites.take(), AccRel.copy())); + } + +protected: + IslPtr makeEmptyUnionSet() { + return give(isl_union_set_empty(ParamSpace.copy())); + } + + IslPtr makeEmptyUnionMap() { + return give(isl_union_map_empty(ParamSpace.copy())); + } + + /// Check whether @p S can be accurately analyzed by zones. + bool isCompatibleScop() { + for (auto &Stmt : *S) { + if (!isCompatibleStmt(&Stmt)) + return false; + } + return true; + } + + /// Get the schedule for @p Stmt. + /// + /// The domain of the result is as narrow as possible. + IslPtr getScatterFor(ScopStmt *Stmt) const { + auto ResultSpace = give(isl_space_map_from_domain_and_range( + Stmt->getDomainSpace(), ScatterSpace.copy())); + return give(isl_union_map_extract_map(Schedule.keep(), ResultSpace.take())); + } + + /// Get the schedule of @p MA's parent statement. + IslPtr getScatterFor(MemoryAccess *MA) const { + return getScatterFor(MA->getStatement()); + } + + /// Get the schedule for the statement instances of @p Domain. + IslPtr getScatterFor(IslPtr Domain) const { + return give(isl_union_map_intersect_domain(Schedule.copy(), Domain.take())); + } + + /// Get the schedule for the statement instances of @p Domain. + IslPtr getScatterFor(IslPtr Domain) const { + auto ResultSpace = give(isl_space_map_from_domain_and_range( + isl_set_get_space(Domain.keep()), ScatterSpace.copy())); + auto UDomain = give(isl_union_set_from_set(Domain.copy())); + auto UResult = getScatterFor(std::move(UDomain)); + auto Result = singleton(std::move(UResult), std::move(ResultSpace)); + assert(isl_set_is_equal(give(isl_map_domain(Result.copy())).keep(), + Domain.keep()) == isl_bool_true); + return Result; + } + + /// Get the domain of @p Stmt. + IslPtr getDomainFor(ScopStmt *Stmt) const { + return give(Stmt->getDomain()); + } + + /// Get the domain @p MA's parent statement. + IslPtr getDomainFor(MemoryAccess *MA) const { + return getDomainFor(MA->getStatement()); + } + + /// Get the access relation of @p MA. + /// + /// The domain of the result is as narrow as possible. + IslPtr getAccessRelationFor(MemoryAccess *MA) const { + auto Domain = getDomainFor(MA); + auto AccRel = give(MA->getLatestAccessRelation()); + return give(isl_map_intersect_domain(AccRel.take(), Domain.take())); + } + + /// Get the reaching definition of a scalar defined in @p Stmt. + /// + /// Note that this does not depend on the llvm::Instruction, only on the + /// statement it is defined in. Therefore the same computation can be reused. + /// + /// @param Stmt The statement in which a scalar is defined. + /// + /// @return { Scatter[] -> DomainDef[] } + IslPtr getScalarReachingDefinition(ScopStmt *Stmt) { + auto &Result = ScalarReachDefZone[Stmt]; + if (Result) + return Result; + + auto Domain = getDomainFor(Stmt); + Result = computeScalarReachingDefinition(Schedule, Domain, false, true); + simplify(Result); + + assert(Result); + return Result; + } + + /// Compute the different zones. + void computeCommon() { + AllReads = makeEmptyUnionMap(); + AllMayWrites = makeEmptyUnionMap(); + AllMustWrites = makeEmptyUnionMap(); + + for (auto &Stmt : *S) { + for (auto *MA : Stmt) { + if (!MA->isLatestArrayKind()) + continue; + + if (MA->isRead()) + addArrayReadAccess(MA); + + if (MA->isWrite()) + addArrayWriteAccess(MA); + } + } + + // { DomainWrite[] -> Element[] } + auto AllWrites = + give(isl_union_map_union(AllMustWrites.copy(), AllMayWrites.copy())); + + // { Element[] } + AllElements = makeEmptyUnionSet(); + foreachElt(AllWrites, [this](IslPtr Write) { + auto Space = give(isl_map_get_space(Write.keep())); + auto EltSpace = give(isl_space_range(Space.take())); + auto EltUniv = give(isl_set_universe(EltSpace.take())); + AllElements = + give(isl_union_set_add_set(AllElements.take(), EltUniv.take())); + }); + } + + /// Print the current state of all MemoryAccesses to @p. + void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const { + OS.indent(Indent) << "After accesses {\n"; + for (auto &Stmt : *S) { + OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; + for (auto *MA : Stmt) + MA->print(OS); + } + OS.indent(Indent) << "}\n"; + } + +public: + /// Return the SCoP this object is analyzing. + Scop *getScop() const { return S; } +}; + +/// Implementation of the DeLICM/DePRE transformation. +class DeLICMImpl : public ZoneAlgorithm { +private: + /// Knowledge before any transformation took place. + Knowledge OriginalZone; + + /// Current knowledge of the SCoP including all already applied + /// transformations. + Knowledge Zone; + + ScalarDefUseChains DefUse; + + /// Determine whether two knowledges are conflicting with each other. + /// + /// @see Knowledge::isConflicting + bool isConflicting(const Knowledge &Proposed) { + raw_ostream *OS = nullptr; + DEBUG(OS = &llvm::dbgs()); + return Knowledge::isConflicting(Zone, Proposed, OS, 4); + } + + /// Determine whether @p SAI is a scalar that can be mapped to an array + /// element. + bool isMappable(const ScopArrayInfo *SAI) { + assert(SAI); + + if (SAI->isValueKind()) { + auto *MA = DefUse.getValueDef(SAI); + if (!MA) { + DEBUG(dbgs() + << " Reject because value is read-only within the scop\n"); + return false; + } + + // Mapping if value is used after scop is not supported. The code + // generator would need to reload the scalar after the scop, but it + // does not have the information to where it is mapped to. Only the + // MemoryAccesses have that information, not the ScopArrayInfo. + auto Inst = MA->getAccessInstruction(); + for (auto User : Inst->users()) { + if (!isa(User)) + return false; + auto UserInst = cast(User); + + if (!S->contains(UserInst)) { + DEBUG(dbgs() << " Reject because value is escaping\n"); + return false; + } + } + + return true; + } + + if (SAI->isPHIKind()) { + auto *MA = DefUse.getPHIRead(SAI); + assert(MA); + + // Mapping of an incoming block from before the SCoP is not supported by + // the code generator. + auto PHI = cast(MA->getAccessInstruction()); + for (auto Incoming : PHI->blocks()) { + if (!S->contains(Incoming)) { + DEBUG(dbgs() << " Reject because at least one incoming block is " + "not in the scop region\n"); + return false; + } + } + + return true; + } + + DEBUG(dbgs() << " Reject ExitPHI or other non-value\n"); + return false; + } + + /// Compute the uses of a MemoryKind::Value and its lifetime (from its + /// definition to the last use). + /// + /// @param SAI The ScopArrayInfo representing the value's storage. + /// + /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] } + /// First element is the set of uses for each definition. + /// The second is the lifetime of each definition. + std::tuple, IslPtr> + computeValueUses(const ScopArrayInfo *SAI) { + assert(SAI->isValueKind()); + + // { DomainRead[] } + auto Reads = makeEmptyUnionSet(); + + // Find all uses. + for (auto *MA : DefUse.getValueUses(SAI)) + Reads = + give(isl_union_set_add_set(Reads.take(), getDomainFor(MA).take())); + + // { DomainRead[] -> Scatter[] } + auto ReadSchedule = getScatterFor(Reads); + + auto *DefMA = DefUse.getValueDef(SAI); + assert(DefMA); + + // { DomainDef[] } + auto Writes = getDomainFor(DefMA); + + // { DomainDef[] -> Scatter[] } + auto WriteScatter = getScatterFor(Writes); + + // { Scatter[] -> DomainDef[] } + auto ReachDef = getScalarReachingDefinition(DefMA->getStatement()); + + // { [DomainDef[] -> Scatter[]] -> DomainUse[] } + auto Uses = give( + isl_union_map_apply_range(isl_union_map_from_map(isl_map_range_map( + isl_map_reverse(ReachDef.take()))), + isl_union_map_reverse(ReadSchedule.take()))); + + // { DomainDef[] -> Scatter[] } + auto UseScatter = + singleton(give(isl_union_set_unwrap(isl_union_map_domain(Uses.copy()))), + give(isl_space_map_from_domain_and_range( + isl_set_get_space(Writes.keep()), ScatterSpace.copy()))); + + // { DomainDef[] -> Zone[] } + auto Lifetime = betweenScatter(WriteScatter, UseScatter, false, true); + + // { DomainDef[] -> DomainRead[] } + auto DefUses = give(isl_union_map_domain_factor_domain(Uses.take())); + + return std::make_pair(DefUses, Lifetime); + } + + /// For each 'execution' of a PHINode, get the incoming block that was + /// executed before. + /// + /// For each PHI instance we can directly determine which was the incoming + /// block, and hence derive which value the PHI has. + /// + /// @param SAI The ScopArrayInfo representing the PHI's storage. + /// + /// @return { DomainPHIRead[] -> DomainPHIWrite[] } + IslPtr computePerPHI(const ScopArrayInfo *SAI) { + assert(SAI->isPHIKind()); + + // { DomainPHIWrite[] -> Scatter[] } + auto PHIWriteScatter = makeEmptyUnionMap(); + + // Collect all incoming block timepoint. + for (auto *MA : DefUse.getPHIIncomings(SAI)) { + auto Scatter = getScatterFor(MA); + PHIWriteScatter = + give(isl_union_map_add_map(PHIWriteScatter.take(), Scatter.take())); + } + + // { DomainPHIRead[] -> Scatter[] } + auto PHIReadScatter = getScatterFor(DefUse.getPHIRead(SAI)); + + // { DomainPHIRead[] -> Scatter[] } + auto BeforeRead = beforeScatter(PHIReadScatter, true); + + // { Scatter[] } + auto WriteTimes = singleton( + give(isl_union_map_range(PHIWriteScatter.copy())), ScatterSpace); + + // { DomainPHIRead[] -> Scatter[] } + auto PHIWriteTimes = + give(isl_map_intersect_range(BeforeRead.take(), WriteTimes.take())); + auto LastPerPHIWrites = give(isl_map_lexmax(PHIWriteTimes.take())); + + // { DomainPHIRead[] -> DomainPHIWrite[] } + auto Result = give(isl_union_map_apply_range( + isl_union_map_from_map(LastPerPHIWrites.take()), + isl_union_map_reverse(PHIWriteScatter.take()))); + assert(isl_union_map_is_single_valued(Result.keep()) == isl_bool_true); + assert(isl_union_map_is_injective(Result.keep()) == isl_bool_true); + return Result; + } + + /// Try to map a MemoryKind::Value to a given array element. + /// + /// @param SAI Representation of the scalar's memory to map. + /// @param TargetElt { Scatter[] -> Element[] } + /// Suggestion where to map a scalar to when at a timepoint. + /// + /// @return true if the scalar was successfully mapped. + bool tryMapValue(const ScopArrayInfo *SAI, IslPtr TargetElt) { + assert(SAI->isValueKind()); + + auto *DefMA = DefUse.getValueDef(SAI); + assert(DefMA->isValueKind()); + assert(DefMA->isMustWrite()); + + // Stop if the scalar has already been mapped. + if (!DefMA->getLatestScopArrayInfo()->isValueKind()) + return false; + + // { DomainDef[] -> Scatter[] } + auto DefSched = getScatterFor(DefMA); + + // Where each write is mapped to, according to the suggestion. + // { DomainDef[] -> Element[] } + auto DefTarget = give(isl_map_apply_domain( + TargetElt.copy(), isl_map_reverse(DefSched.copy()))); + simplify(DefTarget); + DEBUG(dbgs() << " Def Mapping: " << DefTarget << '\n'); + + auto OrigDomain = getDomainFor(DefMA); + auto MappedDomain = give(isl_map_domain(DefTarget.copy())); + if (!isl_set_is_subset(OrigDomain.keep(), MappedDomain.keep())) { + DEBUG(dbgs() + << " Reject because mapping does not encompass all instances\n"); + return false; + } + + // { DomainDef[] -> Zone[] } + IslPtr Lifetime; + + // { DomainDef[] -> DomainUse[] } + IslPtr DefUses; + + std::tie(DefUses, Lifetime) = computeValueUses(SAI); + DEBUG(dbgs() << " Lifetime: " << Lifetime << '\n'); + + /// { [Element[] -> Zone[]] } + auto EltZone = give( + isl_map_wrap(isl_map_apply_domain(Lifetime.copy(), DefTarget.copy()))); + simplify(EltZone); + + // { [Element[] -> Scatter[]] } + auto DefEltSched = give(isl_map_wrap(isl_map_reverse( + isl_map_apply_domain(DefTarget.copy(), DefSched.copy())))); + simplify(DefEltSched); + + Knowledge Proposed(EltZone, nullptr, DefEltSched); + if (isConflicting(Proposed)) + return false; + + // { DomainUse[] -> Element[] } + auto UseTarget = give( + isl_union_map_apply_range(isl_union_map_reverse(DefUses.take()), + isl_union_map_from_map(DefTarget.copy()))); + + mapValue(SAI, std::move(DefTarget), std::move(UseTarget), + std::move(Lifetime), std::move(Proposed)); + return true; + } + + /// After a scalar has been mapped, update the global knowledge. + void applyLifetime(Knowledge Proposed) { + Zone.learnFrom(std::move(Proposed)); + } + + /// Map a MemoryKind::Value scalar to an array element. + /// + /// Callers must have ensured that the mapping is valid and not conflicting. + /// + /// @param SAI The ScopArrayInfo representing the scalar's memory to + /// map. + /// @param DefTarget { DomainDef[] -> Element[] } + /// The array element to map the scalar to. + /// @param UseTarget { DomainUse[] -> Element[] } + /// The array elements the uses are mapped to. + /// @param Lifetime { DomainDef[] -> Zone[] } + /// The lifetime of each llvm::Value definition for + /// reporting. + /// @param Proposed Mapping constraints for reporting. + void mapValue(const ScopArrayInfo *SAI, IslPtr DefTarget, + IslPtr UseTarget, IslPtr Lifetime, + Knowledge Proposed) { + // Redirect the read accesses. + for (auto *MA : DefUse.getValueUses(SAI)) { + // { DomainUse[] } + auto Domain = getDomainFor(MA); + + // { DomainUse[] -> Element[] } + auto NewAccRel = give(isl_union_map_intersect_domain( + UseTarget.copy(), isl_union_set_from_set(Domain.take()))); + simplify(NewAccRel); + + assert(isl_union_map_n_map(NewAccRel.keep()) == 1); + MA->setNewAccessRelation(isl_map_from_union_map(NewAccRel.take())); + } + + auto *WA = DefUse.getValueDef(SAI); + WA->setNewAccessRelation(DefTarget.copy()); + applyLifetime(Proposed); + + MappedValueScalars++; + } + + /// Try to map a MemoryKind::PHI scalar to a given array element. + /// + /// @param SAI Representation of the scalar's memory to map. + /// @param TargetElt { Scatter[] -> Element[] } + /// Suggestion where to map the scalar to when at a + /// timepoint. + /// + /// @return true if the PHI scalar has been mapped. + bool tryMapPHI(const ScopArrayInfo *SAI, IslPtr TargetElt) { + auto *PHIRead = DefUse.getPHIRead(SAI); + assert(PHIRead->isPHIKind()); + assert(PHIRead->isRead()); + + // Skip if already been mapped. + if (!PHIRead->getLatestScopArrayInfo()->isPHIKind()) + return false; + + // { DomainRead[] -> Scatter[] } + auto PHISched = getScatterFor(PHIRead); + + // { DomainRead[] -> Element[] } + auto PHITarget = + give(isl_map_apply_range(PHISched.copy(), TargetElt.copy())); + simplify(PHITarget); + DEBUG(dbgs() << " Mapping: " << PHITarget << '\n'); + + auto OrigDomain = getDomainFor(PHIRead); + auto MappedDomain = give(isl_map_domain(PHITarget.copy())); + if (!isl_set_is_subset(OrigDomain.keep(), MappedDomain.keep())) { + DEBUG(dbgs() + << " Reject because mapping does not encompass all instances\n"); + return false; + } + + // { DomainRead[] -> DomainWrite[] } + auto PerPHIWrites = computePerPHI(SAI); + + // { DomainWrite[] -> Element[] } + auto WritesTarget = give(isl_union_map_reverse(isl_union_map_apply_domain( + PerPHIWrites.copy(), isl_union_map_from_map(PHITarget.copy())))); + simplify(WritesTarget); + + // { DomainWrite[] } + auto ExpandedWritesDom = give(isl_union_map_domain(WritesTarget.copy())); + auto UniverseWritesDom = give(isl_union_set_empty(ParamSpace.copy())); + + for (auto *MA : DefUse.getPHIIncomings(SAI)) + UniverseWritesDom = give(isl_union_set_add_set(UniverseWritesDom.take(), + getDomainFor(MA).take())); + + if (!isl_union_set_is_subset(UniverseWritesDom.keep(), + ExpandedWritesDom.keep())) { + DEBUG(dbgs() << " Reject because did not find PHI write mapping for " + "all instances\n"); + DEBUG(dbgs() << " Deduced Mapping: " << WritesTarget << '\n'); + DEBUG(dbgs() << " Missing instances: " + << give(isl_union_set_subtract(UniverseWritesDom.copy(), + ExpandedWritesDom.copy())) + << '\n'); + return false; + } + + // { DomainRead[] -> Scatter[] } + auto PerPHIWriteScatter = give(isl_map_from_union_map( + isl_union_map_apply_range(PerPHIWrites.copy(), Schedule.copy()))); + + // { DomainRead[] -> Zone[] } + auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true); + simplify(Lifetime); + DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n"); + + // { DomainWrite[] -> Zone[] } + auto WriteLifetime = give(isl_union_map_apply_domain( + isl_union_map_from_map(Lifetime.copy()), PerPHIWrites.copy())); + + // { DomainWrite[] -> [Element[] -> Scatter[]] } + auto WrittenTranslator = + give(isl_union_map_range_product(WritesTarget.copy(), Schedule.copy())); + + // { [Element[] -> Scatter[]] } + auto Written = give(isl_union_map_range(WrittenTranslator.copy())); + simplify(Written); + + // { DomainWrite[] -> [Element[] -> Zone[]] } + auto LifetimeTranslator = give( + isl_union_map_range_product(WritesTarget.copy(), WriteLifetime.take())); + + // { [Element[] -> Zone[] } + auto Occupied = give(isl_union_map_range(LifetimeTranslator.copy())); + simplify(Occupied); + + Knowledge Proposed(Occupied, nullptr, Written); + if (isConflicting(Proposed)) + return false; + + mapPHI(SAI, std::move(PHITarget), std::move(WritesTarget), + std::move(Lifetime), std::move(Proposed)); + return true; + } + + /// Map a MemoryKind::PHI scalar to an array element. + /// + /// Callers must have ensured that the mapping is valid and not conflicting + /// with the common knowledge. + /// + /// @param SAI The ScopArrayInfo representing the scalar's memory to + /// map. + /// @param ReadTarget { DomainRead[] -> Element[] } + /// The array element to map the scalar to. + /// @param WriteTarget { DomainWrite[] -> Element[] } + /// New access target for each PHI incoming write. + /// @param Lifetime { DomainRead[] -> Zone[] } + /// The lifetime of each PHI for reporting. + /// @param Proposed Mapping constraints for reporting. + void mapPHI(const ScopArrayInfo *SAI, IslPtr ReadTarget, + IslPtr WriteTarget, IslPtr Lifetime, + Knowledge Proposed) { + // Redirect the PHI incoming writes. + for (auto *MA : DefUse.getPHIIncomings(SAI)) { + // { DomainWrite[] } + auto Domain = getDomainFor(MA); + + // { DomainWrite[] -> Element[] } + auto NewAccRel = give(isl_union_map_intersect_domain( + WriteTarget.copy(), isl_union_set_from_set(Domain.take()))); + simplify(NewAccRel); + + assert(isl_union_map_n_map(NewAccRel.keep()) == 1); + MA->setNewAccessRelation(isl_map_from_union_map(NewAccRel.take())); + } + + // Redirect the PHI read. + auto *PHIRead = DefUse.getPHIRead(SAI); + PHIRead->setNewAccessRelation(ReadTarget.copy()); + applyLifetime(Proposed); + + MappedPHIScalars++; + } + + /// Search and map scalars to memory overwritten by @p TargetStoreMA. + /// + /// Start trying to map scalars that are used in the same statement as the + /// store. For every successful mapping, try to also map scalars of the + /// statements where those are written. Repeat, until no more mapping + /// opportunity is found. + /// + /// There is currently no preference in which order scalars are tried. + /// Ideally, we would direct it towards a load instruction of the same array + /// element. + bool collapseScalarsToStore(MemoryAccess *TargetStoreMA) { + assert(TargetStoreMA->isLatestArrayKind()); + assert(TargetStoreMA->isMustWrite()); + + auto TargetStmt = TargetStoreMA->getStatement(); + + // { DomTarget[] } + auto TargetDom = getDomainFor(TargetStmt); + + // { DomTarget[] -> Element[] } + auto TargetAccRel = getAccessRelationFor(TargetStoreMA); + + // { Zone[] -> DomTarget[] } + // For each point in time, find the next target store instance. + auto Target = + computeScalarReachingOverwrite(Schedule, TargetDom, false, true); + + // { Zone[] -> Element[] } + // Use the target store's write location as a suggestion to map scalars to. + auto EltTarget = + give(isl_map_apply_range(Target.take(), TargetAccRel.take())); + simplify(EltTarget); + DEBUG(dbgs() << " Target mapping is " << EltTarget << '\n'); + + // Stack of elements no yet processed. + SmallVector Worklist; + + // Set of scalars already tested. + SmallPtrSet Closed; + + // Lambda to add all scalar reads to the work list. + auto ProcessAllIncoming = [&](ScopStmt *Stmt) { + for (auto *MA : *Stmt) { + if (!MA->isLatestScalarKind()) + continue; + if (!MA->isRead()) + continue; + + Worklist.push_back(MA); + } + }; + + // Add initial scalar. Either the value written by the store, or all inputs + // of its statement. + auto WrittenVal = TargetStoreMA->getAccessValue(); + if (auto InputAcc = getInputAccessOf(WrittenVal, TargetStmt)) + Worklist.push_back(InputAcc); + else + ProcessAllIncoming(TargetStmt); + + auto AnyMapped = false; + auto &DL = + S->getRegion().getEntry()->getParent()->getParent()->getDataLayout(); + auto StoreSize = + DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType()); + + while (!Worklist.empty()) { + auto *MA = Worklist.pop_back_val(); + + auto *SAI = MA->getScopArrayInfo(); + if (Closed.count(SAI)) + continue; + Closed.insert(SAI); + DEBUG(dbgs() << "\n Trying to map " << MA << " (SAI: " << SAI + << ")\n"); + + // Skip non-mappable scalars. + if (!isMappable(SAI)) + continue; + + auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType()); + if (MASize > StoreSize) { + DEBUG(dbgs() << " Reject because storage size is insufficient\n"); + continue; + } + + // Try to map MemoryKind::Value scalars. + if (SAI->isValueKind()) { + if (!tryMapValue(SAI, EltTarget)) + continue; + + auto *DefAcc = DefUse.getValueDef(SAI); + ProcessAllIncoming(DefAcc->getStatement()); + + AnyMapped = true; + continue; + } + + // Try to map MemoryKind::PHI scalars. + if (SAI->isPHIKind()) { + if (!tryMapPHI(SAI, EltTarget)) + continue; + // Add inputs of all incoming statements to the worklist. + for (auto *PHIWrite : DefUse.getPHIIncomings(SAI)) + ProcessAllIncoming(PHIWrite->getStatement()); + + AnyMapped = true; + continue; + } + } + + if (AnyMapped) + TargetsMapped++; + return AnyMapped; + } + + /// Compute when an array element is unused. + /// + /// @return { [Element[] -> Zone[]] } + IslPtr computeLifetime() const { + // { Element[] -> Zone[] } + auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads, + false, false, true); + + auto Result = give(isl_union_map_wrap(ArrayUnused.copy())); + + simplify(Result); + return Result; + } + + /// Determine when an array element is written to. + /// + /// @return { [Element[] -> Scatter[]] } + IslPtr computeWritten() const { + // { WriteDomain[] -> Element[] } + auto AllWrites = + give(isl_union_map_union(AllMustWrites.copy(), AllMayWrites.copy())); + + // { Scatter[] -> Element[] } + auto WriteTimepoints = + give(isl_union_map_apply_domain(AllWrites.copy(), Schedule.copy())); + + auto Result = + give(isl_union_map_wrap(isl_union_map_reverse(WriteTimepoints.copy()))); + + simplify(Result); + return Result; + } + + /// Determine whether an access touches at most one element. + /// + /// The accessed element could be a scalar or accessing an array with constant + /// subscript, such that all instances access only that element. + /// + /// @param MA The access to test. + /// + /// @return True, if zero or one elements are accessed; False if at least two + /// different elements are accessed. + bool isScalarAccess(MemoryAccess *MA) { + auto Map = getAccessRelationFor(MA); + auto Set = give(isl_map_range(Map.take())); + return isl_set_is_singleton(Set.keep()) == isl_bool_true; + } + +public: + DeLICMImpl(Scop *S) : ZoneAlgorithm(S) {} + + /// Calculate the lifetime (definition to last use) of every array element. + /// + /// @return True if the computed lifetimes (#Zone) is usable. + bool computeZone() { + // Check that nothing strange occurs. + if (!isCompatibleScop()) { + DeLICMIncompatible++; + return false; + } + + DefUse.compute(S); + IslPtr EltUnused, EltWritten; + + { + IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps); + + computeCommon(); + + EltUnused = computeLifetime(); + EltWritten = computeWritten(); + } + + if (isl_ctx_last_error(IslCtx.get()) == isl_error_quota) { + DeLICMOutOfQuota++; + DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n"); + } + + DeLICMAnalyzed++; + OriginalZone = Knowledge(nullptr, EltUnused, EltWritten); + DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4)); + + Zone = OriginalZone; + + return DelicmMaxOps == 0 || Zone.isUsable(); + } + + /// Try to map as many scalars to unused array elements as possible. + /// + /// Multiple scalars might be mappable to intersecting unused array element + /// zones, but we can only chose one. This is a greedy algorithm, therefore + /// the first processed claims it. + void greedyCollapse() { + bool Modified = false; + + for (auto &Stmt : *S) { + for (auto *MA : Stmt) { + if (!MA->isLatestArrayKind()) + continue; + if (!MA->isWrite()) + continue; + + if (MA->isMayWrite()) { + DEBUG(dbgs() << "Access " << MA + << " pruned because it is a MAY_WRITE\n"); + continue; + } + + if (Stmt.getNumIterators() == 0) { + DEBUG(dbgs() << "Access " << MA + << " pruned because it is not in a loop\n"); + continue; + } + + if (isScalarAccess(MA)) { + DEBUG(dbgs() << "Access " << MA + << " pruned because it writes only a single element\n"); + continue; + } + + DEBUG(dbgs() << "Analyzing target access " << MA << "\n"); + if (collapseScalarsToStore(MA)) + Modified = true; + } + } + + if (Modified) + DeLICMScopsModified++; + } + + /// Dump the internal information about a performed DeLICM to @p OS. + void print(llvm::raw_ostream &OS, int indent = 0) { + printAccesses(OS, indent); + } +}; + class DeLICM : public ScopPass { private: DeLICM(const DeLICM &) = delete; const DeLICM &operator=(const DeLICM &) = delete; + /// The pass implementation, also holding per-scop data. + std::unique_ptr Impl; + + void collapseToUnused(Scop &S) { + Impl = make_unique(&S); + + if (!Impl->computeZone()) { + DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n"); + return; + } + + DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n"); + Impl->greedyCollapse(); + + DEBUG(dbgs() << "\nFinal Scop:\n"); + DEBUG(S.print(dbgs())); + } + public: static char ID; explicit DeLICM() : ScopPass(ID) {} @@ -43,19 +1564,21 @@ // Free resources for previous scop's computation, if not yet done. releaseMemory(); - // TODO: Run DeLICM algorithm + collapseToUnused(S); return false; } virtual void printScop(raw_ostream &OS, Scop &S) const override { + if (!Impl) + return; + assert(Impl->getScop() == &S); + OS << "DeLICM result:\n"; - // TODO: Print analysis results and performed transformation details + Impl->print(OS); } - virtual void releaseMemory() override { - // TODO: Release resources (eg. shared_ptr to isl_ctx) - } + virtual void releaseMemory() override { Impl.reset(); } }; char DeLICM::ID; @@ -68,3 +1591,141 @@ INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass) INITIALIZE_PASS_END(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false, false) + +IslPtr polly::convertZoneToTimepoints(IslPtr Zone, + bool InclStart, + bool InclEnd) { + if (!InclStart && InclEnd) + return Zone; + + auto ShiftedZone = shiftDim(Zone, -1, -1); + if (InclStart && !InclEnd) + return ShiftedZone; + else if (!InclStart && !InclEnd) + return give(isl_union_set_intersect(Zone.take(), ShiftedZone.take())); + + assert(InclStart && InclEnd); + return give(isl_union_set_union(Zone.take(), ShiftedZone.take())); +} + +IslPtr +polly::computeReachingWrite(IslPtr Schedule, + IslPtr Writes, bool Reverse, + bool InclPrevDef, bool InclNextDef) { + + // { Scatter[] } + auto ScatterSpace = getScatterSpace(Schedule); + + // { ScatterRead[] -> ScatterWrite[] } + IslPtr Relation; + if (Reverse) + Relation = give(InclPrevDef ? isl_map_lex_lt(ScatterSpace.take()) + : isl_map_lex_le(ScatterSpace.take())); + else + Relation = give(InclNextDef ? isl_map_lex_gt(ScatterSpace.take()) + : isl_map_lex_ge(ScatterSpace.take())); + + // { ScatterWrite[] -> [ScatterRead[] -> ScatterWrite[]] } + auto RelationMap = give(isl_map_reverse(isl_map_range_map(Relation.take()))); + + // { Element[] -> ScatterWrite[] } + auto WriteAction = + give(isl_union_map_apply_domain(Schedule.copy(), Writes.take())); + + // { ScatterWrite[] -> Element[] } + auto WriteActionRev = give(isl_union_map_reverse(WriteAction.copy())); + + // { Element[] -> [ScatterUse[] -> ScatterWrite[]] } + auto DefSchedRelation = give(isl_union_map_apply_domain( + isl_union_map_from_map(RelationMap.take()), WriteActionRev.take())); + + // For each element, at every point in time, map to the times of previous + // definitions. { [Element[] -> ScatterRead[]] -> ScatterWrite[] } + auto ReachableWrites = give(isl_union_map_uncurry(DefSchedRelation.take())); + if (Reverse) + ReachableWrites = give(isl_union_map_lexmin(ReachableWrites.copy())); + else + ReachableWrites = give(isl_union_map_lexmax(ReachableWrites.copy())); + + // { [Element[] -> ScatterWrite[]] -> ScatterWrite[] } + auto SelfUse = give(isl_union_map_range_map(WriteAction.take())); + + if (InclPrevDef && InclNextDef) { + // Add the Def itself to the solution. + ReachableWrites = + give(isl_union_map_union(ReachableWrites.take(), SelfUse.take())); + ReachableWrites = give(isl_union_map_coalesce(ReachableWrites.take())); + } else if (!InclPrevDef && !InclNextDef) { + // Remove Def itself from the solution. + ReachableWrites = + give(isl_union_map_subtract(ReachableWrites.take(), SelfUse.take())); + } + + // { [Element[] -> ScatterRead[]] -> Domain[] } + auto ReachableWriteDomain = give(isl_union_map_apply_range( + ReachableWrites.take(), isl_union_map_reverse(Schedule.take()))); + + return ReachableWriteDomain; +} + +IslPtr polly::computeArrayUnused(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, + bool ReadEltInSameInst, + bool IncludeLastRead, + bool IncludeWrite) { + // { Element[] -> Scatter[] } + auto ReadActions = + give(isl_union_map_apply_domain(Schedule.copy(), Reads.take())); + auto WriteActions = + give(isl_union_map_apply_domain(Schedule.copy(), Writes.copy())); + + // { [Element[] -> Scatter[] } + auto AfterReads = afterScatter(ReadActions, ReadEltInSameInst); + auto WritesBeforeAnyReads = + give(isl_union_map_subtract(WriteActions.take(), AfterReads.take())); + auto BeforeWritesBeforeAnyReads = + beforeScatter(WritesBeforeAnyReads, !IncludeWrite); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto EltDomWrites = give(isl_union_map_apply_range( + isl_union_map_range_map(isl_union_map_reverse(Writes.copy())), + Schedule.copy())); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto ReachingOverwrite = computeReachingOverwrite( + Schedule, Writes, ReadEltInSameInst, !ReadEltInSameInst); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto ReadsOverwritten = give(isl_union_map_intersect_domain( + ReachingOverwrite.take(), isl_union_map_wrap(ReadActions.take()))); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto ReadsOverwrittenRotated = give(isl_union_map_reverse( + isl_union_map_curry(reverseDomain(ReadsOverwritten).take()))); + auto LastOverwrittenRead = + give(isl_union_map_lexmax(ReadsOverwrittenRotated.take())); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto BetweenLastReadOverwrite = betweenScatter( + LastOverwrittenRead, EltDomWrites, IncludeLastRead, IncludeWrite); + + return give(isl_union_map_union( + BeforeWritesBeforeAnyReads.take(), + isl_union_map_domain_factor_domain(BetweenLastReadOverwrite.take()))); +} + +bool polly::isConflicting(IslPtr ExistingOccupied, + IslPtr ExistingUnused, + IslPtr ExistingWrites, + IslPtr ProposedOccupied, + IslPtr ProposedUnused, + IslPtr ProposedWrites, + llvm::raw_ostream *OS, unsigned Indent) { + Knowledge Existing(std::move(ExistingOccupied), std::move(ExistingUnused), + std::move(ExistingWrites)); + Knowledge Proposed(std::move(ProposedOccupied), std::move(ProposedUnused), + std::move(ProposedWrites)); + + return Knowledge::isConflicting(Existing, Proposed, OS, Indent); +} Index: test/DeLICM/reduction_preheader.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_preheader.ll @@ -0,0 +1,130 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm -analyze < %s | FileCheck %s +; +; void func(double *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %reduction.preheader], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + br i1 %i.cmp, label %body, label %reduction.exit + + + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + + reduction.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %phi, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; Unrolled flattened schedule: +; [0] Stmt_reduction_preheader[0] +; [1] Stmt_reduction_for[0, 0] +; [2] Stmt_body[0, 0] +; [3] Stmt_reduction_inc[0, 0] +; [4] Stmt_reduction_for[0, 1] +; [5] Stmt_body[0, 1] +; [6] Stmt_reduction_inc[0, 1] +; [7] Stmt_reduction_for[0, 2] +; [8] Stmt_body[0, 2] +; [9] Stmt_reduction_inc[0, 2] +; [10] Stmt_reduction_for[0, 3] +; [11] Stmt_body[0, 3] +; [12] Stmt_reduction_inc[0, 3] +; [13] Stmt_reduction_for[0, 4] +; [14] Stmt_reduction_exit[0] +; [15] Stmt_reduction_preheader[0] +; [16] Stmt_reduction_for[1, 0] +; [17] Stmt_body[1, 0] +; [18] Stmt_reduction_inc[1, 0] +; [19] Stmt_reduction_for[1, 1] +; [20] Stmt_body[1, 1] +; [21] Stmt_reduction_inc[1, 1] +; [22] Stmt_reduction_for[1, 2] +; [23] Stmt_body[1, 2] +; [24] Stmt_reduction_inc[1, 2] +; [25] Stmt_reduction_for[1, 3] +; [26] Stmt_body[1, 3] +; [27] Stmt_reduction_inc[1, 3] +; [28] Stmt_reduction_for[1, 4] +; [29] Stmt_reduction_exit[1] + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_reduction_preheader +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_preheader[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 4 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 4 }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and i1 >= 0 and -5i0 <= i1 <= 8 - 5i0 and i1 <= 3 }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : i1 >= 0 and -5i0 <= i1 <= 7 - 5i0 and i1 <= 3; Stmt_reduction_inc[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: } + Index: unittests/CMakeLists.txt =================================================================== --- unittests/CMakeLists.txt +++ unittests/CMakeLists.txt @@ -16,8 +16,9 @@ set_property(TARGET ${test_name} PROPERTY FOLDER "Polly") endif() - target_link_libraries(${test_name} Polly LLVMCore LLVMSupport LLVMDemangle) + target_link_libraries(${test_name} Polly LLVMCore LLVMSupport LLVMDemangle LLVMipo) endfunction() add_subdirectory(Isl) add_subdirectory(Flatten) +add_subdirectory(DeLICM) Index: unittests/DeLICM/CMakeLists.txt =================================================================== --- /dev/null +++ unittests/DeLICM/CMakeLists.txt @@ -0,0 +1,3 @@ +add_polly_unittest(DeLICMTests + DeLICMTest.cpp + ) Index: unittests/DeLICM/DeLICMTest.cpp =================================================================== --- /dev/null +++ unittests/DeLICM/DeLICMTest.cpp @@ -0,0 +1,419 @@ +//===- DeLICMTest.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "polly/DeLICM.h" +#include "gtest/gtest.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace polly; + +/// Loop-like control structure to run body for true and false if passed indef. +/// +/// Takes an int variable that can be either 0 (false), 1 (true), or -1 (indef). +/// It then shadows that variable with a new variable of type bool. +/// +/// - If the int variable has the value of 0, the body is executed with the bool +/// variable set to false. +/// - If the int variable has the value of 1, the body is executed with the bool +/// variable set to true. +/// - If the int variable has the value of -1, the body is executed twice: once +/// with the bool variable set to false and a second time with true. +/// +/// For use in tests where the value of a boolean variable should not change the +/// result, such that both settings are tested at once. +#define BOOL_FOR(VAR) \ + for (int VAR##_iter = (VAR == indef) ? 0 : VAR; \ + VAR##_iter <= ((VAR == indef) ? 1 : VAR); VAR##_iter += 1) \ + for (bool VAR = VAR##_iter, VAR##_repeat = true; VAR##_repeat; \ + VAR##_repeat = false) + +#define indef (-1) + +#define USET(Str) give(isl_union_set_read_from_str(Ctx.get(), Str)) +#define UMAP(Str) give(isl_union_map_read_from_str(Ctx.get(), Str)) + +static bool operator==(const IslPtr &LHS, + const IslPtr &RHS) { + auto IsEqual = isl_union_set_is_equal(LHS.keep(), RHS.keep()); + EXPECT_NE(isl_bool_error, IsEqual); + return IsEqual; +} + +static bool operator==(const IslPtr &LHS, + const IslPtr &RHS) { + auto IsEqual = isl_union_map_is_equal(LHS.keep(), RHS.keep()); + EXPECT_NE(isl_bool_error, IsEqual); + return IsEqual; +} + +namespace { + +TEST(DeLICM, ConvertZoneToTimepoints) { + std::unique_ptr Ctx(isl_ctx_alloc(), + &isl_ctx_free); + + // Corner case: empty set + EXPECT_EQ(USET("{}"), convertZoneToTimepoints(USET("{}"), false, false)); + EXPECT_EQ(USET("{}"), convertZoneToTimepoints(USET("{}"), true, false)); + EXPECT_EQ(USET("{}"), convertZoneToTimepoints(USET("{}"), false, true)); + EXPECT_EQ(USET("{}"), convertZoneToTimepoints(USET("{}"), true, true)); + + // Basic usage + EXPECT_EQ(USET("{}"), convertZoneToTimepoints(USET("{ [1] }"), false, false)); + EXPECT_EQ(USET("{ [0] }"), + convertZoneToTimepoints(USET("{ [1] }"), true, false)); + EXPECT_EQ(USET("{ [1] }"), + convertZoneToTimepoints(USET("{ [1] }"), false, true)); + EXPECT_EQ(USET("{ [0]; [1] }"), + convertZoneToTimepoints(USET("{ [1] }"), true, true)); + + // Non-adjacent ranges + EXPECT_EQ(USET("{}"), + convertZoneToTimepoints(USET("{ [1]; [11] }"), false, false)); + EXPECT_EQ(USET("{ [0]; [10] }"), + convertZoneToTimepoints(USET("{ [1]; [11] }"), true, false)); + EXPECT_EQ(USET("{ [1]; [11] }"), + convertZoneToTimepoints(USET("{ [1]; [11] }"), false, true)); + EXPECT_EQ(USET("{ [0]; [1]; [10]; [11] }"), + convertZoneToTimepoints(USET("{ [1]; [11] }"), true, true)); + + // Adjacent unit ranges + EXPECT_EQ( + USET("{ [i] : 0 < i < 10 }"), + convertZoneToTimepoints(USET("{ [i] : 0 < i <= 10 }"), false, false)); + EXPECT_EQ( + USET("{ [i] : 0 <= i < 10 }"), + convertZoneToTimepoints(USET("{ [i] : 0 < i <= 10 }"), true, false)); + EXPECT_EQ( + USET("{ [i] : 0 < i <= 10 }"), + convertZoneToTimepoints(USET("{ [i] : 0 < i <= 10 }"), false, true)); + EXPECT_EQ(USET("{ [i] : 0 <= i <= 10 }"), + convertZoneToTimepoints(USET("{ [i] : 0 < i <= 10 }"), true, true)); + + // More than one dimension + EXPECT_EQ(USET("{}"), + convertZoneToTimepoints(USET("{ [0,1] }"), false, false)); + EXPECT_EQ(USET("{ [0,0] }"), + convertZoneToTimepoints(USET("{ [0,1] }"), true, false)); + EXPECT_EQ(USET("{ [0,1] }"), + convertZoneToTimepoints(USET("{ [0,1] }"), false, true)); + EXPECT_EQ(USET("{ [0,0]; [0,1] }"), + convertZoneToTimepoints(USET("{ [0,1] }"), true, true)); +} + +TEST(DeLICM, ComputeWriteInfluance) { + std::unique_ptr Ctx(isl_ctx_alloc(), + &isl_ctx_free); + + // Basic usage + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom[] : 0 < i }"), + computeReachingWrite(UMAP("{ Dom[] -> [0] }"), + UMAP("{ Dom[] -> Elt[] }"), false, false, + false)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom[] : 0 < i }"), + computeReachingWrite(UMAP("{ Dom[] -> [0] }"), + UMAP("{ Dom[] -> Elt[] }"), false, false, + true)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom[] : 0 <= i }"), + computeReachingWrite(UMAP("{ Dom[] -> [0] }"), + UMAP("{ Dom[] -> Elt[] }"), false, true, + false)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom[] : 0 <= i }"), + computeReachingWrite(UMAP("{ Dom[] -> [0] }"), + UMAP("{ Dom[] -> Elt[] }"), false, true, + false)); + + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom[] : i < 0 }"), + computeReachingWrite(UMAP("{ Dom[] -> [0] }"), + UMAP("{ Dom[] -> Elt[] }"), true, false, + false)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom[] : i <= 0 }"), + computeReachingWrite(UMAP("{ Dom[] -> [0] }"), + UMAP("{ Dom[] -> Elt[] }"), true, false, + true)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom[] : i < 0 }"), + computeReachingWrite(UMAP("{ Dom[] -> [0] }"), + UMAP("{ Dom[] -> Elt[] }"), true, true, + false)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom[] : i <= 0 }"), + computeReachingWrite(UMAP("{ Dom[] -> [0] }"), + UMAP("{ Dom[] -> Elt[] }"), true, true, true)); + + // Two writes + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom1[] : 0 < i < 10; [Elt[] -> [i]] -> " + "Dom2[] : 10 < i }"), + computeReachingWrite(UMAP("{ Dom1[] -> [0]; Dom2[] -> [10] }"), + UMAP("{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }"), + false, false, false)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom1[] : 0 <= i < 10; [Elt[] -> [i]] -> " + "Dom2[] : 10 <= i }"), + computeReachingWrite(UMAP("{ Dom1[] -> [0]; Dom2[] -> [10] }"), + UMAP("{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }"), + false, true, false)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom1[] : 0 < i <= 10; [Elt[] -> [i]] -> " + "Dom2[] : 10 < i }"), + computeReachingWrite(UMAP("{ Dom1[] -> [0]; Dom2[] -> [10] }"), + UMAP("{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }"), + false, false, true)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom1[] : 0 <= i <= 10; [Elt[] -> [i]] -> " + "Dom2[] : 10 <= i }"), + computeReachingWrite(UMAP("{ Dom1[] -> [0]; Dom2[] -> [10] }"), + UMAP("{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }"), + false, true, true)); + + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom2[] : 0 < i < 10; [Elt[] -> [i]] -> " + "Dom1[] : i < 0 }"), + computeReachingWrite(UMAP("{ Dom1[] -> [0]; Dom2[] -> [10] }"), + UMAP("{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }"), + true, false, false)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom2[] : 0 <= i < 10; [Elt[] -> [i]] -> " + "Dom1[] : i < 0 }"), + computeReachingWrite(UMAP("{ Dom1[] -> [0]; Dom2[] -> [10] }"), + UMAP("{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }"), + true, true, false)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom2[] : 0 < i <= 10; [Elt[] -> [i]] -> " + "Dom1[] : i <= 0 }"), + computeReachingWrite(UMAP("{ Dom1[] -> [0]; Dom2[] -> [10] }"), + UMAP("{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }"), + true, false, true)); + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom2[] : 0 <= i <= 10; [Elt[] -> [i]] -> " + "Dom1[] : i <= 0 }"), + computeReachingWrite(UMAP("{ Dom1[] -> [0]; Dom2[] -> [10] }"), + UMAP("{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }"), + true, true, true)); + + // Domain in same space + EXPECT_EQ(UMAP("{ [Elt[] -> [i]] -> Dom[1] : 0 < i <= 10; [Elt[] -> [i]] -> " + "Dom[2] : 10 < i }"), + computeReachingWrite(UMAP("{ Dom[i] -> [10i - 10] }"), + UMAP("{ Dom[1] -> Elt[]; Dom[2] -> Elt[] }"), + false, false, true)); + + // Parametric + EXPECT_EQ(UMAP("[p] -> { [Elt[] -> [i]] -> Dom[] : p < i }"), + computeReachingWrite(UMAP("[p] -> { Dom[] -> [p] }"), + UMAP("{ Dom[] -> Elt[] }"), false, false, + false)); + + // More realistic example (from redunction_embeddel.ll) + EXPECT_EQ( + UMAP("{ [Elt[] -> [i]] -> Dom[0] : 0 < i <= 3; [Elt[] -> [i]] -> Dom[1] " + ": 3 < i <= 6; [Elt[] -> [i]] -> Dom[2] : 6 < i <= 9; [Elt[] -> " + "[i]] -> Dom[3] : 9 < i <= 12; [Elt[] -> [i]] -> Dom[4] : 12 < i }"), + computeReachingWrite(UMAP("{ Dom[i] -> [3i] : 0 <= i <= 4 }"), + UMAP("{ Dom[i] -> Elt[] : 0 <= i <= 4 }"), false, + false, true)); +} + +TEST(DeLICM, ArrayUnused) { + std::unique_ptr Ctx(isl_ctx_alloc(), + &isl_ctx_free); + + int ReadEltInSameInst = indef; + BOOL_FOR(ReadEltInSameInst) { + // Basic usage: one read, one write + EXPECT_EQ(UMAP("{ Elt[] -> [i] : 0 < i < 10 }"), + computeArrayUnused(UMAP("{ Read[] -> [0]; Write[] -> [10] }"), + UMAP("{ Write[] -> Elt[] }"), + UMAP("{ Read[] -> Elt[] }"), ReadEltInSameInst, + false, false)); + EXPECT_EQ(UMAP("{ Elt[] -> [i] : 0 < i <= 10 }"), + computeArrayUnused(UMAP("{ Read[] -> [0]; Write[] -> [10] }"), + UMAP("{ Write[] -> Elt[] }"), + UMAP("{ Read[] -> Elt[] }"), ReadEltInSameInst, + false, true)); + EXPECT_EQ(UMAP("{ Elt[] -> [i] : 0 <= i < 10 }"), + computeArrayUnused(UMAP("{ Read[] -> [0]; Write[] -> [10] }"), + UMAP("{ Write[] -> Elt[] }"), + UMAP("{ Read[] -> Elt[] }"), ReadEltInSameInst, + true, false)); + EXPECT_EQ(UMAP("{ Elt[] -> [i] : 0 <= i <= 10 }"), + computeArrayUnused(UMAP("{ Read[] -> [0]; Write[] -> [10] }"), + UMAP("{ Write[] -> Elt[] }"), + UMAP("{ Read[] -> Elt[] }"), ReadEltInSameInst, + true, true)); + + // Two reads + EXPECT_EQ(UMAP("{ Elt[] -> [i] : 0 < i <= 10 }"), + computeArrayUnused( + UMAP("{ Read[0] -> [-10]; Read[1] -> [0]; Write[] -> [10] }"), + UMAP("{ Write[] -> Elt[] }"), UMAP("{ Read[i] -> Elt[] }"), + ReadEltInSameInst, false, true)); + + // Corner case: no writes + EXPECT_EQ(UMAP("{}"), + computeArrayUnused(UMAP("{ Read[] -> [0] }"), UMAP("{}"), + UMAP("{ Read[] -> Elt[] }"), ReadEltInSameInst, + false, false)); + + // Corner case: no reads + EXPECT_EQ(UMAP("{ Elt[] -> [i] : i <= 0 }"), + computeArrayUnused(UMAP("{ Write[] -> [0] }"), + UMAP("{ Write[] -> Elt[] }"), UMAP("{}"), + ReadEltInSameInst, false, true)); + } + + // Read and write in same statement + EXPECT_EQ(UMAP("{ Elt[] -> [i] : i < 0 }"), + computeArrayUnused(UMAP("{ RW[] -> [0] }"), + UMAP("{ RW[] -> Elt[] }"), + UMAP("{ RW[] -> Elt[] }"), true, false, false)); + EXPECT_EQ(UMAP("{ Elt[] -> [i] : i <= 0 }"), + computeArrayUnused(UMAP("{ RW[] -> [0] }"), + UMAP("{ RW[] -> Elt[] }"), + UMAP("{ RW[] -> Elt[] }"), true, false, true)); + EXPECT_EQ(UMAP("{ Elt[] -> [0] }"), + computeArrayUnused(UMAP("{ RW[] -> [0] }"), + UMAP("{ RW[] -> Elt[] }"), + UMAP("{ RW[] -> Elt[] }"), false, true, true)); +} + +/// Get the universes of all spaces in @p USet. +IslPtr unionSpace(NonowningIslPtr USet) { + auto Result = give(isl_union_set_empty(isl_union_set_get_space(USet.keep()))); + foreachElt(USet, [=, &Result](IslPtr Set) { + auto Space = give(isl_set_get_space(Set.keep())); + auto Universe = give(isl_set_universe(Space.take())); + Result = give(isl_union_set_add_set(Result.take(), Universe.take())); + }); + return Result; +} + +void completeLifetime(IslPtr Universe, + IslPtr &Unknown, + IslPtr &Undef) { + if (!Unknown) { + assert(Undef); + Unknown = give(isl_union_set_subtract(Universe.copy(), Undef.copy())); + } + + if (!Undef) { + assert(Unknown); + Undef = give(isl_union_set_subtract(Universe.copy(), Unknown.copy())); + } +} + +typedef struct { + const char *OccupiedStr; + const char *UndefStr; + const char *WrittenStr; +} Knowledge; + +bool checkIsConflictingNonsymmetric(Knowledge Existing, Knowledge Proposed) { + std::unique_ptr Ctx(isl_ctx_alloc(), + &isl_ctx_free); + + // Parse knowledge. + auto ExistingOccupied = + Existing.OccupiedStr + ? give(isl_union_set_read_from_str(Ctx.get(), Existing.OccupiedStr)) + : nullptr; + auto ExistingUndef = + Existing.UndefStr + ? give(isl_union_set_read_from_str(Ctx.get(), Existing.UndefStr)) + : nullptr; + auto ExistingWritten = + give(isl_union_set_read_from_str(Ctx.get(), Existing.WrittenStr)); + + auto ProposedOccupied = + Proposed.OccupiedStr + ? give(isl_union_set_read_from_str(Ctx.get(), Proposed.OccupiedStr)) + : nullptr; + auto ProposedUndef = + Proposed.UndefStr + ? give(isl_union_set_read_from_str(Ctx.get(), Proposed.UndefStr)) + : nullptr; + auto ProposedWritten = + give(isl_union_set_read_from_str(Ctx.get(), Proposed.WrittenStr)); + + // Determine universe (set of all possible domains). + auto Universe = + give(isl_union_set_empty(isl_space_params_alloc(Ctx.get(), 0))); + if (ExistingOccupied) + Universe = + give(isl_union_set_union(Universe.take(), ExistingOccupied.copy())); + if (ExistingUndef) + Universe = give(isl_union_set_union(Universe.take(), ExistingUndef.copy())); + if (ExistingWritten) + Universe = + give(isl_union_set_union(Universe.take(), ExistingWritten.copy())); + if (ProposedOccupied) + Universe = + give(isl_union_set_union(Universe.take(), ProposedOccupied.copy())); + if (ProposedUndef) + Universe = give(isl_union_set_union(Universe.take(), ProposedUndef.copy())); + if (ProposedWritten) + Universe = + give(isl_union_set_union(Universe.take(), ProposedWritten.copy())); + Universe = unionSpace(Universe); + + // Using the universe, fill missing data. + completeLifetime(Universe, ExistingOccupied, ExistingUndef); + completeLifetime(Universe, ProposedOccupied, ProposedUndef); + + return isConflicting(ExistingOccupied, ExistingUndef, ExistingWritten, + ProposedOccupied, ProposedUndef, ProposedWritten); +} + +bool checkIsConflicting(Knowledge Existing, Knowledge Proposed) { + auto Forward = checkIsConflictingNonsymmetric(Existing, Proposed); + auto Backward = checkIsConflictingNonsymmetric(Proposed, Existing); + + // isConflicting should be symmetric. + EXPECT_EQ(Forward, Backward); + + return Forward || Backward; +} + +TEST(DeLICM, isConflicting) { + + // Check occupied vs. occupied. + EXPECT_TRUE( + checkIsConflicting({"{ Dom[i] }", nullptr, "{}"}, {nullptr, "{}", "{}"})); + EXPECT_TRUE(checkIsConflicting({"{ Dom[i] }", nullptr, "{}"}, + {"{ Dom[i] }", nullptr, "{}"})); + EXPECT_FALSE(checkIsConflicting({"{ Dom[0] }", nullptr, "{}"}, + {nullptr, "{ Dom[0] }", "{}"})); + EXPECT_FALSE(checkIsConflicting({"{ Dom[i] : i != 0 }", nullptr, "{}"}, + {"{ Dom[0] }", nullptr, "{}"})); + + // Check occupied vs. written. + EXPECT_TRUE( + checkIsConflicting({nullptr, "{}", "{}"}, {"{}", nullptr, "{ Dom[0] }"})); + EXPECT_FALSE( + checkIsConflicting({"{}", nullptr, "{}"}, {"{}", nullptr, "{ Dom[0] }"})); + + EXPECT_TRUE(checkIsConflicting({"{ Dom[i] }", nullptr, "{}"}, + {"{}", nullptr, "{ Dom[0] }"})); + EXPECT_FALSE(checkIsConflicting({"{ DomA[i] }", nullptr, "{}"}, + {"{}", nullptr, "{ DomB[0] }"})); + + // Dom[1] represents the time between 0 and 1. Now Proposed writes at timestep + // 0 such that will have a different value between 0 and 1. Hence it is + // conflicting with Existing. + EXPECT_TRUE(checkIsConflicting({"{ Dom[1] }", nullptr, "{}"}, + {"{}", nullptr, "{ Dom[0] }"})); + EXPECT_FALSE(checkIsConflicting({"{ Dom[i] : i != 1 }", nullptr, "{}"}, + {"{}", nullptr, "{ Dom[0] }"})); + + // Check written vs. written. + EXPECT_TRUE(checkIsConflicting({"{}", nullptr, "{ Dom[0] }"}, + {"{}", nullptr, "{ Dom[0] }"})); + EXPECT_FALSE(checkIsConflicting({"{}", nullptr, "{ Dom[-1] }"}, + {"{}", nullptr, "{ Dom[0] }"})); + EXPECT_FALSE(checkIsConflicting({"{}", nullptr, "{ Dom[1] }"}, + {"{}", nullptr, "{ Dom[0] }"})); +} +} // anonymous namespace