Index: include/polly/DeLICM.h =================================================================== --- include/polly/DeLICM.h +++ include/polly/DeLICM.h @@ -18,14 +18,246 @@ #ifndef POLLY_DELICM_H #define POLLY_DELICM_H +#include "polly/Support/GICHelper.h" + namespace llvm { class PassRegistry; class Pass; } // anonymous namespace namespace polly { +class Scop; + /// Create a new DeLICM pass instance. llvm::Pass *createDeLICMPass(); + +/// Compute the reaching definition statement for each definition of an array +/// element. +/// +/// The reaching definition of an array element at a specific timepoint is the +/// statement instance that had written the current element's content. This +/// function computes all reaching definitions for all array elements and +/// timepoints. For example: +/// +/// Schedule := { Write[] -> [0]; Overwrite[] -> [10] } +/// Defs := { Write[] -> A[5]; Overwrite[] -> A[5] } +/// +/// That is, index 5 of array A will be written to at timepoint 0 and 10. The +/// result will be: +/// +/// { [A[5] -> [i]] -> Write[] : 0 < i < 10; +/// [A[5] -> [i]] -> Overwrite[] : 10 < i } +/// +/// That is, between timepoint 0 (Write[]) and timepoint 10 (Overwrite[]), the +/// content of A[5] was written by statement instance Write[] and after +/// timepoint 10 by Overwrite[]. Values not defined in the map have no known +/// definition. This includes the statement instance timepoints themselves, +/// because reads at those timepoints could either read the old or the new +/// value, defined only by the statement itself. But this can be changed by @p +/// InclDef and @p InclRedef. InclDef=false and InclRedef=true will return a +/// zone. Unless @p InclDef and @p InclRedef are both true, there is only one +/// unique definition per element and timepoint. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// Schedule of (at least) all array writes. Instances not in +/// @p Writes will be ignored. +/// @param Writes { DomainWrite[] -> Element[] } +/// Elements written to by the statement instances. +/// @param InclDef Whether to include the definition's timepoint as where the +/// element is well-defined (any load at that timepoint would +/// happen at the writes). In the example, enabling adds +/// { [A[5] -> [0]] -> Write[]; [A[5] -> [10]] -> Overwrite[] } +/// to the result. +/// @param InclRedef Whether to assume that at the timepoint where an element +/// is overwritten, it still contains the old value (any load +/// at that timepoint would happen before the overwrite). In +/// this example, enabling this adds +/// { [A[] -> [10]] -> Write[] } to the result. +/// +/// @return { [Element[] -> Scatter[]] -> DomainWrite[] } +/// The reaching definitions as described above, or nullptr if either @p +/// Schedule or @p Writes is nullptr, or the ISL max operations count +/// has exceeded. +IslPtr computeReachingDefinition(IslPtr Schedule, + IslPtr Writes, + bool InclDef, bool InclRedef); + +/// Compute the timepoints from a write to its (last) use. +/// +/// Example: +/// Schedule := { Def[] -> [0]; Read[] -> [10]; } +/// Writes := { Def[] -> A[5] } +/// Reads := { Read[] -> A[5] } +/// +/// Result: +/// { [A[5] -> Write[]] -> [i] : 0 < i < 10 } +/// +/// Note: Lifetimes are expressed in terms of the preceding write. Hence, reads +/// before the first read cannot be expressed by this function. +/// +/// @param Schedule { Domain[] -> Scatter[] } +/// The schedule of (at least) all statement instances +/// occurring in @p Writes or @p Reads. All other +/// instances will be ignored. +/// @param Writes { DomainDef[] -> Element[] } +/// Elements written to by the statement instances. +/// @param Reads { DomainRead[] -> Element[] } +/// Elements read from by the statement instances. +/// @param ReadEltInSameInst Whether a load will read the value from a write +/// that is scheduled at the same timepoint (Writes +/// happen before reads). +/// @param InclWrite Whether to also include the timepoint where a value +/// is written to the lifetime. If enabled for the +/// example, it changes to +/// { [A[5] -> Def[]] -> [i] : 0 <= i < 10 }. +/// @param InclLastRead Whether to also include the timepoint where with +/// the last use to the lifetime. If enabled for the +/// example, it changes to +/// { [A[5] -> Def[]] -> [i] : 0 < i <= 10 }. +/// @param ExitReads Whether to extend the lifetimes that are not +/// overwritten into infinity. This corresponds to the +/// assumption that the values must be available after +/// the scop. If enabled, the example changes to +/// { [A[5] -> Def[]] -> [i] : 0 < i } +/// +/// @return { [Element[] -> DomainWrite[]] -> Zone[] } +IslPtr computeArrayLifetime(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, + bool ReadEltInSameInst, + bool InclWrite, bool InclLastRead, + bool ExitReads); + +/// Compute the next overwrite for each array element. +/// +/// This is computeReachingDefinition() "in reverse"; Instead of looking to the +/// most recent write to an element, look for the next (over)write. For example: +/// +/// Schedule := { Write[] -> [0]; Overwrite[] -> [10] } +/// Writes := { Write[] -> A[5]; Overwrite[] -> A[5] } +/// +/// will result in: +/// +/// { [A[5] -> [i]] -> Write[] : i < 0; +/// [A[5] -> [i]] -> Overwrite[] : 0 < i < 10 } +/// +/// That is, A[5] will be overwritten next by Write[] when before timepoint 0, +/// or by Overwrite[] when between timepoints 0 and 10. Use InclPrevWrite=false +/// and InclOverwrite=true to interpret the result as a Zone. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// Schedule of (at least) all array writes. Instances not +/// in @p Writes will be ignored. +/// @param Writes { DomainWrite[] -> Element[] } +/// Elements written to by the statement instances. +/// @param InclPrevWrite Whether to extend an overwrite timepoints to include +/// the timepoint where the previous write happens (the +/// previous write would happen at the beginning of its +/// timepoint). In this example, this adds +/// { [A[5] -> [0]] -> Overwrite[] } to the result. +/// @param InclOverwrite Whether the reaching overwrite includes the timepoint +/// of the overwrite itself (so the overwrite would happen +/// at then end of its timepoint). In the example, this +/// adds +/// { [A[5] -> [0]] -> Write[]; +/// [A[5] -> [10]] -> Overwrite[] } +/// to the result. +/// +/// @return { [Element[] -> Scatter[]] -> DomainWrite[] } +/// The reaching overwrite as defined above, or nullptr if either @p +/// Schedule or @p Writes is nullptr, or the ISL max operations count +/// has exceeded. +IslPtr computeReachingOverwrite(IslPtr Schedule, + IslPtr Writes, + bool InclPrevWrite, + bool InclOverwrite); + +/// Compute the timepoints where the contents of an array element are not used. +/// +/// An element is unused at a timepoint when the element will be overwritten in +/// the future, but it is no read in between. Another way to express this: the +/// time from when the element is written, to the most recent read before it, or +/// infinitely into the past if there no read before. Such unused elements can +/// be overwritten by any value without changing the scop's semantics. An +/// example: +/// +/// Schedule := { Read[] -> [0]; Write[] -> [10]; Def[] -> [20] } +/// Writes := { Write[] -> A[5]; Def[] -> A[6] } +/// Reads := { Read[] -> A[5] } +/// +/// The result will be: +/// +/// { A[5] -> [i] : 0 < i < 10; +/// A[6] -> [i] : i < 20 } +/// +/// That is, A[5] is unused between timepoint 0 (the read) and timepoint 10 (the +/// write). A[6] is unused before timepoint 20, but might be used after the +/// scop's execution (A[5] and any other A[i] as well). Use InclLastRead=false +/// and InclWrite=true to interpret the result as zone. +/// +/// @param Schedule { Domain[] -> Scatter[] } +/// The schedule of (at least) all statement instances +/// occurring in @p Writes or @p Reads. All other +/// instances will be ignored. +/// @param Writes { DomainWrite[] -> Element[] } +/// Elements written to by the statement instances. +/// @param Reads { DomainRead[] -> Element[] } +/// Elements read from by the statement instances. +/// @param ReadEltInSameInst Whether a load will read the value from a write +/// that is scheduled at the same timepoint (Writes +/// happen before reads). Otherwise, loads will use the +/// value of an element it had before the timepoint +/// (Reads before writes). For example: +/// { Read[] -> [0]; Write[] -> [0] } +/// With ReadEltInSameInst=false it is assumed that the +/// read happens before the write, such that the +/// element is never unused, or just at timepoint 0, +/// depending on InclLastRead/InclWrite. +/// With ReadEltInSameInst=false it assumes that the +/// value just written will be used. Anything before +/// timepoint 0 will be considered unused. +/// @param InclLastRead Whether the timepoint where an element is last read +/// counts as unused (the read happens at the beginning +/// of its timepoint, and nothing (else) can use it +/// during the timepoint). In the example, this option +/// adds { A[5] -> [0] } to the result. +/// @param InclWrite Whether the timepoint where an element is written +/// itself counts as unused (the write happens at the +/// end of its timepoint; no (other) operations uses +/// the element during the timepoint). In this example, +/// this adds +/// { A[5] -> [10]; A[6] -> [20] } to the result. +/// +/// @return { Element[] -> Scatter[] } +/// The unused timepoints as defined above, or nullptr if either @p +/// Schedule, @p Writes are @p Reads is nullptr, or the ISL max +/// operations count is exceeded. +IslPtr computeArrayUnused(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, + bool ReadEltInSameInst, + bool InclLastRead, bool InclWrite); + +/// Determine whether two lifetimes are conflicting. +/// +/// Used by unittesting. +/// +/// @param ExistingLifetime { [Element[] -> Zone[]] -> ValInst[] } +/// @param ExistingImplicitLifetimeIsUnknown +/// @param ExistingWritten { [Element[] -> Scatter[]] -> ValInst[] } +/// @param ProposedLifetime { [Element[] -> Zone[]] -> ValInst[] } +/// @param ProposedImplicitLifetimeIsUnknown +/// @param ProposedWritten { [Element[] -> Scatter[]] -> ValInst[] } +/// +/// @param False, iff the lifetimes and writes can me merged. +/// +/// @see polly::Knowledge +bool isConflicting(IslPtr ExistingLifetime, + bool ExistingmplicitLifetimeIsUnknown, + IslPtr ExistingWrites, + IslPtr ProposedLifetime, + bool ProposedImplicitLifetimeIsUnknown, + IslPtr ProposedWrites); } // namespace polly namespace llvm { Index: include/polly/Support/GICHelper.h =================================================================== --- include/polly/Support/GICHelper.h +++ include/polly/Support/GICHelper.h @@ -18,6 +18,7 @@ #include "llvm/Support/raw_ostream.h" #include "isl/aff.h" #include "isl/ctx.h" +#include "isl/id.h" #include "isl/map.h" #include "isl/options.h" #include "isl/set.h" Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -120,7 +120,7 @@ static cl::opt UnprofitableScalarAccs( "polly-unprofitable-scalar-accs", cl::desc("Count statements with scalar accesses as not optimizable"), - cl::Hidden, cl::init(true), cl::cat(PollyCategory)); + cl::Hidden, cl::init(false), cl::cat(PollyCategory)); //===----------------------------------------------------------------------===// Index: lib/External/isl/imath/gmp_compat.c =================================================================== --- lib/External/isl/imath/gmp_compat.c +++ lib/External/isl/imath/gmp_compat.c @@ -688,7 +688,7 @@ int i, j; int num_used_bytes; size_t num_words, num_missing_bytes; - ssize_t word_offset; + ptrdiff_t word_offset; unsigned char* dst; mp_digit* src; int src_bits; @@ -767,7 +767,7 @@ mp_int tmp = &tmpz; size_t total_size; size_t num_digits; - ssize_t word_offset; + ptrdiff_t word_offset; const unsigned char *src; mp_digit *dst; int dst_bits; Index: lib/Support/GICHelper.cpp =================================================================== --- lib/Support/GICHelper.cpp +++ lib/Support/GICHelper.cpp @@ -177,6 +177,7 @@ replace(str, "\"", "_"); replace(str, " ", "__"); replace(str, "=>", "TO"); + replace(str, "+", "_"); } std::string polly::getIslCompatibleName(const std::string &Prefix, Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -163,7 +163,7 @@ static cl::opt EnableDeLICM("polly-enable-delicm", cl::desc("Eliminate scalar loop carried dependences"), - cl::Hidden, cl::init(false), cl::cat(PollyCategory)); + cl::Hidden, cl::init(true), cl::cat(PollyCategory)); namespace polly { void initializePollyPasses(PassRegistry &Registry) { Index: lib/Transform/DeLICM.cpp =================================================================== --- lib/Transform/DeLICM.cpp +++ lib/Transform/DeLICM.cpp @@ -13,23 +13,2712 @@ // Namely, remove register/scalar dependencies by mapping them back to array // elements. // +// The algorithms here work on the scatter space - the image space of the +// schedule returned by Scop::getSchedule(). We call an element in that space a +// "timepoint". Timepoints are lexicographically ordered such that we can +// defined ranges in the scatter space. We use two flavors of such ranges: +// Timepoint sets and zones. A timepoint set is simply a subset of the scatter +// space and is directly stored as isl_set. +// +// Zones are used to describe the space between timepoints as open sets, ie.. +// they do not contain the extrema. Using ISL rational sets to express these +// would be overkill. We also cannot store them as the integer timepoints they +// contain; the (nonempty) zone between 1 and 2 would be empty and not +// differentiable from eg. the zone between 3 and 4. Also, we cannot store the +// integer set including the extrema; the set ]1,2[ + ]3,4[ could be coalesced +// to ]1,3[, although we defined the range [2,3] to be not in the set. Instead, +// we store the "half-open" integer extrema, including the lower bound, but +// excluding the upper bound. Examples: +// +// * The set { [i] : 1 <= i <= 3 } represents the zone ]0,3[ (which contains the +// integer points 1 and 2, but not 0 or 3) +// +// * { [1] } represents the zone ]0,1[ +// +// * { [i] : i = 1 or i = 3 } represents the zone ]0,1[ + ]2,3[ +// +// Therefore, an integer i in the set represents the zone ]i-1,i[, ie. strictly +// speaking the integer points never belong to the zone. However, depending an +// the interpretation, one might want to include them: +// +// * The timepoints adjacent to two unit zones: zoneToInsideInstances() +// +// * The timepoints before a unit zone begins: +// shiftDim(, isl_dim_in, -1, 1) +// +// * The timepoints that directly follow a unit zone: Reinterpret the zone as +// a set of timepoints. +// +// It sometimes helps think about a value of i stored in an isl_set to represent +// the timepoint i-0.5 between two integer-valued timepoints. +// @see zoneToInsideInstances() +// +// +// The code makes use of maps and sets in many different spaces. To not loose +// track in which space a set or map is expected to be in, variables holding an +// ISL reference are usually annotated in the comments. They roughly follow ISL +// syntax for spaces, but only the tuples, not the dimensions. The tuples have a +// meaning as follows: +// +// * Space[] - An unspecified tuple. Used for function parameters such that the +// function caller can use it for anything they like. +// +// * Domain[] - A statement instance as returned by ScopStmt::getDomain() +// isl_id_get_name: Stmt_ +// isl_id_get_user: Pointer to ScopStmt +// +// * Element[] - An array element as in the range part of +// MemoryAccess::getAccessRelation() +// isl_id_get_name: MemRef_ +// isl_id_get_user: Pointer to ScopArrayInfo +// +// * Scatter[] - Scatter space or space of timepoints +// Has no tuple id +// +// * Zone[] - Range between timepoints as described above +// Has no tuple id +// +// * ValInst[] - An llvm::Value as defined at a specific timepoint. +// A ValInst[] itself can be structured as one of: +// * [] - An unknown value +// Always zero dimensions +// Has no tuple id +// * Undef[] - Value is not used +// Always zero dimensions +// isl_id_get_name: Undef +// isl_id_get_user: A pointer to an llvm::UndefValue +// * Value[] - An llvm::Value that is read-only in the Scop, ie. its +// runtime content does not depend on the timepoint. +// Always zero dimensions +// isl_id_get_name: Val_ +// isl_id_get_user: A pointer to an llvm::Value +// * [Domain[] -> Value[]] - An llvm::Value that may change during the +// Scop's execution +// The tuple itself has no id, but it wraps a map space holding a +// statement instance which defines the llvm::Value as the map's domain +// and llvm::Value itself as range. +// @see makeValInst() +// +// An annotation "{ Domain[] -> Scatter[] }" therefore means: A map from a +// statement instance to a timepoint, aka. a schedule. There is only one scatter +// space, but most of the time multiple statements are processed in one set. +// This is why most of the time isl_union_map has to be used. +// //===----------------------------------------------------------------------===// -#include "polly/DeLICM.h" -#include "polly/ScopInfo.h" -#include "polly/ScopPass.h" -#define DEBUG_TYPE "polly-delicm" +#include "polly/DeLICM.h" +#include "polly/Options.h" +#include "polly/ScopInfo.h" +#include "polly/ScopPass.h" +#include "llvm/ADT/Statistic.h" +#define DEBUG_TYPE "polly-delicm" + +using namespace polly; +using namespace llvm; + +namespace { + +cl::opt + DelicmMaxOps("polly-delicm-max-ops", + cl::desc("Maximum number of ISL operations to invest for " + "lifetime analysis; 0=no limit"), + cl::init(1000000), cl::cat(PollyCategory)); + +STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs"); +STATISTIC(DeLICMOutOfQuota, + "Analyses aborted because max_operations was reached"); +STATISTIC(DeLICMIncompatible, "Number of SCoPs incompatible for analysis"); +STATISTIC(MappedValueScalars, "Number of mapped Value scalars"); +STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars"); +STATISTIC(TargetsMapped, "Number of stores used for at least one mapping"); +STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized"); + +/// Class for keeping track of scalar def-use chains in the polyhedral +/// representation. +/// +/// MK_Value: There is one definition in SCoP and an arbitrary number of reads. +/// +/// MK_PHI, MK_ExitPHI: There is at least one write (the incoming blocks/stmts) +/// and one (MK_PHI) or zero (MK_ExitPHI) reads. +class ScalarDefUseChains { +private: + /// The definitions/write MemoryAccess of an MK_Value scalar. + /// + /// Note that read-only values have no value-defining write access. + DenseMap ValueDefAccs; + + /// List of all uses/read MemoryAccesses for an MK_Value scalar. + DenseMap> ValueUseAccs; + + /// The PHI/read MemoryAccess of an MK_PHI/MK_ExitPHI scalar. + DenseMap PHIReadAccs; + + /// List of all incoming values/writes of an MK_PHI/MK_ExitPHI scalar. + DenseMap> + PHIIncomingAccs; + +public: + /// Find the MemoryAccesses that access the ScopArrayInfo-represented memory. + /// + /// @param S The SCoP to analyze. + void compute(Scop *S) { + // Purge any previous result. + reset(); + + for (auto &Stmt : *S) { + for (auto *MA : Stmt) { + if (MA->isOriginalValueKind() && MA->isWrite()) { + auto *SAI = MA->getScopArrayInfo(); + assert(!ValueDefAccs.count(SAI) && + "There can be at most one definition per MK_Value scalar"); + ValueDefAccs[SAI] = MA; + } + + if (MA->isOriginalValueKind() && MA->isRead()) + ValueUseAccs[MA->getScopArrayInfo()].push_back(MA); + + if (MA->isOriginalAnyPHIKind() && MA->isRead()) { + auto *SAI = MA->getScopArrayInfo(); + assert(!PHIReadAccs.count(SAI) && "There must be exactly one read " + "per PHI (that's where the PHINode " + "is)"); + PHIReadAccs[SAI] = MA; + } + + if (MA->isOriginalAnyPHIKind() && MA->isWrite()) + PHIIncomingAccs[MA->getScopArrayInfo()].push_back(MA); + } + } + } + + /// Free all memory used by the analysis. + void reset() { + ValueDefAccs.clear(); + ValueUseAccs.clear(); + PHIReadAccs.clear(); + PHIIncomingAccs.clear(); + } + + MemoryAccess *getValueDef(const ScopArrayInfo *SAI) const { + return ValueDefAccs.lookup(SAI); + } + + ArrayRef getValueUses(const ScopArrayInfo *SAI) const { + auto It = ValueUseAccs.find(SAI); + if (It == ValueUseAccs.end()) + return {}; + return It->second; + } + + MemoryAccess *getPHIRead(const ScopArrayInfo *SAI) const { + return PHIReadAccs.lookup(SAI); + } + + ArrayRef getPHIIncomings(const ScopArrayInfo *SAI) const { + auto It = PHIIncomingAccs.find(SAI); + if (It == PHIIncomingAccs.end()) + return {}; + return It->second; + } +}; + +/// Return the range elements that are lexicographically smaller. +/// +/// @param Map { Space[] -> Scatter[] } +/// @param Strict True for strictly lexicographically smaller elements. (exclude +/// same timepoints from the result) +/// +/// @return { Space[] -> Scatter[] } +/// A map to all timepoints that happen before the timepoints the input +/// mapped to. +IslPtr beforeScatter(IslPtr Map, bool Strict) { + auto RangeSpace = give(isl_space_range(isl_map_get_space(Map.keep()))); + auto ScatterRel = give(Strict ? isl_map_lex_gt(RangeSpace.take()) + : isl_map_lex_ge(RangeSpace.take())); + return give(isl_map_apply_range(Map.take(), ScatterRel.take())); +} + +/// Piecewise beforeScatter(IslPtr,bool). +IslPtr beforeScatter(IslPtr UMap, bool Strict) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + auto After = beforeScatter(Map, Strict); + Result = give(isl_union_map_add_map(Result.take(), After.take())); + }); + return Result; +} + +/// Return the range elements that are lexicographically larger. +/// +/// @param Map { Space[] -> Scatter[] } +/// @param Strict True for strictly lexicographically larger elements. (exclude +/// same timepoints from the result) +/// +/// @return { Space[] -> Scatter[] } +/// A map to all timepoints that happen after the timepoints the input +/// map originally mapped to. +IslPtr afterScatter(IslPtr Map, bool Strict) { + auto RangeSpace = give(isl_space_range(isl_map_get_space(Map.keep()))); + auto ScatterRel = give(Strict ? isl_map_lex_lt(RangeSpace.take()) + : isl_map_lex_le(RangeSpace.take())); + return give(isl_map_apply_range(Map.take(), ScatterRel.take())); +} + +/// Piecewise afterScatter(IslPtr,bool). +IslPtr afterScatter(NonowningIslPtr UMap, + bool Strict) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + auto After = afterScatter(Map, Strict); + Result = give(isl_union_map_add_map(Result.take(), After.take())); + }); + return Result; +} + +/// Construct a range of timepoints between two timepoints. +/// +/// Example: +/// From := { A[] -> [0]; B[] -> [0] } +/// To := { B[] -> [10]; C[] -> [20] } +/// +/// Result: +/// { B[] -> [i] : 0 < i < 10 } +/// +/// Note that A[] and C[] are not in the result because they do not have a start +/// or end timepoint. If a start (or end) timepoint is not unique, the first +/// (respectively last) is chosen. +/// +/// @param From { Space[] -> Scatter[] } +/// Map to start timepoints. +/// @param To { Space[] -> Scatter[] } +/// Map to end timepoints. +/// @param InclFrom Whether to include the start timepoints to the result. In +/// the example, this would add { B[] -> [0] } +/// @param InclTo Whether to include the end timepoints to the result. In this +/// example, this would add { B[] -> [10] } +/// +/// @return { Space[] -> Scatter[] } +/// A map for each domain element of timepoints between two extreme +/// points, or nullptr if @p From or @p To is nullptr, or the ISL max +/// operations is exceeded. +IslPtr betweenScatter(IslPtr From, IslPtr To, + bool InclFrom, bool InclTo) { + auto AfterFrom = afterScatter(From, !InclFrom); + auto BeforeTo = beforeScatter(To, !InclTo); + + return give(isl_map_intersect(AfterFrom.take(), BeforeTo.take())); +} + +/// Piecewise betweenScatter(IslPtr,IslPtr,bool,bool). +IslPtr betweenScatter(IslPtr From, + IslPtr To, bool IncludeFrom, + bool IncludeTo) { + auto AfterFrom = afterScatter(From, !IncludeFrom); + auto BeforeTo = beforeScatter(To, !IncludeTo); + + return give(isl_union_map_intersect(AfterFrom.take(), BeforeTo.take())); +} + +/// If by construction a union map is known to contain only a single map, return +/// it. +/// +/// This function combines isl_map_from_union_map() and +/// isl_union_map_extract_map(). isl_map_from_union_map() fails if the map is +/// empty because it doesn't not know which space it would be in. +/// isl_union_map_extract_map() on the other hand does not check whether there +/// is (at most) one isl_map in the union, ie. how it has been constructed is +/// probably wrong. +IslPtr singleton(IslPtr UMap, + IslPtr ExpectedSpace) { + if (!UMap) + return nullptr; + + if (isl_union_map_n_map(UMap.keep()) == 0) + return give(isl_map_empty(ExpectedSpace.take())); + + auto Result = give(isl_map_from_union_map(UMap.take())); + assert( + !Result || + isl_space_has_equal_tuples(give(isl_map_get_space(Result.keep())).keep(), + ExpectedSpace.keep()) == isl_bool_true); + return Result; +} + +/// If by construction an isl_union_set is known to contain only a single +/// isl_set, return it. +/// +/// This function combines isl_set_from_union_set() and +/// isl_union_set_extract_set(). isl_map_from_union_set() fails if the set is +/// empty because it doesn't not know which space it would be in. +/// isl_union_set_extract_set() on the other hand does not check whether there +/// is (at most) one isl_set in the union, ie. how it has been constructed is +/// probably wrong. +IslPtr singleton(IslPtr USet, + IslPtr ExpectedSpace) { + if (!USet) + return nullptr; + + if (isl_union_set_n_set(USet.keep()) == 0) + return give(isl_set_empty(ExpectedSpace.copy())); + + auto Result = give(isl_set_from_union_set(USet.take())); + assert( + !Result || + isl_space_has_equal_tuples(give(isl_set_get_space(Result.keep())).keep(), + ExpectedSpace.keep()) == isl_bool_true); + return Result; +} + +/// Determine how many dimensions the scatter space of @p Schedule has. +/// +/// The schedule must not be empty and have equal number of dimensions of any +/// subspace it contains. +/// +/// The implementation currently returns the maximum number of dimensions it +/// encounters, if different, and 0 if none is encountered. However, most other +/// code will most likely fail if one of these happen. +unsigned getNumScatterDims(NonowningIslPtr Schedule) { + unsigned Dims = 0; + foreachElt(Schedule, [=, &Dims](IslPtr Map) { + Dims = std::max(Dims, isl_map_dim(Map.keep(), isl_dim_out)); + }); + return Dims; +} + +/// Return the scatter space of a @p Schedule. +/// +/// This is basically the range space of the schedule map, but harder to +/// determine because it is an isl_union_map. +IslPtr getScatterSpace(NonowningIslPtr Schedule) { + if (!Schedule) + return nullptr; + auto Dims = getNumScatterDims(Schedule); + auto ScatterSpace = + give(isl_space_set_from_params(isl_union_map_get_space(Schedule.keep()))); + return give(isl_space_add_dims(ScatterSpace.take(), isl_dim_set, Dims)); +} + +/// Construct an identity map for the given domain values. +/// +/// There is no type resembling isl_union_space, hence we have to pass an +/// isl_union_set as the map's domain and range space. +/// +/// @param USet { Space[] } +/// The returned map's domain and range. +/// @param RestrictDomain If true, the returned map only maps elements contained +/// in @p USet and no other. If false, it returns an +/// overapproximation with the identity maps of any space +/// in @p USet, not just the elements in it. +/// +/// @return { Space[] -> Space[] } +/// A map that maps each value of @p USet to itself. +IslPtr makeIdentityMap(NonowningIslPtr USet, + bool RestrictDomain) { + auto Result = give(isl_union_map_empty(isl_union_set_get_space(USet.keep()))); + foreachElt(USet, [=, &Result](IslPtr Set) { + auto IdentityMap = give(isl_map_identity( + isl_space_map_from_set(isl_set_get_space(Set.keep())))); + if (RestrictDomain) + IdentityMap = + give(isl_map_intersect_domain(IdentityMap.take(), Set.take())); + Result = give(isl_union_map_add_map(Result.take(), IdentityMap.take())); + }); + return Result; +} + +/// Constructs a map that swaps two nested tuples. +/// +/// @param FromSpace1 { Space1[] } +/// @param FromSpace2 { Space2[] } +/// +/// @result { [Space1[] -> Space2[]] -> [Space2[] -> Space1[]] } +IslPtr makeTupleSwapBasicMap(IslPtr FromSpace1, + IslPtr FromSpace2) { + assert(isl_space_is_set(FromSpace1.keep()) != isl_bool_false); + assert(isl_space_is_set(FromSpace2.keep()) != isl_bool_false); + + auto Dims1 = isl_space_dim(FromSpace1.keep(), isl_dim_set); + auto Dims2 = isl_space_dim(FromSpace2.keep(), isl_dim_set); + auto FromSpace = give(isl_space_wrap(isl_space_map_from_domain_and_range( + FromSpace1.copy(), FromSpace2.copy()))); + auto ToSpace = give(isl_space_wrap(isl_space_map_from_domain_and_range( + FromSpace2.take(), FromSpace1.take()))); + auto MapSpace = give( + isl_space_map_from_domain_and_range(FromSpace.take(), ToSpace.take())); + + auto Result = give(isl_basic_map_universe(MapSpace.take())); + for (auto i = Dims1 - Dims1; i < Dims1; i += 1) { + Result = give(isl_basic_map_equate(Result.take(), isl_dim_in, i, + isl_dim_out, Dims2 + i)); + } + for (auto i = Dims2 - Dims2; i < Dims2; i += 1) { + Result = give(isl_basic_map_equate(Result.take(), isl_dim_in, Dims1 + i, + isl_dim_out, i)); + } + + return Result; +} + +/// Like makeTupleSwapBasicMap(IslPtr,IslPtr), but returns +/// an isl_map. +IslPtr makeTupleSwapMap(IslPtr FromSpace1, + IslPtr FromSpace2) { + auto BMapResult = + makeTupleSwapBasicMap(std::move(FromSpace1), std::move(FromSpace2)); + return give(isl_map_from_basic_map(BMapResult.take())); +} + +/// Reverse the nested map tuple in @p Map's domain. +/// +/// @param Map { [Space1[] -> Space2[]] -> Space3[] } +/// +/// @return { [Space2[] -> Space1[]] -> Space3[] } +IslPtr reverseDomain(IslPtr Map) { + auto DomSpace = + give(isl_space_unwrap(isl_space_domain(isl_map_get_space(Map.keep())))); + auto Space1 = give(isl_space_domain(DomSpace.copy())); + auto Space2 = give(isl_space_range(DomSpace.take())); + auto Swap = makeTupleSwapMap(std::move(Space1), std::move(Space2)); + return give(isl_map_apply_domain(Map.take(), Swap.take())); +} + +/// Piecewise reverseDomain(IslPtr). +IslPtr reverseDomain(NonowningIslPtr UMap) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + auto Reversed = reverseDomain(std::move(Map)); + Result = give(isl_union_map_add_map(Result.take(), Reversed.take())); + }); + return Result; +} + +/// Compute the next overwrite for a scalar. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// Schedule of (at least) all writes. Instances not in @p +/// Writes will be ignored. +/// @param Writes { DomainWrite[] } +/// The element instances that write to the scalar. +/// @param InclPrevWrite Whether to extend an overwrite timepoints to include +/// the timepoint where the previous write happens. +/// @param InclOverwrite Whether the reaching overwrite includes the timepoint +/// of the overwrite itself. +/// +/// @return { Scatter[] -> DomainDef[] } +IslPtr +computeScalarReachingOverwrite(IslPtr Schedule, + IslPtr Writes, bool InclPrevWrite, + bool InclOverwrite) { + + // { DomainWrite[] } + auto WritesMap = give(isl_union_map_from_domain(Writes.take())); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto Result = computeReachingOverwrite( + std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite); + + return give(isl_union_map_domain_factor_range(Result.take())); +} + +/// Overload of computeScalarReachingOverwrite, with only one writing statement. +/// Consequently, the result consists of only one map space. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// @param Writes { DomainWrite[] } +/// @param InclPrevWrite Include the previous write to result. +/// @param InclOverwrite Include the overwrite to the result. +/// +/// @return { Scatter[] -> DomainWrite[] } +IslPtr computeScalarReachingOverwrite(IslPtr Schedule, + IslPtr Writes, + bool InclPrevWrite, + bool InclOverwrite) { + auto ScatterSpace = getScatterSpace(Schedule); + auto DomSpace = give(isl_set_get_space(Writes.keep())); + + auto ReachOverwrite = computeScalarReachingOverwrite( + Schedule, give(isl_union_set_from_set(Writes.take())), InclPrevWrite, + InclOverwrite); + + auto ResultSpace = give(isl_space_map_from_domain_and_range( + ScatterSpace.take(), DomSpace.take())); + return singleton(std::move(ReachOverwrite), ResultSpace); +} + +/// Compute the reaching definition of a scalar. +/// +/// Compared to computeReachingDefinition, there is just one element which is +/// accessed an therefore doesn't need to be specified. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// @param Writes { DomainWrite[] } +/// @param InclDef Include the timepoint of the definition to the result. +/// @param InclRedef Include the timepoint of the overwrite into the result. +/// +/// @return { Scatter[] -> DomainWrite[] } +IslPtr +computeScalarReachingDefinition(IslPtr Schedule, + IslPtr Writes, bool InclDef, + bool InclRedef) { + + // { DomainWrite[] -> Element[] } + auto Defs = give(isl_union_map_from_domain(Writes.take())); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto ReachDefs = + computeReachingDefinition(Schedule, Defs, InclDef, InclRedef); + + // { Scatter[] -> DomainWrite[] } + return give(isl_union_set_unwrap( + isl_union_map_range(isl_union_map_curry(ReachDefs.take())))); +} + +/// Compute the reaching definition of a scalar. +/// +/// This overload accepts only a single writing statement as an isl_map, +/// consequently the result also is only a single isl_map. +/// +/// @param Schedule { DomainWrite[] -> Scatter[] } +/// @param Writes { DomainWrite[] } +/// @param InclDef Include the timepoint of the definition to the result. +/// @param InclRedef Include the timepoint of the overwrite into the result. +/// +/// @return { Scatter[] -> DomainWrite[] } +IslPtr computeScalarReachingDefinition( // { Domain[] -> Zone[] } + IslPtr Schedule, IslPtr Writes, bool InclDef, + bool InclRedef) { + auto DomainSpace = give(isl_set_get_space(Writes.keep())); + auto ScatterSpace = getScatterSpace(Schedule); + + // { Scatter[] -> DomainWrite[] } + auto UMap = computeScalarReachingDefinition( + Schedule, give(isl_union_set_from_set(Writes.take())), InclDef, + InclRedef); + + auto ResultSpace = give(isl_space_map_from_domain_and_range( + ScatterSpace.take(), DomainSpace.take())); + return singleton(UMap, ResultSpace); +} + +/// Create a map that shifts one dimension by an offset. +/// +/// Example: +/// makeShiftDimAff({ [i0, i1] -> [o0, o1] }, 1, -2) +/// = { [i0, i1] -> [i0, i1 - 1] } +/// +/// @param Space The map space of the result. Must have equal number of in- and +/// out-dimensions. +/// @param Pos Position to shift. +/// @param Amount Value added to the shifted dimension. +/// +/// @return An isl_multi_aff for the map with this shifted dimension. +IslPtr makeShiftDimAff(IslPtr Space, int Pos, + int Amount) { + auto Identity = give(isl_multi_aff_identity(Space.take())); + if (Amount == 0) + return Identity; + auto ShiftAff = give(isl_multi_aff_get_aff(Identity.keep(), Pos)); + ShiftAff = give(isl_aff_set_constant_si(ShiftAff.take(), Amount)); + return give(isl_multi_aff_set_aff(Identity.take(), Pos, ShiftAff.take())); +} + +/// Add a constant to one dimension of a map. +/// +/// @param Map The map to shift a dimension in. +/// @param Type A tuple of @p Map which contains the dimension to shift. +/// @param Pos The dimension to shift. If negative, the dimensions are +/// counted from the end instead from the beginning. Eg. -1 is the last +/// dimension in the tuple. +/// @param Amount The offset to add to the specified dimension. +/// +/// @return The modified map. +IslPtr shiftDim(IslPtr Map, isl_dim_type Type, int Pos, + int Amount) { + assert((Type == isl_dim_in || Type == isl_dim_out) && + "Cannot shift parameter dims"); + int NumDims = isl_map_dim(Map.keep(), Type); + if (Pos < 0) + Pos = NumDims + Pos; + assert(Pos < NumDims && "Dimension index must be in range"); + auto Space = give(isl_map_get_space(Map.keep())); + Space = give((Type == isl_dim_in) ? isl_space_domain(Space.take()) + : isl_space_range(Space.take())); + Space = give(isl_space_map_from_domain_and_range(Space.copy(), Space.copy())); + auto Translator = makeShiftDimAff(std::move(Space), Pos, Amount); + auto TranslatorMap = give(isl_map_from_multi_aff(Translator.take())); + return give((Type == isl_dim_in) + ? isl_map_apply_domain(Map.take(), TranslatorMap.take()) + : isl_map_apply_range(Map.take(), TranslatorMap.take())); +} + +/// Add a constant to one dimension of a each map in a union map. +/// +/// @param UMap The maps to shift a dimension in. +/// @param Type The tuple which contains the dimension to shift. +/// @param Pos The dimension to shift. If negative, the dimensions are +/// counted from the ends of each map of union instead from their beginning. Eg. +/// -1 is the last dimension of any map. +/// @param Amount The offset to add to the specified dimension. +/// +/// @return The union of all modified maps. +IslPtr shiftDim(IslPtr UMap, isl_dim_type Type, + int Pos, int Amount) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + auto Shifted = shiftDim(Map, Type, Pos, Amount); + Result = give(isl_union_map_add_map(Result.take(), Shifted.take())); + }); + return Result; +} + +/// Simplify a map inplace. +void simplify(IslPtr &Map) { + Map = give(isl_map_compute_divs(Map.take())); + Map = give(isl_map_detect_equalities(Map.take())); + Map = give(isl_map_coalesce(Map.take())); +} + +/// Simplify a union map inplace. +void simplify(IslPtr &Map) { + Map = give(isl_union_map_compute_divs(Map.take())); + Map = give(isl_union_map_detect_equalities(Map.take())); + Map = give(isl_union_map_coalesce(Map.take())); +} + +/// If InputVal is not defined in the stmt itself, return the MemoryAccess that +/// reads the scalar. Return nullptr otherwise (if the value is defined in the +/// scop, or is synthesizable) +MemoryAccess *getInputAccessOf(Value *InputVal, ScopStmt *Stmt) { + for (auto *MA : *Stmt) { + if (!MA->isRead()) + continue; + if (!MA->isLatestScalarKind()) + continue; + + assert(MA->getAccessValue() == MA->getBaseAddr()); + if (MA->getAccessValue() == InputVal) + return MA; + } + return nullptr; +} + +/// Try to find a 'natural' extension of a mapped to elements outside its +/// domain. +/// +/// @param Relevant The map with mapping that may not be modified. +/// @param Universe The domain to which @p Relevant needs to be extended. +/// +/// @return A map with that associates the domain elements of @p Relevant to the +/// same elements and in addition the elements of @p Universe to some +/// undefined elements. The function prefers to return simple maps. +IslPtr expandMapping(IslPtr Relevant, + IslPtr Universe) { + Relevant = give(isl_union_map_coalesce(Relevant.take())); + auto RelevantDomain = give(isl_union_map_domain(Relevant.copy())); + auto Simplified = + give(isl_union_map_gist_domain(Relevant.take(), RelevantDomain.take())); + Simplified = give(isl_union_map_coalesce(Simplified.take())); + return give( + isl_union_map_intersect_domain(Simplified.take(), Universe.take())); +} + +/// Determine whether an access touches at most one element. +/// +/// The accessed element could be a scalar or accessing an array with constant +/// subscript, st. all instances access only that element. +/// +/// @param Map { Domain[] -> Element[] } +/// The access's access relation. +/// +/// @return True, if zero or one elements are accessed; False if at least two +/// different elements are accessed. +bool isScalarAccess(IslPtr Map) { + auto Set = give(isl_map_range(Map.take())); + return isl_set_is_singleton(Set.keep()); +} + +/// Return whether @p Map maps to llvm::Undef. +/// +/// @param Map { [] -> ValInst[] } +bool isMapToUndef(NonowningIslPtr Map) { + if (!isl_map_has_tuple_id(Map.keep(), isl_dim_out)) + return false; + + auto Id = give(isl_map_get_tuple_id(Map.keep(), isl_dim_out)); + auto Val = static_cast(isl_id_get_user(Id.keep())); + return Val && isa(Val); +} + +/// Return whether @p Map maps to an unknown value. +/// +/// @param { [] -> ValInst[] } +bool isMapToUnknown(NonowningIslPtr Map) { + auto Space = give(isl_space_range(isl_map_get_space(Map.keep()))); + return !isl_map_has_tuple_id(Map.keep(), isl_dim_set) && + !isl_space_is_wrapping(Space.keep()); +} + +/// Remove unknown values from the mapping, leaving only mappings to +/// llvm::Value's and llvm::Undef. +/// +/// @param UMap { [] -> ValInst[] } +/// +/// @return { [] -> ValInst[] } +IslPtr +removeUnknownValInst(NonowningIslPtr UMap) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + if (!isMapToUnknown(Map)) + Result = give(isl_union_map_add_map(Result.take(), Map.take())); + }); + return Result; +} + +/// Return the domain of everything that maps to an unknown value. +/// +/// @param UMap { Domain[] -> ValInst[] } +/// +/// @return { Domain[] } +IslPtr +getUnknownValInstDomain(NonowningIslPtr UMap) { + auto Result = give(isl_union_set_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + if (isMapToUnknown(Map)) + Result = give( + isl_union_set_add_set(Result.take(), isl_map_domain(Map.take()))); + }); + return Result; +} + +/// Return the domain of everything that maps to Undef. +/// +/// @param UMap { Domain[] -> ValInst[] } +/// +/// @return { Domain[] } +IslPtr +getUndefValInstDomain(NonowningIslPtr UMap) { + auto Result = give(isl_union_set_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + if (isMapToUndef(Map)) + Result = give( + isl_union_set_add_set(Result.take(), isl_map_domain(Map.take()))); + }); + return Result; +} + +/// Remove everything that maps to llvm::Undef. +/// +/// @param UMap { [] -> ValInst[] } +/// +/// @return { [] -> ValInst[] } +IslPtr removeUndefValInst(NonowningIslPtr UMap) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + if (!isMapToUndef(Map)) + Result = give(isl_union_map_add_map(Result.take(), Map.take())); + }); + return Result; +} + +/// Return only the mappings that map to known values. +/// +/// @param UMap { [] -> ValInst[] } +/// +/// @return { [] -> ValInst[] } +IslPtr filterKnownValInst(NonowningIslPtr UMap) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](IslPtr Map) { + if (!isMapToUnknown(Map) && !isMapToUndef(Map)) + Result = give(isl_union_map_add_map(Result.take(), Map.take())); + }); + return Result; +} + +/// Represent the knowledge of the contents any array elements in any zone or +/// the knowledge we would add when mapping a scalar to an array element. +/// +/// Every array element at every zone unit has one three states: +/// - Undef: Not occupied by any value so transformation can change it to other +/// values. +/// - Known: Contains an llvm::Value instance, the instance is stored as the +/// domain of the statement instance defining it. +/// - Unknown: The element contains a value, but it is not a determinable +/// llvm::Value instance. +/// +/// There are two uses for the Knowledge class: +/// 1) To represent the knowledge of the current state of ScopInfo. The Undef +/// state means that an element is currently unused: there is no read of it +/// before the next overwrite. Also called 'existing'. +/// 2) To represent the requirements for mapping a scalar to array elements. The +/// undef state means that there is no change/requirement. Also called +/// 'proposed'. +/// +/// In addition to the these states at unit zones, Knowledge need to know when +/// values are written. This is because written values may have no lifetime (one +/// reason is that the value is never read). Such writes would therefore never +/// conflict, but overwrite values that might still be required. Another source +/// of problems problem are multiple writes to the same element at the same +/// timepoint, because their order is undefined. Writes can either write know or +/// unknown states. An 'undef' write would a non-existing write. +class Knowledge { +private: + /// { [Element[] -> Zone[]] -> ValInst[] } + /// The state of every array element at every unit zone. + IslPtr Lifetime; + + /// An array element at any time has one of the three states. For efficiency, + /// one of them can be represented implicitly by assuming that state when it + /// maps to nothing. Which one is more efficient depends on the use case. + /// + /// If ImplicitLifetimeIsUnknown == false, unmapped zones are assumed to be + /// Unknown. This is more efficient for use case 1) because anything that + /// cannot be determined to be Known or Undef is Unknown. + /// + /// If ImplicitLifetimeIsUnknown == true, unmapped zones are assumed to be + /// Undef. This is more efficient for use case 2) because scalar mapping only + /// constraints zones that are in scalar's lifetime. + bool ImplicitLifetimeIsUnknown = false; + + /// { [Element[] -> Scatter[]] -> ValInst[] } + /// The write actions currently in the scop or that would be added when + /// mapping a scalar. + IslPtr Written; + + /// Check whether this Knowledge object is well-formed. + void checkConsistency() const { + assert(!Lifetime == !Written); + + // Default-initialized object + if (!Lifetime && !Written) + return; + + assert(Lifetime); + assert(Written); + assert(isl_union_map_is_single_valued(Lifetime.keep()) != isl_bool_false); + } + + /// Ensure an unique representation depending on ImplicitLifetimeIsUnknown. + void canonicalize() { + // If at least one is nullptr, the max_operations quota for computing one + // might have exceeded. In this case, this Knowledge object is not usable. + if (!Lifetime || !Written) { + Lifetime = nullptr; + Written = nullptr; + return; + } + + if (isImplicitLifetimeUndef()) + Lifetime = removeUndefValInst(this->Lifetime); + if (isImplicitLifetimeUnknown()) + Lifetime = removeUnknownValInst(this->Lifetime); + } + + /// Accessor for ImplicitLifetimeIsUnknown. + bool isImplicitLifetimeUnknown() const { return ImplicitLifetimeIsUnknown; } + + /// Accessor for ImplicitLifetimeIsUnknown. + bool isImplicitLifetimeUndef() const { return !ImplicitLifetimeIsUnknown; } + +public: + /// Initialize an nullptr-Knowledge. This is only provided for convenience; do + /// not use such an object. + Knowledge() {} + + /// Create a new object with the given members. + Knowledge(IslPtr Lifetime, bool ImplicitLifetimeIsUnknown, + IslPtr Written) + : Lifetime(std::move(Lifetime)), + ImplicitLifetimeIsUnknown(ImplicitLifetimeIsUnknown), + Written(std::move(Written)) { + canonicalize(); + checkConsistency(); + } + + /// Alternative constructor taking isl_maps instead isl_union_map. + Knowledge(IslPtr Lifetime, bool ImplicitLifetimeIsUnknown, + IslPtr Written) + : Knowledge(give(isl_union_map_from_map(Lifetime.take())), + ImplicitLifetimeIsUnknown, + give(isl_union_map_from_map(Written.take()))) {} + + /// Return whether this object was default-constructed. + bool isUsable() const { return Lifetime && Written; } + + /// Print the content of this object to @p OS. + void print(llvm::raw_ostream &OS, unsigned Indent = 0) const { + if (isUsable()) { + if (isImplicitLifetimeUnknown()) + OS.indent(Indent) << "Lifetime: " << Lifetime << " + Unknown\n"; + else + OS.indent(Indent) << "Lifetime: " << Lifetime << " + Undef\n"; + OS.indent(Indent) << "Written : " << Written << '\n'; + } else { + OS.indent(Indent) << "Invalid knowledge\n"; + } + } + + /// Dump the object content stderr. Meant to be called in a debugger. + void dump() const; + + /// Combine two knowledges, this and @p That. + /// + /// The two knowledges must not conflict each other. Only combining 'implicit + /// unknown' (use case 1) with 'implicit undef' (use case 2) knowledges is + /// implemented. + void merge_inplace(Knowledge That) { + assert(!isConflicting(*this, That)); + assert(this->isImplicitLifetimeUnknown()); + assert(That.isImplicitLifetimeUndef()); + + auto ThatKnownDomain = filterKnownValInst(That.Lifetime); + auto ThatDomain = give(isl_union_map_domain(That.Lifetime.take())); + + Lifetime = + give(isl_union_map_subtract_domain(Lifetime.take(), ThatDomain.take())); + Lifetime = + give(isl_union_map_union(Lifetime.take(), ThatKnownDomain.take())); + + Written = give(isl_union_map_union(Written.take(), That.Written.take())); + + checkConsistency(); + } + + /// Determine whether two Knowledges conflict each other. + /// + /// In theory @p This and @p That are symmetric, but the implementation is + /// constrained by the implicit interpretation. + /// + /// A conflict is defined as non-preserved semantics when they are merged. For + /// instance, when for the same array and zone they assume different + /// llvm::Values. + /// + /// @param Existing One of the knowledges; current implementation requires it + /// to be 'implicit unknown' (use case 1). + /// @param Proposed One of the knowledges; current implementation requires it + /// to be 'implicit undef' (use case 2). + /// @param OS Dump the conflict reason to this output stream; use + /// nullptr to + /// not output anything. + /// @param Indent Indention for the conflict reason. + /// + /// @return True, iff the two knowledges are conflicting. + static bool isConflicting(const Knowledge &Existing, + const Knowledge &Proposed, + llvm::raw_ostream *OS = nullptr, + unsigned Indent = 0) { + assert(Existing.isImplicitLifetimeUnknown()); + assert(Proposed.isImplicitLifetimeUndef()); + + // The following domain intersections conflict: + // 1) Unknown vs Unknown + // 2a) Known vs Unknown, 2b) Unknown vs Known + // 3) Known vs Known that do not map to the same llvm::Value instance + // 4a) Written vs Unknown, 4b) Unknown vs Written + // 5a) Written Unknown vs Known, 5b) Known vs Written Unknown + // 6a) Written Known vs Known, 6b) Known vs Written Known that do not write + // the same llvm::Value instance + // 7) Written Known/Unknown vs Written Known/Unknown, where the first writes + // different values to the same location and at the same timepoint as the + // latter. + + // Check 1) and 2a) + auto ExistingUndef = getUndefValInstDomain(Existing.Lifetime); + auto ProposedUnknownDomain = getUnknownValInstDomain(Proposed.Lifetime); + if (!isl_union_set_is_subset(ProposedUnknownDomain.keep(), + ExistingUndef.keep())) { + if (OS) + OS->indent(Indent) << "Conflict with proposed unknown\n"; + return true; + } + + // Check 2b) + auto ProposedKnown = filterKnownValInst(Proposed.Lifetime); + auto ProposedKnownDomain = give(isl_union_map_domain(ProposedKnown.copy())); + auto ExistingKnownOrUndefDomain = + give(isl_union_map_domain(Existing.Lifetime.copy())); + if (!isl_union_set_is_subset(ProposedKnownDomain.keep(), + ExistingKnownOrUndefDomain.keep())) { + if (OS) + OS->indent(Indent) << "Conflict of existing unknown\n"; + return true; + } + + // Check 3) + auto ExistingKnown = filterKnownValInst(Existing.Lifetime); + auto ExistingKnownDomain = give(isl_union_map_domain(ExistingKnown.copy())); + auto CommonOverlapKnown = give(isl_union_set_intersect( + ExistingKnownDomain.copy(), ProposedKnownDomain.copy())); + auto ExistingOverlapKnown = give(isl_union_map_intersect_domain( + ExistingKnown.copy(), CommonOverlapKnown.copy())); + auto ProposedOverlapKnown = give(isl_union_map_intersect_domain( + ProposedKnown.copy(), CommonOverlapKnown.copy())); + if (!isl_union_map_is_equal(ExistingOverlapKnown.keep(), + ProposedOverlapKnown.keep())) { + if (OS) { + OS->indent(Indent) + << "Conflict of lifetime-to-map known with existing known\n"; + auto ExistingConflict = give(isl_union_map_subtract( + ExistingOverlapKnown.copy(), ProposedOverlapKnown.copy())); + auto ProposedConflict = give(isl_union_map_subtract( + ProposedOverlapKnown.copy(), ExistingOverlapKnown.copy())); + OS->indent(Indent + 2) << "Existing wants: " << ExistingConflict + << '\n'; + OS->indent(Indent + 2) << "Proposed wants: " << ProposedConflict + << '\n'; + } + return true; + } + + // Check 4a) + auto ExistingWritten = shiftDim(Existing.Written, isl_dim_in, -1, 1); + auto ExistingWrittenZone = + give(isl_union_map_domain(ExistingWritten.copy())); + if (!isl_union_set_is_disjoint(ExistingWrittenZone.keep(), + ProposedUnknownDomain.keep())) { + if (OS) + OS->indent(Indent) << "Conflict of current write to proposed unknown\n"; + return true; + } + + // Check 4b) + auto ProposedWritten = shiftDim(Proposed.Written, isl_dim_in, -1, 1); + auto ProposedWrittenZone = + give(isl_union_map_domain(ProposedWritten.copy())); + if (!isl_union_set_is_subset(ProposedWrittenZone.keep(), + ExistingKnownOrUndefDomain.keep())) { + if (OS) + dbgs().indent(Indent) + << "Conflict of proposed write to current unknown\n"; + return true; + } + + // Check 5a) + auto ExistingWrittenUnknownZone = getUnknownValInstDomain(ExistingWritten); + if (!isl_union_set_is_disjoint(ExistingWrittenUnknownZone.keep(), + ProposedKnownDomain.keep())) { + if (OS) + dbgs().indent(Indent) + << "Conflict of current unknown write to proposed\n"; + return true; + } + + // Check 5b) + auto ProposedWrittenUnknownZone = getUnknownValInstDomain(ProposedWritten); + if (!isl_union_set_is_disjoint(ProposedWrittenUnknownZone.keep(), + ExistingKnownDomain.keep())) { + if (OS) + OS->indent(Indent) << "Conflict of proposed unknown write to current\n"; + return true; + } + + // Check 6a) + auto ExistingWrittenOverlap = give(isl_union_map_intersect_domain( + ExistingWritten.copy(), ProposedKnownDomain.copy())); + if (!isl_union_map_is_subset(ExistingWrittenOverlap.keep(), + ProposedKnown.keep())) { + if (OS) + OS->indent(Indent) << "Conflict of current write to proposed known\n"; + return true; + } + + // Check 6b) + auto ProposedWrittenOverlap = give(isl_union_map_intersect_domain( + ProposedWritten.copy(), ExistingKnownDomain.copy())); + if (!isl_union_map_is_subset(ProposedWrittenOverlap.keep(), + ExistingKnown.keep())) { + if (OS) + OS->indent(Indent) << "Conflict of proposed write to current known\n"; + return true; + } + + // Check 7) + auto ExistingWrittenDomain = + give(isl_union_map_domain(Existing.Written.copy())); + auto ProposedWrittenDomain = + give(isl_union_map_domain(Proposed.Written.copy())); + auto WriteOverlap = give(isl_union_set_intersect( + ExistingWrittenDomain.copy(), ProposedWrittenDomain.copy())); + auto ExistingOverlapOverwrite = give(isl_union_map_intersect_domain( + Existing.Written.copy(), WriteOverlap.copy())); + auto ProposedOverlapOverwrite = give(isl_union_map_intersect_domain( + Proposed.Written.copy(), WriteOverlap.copy())); + + if (!isl_union_map_is_equal(ExistingOverlapOverwrite.keep(), + ProposedOverlapOverwrite.keep())) { + if (OS) { + OS->indent(Indent) + << "Conflict because of existing/proposed undefined write order\n"; + // Note that if eg. Existing writes two values, one of which is also + // written by Proposed, that value is removed from + // ExistingConflictingOverwrite, st. it seems the value in Proposed does + // not conflict with anything. The isl_union_map_is_equal above still + // fails (and has to!) + auto ExistingConflictingOverwrite = isl_union_map_subtract( + ExistingOverlapOverwrite.copy(), ProposedOverlapOverwrite.copy()); + auto ProposedConflictingOverwrite = isl_union_map_subtract( + ProposedOverlapOverwrite.copy(), ExistingOverlapOverwrite.copy()); + OS->indent(Indent + 2) + << "Existing wants to write: " << ExistingConflictingOverwrite + << '\n'; + OS->indent(Indent + 2) + << "Proposed wants to write: " << ProposedConflictingOverwrite + << '\n'; + } + return true; + } + + auto ExistingWrittenUnknown = getUnknownValInstDomain(Existing.Written); + if (!isl_union_set_is_disjoint(ExistingWrittenUnknown.keep(), + ProposedWrittenDomain.keep())) { + if (OS) + OS->indent(Indent) << "Existing writes unknown with undefined order to " + "proposed write\n"; + return true; + } + + auto ProposedWrittenUnknown = getUnknownValInstDomain(Proposed.Written); + if (!isl_union_set_is_disjoint(ProposedWrittenUnknown.keep(), + ExistingWrittenDomain.keep())) { + if (OS) + OS->indent(Indent) << "Existing writes with undefined order to " + "proposed write unknown\n"; + return true; + } + + return false; + } +}; + +void Knowledge::dump() const { print(llvm::errs()); } + +/// Base class for algorithms based on zones, like DeLICM. +class ZoneAlgorithm { +protected: + /// Hold a reference to the isl_ctx to avoid it being freed before we released + /// all of the ISL objects. + /// + /// This must be the declared before any other member that holds an isl + /// object. This guarantees that the shared_ptr and its isl_ctx is destructed + /// last, after all other members free'd the isl objects they were holding. + std::shared_ptr IslCtx; + + /// Cached reaching definitions for each ScopStmt. + /// + /// Use getScalarReachingDefinition() to get its contents. + DenseMap> ScalarReachDefZone; + + /// The analyzed Scop. + Scop *S; + + /// Parameter space that does not need realignment. + IslPtr ParamSpace; + + /// Space the schedule maps to. + IslPtr ScatterSpace; + + /// Cached version of the schedule and domains. + IslPtr Schedule; + + /// As many isl_union_maps are initialized empty, this can be used to increase + /// only a reference a counter, instead of creating an entirely new + /// isl_union_map. + IslPtr EmptyUnionMap; + + /// As many isl_union_sets are initialized empty, this can be used to increase + /// only a reference a counter, instead of creating an entirely new + /// isl_union_set. + IslPtr EmptyUnionSet; + + /// Set of all referenced elements. + /// { Element[] -> Element[] } + IslPtr AllElements; + + /// Combined access relations of all MK_Array READ accesses. + /// { DomainRead[] -> Element[] } + IslPtr AllReads; + + /// Combined access relations of all MK_Array, MAY_WRITE accesses. + /// { DomainMayWrite[] -> Element[] } + IslPtr AllMayWrites; + + /// Combined access relations of all MK_Array, MUST_WRITE accesses. + /// { DomainMustWrite[] -> Element[] } + IslPtr AllMustWrites; + + /// The value instances written to array elements of all write accesses. + /// { [Element[] -> DomainWrite[]] -> ValInst[] } + IslPtr AllWriteValInst; + + /// All reaching definitions for MK_Array writes. + /// { [Element[] -> Zone[]] -> DomainWrite[] } + IslPtr WriteReachDefZone; + + /// Prepare the object before computing the zones of @p S. + ZoneAlgorithm(Scop *S) + : IslCtx(S->getSharedIslCtx()), S(S), Schedule(give(S->getSchedule())) { + + auto Domains = give(S->getDomains()); + + Schedule = + give(isl_union_map_intersect_domain(Schedule.take(), Domains.take())); + ParamSpace = give(isl_union_map_get_space(Schedule.keep())); + ScatterSpace = getScatterSpace(Schedule); + + EmptyUnionMap = give(isl_union_map_empty(ParamSpace.copy())); + EmptyUnionSet = give(isl_union_set_empty(ParamSpace.copy())); + } + +private: + /// Of all the llvm::Values that represent the same content, try to find an + /// unique one. + /// + /// PHI nodes with just one incoming block are introduced by LCSSA. All other + /// exact copy instructions (eg. bitwise 'or' with zero) should be removed by + /// InstCombine. + /// + /// Without this normalization, the two values would be considered different, + /// leading to less optimization opportunities. + Value *deLCSSA(Value *Val) { + if (!Val) + return Val; + + if (auto *PHI = dyn_cast(Val)) { + Value *NormVal = nullptr; + for (auto &Use : PHI->incoming_values()) { + auto InVal = Use.get(); + assert(InVal); + + if (isa(InVal)) + continue; + + if (NormVal && NormVal != InVal) + return Val; + + NormVal = Val; + } + if (NormVal) + return NormVal; + } + + return Val; + } + + /// Determine whether an instruction is defined in a different statement + /// instance as in which it is used. + /// + /// We here assume that a BB/region cannot use a definition in the same + /// BB/region, which would be theoretically possible in loops within region + /// statements and with disconnected loops: + /// BB: + /// ... = add i32 %def, 5 + /// %def = ... + /// br label %BB + /// + /// @param Val The instruction defining a value. + /// @param UserStmt The statement using @p Val. The use must not be a PHI, + /// they must handled separately. + /// + /// @return True iff a use of @p Val in @p UserStmt introduces a + /// flow-dependency. + bool isInterStmtUse(Value *Val, ScopStmt *UserStmt) const { + assert(UserStmt); + auto *Inst = dyn_cast(Val); + + // Non-instruction like literals do not add inter-stmt dependencies. + if (!Inst) + return false; + + auto *DefStmt = S->getStmtFor(Inst); + + // Read-only uses do not add inter-stmt dependencies. + if (!DefStmt) + return false; + + // This assumes that there must be a PHI in the same statement if we are + // going to use a value from a previous execution of the same statement. + return DefStmt != UserStmt; + } + + /// Check whether @p Stmt can be accurately analyzed by zones. + /// + /// What violates our assumptions: + /// - A load after a write of the same location; we assume that all reads + /// occur before the writes. + /// - Two writes to the same location; we cannot model the order in which + /// these occur. + /// + /// Scalar reads implicitly always occur before other access therefore never + /// violate the first condition. There is also at most one write to a scalar, + /// satisfying the second condition. + bool isCompatibleStmt(ScopStmt *Stmt) { + auto Stores = EmptyUnionMap; + auto Loads = EmptyUnionMap; + + // This assumes that the MK_Array MemoryAccesses are iterated in order. + for (auto *MA : *Stmt) { + if (!MA->isLatestArrayKind()) + continue; + + auto AccRel = + give(isl_union_map_from_map(getAccessRelationFor(MA).take())); + + if (MA->isRead()) { + // Reject store after load to same location. + if (!isl_union_map_is_disjoint(Stores.keep(), AccRel.keep())) + return false; + + Loads = give(isl_union_map_union(Loads.take(), AccRel.take())); + } + + if (MA->isWrite()) { + if (!isa(MA->getAccessInstruction())) { + DEBUG(dbgs() << "WRITE that is not a StoreInst not supported\n"); + return false; + } + + // In region statements the order is less clear, eg. the load and store + // might be in a boxed loop. + if (Stmt->isRegionStmt() && + !isl_union_map_is_disjoint(Loads.keep(), AccRel.keep())) + return false; + + // Do not allow more than one store to the same location. + if (!isl_union_map_is_disjoint(Stores.keep(), AccRel.keep())) + return false; + + Stores = give(isl_union_map_union(Stores.take(), AccRel.take())); + } + } + + return true; + } + + void addArrayReadAccess(MemoryAccess *MA) { + assert(MA->isLatestArrayKind()); + assert(MA->isRead()); + + // { DomainRead[] -> Element[] } + auto AccRel = getAccessRelationFor(MA); + AllReads = give(isl_union_map_add_map(AllReads.take(), AccRel.copy())); + } + + void addArrayWriteAccess(MemoryAccess *MA) { + assert(MA->isLatestArrayKind()); + assert(MA->isWrite()); + auto *Stmt = MA->getStatement(); + + // { Domain[] -> Element[] } + auto AccRel = getAccessRelationFor(MA); + + if (MA->isMustWrite()) { + AllMustWrites = + give(isl_union_map_add_map(AllMustWrites.take(), AccRel.copy())); + } + if (MA->isMayWrite()) { + AllMayWrites = + give(isl_union_map_add_map(AllMayWrites.take(), AccRel.copy())); + } + + // { Domain[] -> ValInst[] } + auto WriteValInstance = + makeValInst(MA->getAccessValue(), Stmt, MA->isMustWrite()); + + // { Domain[] -> [Element[] -> Domain[]] } + auto IncludeElement = + give(isl_map_curry(isl_map_domain_map(AccRel.copy()))); + + // { [Element[] -> DomainWrite[]] -> ValInst[] } + auto EltWriteValInst = give( + isl_map_apply_domain(WriteValInstance.take(), IncludeElement.take())); + + AllWriteValInst = give( + isl_union_map_add_map(AllWriteValInst.take(), EltWriteValInst.take())); + } + +protected: + /// Check whether @p S can be accurately analyzed by zones. + bool isCompatibleScop() { + for (auto &Stmt : *S) { + if (!isCompatibleStmt(&Stmt)) + return false; + } + return true; + } + + /// Get the schedule for @p Stmt. + /// + /// The domain of the result will as narrow as possible. + IslPtr getScatterFor(ScopStmt *Stmt) const { + auto ResultSpace = give(isl_space_map_from_domain_and_range( + Stmt->getDomainSpace(), ScatterSpace.copy())); + return give(isl_union_map_extract_map(Schedule.keep(), ResultSpace.take())); + } + + /// Get the schedule of @p MA's parent statement. + IslPtr getScatterFor(MemoryAccess *MA) const { + return getScatterFor(MA->getStatement()); + } + + /// Get the schedule for the statement instances of @p Domain. + IslPtr getScatterFor(IslPtr Domain) const { + return give(isl_union_map_intersect_domain(Schedule.copy(), Domain.take())); + } + + /// Get the schedule for the statement instances of @p Domain. + IslPtr getScatterFor(IslPtr Domain) const { + auto ResultSpace = give(isl_space_map_from_domain_and_range( + isl_set_get_space(Domain.keep()), ScatterSpace.copy())); + auto UDomain = give(isl_union_set_from_set(Domain.copy())); + auto UResult = getScatterFor(std::move(UDomain)); + auto Result = singleton(std::move(UResult), std::move(ResultSpace)); + assert(isl_set_is_equal(give(isl_map_domain(Result.copy())).keep(), + Domain.keep()) == isl_bool_true); + return Result; + } + + /// Get the domain of @p Stmt. + IslPtr getDomainFor(ScopStmt *Stmt) const { + return give(Stmt->getDomain()); + } + + /// Get the domain @p MA's parent statement. + IslPtr getDomainFor(MemoryAccess *MA) const { + return getDomainFor(MA->getStatement()); + } + + /// get the access relation of @p MA. + /// + /// The domain of the result will as narrow as possible. + IslPtr getAccessRelationFor(MemoryAccess *MA) const { + auto Domain = getDomainFor(MA); + auto AccRel = give(MA->getLatestAccessRelation()); + return give(isl_map_intersect_domain(AccRel.take(), Domain.take())); + } + + /// Get the reaching definition of a scalar defined in @p Stmt. + /// + /// Note that this does not depend on the llvm::Instruction, only on the + /// statement it is defined in. Therefore the same computation can be reused. + /// + /// @param Stmt The statement in which a scalar is defined. + /// + /// @return { Scatter[] -> DomainDef[] } + IslPtr getScalarReachingDefinition(ScopStmt *Stmt) { + auto &Result = ScalarReachDefZone[Stmt]; + if (Result) + return Result; + + auto Domain = getDomainFor(Stmt); + Result = computeScalarReachingDefinition(Schedule, Domain, false, true); + simplify(Result); + + assert(Result); + return Result; + } + + /// Get the reaching definition of a scalar defined in @p DefDomain. + /// + /// @param DomainDef { DomainDef[] } + /// The write statements to get the reaching definition for. + /// + /// @return { Scatter[] -> DomainDef[] } + IslPtr getScalarReachingDefinition(IslPtr DomainDef) { + auto DomId = give(isl_set_get_tuple_id(DomainDef.keep())); + auto *Stmt = static_cast(isl_id_get_user(DomId.keep())); + + auto StmtResult = getScalarReachingDefinition(Stmt); + + return give(isl_map_intersect_range(StmtResult.take(), DomainDef.take())); + } + + /// Create an isl_id that means 'don't know the value'. + IslPtr makeUnknownId() const { return nullptr; } + + /// Create an isl_space for unknown values. + IslPtr makeUnknownSpace() const { + return give(isl_space_set_from_params(ParamSpace.copy())); + } + + /// Create a set with an unknown value in it. + IslPtr makeUnknownSet() const { + auto Space = makeUnknownSpace(); + return give(isl_set_universe(Space.take())); + } + + /// Create a union set with an unknown value in it. + IslPtr makeUnknownUSet() const { + return give(isl_union_set_from_set(makeUnknownSet().take())); + } + + /// Create a domain-to-unknown value mapping. + /// + /// @param Domain { Domain[] } + /// + /// @return { Domain[] -> ValInst[] } + IslPtr makeUnknownForDomain(IslPtr Domain) const { + return give(isl_map_from_domain(Domain.take())); + } + + /// Create a statement-to-unknown value mapping. + /// + /// @param Stmt The statement whose instances are mapped to unknown. + /// + /// @return { Domain[] -> ValInst[] } + IslPtr makeUnknownForDomain(ScopStmt *Stmt) const { + return give(isl_map_from_domain(getDomainFor(Stmt).take())); + } + + /// Create a domain-to-unknown value mapping. + /// + /// @param Domain { Domain[] } + /// + /// @return { Domain[] -> ValInst[] } + IslPtr + makeUnknownForDomain(IslPtr Domain) const { + return give(isl_union_map_from_domain(Domain.take())); + } + + /// Create an isl_id that represents 'unused storage'. + IslPtr makeUndefId() const { + auto &LLVMContext = S->getFunction().getContext(); + auto Ty = IntegerType::get(LLVMContext, 1); + auto Val = UndefValue::get(Ty); + return give(isl_id_alloc(IslCtx.get(), "Undef", Val)); + } + + /// Create an isl_space for an undefined value. + IslPtr makeUndefSpace() const { + auto Result = give(isl_space_set_from_params(ParamSpace.copy())); + return give(isl_space_set_tuple_id(Result.take(), isl_dim_set, + makeUndefId().take())); + } + + /// Create a set with an undefined value in it. + IslPtr makeUndefSet() const { + auto Space = makeUndefSpace(); + return give(isl_set_universe(Space.take())); + } + + /// Create a union set with an undefined value in it. + IslPtr makeUndefUSet() const { + return give(isl_union_set_from_set(makeUndefSet().take())); + } + + /// Create an isl_id that represents @p V. + IslPtr makeValueId(Value *V) const { + if (!V) + return makeUnknownId(); + if (isa(V)) + return makeUndefId(); + + auto Name = getIslCompatibleName("Val_", V, std::string()); + return give(isl_id_alloc(IslCtx.get(), Name.c_str(), V)); + } + + /// Create the space for an llvm::Value that is available everywhere. + IslPtr makeValueSpace(Value *V) const { + auto Result = give(isl_space_set_from_params(ParamSpace.copy())); + return give(isl_space_set_tuple_id(Result.take(), isl_dim_set, + makeValueId(V).take())); + } + + /// Create a set with the llvm::Value @p V which is available everywhere. + IslPtr makeValueSet(Value *V) const { + auto Space = makeValueSpace(V); + return give(isl_set_universe(Space.take())); + } + + // { UserDomain[] -> ValInst[] } + IslPtr makeValInst(Value *Val, ScopStmt *UserStmt, + bool IsCertain = true) { + return makeValInst(Val, nullptr, UserStmt, getDomainFor(UserStmt), + IsCertain); + } + + /// Create a mapping from a statement instance to the instance of an + /// llvm::Value that can be used in there. + /// + /// Although LLVM IR used single static assignment, llvm::Values can have + /// different contents in loops, when they get redefined in the last + /// iteration. This function tries to get the statement instance of the + /// previous definition, relative to a user. + /// + /// Example: + /// for (int i = 0; i < N; i += 1) { + /// DEF: + /// int v = A[i]; + /// USE: + /// use(v); + /// } + /// + /// The value instance used by statement instance USE[i] is DEF[i]. Hence, + /// makeValInst would return + /// { USE[i] -> DEF[i] : 0 <= i < N } + /// + /// + /// @param V The value to look get the instance of. + /// @param DomainDef { DomainDef[] } + /// The domain of the statement that defines @p V. If + /// nullptr, will be derived automatically. If defined, the + /// domain gets precedence over trying to use the + /// llvm::Value instance from the same statement. + /// @param UseStmt The statement that uses @p V. Can be nullptr. + /// @param DomainUse { DomainUse[] } + /// The domain of @p UseStmt. + /// @param IsCertain Pass true if the definition of @p V is a MUST_WRITE or + /// false if the write is conditional. + /// + /// @return { DomainUse[] -> ValInst[] } + IslPtr makeValInst(Value *V, IslPtr DomainDef, + ScopStmt *UseStmt, IslPtr DomainUse, + bool IsCertain = true) { + assert(DomainUse && "Must pass a user domain"); + + // If the definition/write is conditional, the previous write may "shine + // through" on conditions we cannot determine. Again, return the unknown + // value. + if (!IsCertain) + return give(isl_map_from_domain(DomainUse.take())); + + if (V && !isa(V)) { + // Non-instructions are available anywhere. + auto ValSet = makeValueSet(V); + return give( + isl_map_from_domain_and_range(DomainUse.take(), ValSet.take())); + } + + // Normalize + // FIXME: It doesn't really work well if the LCSSA %phi is intra-stmt, but + // the incoming value is extra-phi. + // TODO: In a SCoP, we should be able to determine the predecessor for + // _every_ PHI. + auto *NormV = deLCSSA(V); + + // If the definition is in the using Stmt itself, use DomainUse[] for the + // Value's instance. + // Note that the non-isIntraStmtUse assumes extra-Stmt use, ie. a use would + // use the definition from a previous instance. + if (!DomainDef && UseStmt && V && !isInterStmtUse(V, UseStmt)) { + // Even if V is within UseStmt, NormV might be somewhere else; return + // unknown to avoid problems. + if (V != NormV) + return makeUnknownForDomain(DomainUse); + + // { llvm::Value } + auto ValSet = makeValueSet(NormV); + + // { UserDomain[] -> llvm::Value } + auto ValInstSet = + give(isl_map_from_domain_and_range(DomainUse.take(), ValSet.take())); + + // { UserDomain[] -> [UserDomain[] - >llvm::Value] } + auto Result = + give(isl_map_reverse(isl_map_domain_map(ValInstSet.take()))); + simplify(Result); + return Result; + } + + // Try to derive DomainDef if not explicitly specified. + if (!DomainDef) { + auto *Inst = cast(NormV); + auto *ValStmt = S->getStmtFor(Inst); + + // It is possible that the llvm::Value is in a removed Stmt, in which case + // we cannot derive its domain. + if (ValStmt) + DomainDef = getDomainFor(ValStmt); + } + + if (DomainDef) { + // { Scatter[] -> DefDomain[] } + auto ReachDef = getScalarReachingDefinition(DomainDef); + + // { DomainUse[] -> Scatter[] } + auto UserSched = getScatterFor(DomainUse); + + // { DomainUse[] -> DomainDef[] } + auto UsedInstance = + give(isl_map_apply_range(UserSched.take(), ReachDef.take())); + + // { llvm::Value } + auto ValSet = makeValueSet(NormV); + + // { DomainUse[] -> llvm::Value } + auto ValInstSet = + give(isl_map_from_domain_and_range(DomainUse.take(), ValSet.take())); + + // { DomainUse[] -> [DomainDef[] -> llvm::Value] } + auto Result = + give(isl_map_range_product(UsedInstance.take(), ValInstSet.take())); + simplify(Result); + return Result; + } + + // If neither the value not the user if given, we cannot determine the + // reaching definition; return value is unknown. + return makeUnknownForDomain(DomainUse); + } + + /// Compute the different zones. + void computeCommon() { + AllReads = EmptyUnionMap; + AllMayWrites = EmptyUnionMap; + AllMustWrites = EmptyUnionMap; + AllWriteValInst = EmptyUnionMap; + for (auto &Stmt : *S) { + for (auto *MA : Stmt) { + if (!MA->isLatestArrayKind()) + continue; + + if (MA->isRead()) + addArrayReadAccess(MA); + + if (MA->isWrite()) + addArrayWriteAccess(MA); + } + } + + // { DomainWrite[] -> Element[] } + auto AllWrites = + give(isl_union_map_union(AllMustWrites.copy(), AllMayWrites.copy())); -using namespace polly; -using namespace llvm; + // { Element[] } + AllElements = EmptyUnionSet; + foreachElt(AllWrites, [this](IslPtr Write) { + auto Space = give(isl_map_get_space(Write.keep())); + auto EltSpace = give(isl_space_range(Space.take())); + auto EltUniv = give(isl_set_universe(EltSpace.take())); + AllElements = + give(isl_union_set_add_set(AllElements.take(), EltUniv.take())); + }); -namespace { + // { Element[] -> Element[] } + auto AllElementsId = makeIdentityMap(AllElements, false); + + // { Element[] -> Zone[] } + auto UniverseZone = give(isl_union_map_from_domain_and_range( + AllElements.copy(), + isl_union_set_from_set(isl_set_universe(ScatterSpace.copy())))); + + // { [Element[] -> Zone[]] -> DomainWrite[] } + WriteReachDefZone = + computeReachingDefinition(Schedule, AllWrites, false, true); + simplify(WriteReachDefZone); + } + + /// Print the current state of all MemoryAccesses to @p. + void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const { + OS.indent(Indent) << "After accesses {\n"; + for (auto &Stmt : *S) { + OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; + for (auto *MA : Stmt) + MA->print(OS); + } + OS.indent(Indent) << "}\n"; + } + +public: + /// Return the SCoP this object is analyzing. + Scop *getScop() const { return S; } +}; + +/// Class to remember a scalar-to-array element transformation for printing by +/// opt -analyze. +class MapReport { +private: + /// The scalar that was mapped. + const ScopArrayInfo *SAI; + + /// The definition of an MK_Value or read of an MK_PHI having been mapped. + MemoryAccess *PrimaryAcc; + + /// The uses of an MK_Value or incoming writes of an MK_Value having been + /// mapped. + SmallVector SecondaryAccs; + + /// Target mapping for the MK_Value write or MK_PHI read. + /// { Domain[] -> Element[] } + IslPtr Target; + + /// Lifetime expressed from the MK_Value write or MK_PHI read. + /// { Domain[] -> Zone[] } + IslPtr Lifetime; + + /// The constants that were checked before the transformation/applied to the + /// common knowledge after the transformation. + Knowledge Proposed; + +public: + MapReport(const ScopArrayInfo *SAI, MemoryAccess *PrimaryAcc, + ArrayRef SecondaryAccs, IslPtr Target, + IslPtr Lifetime, Knowledge Zone) + : SAI(SAI), PrimaryAcc(PrimaryAcc), + SecondaryAccs(SecondaryAccs.begin(), SecondaryAccs.end()), + Target(std::move(Target)), Lifetime(std::move(Lifetime)), + Proposed(std::move(Zone)) { + assert(SAI); + DEBUG(print(llvm::dbgs(), 0)); + } + + /// Print this transformation report to @p OS. + void print(llvm::raw_ostream &OS, int Indent = 0) const { + OS.indent(Indent) << "Mapping of " << SAI->getName() << " {\n"; + if (PrimaryAcc) { + OS.indent(Indent + 4) << "Primary:\n"; + PrimaryAcc->print(OS); + } + for (auto *MA : SecondaryAccs) { + OS.indent(Indent + 4) << "Secondary:\n"; + MA->print(OS); + } + OS.indent(Indent + 4) << "Target: " << Target << "\n"; + OS.indent(Indent + 4) << "Lifetime: " << Lifetime << "\n"; + OS.indent(Indent + 4) << "Zone:\n"; + Proposed.print(OS, Indent + 8); + OS.indent(Indent) << "}\n"; + } +}; + +/// Implementation of the DeLICM/DePRE transformation. +class DeLICMImpl : public ZoneAlgorithm { +private: + /// Knowledge before any transformation took place. + Knowledge OriginalZone; + + /// Current knowledge of the SCoP including all already applied + /// transformations. + Knowledge Zone; + + /// Log of every applied mapping transformations. + SmallVector MapReports; + + ScalarDefUseChains DefUse; + + /// Determine whether two knowledges are conflicting each other. + /// + /// @see Knowledge::isConflicting + bool isConflicting(const Knowledge &Proposed) { + raw_ostream *OS = nullptr; + DEBUG(OS = &llvm::dbgs()); + return Knowledge::isConflicting(Zone, Proposed, OS, 4); + } + + /// Determine whether @p SAI is a scalar that can be mapped to an array + /// element. + bool isMappable(const ScopArrayInfo *SAI) { + assert(SAI); + + if (SAI->isValueKind()) { + auto *MA = DefUse.getValueDef(SAI); + if (!MA) { + DEBUG(dbgs() + << " Reject because value is read-only within the scop\n"); + return false; + } + + // Mapping if value is used after scop is not supported. The code + // generator would need to reload the scalar after the scop, but it + // does not have the information to where it is mapped to. Only the + // MemoryAccesses have that information, not the ScopArrayInfo. + auto Inst = MA->getAccessInstruction(); + for (auto User : Inst->users()) { + if (!isa(User)) + return false; + auto UserInst = cast(User); + + if (!S->contains(UserInst)) { + DEBUG(dbgs() << " Reject because value is escaping\n"); + return false; + } + } + + return true; + } + + if (SAI->isPHIKind()) { + auto *MA = DefUse.getPHIRead(SAI); + assert(MA); + + // Mapping of an incoming block from before the SCoP is not supported by + // the code generator. + auto PHI = cast(MA->getAccessInstruction()); + for (auto Incoming : PHI->blocks()) { + if (!S->contains(Incoming)) { + DEBUG(dbgs() << " Reject because at least one incoming block is " + "not in the scop region\n"); + return false; + } + } + + return true; + } + + DEBUG(dbgs() << " Reject ExitPHI or other non-value\n"); + return false; + } + + /// Compute the uses of an MK_Value and its lifetime (from its definition to + /// the last use). + /// + /// @param SAI The ScopArrayInfo representing the value's storage. + /// + /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] } + /// First element is the set of uses for each definition. + /// The second is the lifetime of each definition. + std::tuple, IslPtr> + computeValueUses(const ScopArrayInfo *SAI) { + assert(SAI->isValueKind()); + + // { DomainRead[] } + auto Reads = EmptyUnionSet; + + // Find all uses. + for (auto *MA : DefUse.getValueUses(SAI)) + Reads = + give(isl_union_set_add_set(Reads.take(), getDomainFor(MA).take())); + + // { DomainRead[] -> Scatter[] } + auto ReadSchedule = getScatterFor(Reads); + + auto *DefMA = DefUse.getValueDef(SAI); + assert(DefMA); + + // { DomainDef[] } + auto Writes = getDomainFor(DefMA); + + // { DomainDef[] -> Scatter[] } + auto WriteScatter = getScatterFor(Writes); + + // { Scatter[] -> DomainDef[] } + auto ReachDef = getScalarReachingDefinition(DefMA->getStatement()); + + // { [DomainDef[] -> Scatter[]] -> DomainUse[] } + auto Uses = give( + isl_union_map_apply_range(isl_union_map_from_map(isl_map_range_map( + isl_map_reverse(ReachDef.take()))), + isl_union_map_reverse(ReadSchedule.take()))); + + // { DomainDef[] -> Scatter[] } + auto UseScatter = + singleton(give(isl_union_set_unwrap(isl_union_map_domain(Uses.copy()))), + give(isl_space_map_from_domain_and_range( + isl_set_get_space(Writes.keep()), ScatterSpace.copy()))); + + // { DomainDef[] -> Zone[] } + auto Lifetime = betweenScatter(WriteScatter, UseScatter, false, true); + + // { DomainDef[] -> DomainRead[] } + auto DefUses = give(isl_union_map_domain_factor_domain(Uses.take())); + + return std::make_pair(DefUses, Lifetime); + } + + /// For each 'execution' of a PHINode, get the incoming block that was + /// executed before. + /// + /// For each PHI instance we can directly determine which was the incoming + /// block, and hence derive which value the PHI will have. + /// + /// @param SAI The ScopArrayInfo representing the PHI's storage. + /// + /// @return { DomainPHIRead[] -> DomainPHIWrite[] } + IslPtr computePerPHI(const ScopArrayInfo *SAI) { + assert(SAI->isPHIKind()); + + // { DomainPHIWrite[] -> Scatter[] } + auto PHIWriteScatter = EmptyUnionMap; + + // Collect all incoming block timepoint. + for (auto *MA : DefUse.getPHIIncomings(SAI)) { + auto Scatter = getScatterFor(MA); + PHIWriteScatter = + give(isl_union_map_add_map(PHIWriteScatter.take(), Scatter.take())); + } + + // { DomainPHIRead[] -> Scatter[] } + auto PHIReadScatter = getScatterFor(DefUse.getPHIRead(SAI)); + + // { DomainPHIRead[] -> Scatter[] } + auto BeforeRead = beforeScatter(PHIReadScatter, true); + + // { Scatter[] } + auto WriteTimes = singleton( + give(isl_union_map_range(PHIWriteScatter.copy())), ScatterSpace); + + // { DomainPHIRead[] -> Scatter[] } + auto PHIWriteTimes = + give(isl_map_intersect_range(BeforeRead.take(), WriteTimes.take())); + auto LastPerPHIWrites = give(isl_map_lexmax(PHIWriteTimes.take())); + + // { DomainPHIRead[] -> DomainPHIWrite[] } + auto Result = give(isl_union_map_apply_range( + isl_union_map_from_map(LastPerPHIWrites.take()), + isl_union_map_reverse(PHIWriteScatter.take()))); + assert(isl_union_map_is_single_valued(Result.keep()) == isl_bool_true); + assert(isl_union_map_is_injective(Result.keep()) == isl_bool_true); + return Result; + } + + /// Try to map a MK_Value to a given array element. + /// + /// @param SAI Representation of the scalar's memory to map. + /// @param TargetElt { Scatter[] -> Element[] } + /// Suggestion where to map a scalar to when at a timepoint. + /// + /// @return true if the scalar was successfully mapped. + bool tryMapValue(const ScopArrayInfo *SAI, IslPtr TargetElt) { + assert(SAI->isValueKind()); + + auto *DefMA = DefUse.getValueDef(SAI); + assert(DefMA->isValueKind()); + assert(DefMA->isMustWrite()); + auto *V = DefMA->getAccessValue(); + + // Stop if the scalar has already been mapped. + if (!DefMA->getLatestScopArrayInfo()->isValueKind()) + return false; + + // { DomainDef[] -> Scatter[] } + auto DefSched = getScatterFor(DefMA); + + // Where each write is mapped to, according to the suggestion. + // { DomainDef[] -> Element[] } + auto DefTarget = give(isl_map_apply_domain( + TargetElt.copy(), isl_map_reverse(DefSched.copy()))); + simplify(DefTarget); + DEBUG(dbgs() << " Def Mapping: " << DefTarget << '\n'); + + auto OrigDomain = getDomainFor(DefMA); + auto MappedDomain = give(isl_map_domain(DefTarget.copy())); + if (!isl_set_is_subset(OrigDomain.keep(), MappedDomain.keep())) { + DEBUG(dbgs() + << " Reject because mapping does not encompass all instances\n"); + return false; + } + + // { DomainDef[] -> Zone[] } + IslPtr Lifetime; + + // { DomainDef[] -> DomainUse[] } + IslPtr DefUses; + + std::tie(DefUses, Lifetime) = computeValueUses(SAI); + simplify(Lifetime); + DEBUG(dbgs() << " Lifetime: " << Lifetime << '\n'); + + // { DomainDef[] -> [Element[] -> Zone[]] } + auto EltLifetimeTranslator = + give(isl_map_range_product(DefTarget.copy(), Lifetime.copy())); + + // { DomainDef[] -> [Element[] -> Scatter[]] } + auto WrittenTranslator = + give(isl_map_range_product(DefTarget.copy(), DefSched.take())); + + // { DomainDef[] -> ValInst[] } + auto ValInst = makeValInst(V, DefMA->getStatement(), true); + + // { [Element[] -> Zone[]] -> ValInst[] } + auto EltLifetime = give( + isl_map_apply_domain(ValInst.copy(), EltLifetimeTranslator.take())); + + // { [Element[] -> Scatter[]] -> ValInst[] } + auto EltWriteAction = + give(isl_map_apply_domain(ValInst.copy(), WrittenTranslator.take())); + + Knowledge Proposed(std::move(EltLifetime), false, + std::move(EltWriteAction)); + if (isConflicting(Proposed)) + return false; + + // { DomainUse[] -> Element[] } + auto UseTarget = give( + isl_union_map_apply_range(isl_union_map_reverse(DefUses.take()), + isl_union_map_from_map(DefTarget.copy()))); + + mapValue(SAI, std::move(DefTarget), std::move(UseTarget), + std::move(Lifetime), std::move(Proposed)); + return true; + } + + /// After a scalar has been mapped, update the global knowledge. + void applyLifetime(Knowledge Proposed) { + Zone.merge_inplace(std::move(Proposed)); + } + + /// Map a MK_Value scalar to an array element. + /// + /// Callers must have ensured that the mapping is valid and not conflicting. + /// + /// @param SAI The ScopArrayInfo representing the scalar's memory to + /// map. + /// @param DefTarget { DomainDef[] -> Element[] } + /// The array element to map the scalar to. + /// @param UseTarget { DomainUse[] -> Element[] } + /// The array elements the uses are mapped to. + /// @param Lifetime { DomainDef[] -> Zone[] } + /// The lifetime of each llvm::Value definition for + /// reporting. + /// @param Proposed Mapping constraints for reporting. + void mapValue(const ScopArrayInfo *SAI, IslPtr DefTarget, + IslPtr UseTarget, IslPtr Lifetime, + Knowledge Proposed) { + // Redirect the use accesses. + SmallVector SecondaryAccs; + for (auto *MA : DefUse.getValueUses(SAI)) { + // { DomainUse[] } + auto Domain = getDomainFor(MA); + + // { DomainUse[] -> Element[] } + auto NewAccRel = give(isl_union_map_intersect_domain( + UseTarget.copy(), isl_union_set_from_set(Domain.take()))); + simplify(NewAccRel); + + assert(isl_union_map_n_map(NewAccRel.keep()) == 1); + MA->setNewAccessRelation(isl_map_from_union_map(NewAccRel.take())); + SecondaryAccs.push_back(MA); + } + + auto *WA = DefUse.getValueDef(SAI); + WA->setNewAccessRelation(DefTarget.copy()); + applyLifetime(Proposed); + + MappedValueScalars++; + MapReports.emplace_back(SAI, WA, SecondaryAccs, DefTarget, Lifetime, + std::move(Proposed)); + } + + /// Get the all the statement instances of any statement for which there is at + /// least one instance in @p RelevantDomain. + IslPtr + wholeStmtDomain(NonowningIslPtr RelevantDomain) { + auto Universe = EmptyUnionSet; + foreachElt(RelevantDomain, [&Universe](IslPtr Dom) { + auto Space = give(isl_set_get_space(Dom.keep())); + auto DomUniv = give(isl_set_universe(Space.take())); + Universe = give(isl_union_set_add_set(Universe.take(), DomUniv.take())); + }); + return give(isl_union_set_intersect(Universe.take(), + isl_union_map_domain(Schedule.copy()))); + } + + /// Try to find a 'natural' extension of a mapped to elements outside its + /// domain. + /// + /// @see ::expandMapping + IslPtr expandMapping(IslPtr Relevant) { + auto RelevantDomain = give(isl_union_map_domain(Relevant.copy())); + auto Universe = wholeStmtDomain(RelevantDomain); + return ::expandMapping(Relevant, Universe); + } + + /// Express the incoming values of a PHI for each incoming statement in an + /// isl_union_map. + /// + /// @param SAI The PHI scalar represented by a ScopArrayInfo. + /// + /// @return { PHIWriteDomain[] -> ValInst[] } + IslPtr determinePHIWrittenValues(const ScopArrayInfo *SAI) { + auto Result = EmptyUnionMap; + + // Collect the incoming values. + for (auto *MA : DefUse.getPHIIncomings(SAI)) { + // { DomainWrite[] -> ValInst[] } + IslPtr ValInst; + auto *WriteStmt = MA->getStatement(); + + auto Incoming = MA->getIncoming(); + assert(!Incoming.empty()); + if (Incoming.size() == 1) { + ValInst = makeValInst(Incoming[0].second, WriteStmt, true); + } else { + // If the PHI is in a subregion's exit node it can have multiple + // incoming values (+ maybe another incoming edge from an unrelated + // block). Since we cannot directly represent it as a single + // llvm::Value from multiple exiting block, it is represented using + // the PHI itself. + // We currently model it as unknown value, but modeling as the PHIInst + // itself could be OK, too. + ValInst = makeUnknownForDomain(WriteStmt); + } + + Result = give(isl_union_map_add_map(Result.take(), ValInst.take())); + } + + assert(isl_union_map_is_single_valued(Result.keep()) == isl_bool_true && + "Cannot have multiple incoming values for same incoming statement"); + return Result; + } + + /// Try to map a MK_PHI scalar to a given array element. + /// + /// @param SAI Representation of the scalar's memory to map. + /// @param TargetElt { Scatter[] -> Element[] } + /// Suggestion where to map the scalar to when at a + /// timepoint. + /// + /// @return true if the PHI scalar has been mapped. + bool tryMapPHI(const ScopArrayInfo *SAI, IslPtr TargetElt) { + auto *PHIRead = DefUse.getPHIRead(SAI); + assert(PHIRead->isPHIKind()); + assert(PHIRead->isRead()); + + // Skip if already been mapped. + if (!PHIRead->getLatestScopArrayInfo()->isPHIKind()) + return false; + + // { DomainRead[] -> Scatter[] } + auto PHISched = getScatterFor(PHIRead); + + // { DomainRead[] -> Element[] } + auto PHITarget = + give(isl_map_apply_range(PHISched.copy(), TargetElt.copy())); + simplify(PHITarget); + DEBUG(dbgs() << " Mapping: " << PHITarget << '\n'); + + auto OrigDomain = getDomainFor(PHIRead); + auto MappedDomain = give(isl_map_domain(PHITarget.copy())); + if (!isl_set_is_subset(OrigDomain.keep(), MappedDomain.keep())) { + DEBUG(dbgs() + << " Reject because mapping does not encompass all instances\n"); + return false; + } + + // { DomainRead[] -> DomainWrite[] } + auto PerPHIWrites = computePerPHI(SAI); + + // { DomainWrite[] -> Element[] } + auto RelevantWritesTarget = + give(isl_union_map_reverse(isl_union_map_apply_domain( + PerPHIWrites.copy(), isl_union_map_from_map(PHITarget.copy())))); + + simplify(RelevantWritesTarget); + auto ExpandedTargetWrites = expandMapping(RelevantWritesTarget); + + // { DomainWrite[] } + auto ExpandedWritesDom = + give(isl_union_map_domain(ExpandedTargetWrites.copy())); + auto UniverseWritesDom = give(isl_union_set_empty(ParamSpace.copy())); + + for (auto *MA : DefUse.getPHIIncomings(SAI)) + UniverseWritesDom = give(isl_union_set_add_set(UniverseWritesDom.take(), + getDomainFor(MA).take())); + + if (!isl_union_set_is_subset(UniverseWritesDom.keep(), + ExpandedWritesDom.keep())) { + DEBUG(dbgs() << " Reject because did not find PHI write mapping for " + "all instances\n"); + DEBUG(dbgs() << " Relevant mapping: " << RelevantWritesTarget + << '\n'); + DEBUG(dbgs() << " Extrapolated mapping: " << ExpandedTargetWrites + << '\n'); + DEBUG(dbgs() << " Missing instances: " + << give(isl_union_set_subtract(UniverseWritesDom.copy(), + ExpandedWritesDom.copy())) + << '\n'); + return false; + } + + // { DomainRead[] -> Scatter[] } + auto PerPHIWriteScatter = give(isl_map_from_union_map( + isl_union_map_apply_range(PerPHIWrites.copy(), Schedule.copy()))); + + // { DomainRead[] -> Zone[] } + auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true); + simplify(Lifetime); + DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n"); + + // { DomainWrite[] -> Zone[] } + auto WriteLifetime = give(isl_union_map_apply_domain( + isl_union_map_from_map(Lifetime.copy()), PerPHIWrites.copy())); + + // { DomainWrite[] -> ValInst[] } + auto WrittenValue = determinePHIWrittenValues(SAI); + + // { DomainWrite[] -> [Element[] -> Scatter[]] } + auto WrittenTranslator = give(isl_union_map_range_product( + ExpandedTargetWrites.copy(), Schedule.copy())); + + // { DomainWrite[] -> [Element[], Zone[]] } + auto LifetimeTranslator = give(isl_union_map_range_product( + ExpandedTargetWrites.copy(), WriteLifetime.take())); + + // { [Element[] -> Zone[]] -> ValInst[] } + auto EltLifetimeInst = give(isl_union_map_apply_domain( + WrittenValue.copy(), LifetimeTranslator.take())); + + // { [Element[] -> Scatter[]] -> ValInst[] } + auto EltWritten = give(isl_union_map_apply_domain( + WrittenValue.copy(), WrittenTranslator.take())); + + Knowledge Proposed(EltLifetimeInst, false, EltWritten); + if (isConflicting(Proposed)) + return false; + + mapPHI(SAI, std::move(PHITarget), std::move(ExpandedTargetWrites), + std::move(Lifetime), std::move(Proposed)); + return true; + } + + /// Map an MK_PHI scalar to an array element. + /// + /// Callers must have ensured that the mapping is valid and not conflicting + /// with the common knowledge. + /// + /// @param SAI The ScopArrayInfo representing the scalar's memory to + /// map. + /// @param ReadTarget { DomainRead[] -> Element[] } + /// The array element to map the scalar to. + /// @param WriteTarget { DomainWrite[] -> Element[] } + /// New access target for each PHI incoming write. + /// @param Lifetime { DomainRead[] -> Zone[] } + /// The lifetime of each PHI for reporting. + /// @param Proposed Mapping constraints for reporting. + void mapPHI(const ScopArrayInfo *SAI, IslPtr ReadTarget, + IslPtr WriteTarget, IslPtr Lifetime, + Knowledge Proposed) { + // Redirect the PHI incoming writes. + SmallVector SecondaryAccs; + for (auto *MA : DefUse.getPHIIncomings(SAI)) { + // { DomainWrite[] } + auto Domain = getDomainFor(MA); + + // { DomainWrite[] -> Element[] } + auto NewAccRel = give(isl_union_map_intersect_domain( + WriteTarget.copy(), isl_union_set_from_set(Domain.take()))); + simplify(NewAccRel); + + assert(isl_union_map_n_map(NewAccRel.keep()) == 1); + MA->setNewAccessRelation(isl_map_from_union_map(NewAccRel.take())); + SecondaryAccs.push_back(MA); + } + + // Redirect the PHI read. + auto *PHIRead = DefUse.getPHIRead(SAI); + PHIRead->setNewAccessRelation(ReadTarget.copy()); + applyLifetime(Proposed); + + MappedPHIScalars++; + MapReports.emplace_back(SAI, PHIRead, SecondaryAccs, std::move(ReadTarget), + std::move(Lifetime), std::move(Proposed)); + } + + /// Search and map scalars to memory overwritten by @p TargetStoreMA. + /// + /// Start trying to map scalars that are used in the same statement as the + /// store. For every successful mapping, try to also map scalars of the + /// statements where those are written. Repeat, until no more mapping + /// opportunity is found. + /// + /// There is currently no preference in which order scalars are tried. + /// Ideally, we would direct it towards a load instruction of the same array + /// element. + bool collapseScalarsToStore(MemoryAccess *TargetStoreMA) { + assert(TargetStoreMA->isLatestArrayKind()); + assert(TargetStoreMA->isMustWrite()); + + auto TargetStmt = TargetStoreMA->getStatement(); + + // { DomTarget[] } + auto TargetDom = getDomainFor(TargetStmt); + + // { DomTarget[] -> Element[] } + auto TargetAccRel = getAccessRelationFor(TargetStoreMA); + + // { Zone[] -> DomTarget[] } + // For each point in time, find the next target store instance. + auto Target = + computeScalarReachingOverwrite(Schedule, TargetDom, false, true); + + // { Zone[] -> Element[] } + // Use the target store's write location as a suggestion to map scalars to. + auto EltTarget = + give(isl_map_apply_range(Target.take(), TargetAccRel.take())); + simplify(EltTarget); + DEBUG(dbgs() << " Target mapping is " << EltTarget << '\n'); + + // Stack of elements no yet processed. + SmallVector Worklist; + + // Set of scalars already tested. + SmallPtrSet Closed; + + // Lambda to add all scalar reads to the work list. + auto ProcessAllIncoming = [&](ScopStmt *Stmt) { + for (auto *MA : *Stmt) { + if (!MA->isLatestScalarKind()) + continue; + if (!MA->isRead()) + continue; + + Worklist.push_back(MA); + } + }; + + // Add initial scalar. Either the value written by the store, or all inputs + // of its statement. + auto WrittenVal = TargetStoreMA->getAccessValue(); + if (auto InputAcc = getInputAccessOf(WrittenVal, TargetStmt)) + Worklist.push_back(InputAcc); + else + ProcessAllIncoming(TargetStmt); + + auto AnyMapped = false; + auto &DL = + S->getRegion().getEntry()->getParent()->getParent()->getDataLayout(); + auto StoreSize = + DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType()); + + while (!Worklist.empty()) { + auto *MA = Worklist.pop_back_val(); + + auto *SAI = MA->getScopArrayInfo(); + if (Closed.count(SAI)) + continue; + Closed.insert(SAI); + DEBUG(dbgs() << "\n Trying to map " << MA << " (SAI: " << SAI + << ")\n"); + + // Skip non-mappable scalars. + if (!isMappable(SAI)) + continue; + + auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType()); + if (MASize > StoreSize) { + DEBUG(dbgs() << " Reject because storage size is insufficient\n"); + continue; + } + + // Try to map MK_Value scalars. + if (SAI->isValueKind()) { + if (!tryMapValue(SAI, EltTarget)) + continue; + + auto *DefAcc = DefUse.getValueDef(SAI); + ProcessAllIncoming(DefAcc->getStatement()); + + AnyMapped = true; + continue; + } + + // Try to map MK_PHI scalars. + if (SAI->isPHIKind()) { + if (!tryMapPHI(SAI, EltTarget)) + continue; + // Add inputs of all incoming statements to the worklist. + for (auto *PHIWrite : DefUse.getPHIIncomings(SAI)) + ProcessAllIncoming(PHIWrite->getStatement()); + + AnyMapped = true; + continue; + } + } + + if (AnyMapped) + TargetsMapped++; + return AnyMapped; + } + + /// Print the knowledge before any transformation has been applied to @p OS. + void printBefore(llvm::raw_ostream &OS, int Indent = 0) { + OS.indent(Indent) << "Original knowledge {\n"; + OriginalZone.print(OS, Indent + 4); + OS.indent(Indent) << "}\n"; + } + + /// Print the report about all executions transformations to @p OS. + void printMappedScalars(llvm::raw_ostream &OS, int Indent = 0) { + OS.indent(Indent) << "Mapped scalars {\n"; + for (auto &Report : MapReports) + Report.print(OS, Indent + 4); + OS.indent(Indent) << "}\n"; + } + + /// Print the knowledge from after transformations have been applied to @p OS. + void printAfter(llvm::raw_ostream &OS, int Indent = 0) { + OS.indent(Indent) << "After knowledge {\n"; + Zone.print(OS, Indent + 4); + OS.indent(Indent) << "}\n"; + } + + /// Compute when an array element is alive (Its value will be read in the + /// future) and its value at that time. + /// + /// @return { [Element[] -> Zone[]] -> ValInst[] } + IslPtr computeLifetime() const { + // { [Element[] -> Zone[]] } + auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads, + false, false, true); + + // { [Element[] -> Zone[]] } + auto UnusedZone = give(isl_union_map_wrap(ArrayUnused.take())); + + // { [Element[] -> Zone[]] -> ValInst[] } + auto UnusedUndef = give(isl_union_map_from_domain_and_range( + UnusedZone.copy(), makeUndefUSet().take())); + + // { Element[] -> Zone[] } + auto UniverseZone = give(isl_union_map_from_domain_and_range( + AllElements.copy(), + isl_union_set_from_set(isl_set_universe(ScatterSpace.copy())))); + + // { [Element[] -> Zone[]] -> [Element[] -> DomainWrite[]] } + auto EltReachdDef = give(isl_union_map_range_product( + isl_union_map_domain_map(UniverseZone.take()), + WriteReachDefZone.copy())); + + // { [Element[] -> Zone[]] -> ValInst[] } + auto EltLifetime = give(isl_union_map_apply_domain( + AllWriteValInst.copy(), isl_union_map_reverse(EltReachdDef.take()))); + + // Remove the zones that are guaranteed to be overwritten - they do not + // belong to a lifetime. + EltLifetime = give( + isl_union_map_subtract_domain(EltLifetime.take(), UnusedZone.take())); + EltLifetime = + give(isl_union_map_union(EltLifetime.take(), UnusedUndef.take())); + EltLifetime = removeUnknownValInst(EltLifetime); + + // TODO: If EltLifetime at a point maps to two (non-undef) values, replace + // by unknown. + simplify(EltLifetime); + return EltLifetime; + } + + /// Determine when an array element is written to, and which value instance is + /// written. + /// + /// @return { [Element[] -> Scatter[]] -> ValInst[] } + IslPtr computeWritten() const { + // { Element[] -> Element[] } + auto AllElementsId = makeIdentityMap(AllElements, false); + + // { [Element[] -> DomainWrite[]] -> [Element[] -> Scatter[]] } + auto EltWriteTranslator = + give(isl_union_map_product(AllElementsId.take(), Schedule.copy())); + + // { [Element[] -> Scatter[]] -> ValInst[] } + auto EltWritten = give(isl_union_map_apply_domain( + AllWriteValInst.copy(), EltWriteTranslator.take())); + + simplify(EltWritten); + return EltWritten; + } + +public: + DeLICMImpl(Scop *S) : ZoneAlgorithm(S) {} + + /// Calculate the lifetime (definition to last use) of every array element. + /// + /// @return True if the computed lifetimes (#Zone) is usable. + bool computeZone() { + // Check that nothing strange occurs. + if (!isCompatibleScop()) { + DeLICMIncompatible++; + return false; + } + + DefUse.compute(S); + IslPtr EltLifetime, EltWritten; + + { + IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps); + + computeCommon(); + + EltLifetime = computeLifetime(); + EltWritten = computeWritten(); + } + + if (isl_ctx_last_error(IslCtx.get()) == isl_error_quota) { + DeLICMOutOfQuota++; + DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n"); + } + + DeLICMAnalyzed++; + OriginalZone = + Knowledge(std::move(EltLifetime), true, std::move(EltWritten)); + DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4)); + + Zone = OriginalZone; + + return DelicmMaxOps == 0 || Zone.isUsable(); + } + + /// Try to map as many scalars to unused array elements as possible. + /// + /// Multiple scalars might be mappable to intersecting unused array element + /// zones, but we can only chose one. This is a greedy algorithm, therefore + /// the first processed will claim it. + void greedyCollapse() { + bool Modified = false; + + for (auto &Stmt : *S) { + for (auto *MA : Stmt) { + if (!MA->isLatestArrayKind()) + continue; + if (!MA->isWrite()) + continue; + + if (MA->isMayWrite()) { + DEBUG(dbgs() << "Access " << MA + << " pruned because it is a MAY_WRITE\n"); + continue; + } + + if (Stmt.getNumIterators() == 0) { + DEBUG(dbgs() << "Access " << MA + << " pruned because it is not in a loop\n"); + continue; + } + + if (isScalarAccess(getAccessRelationFor(MA))) { + DEBUG(dbgs() << "Access " << MA + << " pruned because it writes only a single element\n"); + continue; + } + + DEBUG(dbgs() << "Analyzing target access " << MA << "\n"); + if (collapseScalarsToStore(MA)) + Modified = true; + } + } + + if (Modified) + DeLICMScopsModified++; + } + + /// Dump the internal information about a performed DeLICM to @p OS. + void print(llvm::raw_ostream &OS, int indent = 0) { + printBefore(OS, indent); + printMappedScalars(OS, indent); + printAfter(OS, indent); + printAccesses(OS, indent); + } +}; class DeLICM : public ScopPass { private: DeLICM(const DeLICM &) = delete; const DeLICM &operator=(const DeLICM &) = delete; + /// The pass implementation, also holding per-scop data. + std::unique_ptr Impl; + + void collapseToUnused(Scop &S) { + Impl = make_unique(&S); + + if (!Impl->computeZone()) { + DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n"); + return; + } + + DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n"); + Impl->greedyCollapse(); + + DEBUG(dbgs() << "\nFinal Scop:\n"); + DEBUG(S.print(dbgs())); + } + public: static char ID; explicit DeLICM() : ScopPass(ID) {} @@ -43,19 +2732,21 @@ // Free resources for previous scop's computation, if not yet done. releaseMemory(); - // TODO: Run DeLICM algorithm + collapseToUnused(S); return false; } virtual void printScop(raw_ostream &OS, Scop &S) const override { + if (!Impl) + return; + assert(Impl->getScop() == &S); + OS << "DeLICM result:\n"; - // TODO: Print analysis results and performed transformation details + Impl->print(OS); } - virtual void releaseMemory() override { - // TODO: Release resources (eg. shared_ptr to isl_ctx) - } + virtual void releaseMemory() override { Impl.reset(); } }; char DeLICM::ID; @@ -68,3 +2759,233 @@ INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass) INITIALIZE_PASS_END(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false, false) + +IslPtr +polly::computeReachingDefinition(IslPtr Schedule, + IslPtr Writes, bool InclDef, + bool InclRedef) { + // { Scatter[] } + auto ScatterSpace = getScatterSpace(Schedule); + + // { Element[] -> ScatterWrite[] } + auto DefSched = + give(isl_union_map_apply_domain(Schedule.copy(), Writes.take())); + + // { ScatterRead[] -> ScatterWrite[] } + auto Before = give(InclRedef ? isl_map_lex_gt(ScatterSpace.take()) + : isl_map_lex_ge(ScatterSpace.take())); + + // { ScatterWrite[] -> [ScatterRead[] -> ScatterWrite[]] } + auto BeforeMap = give(isl_map_reverse(isl_map_range_map(Before.take()))); + + // { Element[] -> [ScatterUse[] -> ScatterWrite[]] } + auto DefSchedBefore = + give(isl_union_map_apply_domain(isl_union_map_from_map(BeforeMap.take()), + isl_union_map_reverse(DefSched.copy()))); + + // For each element, at every point in time, map to the times of previous + // definitions. + // { [Element[] -> ScatterRead[]] -> ScatterWrite[] } + auto ReachableDefs = give(isl_union_map_uncurry(DefSchedBefore.take())); + auto LastReachableDef = give(isl_union_map_lexmax(ReachableDefs.copy())); + + // { [Element[] -> ScatterWrite[]] -> ScatterWrite[] } + auto SelfUse = give(isl_union_map_range_map(DefSched.take())); + + if (InclDef && InclRedef) { + // Add the Def itself to the solution. + LastReachableDef = + give(isl_union_map_union(LastReachableDef.take(), SelfUse.take())); + LastReachableDef = give(isl_union_map_coalesce(LastReachableDef.take())); + } else if (!InclDef && !InclRedef) { + // Remove Def itself from the solution. + LastReachableDef = + give(isl_union_map_subtract(LastReachableDef.take(), SelfUse.take())); + } + + // { [Element[] -> ScatterRead[]] -> Domain[] } + return give(isl_union_map_apply_range( + LastReachableDef.take(), isl_union_map_reverse(Schedule.take()))); +} + +IslPtr +polly::computeArrayLifetime(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, bool ReadEltInSameInst, + bool InclWrite, bool InclLastRead, bool ExitReads) { + IslPtr ExitRays; + if (ExitReads) { + // Add all writes that are never overwritten as rays. + + // { Element[] } + auto WriteElements = give(isl_union_map_range(Writes.copy())); + + // { DomainWrite[] -> Scatter[] } + auto WriteSched = give(isl_union_map_intersect_domain( + Schedule.copy(), isl_union_map_domain(Writes.copy()))); + + // { Element[] -> Scatter[] } + auto WriteActions = give(isl_union_map_apply_range( + isl_union_map_reverse(Writes.copy()), Schedule.copy())); + auto LastWrites = give(isl_union_map_lexmax(WriteActions.take())); + + // { [Element[] -> Scatter[]] -> Zone[] } + auto AfterLastWrite = afterScatter( + give(isl_union_map_range_map(LastWrites.take())), !InclWrite); + + // { [Element[] -> DomainWrite[]] -> Zone[] } + ExitRays = give(isl_union_map_apply_domain( + AfterLastWrite.take(), + isl_union_map_product(makeIdentityMap(WriteElements, false).take(), + isl_union_map_reverse(WriteSched.take())))); + } + + // { [Element[] -> DomainWrite[] -> Scatter[] } + auto Defs = give(isl_union_map_apply_range( + isl_union_map_range_map(isl_union_map_reverse(Writes.copy())), + Schedule.copy())); + + // { [Element[] -> Zone[]] -> DomainWrite[] } + auto ReachDef = computeReachingDefinition(Schedule, Writes, ReadEltInSameInst, + !ReadEltInSameInst); + + // { Element[] -> Scatter[] } + auto ReadActions = + give(isl_union_map_apply_domain(Schedule.take(), Reads.take())); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto WhatIsItReading = give(isl_union_map_intersect_domain( + ReachDef.take(), isl_union_map_wrap(ReadActions.take()))); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto Uses = give(isl_union_map_reverse( + isl_union_map_curry(reverseDomain(WhatIsItReading).take()))); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto Result = betweenScatter(Defs, Uses, InclWrite, InclLastRead); + + if (ExitRays) + Result = give(isl_union_map_union(Result.take(), ExitRays.take())); + + return Result; +} + +IslPtr +polly::computeReachingOverwrite(IslPtr Schedule, + IslPtr Writes, + bool InclPrevWrite, bool IncludeOverwrite) { + assert(isl_union_map_is_bijective(Schedule.keep()) != isl_bool_false); + + // { Scatter[] } + auto ScatterSpace = getScatterSpace(Schedule); + + // { Element[] -> ScatterWrite[] } + auto WriteAction = + give(isl_union_map_apply_domain(Schedule.copy(), Writes.take())); + + // { ScatterWrite[] -> Element[] } + auto WriteActionRev = give(isl_union_map_reverse(WriteAction.copy())); + + // { ScatterRead[] -> ScatterWrite[] } + auto After = give(InclPrevWrite ? isl_map_lex_lt(ScatterSpace.take()) + : isl_map_lex_le(ScatterSpace.take())); + + // { ScatterWrite[] -> [ScatterRead[] -> ScatterWrite[]] } + auto BeforeMap = give(isl_map_reverse(isl_map_range_map(After.take()))); + + // { Element[] -> [ScatterRead[] -> ScatterWrite[]] } + auto DefSchedBefore = give(isl_union_map_apply_domain( + isl_union_map_from_map(BeforeMap.take()), WriteActionRev.take())); + + // For each element, at every point in time, map to the times of previous + // definitions. + // { [Element[] -> ScatterRead[]] -> ScatterWrite[] } + auto ReachableDefs = give(isl_union_map_uncurry(DefSchedBefore.take())); + auto LastReachableDef = give(isl_union_map_lexmin(ReachableDefs.take())); + + if (InclPrevWrite && IncludeOverwrite) { + // Add the def itself to the solution + + // { [Element[] -> ScatterWrite[]] -> ScatterWrite[] } + auto SelfUse = give(isl_union_map_range_map(WriteAction.take())); + LastReachableDef = + give(isl_union_map_union(LastReachableDef.take(), SelfUse.take())); + LastReachableDef = give(isl_union_map_coalesce(LastReachableDef.take())); + } else if (!InclPrevWrite && !IncludeOverwrite) { + // Remove def itself from the solution + + // { [Element[] -> ScatterWrite[]] -> ScatterWrite[] } + auto SelfUse = give(isl_union_map_range_map(WriteAction.take())); + LastReachableDef = + give(isl_union_map_subtract(LastReachableDef.take(), SelfUse.take())); + } + + // { [Element[] -> ScatterRead[]] -> Domain[] } + auto LastReachableDefDomain = give(isl_union_map_apply_range( + LastReachableDef.take(), isl_union_map_reverse(Schedule.take()))); + + return LastReachableDefDomain; +} + +IslPtr polly::computeArrayUnused(IslPtr Schedule, + IslPtr Writes, + IslPtr Reads, + bool ReadEltInSameInst, + bool IncludeLastRead, + bool IncludeWrite) { + // { Element[] -> Scatter[] } + auto ReadActions = + give(isl_union_map_apply_domain(Schedule.copy(), Reads.take())); + auto WriteActions = + give(isl_union_map_apply_domain(Schedule.copy(), Writes.copy())); + + // { [Element[] -> Scatter[] } + auto AfterReads = afterScatter(ReadActions, ReadEltInSameInst); + auto WritesBeforeAnyReads = + give(isl_union_map_subtract(WriteActions.take(), AfterReads.take())); + auto BeforeWritesBeforeAnyReads = + beforeScatter(WritesBeforeAnyReads, !IncludeWrite); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto EltDomWrites = give(isl_union_map_apply_range( + isl_union_map_range_map(isl_union_map_reverse(Writes.copy())), + Schedule.copy())); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto ReachingOverwrite = computeReachingOverwrite( + Schedule, Writes, ReadEltInSameInst, !ReadEltInSameInst); + + // { [Element[] -> Scatter[]] -> DomainWrite[] } + auto ReadsOverwritten = give(isl_union_map_intersect_domain( + ReachingOverwrite.take(), isl_union_map_wrap(ReadActions.take()))); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto ReadsOverwrittenRotated = give(isl_union_map_reverse( + isl_union_map_curry(reverseDomain(ReadsOverwritten).take()))); + auto LastOverwrittenRead = + give(isl_union_map_lexmax(ReadsOverwrittenRotated.take())); + + // { [Element[] -> DomainWrite[]] -> Scatter[] } + auto BetweenLastReadOverwrite = betweenScatter( + LastOverwrittenRead, EltDomWrites, IncludeLastRead, IncludeWrite); + + return give(isl_union_map_union( + BeforeWritesBeforeAnyReads.take(), + isl_union_map_domain_factor_domain(BetweenLastReadOverwrite.take()))); +} + +bool polly::isConflicting(IslPtr ExistingLifetime, + bool ExistingImplicitLifetimeIsUnknown, + IslPtr ExistingWritten, + IslPtr ProposedLifetime, + bool ProposedImplicitLifetimeIsUnknown, + IslPtr ProposedWritten) { + Knowledge Existing(std::move(ExistingLifetime), + ExistingImplicitLifetimeIsUnknown, + std::move(ExistingWritten)); + Knowledge Proposed(std::move(ProposedLifetime), + ProposedImplicitLifetimeIsUnknown, + std::move(ProposedWritten)); + + return Knowledge::isConflicting(Existing, Proposed); +} Index: test/DeLICM/gemm_complete.ll =================================================================== --- /dev/null +++ test/DeLICM/gemm_complete.ll @@ -0,0 +1,192 @@ +; RUN: opt %loadPolly -basicaa -loop-rotate -licm -polly-delicm -analyze < %s | FileCheck %s +; +; dgemm kernel +; C := alpha*A*B + beta*C +; C[ni][nj] +; A[ni][nk] +; B[nk][nj] + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" + +define void @gemm(i32 %ni, i32 %nj, i32 %nk, double %alpha, double %beta, double* noalias nonnull %C, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %ni.for + +ni.for: + %i = phi i32 [0, %entry], [%i.inc, %ni.inc] + %i.cmp = icmp slt i32 %i, %ni + br i1 %i.cmp, label %nj.for, label %ni.exit + + nj.for: + %j = phi i32 [0, %ni.for], [%j.inc, %nj.inc] + %j.cmp = icmp slt i32 %j, %nj + br i1 %j.cmp, label %nj_beta, label %nj.exit + + nj_beta: + %c_stride = mul nsw i32 %i, %nj + %c_idx_i = getelementptr inbounds double, double* %C, i32 %c_stride + %c_idx_ij = getelementptr inbounds double, double* %c_idx_i, i32 %j + + ; C[i][j] *= beta + %c = load double, double* %c_idx_ij + %c_beta = fmul double %c, %beta + store double %c_beta, double* %c_idx_ij + + br label %nk.for + + nk.for: + %k = phi i32 [0, %nj_beta], [%k.inc, %nk.inc] + %k.cmp = icmp slt i32 %k, %nk + br i1 %k.cmp, label %nk_alpha, label %nk.exit + + nk_alpha: + %a_stride = mul nsw i32 %i, %nk + %a_idx_i = getelementptr inbounds double, double* %A, i32 %a_stride + %a_idx_ik = getelementptr inbounds double, double* %a_idx_i, i32 %k + + %b_stride = mul nsw i32 %k, %nj + %b_idx_k = getelementptr inbounds double, double* %B, i32 %b_stride + %b_idx_kj = getelementptr inbounds double, double* %b_idx_k, i32 %j + + ; C[i][j] += alpha * A[i][k] * B[k][j] + %a = load double, double* %a_idx_ik + %b = load double, double* %b_idx_kj + %beta_c = load double, double* %c_idx_ij + + %alpha_a = fmul double %a, %alpha + %alpha_a_b = fmul double %alpha_a, %b + %beta_c_alpha_a_b = fadd double %beta_c, %alpha_a_b + + store double %beta_c_alpha_a_b, double* %c_idx_ij + + br label %nk.inc + + nk.inc: + %k.inc = add nuw nsw i32 %k, 1 + br label %nk.for + + nk.exit: + br label %nj.inc + + nj.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %nj.for + + nj.exit: + br label %ni.inc + +ni.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %ni.for + +ni.exit: + br label %return + +return: + ret void +} + + +; CHECK: Original knowledge { +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i2 > i0; [MemRef_C[i0, i1] -> [i0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i3 > i1; [MemRef_C[0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i2 > 0; [MemRef_C[i0, i1] -> [i0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i4 >= 4; [MemRef_C[0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i3 > i1; [MemRef_C[i0, i1] -> [i0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i4 >= 4; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 > 0; [MemRef_C[i0, i1] -> [i0, i1, 3, i5, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[i0, i1] -> [i0, i1, 1, i5, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 3, i5, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 1, i5, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 1, 0, i6{{\]\]}} -> Undef[] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 3, 0, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 1, 0, i6{{\]\]}} -> Undef[] : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 > 0; [MemRef_C[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i2 > i0; [MemRef_C[i0, i1] -> [i0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i3 > i1; [MemRef_C[0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i2 > 0; [MemRef_C[i0, i1] -> [i0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i4 > 0; [MemRef_C[0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i3 > i1; [MemRef_C[i0, i1] -> [i0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 0, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i4 > 0; [MemRef_C[i0, i1] -> [i0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 0, 0, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 0, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 0, 0, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and i6 > 0 } + Unknown +; CHECK-NEXT: Written : [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 0, 0, 0{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 0, 0, 0{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, 0{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 3, 0, 0{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj } +; CHECK-NEXT: } +; CHECK: Mapped scalars { +; CHECK-NEXT: Mapping of MemRef_beta_c_alpha_a_b_lcssa__phi { +; CHECK-NEXT: Primary: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> MemRef_beta_c_alpha_a_b_lcssa__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> MemRef_C[i0, i1] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> MemRef_C[0, i1] : nk > 0 and ni <= 0 and 0 <= i1 < nj }; +; CHECK-NEXT: Secondary: +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_beta_c_alpha_a_b_lcssa__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_inc[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: Target: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> MemRef_C[i0, i1] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> MemRef_C[0, i1] : nk > 0 and ni <= 0 and 0 <= i1 < nj } +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> [i0, i1, 3, o3, o4] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and o3 < 0; Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> [i0, i1, 2, o3, o4] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and o3 >= nk; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> [0, i1, 3, o3, o4] : nk > 0 and ni <= 0 and 0 <= i1 < nj and o3 < 0; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> [0, i1, 2, o3, o4] : nk > 0 and ni <= 0 and 0 <= i1 < nj and o3 >= nk; Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> [i0, i1, 3, 0, o4] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and o4 <= 0; Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> [i0, i1, 2, -1 + nk, o4] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and o4 >= 2; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> [0, i1, 3, 0, o4] : nk > 0 and ni <= 0 and 0 <= i1 < nj and o4 <= 0; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> [0, i1, 2, -1 + nk, o4] : nk > 0 and ni <= 0 and 0 <= i1 < nj and o4 >= 2 } +; CHECK-NEXT: Zone: +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 >= nk; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 2, -1 + nk, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 >= 2; [MemRef_C[0, i1] -> [0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 >= nk; [MemRef_C[0, i1] -> [0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 2, -1 + nk, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 >= 2 } + Undef +; CHECK-NEXT: Written : [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk } +; CHECK-NEXT: } +; CHECK-NEXT: Mapping of MemRef_beta_c_alpha_a_b { +; CHECK-NEXT: Primary: +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_beta_c_alpha_a_b[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_alpha[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: Secondary: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_beta_c_alpha_a_b[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_inc[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: Target: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_alpha[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk } +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> [i0, i1, 2, i2, 1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_alpha[0, i1, i2] -> [0, i1, 2, i2, 1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk } +; CHECK-NEXT: Zone: +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk } + Undef +; CHECK-NEXT: Written : [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 0{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, 0{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk } +; CHECK-NEXT: } +; CHECK-NEXT: Mapping of MemRef_beta_c_alpha_a_b3__phi { +; CHECK-NEXT: Primary: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_beta_c_alpha_a_b3__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_alpha[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: Secondary: +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha_lr_ph[i0, i1] -> MemRef_beta_c_alpha_a_b3__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_alpha_lr_ph[i0, i1] -> MemRef_C[i0, i1] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; Stmt_nk_alpha_lr_ph[0, i1] -> MemRef_C[0, i1] : nk > 0 and ni <= 0 and 0 <= i1 < nj }; +; CHECK-NEXT: Secondary: +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_beta_c_alpha_a_b3__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_inc[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: Target: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_alpha[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk } +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, 0] -> [i0, i1, 2, o3, o4] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and o3 < 0; Stmt_nk_alpha[i0, i1, 0] -> [i0, i1, 1, o3, o4] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and o3 > 0; Stmt_nk_alpha[i0, i1, i2] -> [i0, i1, 2, i2, o4] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk and o4 <= 0; Stmt_nk_alpha[i0, i1, i2] -> [i0, i1, 2, -1 + i2, o4] : 0 <= i0 < ni and 0 <= i1 < nj and 0 < i2 < nk and o4 >= 2; Stmt_nk_alpha[0, i1, 0] -> [0, i1, 2, o3, o4] : nk > 0 and ni <= 0 and 0 <= i1 < nj and o3 < 0; Stmt_nk_alpha[0, i1, 0] -> [0, i1, 1, o3, o4] : nk > 0 and ni <= 0 and 0 <= i1 < nj and o3 > 0; Stmt_nk_alpha[0, i1, i2] -> [0, i1, 2, i2, o4] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk and o4 <= 0; Stmt_nk_alpha[i0, i1, 0] -> [i0, i1, 1, 0, o4] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and o4 > 0; Stmt_nk_alpha[0, i1, i2] -> [0, i1, 2, -1 + i2, o4] : ni <= 0 and 0 <= i1 < nj and 0 < i2 < nk and o4 >= 2; Stmt_nk_alpha[0, i1, 0] -> [0, i1, 1, 0, o4] : nk > 0 and ni <= 0 and 0 <= i1 < nj and o4 > 0 } +; CHECK-NEXT: Zone: +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 <= -2 + nk and i6 >= 2; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 < i5 < nk and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 <= -2 + nk and i6 >= 2; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 < i5 < nk and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[i0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[i0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[i0, i1] -> [i0, i1, 2, 0, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[i0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[i0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 2, 0, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 > 0 } + Undef +; CHECK-NEXT: Written : [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 1, 0, 0{{\]\]}} -> [Stmt_nk_alpha_lr_ph[i0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 1, 0, 0{{\]\]}} -> [Stmt_nk_alpha_lr_ph[0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK: After knowledge { +; CHECK-NEXT: Lifetime: [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[i0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[i0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[i0, i1] -> [i0, i1, 2, 0, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[i0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[i0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 2, 0, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nk_alpha_lr_ph[0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 > 0; [MemRef_C[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i2 > i0; [MemRef_C[i0, i1] -> [i0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i3 > i1; [MemRef_C[0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i2 > 0; [MemRef_C[i0, i1] -> [i0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i4 >= 4; [MemRef_C[0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i3 > i1; [MemRef_C[i0, i1] -> [i0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i4 >= 4; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 > 0; [MemRef_C[i0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i2 > i0; [MemRef_C[i0, i1] -> [i0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i3 > i1; [MemRef_C[0, i1] -> [i2, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i2 > 0; [MemRef_C[i0, i1] -> [i0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and 0 <= i0 < ni and 0 <= i1 < nj and i4 > 0; [MemRef_C[0, i1] -> [0, i3, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i3 > i1; [MemRef_C[i0, i1] -> [i0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 0, i5, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, i4, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk <= 0 and ni <= 0 and 0 <= i1 < nj and i4 > 0; [MemRef_C[i0, i1] -> [i0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 0, 0, i6{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and i6 > 0; [MemRef_C[0, i1] -> [0, i1, 1, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 0, i5, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and i5 > 0; [MemRef_C[0, i1] -> [0, i1, 1, 0, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 0, 0, i6{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and i6 > 0; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 <= -2 + nk and i6 >= 2; [MemRef_C[i0, i1] -> [i0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 < 0; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i5 >= nk; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 < i5 < nk and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 <= -2 + nk and i6 >= 2; [MemRef_C[i0, i1] -> [i0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 <= 0; [MemRef_C[i0, i1] -> [i0, i1, 2, -1 + nk, i6{{\]\]}} -> [Stmt_nk_alpha[i0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj and i6 >= 2; [MemRef_C[0, i1] -> [0, i1, 3, i5, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 < 0; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i5 >= nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 < i5 < nk and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 3, 0, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 <= 0; [MemRef_C[0, i1] -> [0, i1, 2, -1 + nk, i6{{\]\]}} -> [Stmt_nk_alpha[0, i1, -1 + nk] -> Val_beta_c_alpha_a_b[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj and i6 >= 2 } + Unknown +; CHECK-NEXT: Written : [nj, nk, ni] -> { [MemRef_C[i0, i1] -> [i0, i1, 3, 0, 0{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 3, 0, 0{{\]\]}} -> [Stmt_nk_for_nk_exit_crit_edge[0, i1] -> Val_beta_c_alpha_a_b_lcssa[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, 0{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[i0, i1] -> [i0, i1, 2, i5, 0{{\]\]}} -> [Stmt_nk_alpha[i0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[0, i1] -> [0, i1, 2, i5, 1{{\]\]}} -> [Stmt_nk_alpha[0, i1, i5] -> Val_beta_c_alpha_a_b[{{\]\]}} : ni <= 0 and 0 <= i1 < nj and 0 <= i5 < nk; [MemRef_C[i0, i1] -> [i0, i1, 0, 0, 0{{\]\]}} -> [Stmt_nj_beta[i0, i1] -> Val_c_beta[{{\]\]}} : 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 0, 0, 0{{\]\]}} -> [Stmt_nj_beta[0, i1] -> Val_c_beta[{{\]\]}} : ni <= 0 and 0 <= i1 < nj; [MemRef_C[i0, i1] -> [i0, i1, 1, 0, 0{{\]\]}} -> [Stmt_nk_alpha_lr_ph[i0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; [MemRef_C[0, i1] -> [0, i1, 1, 0, 0{{\]\]}} -> [Stmt_nk_alpha_lr_ph[0, i1] -> Val_c_idx_ij_promoted[{{\]\]}} : nk > 0 and ni <= 0 and 0 <= i1 < nj } +; CHECK-NEXT: } +; CHECK: After accesses { +; CHECK-NEXT: Stmt_nj_beta +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nj_beta[i0, i1] -> MemRef_C[i0, i1] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nj_beta[i0, i1] -> MemRef_beta[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nj_beta[i0, i1] -> MemRef_C[i0, i1] }; +; CHECK-NEXT: Stmt_nk_alpha_lr_ph +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha_lr_ph[i0, i1] -> MemRef_C[i0, i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha_lr_ph[i0, i1] -> MemRef_beta_c_alpha_a_b3__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_alpha_lr_ph[i0, i1] -> MemRef_C[i0, i1] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; Stmt_nk_alpha_lr_ph[0, i1] -> MemRef_C[0, i1] : nk > 0 and ni <= 0 and 0 <= i1 < nj }; +; CHECK-NEXT: Stmt_nk_alpha +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_beta_c_alpha_a_b[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_alpha[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_beta_c_alpha_a_b3__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_alpha[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_A[i0, i2] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_B[i2, i1] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_alpha[i0, i1, i2] -> MemRef_alpha[] }; +; CHECK-NEXT: Stmt_nk_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_beta_c_alpha_a_b[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_inc[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_beta_c_alpha_a_b3__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_inc[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_beta_c_alpha_a_b_lcssa__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_inc[i0, i1, i2] -> MemRef_C[i0, i1] : 0 <= i0 < ni and 0 <= i1 < nj and 0 <= i2 < nk; Stmt_nk_inc[0, i1, i2] -> MemRef_C[0, i1] : ni <= 0 and 0 <= i1 < nj and 0 <= i2 < nk }; +; CHECK-NEXT: Stmt_nk_for_nk_exit_crit_edge +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> MemRef_beta_c_alpha_a_b_lcssa__phi[] }; +; CHECK-NEXT: new: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> MemRef_C[i0, i1] : nk > 0 and 0 <= i0 < ni and 0 <= i1 < nj; Stmt_nk_for_nk_exit_crit_edge[0, i1] -> MemRef_C[0, i1] : nk > 0 and ni <= 0 and 0 <= i1 < nj }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [nj, nk, ni] -> { Stmt_nk_for_nk_exit_crit_edge[i0, i1] -> MemRef_C[i0, i1] }; +; CHECK-NEXT: } Index: test/DeLICM/incomplete_phi.ll =================================================================== --- /dev/null +++ test/DeLICM/incomplete_phi.ll @@ -0,0 +1,211 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm -polly-scops -analyze < %s | FileCheck %s +; +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" + +define void @func(double* noalias nonnull %C) { +entry: + br label %outer.for + + + outer.for: + %i = phi i32 [0, %entry], [%i.inc, %outer.inc] + %i.cmp = icmp slt i32 %i, 3 + br i1 %i.cmp, label %outer.body, label %outer.exit + + outer.body: + br label %inner.for + + inner.for: + %phi = phi double [0.0, %outer.body], [%sum, %inner.inc] + %j = phi i32 [0, %outer.body], [%j.inc, %inner.inc] + %j.cmp = icmp slt i32 %j, 3 + br i1 %j.cmp, label %inner.body, label %inner.exit + + inner.body: + %sum = fadd double %phi, 1.0 + br label %inner.inc + + inner.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %inner.for + + inner.exit: + %C_i = getelementptr inbounds double, double* %C, i32 %i + store double %phi, double* %C_i + br label %outer.inc + + outer.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %outer.for + + outer.exit: + br label %return + + +return: + ret void +} + + +; +; 0) Stmt_outer_body[0] +; +; +; 1) Stmt_inner_for[0, 0] +; 2) Stmt_inner_body[0, 0] +; 3) Stmt_inner_inc[0, 0] +; +; 4) Stmt_inner_for[0, 1] +; 5) Stmt_inner_body[0, 1] +; 6) Stmt_inner_inc[0, 1] +; +; 7) Stmt_inner_for[0, 2] +; 8) Stmt_inner_body[0, 2] +; 9) Stmt_inner_inc[0, 2] +; +;10) Stmt_inner_for[0, 3] +; +; +;11) Stmt_inner_exit[1] +;12) Stmt_outer_body[1] +; +; +;13) Stmt_inner_for[1, 0] +;14) Stmt_inner_body[1, 0] +;15) Stmt_inner_inc[1, 0] +; +;16) Stmt_inner_for[1, 1] +;17) Stmt_inner_body[1, 1] +;18) Stmt_inner_inc[1, 1] +; +;19) Stmt_inner_for[1, 2] +;20) Stmt_inner_body[1, 2] +;21) Stmt_inner_inc[1, 2] +; +;22) Stmt_inner_for[1, 3] +; +; +;23) Stmt_inner_exit[1] +;24) Stmt_outer_body[2] +; +;25) Stmt_inner_for[2, 0] +;26) Stmt_inner_body[2, 0] +;27) Stmt_inner_inc[2, 0] +; +;28) Stmt_inner_for[2, 1] +;29) Stmt_inner_body[2, 1] +;30) Stmt_inner_inc[2, 1] +; +;31) Stmt_inner_for[2, 2] +;32) Stmt_inner_body[2, 2] +;33) Stmt_inner_inc[2, 2] +; +;34) Stmt_inner_for[2, 3] +; +; +;35) Stmt_inner_exit[0] + + +; CHECK: Schedule after flattening { +; CHECK-NEXT: { Stmt_outer_body[i0] -> [12i0] } +; CHECK-NEXT: { Stmt_inner_inc[i0, i1] -> [3 + 12i0 + 3i1] } +; CHECK-NEXT: { Stmt_inner_exit[i0] -> [11 + 12i0] } +; CHECK-NEXT: { Stmt_inner_body[i0, i1] -> [2 + 12i0 + 3i1] } +; CHECK-NEXT: { Stmt_inner_for[i0, i1] -> [1 + 12i0 + 3i1] } +; CHECK-NEXT: } +; CHECK: Original knowledge { +; CHECK-NEXT: Lifetime: { [MemRef_C[i0] -> [i1{{\]\]}} -> Undef[] : 0 <= i0 <= 2 and i1 <= 11 + 12i0; [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, 3] -> Val_phi[{{\]\]}} : 0 <= i0 <= 2 and i1 >= 12 + 12i0 } + Unknown +; CHECK-NEXT: Written : { [MemRef_C[i0] -> [11 + 12i0{{\]\]}} -> [Stmt_inner_for[i0, 3] -> Val_phi[{{\]\]}} : 0 <= i0 <= 2 } +; CHECK-NEXT: } +; CHECK: Mapped scalars { +; CHECK-NEXT: Mapping of MemRef_phi { +; CHECK-NEXT: Primary: +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_inner_for[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 4 - 4i0 <= i1 <= 11 - 4i0 and i1 <= 3; Stmt_inner_for[0, i1] -> MemRef_C[0] : 0 <= i1 <= 3 }; +; CHECK-NEXT: Secondary: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_inner_body[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 4 - 4i0 <= i1 <= 10 - 4i0 and i1 <= 2; Stmt_inner_body[0, i1] -> MemRef_C[0] : 0 <= i1 <= 2 }; +; CHECK-NEXT: Secondary: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_exit[i0] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_inner_exit[i0] -> MemRef_C[i0] : 0 <= i0 <= 2 }; +; CHECK-NEXT: Target: { Stmt_inner_for[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 4 - 4i0 <= i1 <= 11 - 4i0 and i1 <= 3; Stmt_inner_for[0, i1] -> MemRef_C[0] : 0 <= i1 <= 3 } +; CHECK-NEXT: Lifetime: { Stmt_inner_for[i0, i1] -> [2 + 12i0 + 3i1] : 0 <= i0 <= 2 and 0 <= i1 <= 3 } +; CHECK-NEXT: Zone: +; CHECK-NEXT: Lifetime: { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 - 12i0 + i1 and i0 <= 2 and i1 >= 2 + 12i0 and 14 <= i1 <= 11 + 12i0; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_for[0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 + i1 and 2 <= i1 <= 11 } + Undef +; CHECK-NEXT: Written : { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 - 12i0 + i1 and i0 <= 2 and i1 > 12i0 and 13 <= i1 <= 10 + 12i0; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_for[0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 + i1 and 0 < i1 <= 10 } +; CHECK-NEXT: } +; CHECK-NEXT: Mapping of MemRef_phi__phi { +; CHECK-NEXT: Primary: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_inner_for[i0, i1] -> MemRef_C[i0] : i0 <= 2 and i1 >= 0 and 4 - 4i0 <= i1 <= 3; Stmt_inner_for[0, i1] -> MemRef_C[0] : 0 <= i1 <= 3 }; +; CHECK-NEXT: Secondary: +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_outer_body[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_outer_body[i0] -> MemRef_C[i0] : 0 <= i0 <= 2 }; +; CHECK-NEXT: Secondary: +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_inner_inc[i0, i1] -> MemRef_C[i0] : 0 <= i0 <= 2 and 0 <= i1 <= 2 }; +; CHECK-NEXT: Target: { Stmt_inner_for[i0, i1] -> MemRef_C[i0] : i0 <= 2 and i1 >= 0 and 4 - 4i0 <= i1 <= 3; Stmt_inner_for[0, i1] -> MemRef_C[0] : 0 <= i1 <= 3 } +; CHECK-NEXT: Lifetime: { Stmt_inner_for[i0, i1] -> [1 + 12i0 + 3i1] : i1 > 0 and -4i0 < i1 <= 7 - 4i0 and i1 <= 3; Stmt_inner_for[2, i1] -> [25 + 3i1] : 0 < i1 <= 3; Stmt_inner_for[i0, 0] -> [1 + 12i0] : 0 <= i0 <= 2 } +; CHECK-NEXT: Zone: +; CHECK-NEXT: Lifetime: { [MemRef_C[i0] -> [1 + 12i0{{\]\]}} -> Val__000000e_00[] : 0 < i0 <= 2; [MemRef_C[0] -> [1{{\]\]}} -> Val__000000e_00[]; [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_body[i0, o1] -> Val_sum[{{\]\]}} : 3o1 = -4 - 12i0 + i1 and i0 >= 0 and 4 + 12i0 <= i1 <= 22 and i1 <= 10 + 12i0 and 12*floor((-4 - 12i0 + i1)/12) >= -10 - 12i0 + i1 and -10 - 12i0 + i1 <= 12*floor((-1 - 12i0 + i1)/12) <= -4 - 12i0 + i1; [MemRef_C[2] -> [i1{{\]\]}} -> [Stmt_inner_body[2, o1] -> Val_sum[{{\]\]}} : 3o1 = -28 + i1 and 28 <= i1 <= 31 and 12*floor((-28 + i1)/12) >= -34 + i1; [MemRef_C[2] -> [34{{\]\]}} -> [Stmt_inner_body[2, 2] -> Val_sum[{{\]\]}} } + Undef +; CHECK-NEXT: Written : { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_body[i0, o1] -> Val_sum[{{\]\]}} : 3o1 = -3 - 12i0 + i1 and i0 >= 0 and 3 + 12i0 <= i1 <= 30 and i1 <= 9 + 12i0; [MemRef_C[2] -> [33{{\]\]}} -> [Stmt_inner_body[2, 2] -> Val_sum[{{\]\]}}; [MemRef_C[i0] -> [12i0{{\]\]}} -> Val__000000e_00[] : 0 <= i0 <= 2 } +; CHECK-NEXT: } +; CHECK-NEXT: Mapping of MemRef_sum { +; CHECK-NEXT: Primary: +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_body[i0, i1] -> MemRef_sum[] }; +; CHECK-NEXT: new: { Stmt_inner_body[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 3 - 4i0 <= i1 <= 11 - 4i0 and i1 <= 2; Stmt_inner_body[0, i1] -> MemRef_C[0] : 0 <= i1 <= 2 }; +; CHECK-NEXT: Secondary: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_inc[i0, i1] -> MemRef_sum[] }; +; CHECK-NEXT: new: { Stmt_inner_inc[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 3 - 4i0 <= i1 <= 9 - 4i0 and i1 <= 2; Stmt_inner_inc[0, i1] -> MemRef_C[0] : 0 <= i1 <= 2; Stmt_inner_inc[2, 2] -> MemRef_C[2] }; +; CHECK-NEXT: Target: { Stmt_inner_body[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 3 - 4i0 <= i1 <= 11 - 4i0 and i1 <= 2; Stmt_inner_body[0, i1] -> MemRef_C[0] : 0 <= i1 <= 2 } +; CHECK-NEXT: Lifetime: { Stmt_inner_body[i0, i1] -> [3 + 12i0 + 3i1] : i0 >= 0 and 0 <= i1 <= 10 - 4i0 and i1 <= 2 } +; CHECK-NEXT: Zone: +; CHECK-NEXT: Lifetime: { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_body[i0, o1] -> Val_sum[{{\]\]}} : 3o1 = -3 - 12i0 + i1 and i0 <= 2 and i1 >= 3 + 12i0 and 12 <= i1 <= 9 + 12i0; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_body[0, o1] -> Val_sum[{{\]\]}} : 3o1 = -3 + i1 and 3 <= i1 <= 9 } + Undef +; CHECK-NEXT: Written : { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_body[i0, o1] -> Val_sum[{{\]\]}} : 3o1 = -2 - 12i0 + i1 and i0 <= 2 and i1 >= 2 + 12i0 and 11 <= i1 <= 8 + 12i0; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_body[0, o1] -> Val_sum[{{\]\]}} : 3o1 = -2 + i1 and 2 <= i1 <= 8 } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK: After knowledge { +; CHECK-NEXT: Lifetime: { [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_body[i0, o1] -> Val_sum[{{\]\]}} : 3 + 12i0 <= i1 <= 10 + 12i0 and ((3o1 = -4 - 12i0 + i1 and i0 >= 0 and 4 + 12i0 <= i1 <= 22 and 12*floor((-4 - 12i0 + i1)/12) >= -10 - 12i0 + i1 and -10 - 12i0 + i1 <= 12*floor((-1 - 12i0 + i1)/12) <= -4 - 12i0 + i1) or (3o1 = -3 - 12i0 + i1 and i0 <= 2 and 12 <= i1 <= 9 + 12i0)); [MemRef_C[2] -> [34{{\]\]}} -> [Stmt_inner_body[2, 2] -> Val_sum[{{\]\]}}; [MemRef_C[2] -> [i1{{\]\]}} -> [Stmt_inner_body[2, o1] -> Val_sum[{{\]\]}} : 3o1 = -28 + i1 and 28 <= i1 <= 31 and 12*floor((-28 + i1)/12) >= -34 + i1; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_body[0, o1] -> Val_sum[{{\]\]}} : 3o1 = -3 + i1 and 3 <= i1 <= 9; [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 - 12i0 + i1 and i0 <= 2 and i1 >= 2 + 12i0 and 14 <= i1 <= 11 + 12i0; [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, 3] -> Val_phi[{{\]\]}} : 0 <= i0 <= 2 and i1 >= 12 + 12i0; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_for[0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 + i1 and 2 <= i1 <= 11; [MemRef_C[i0] -> [i1{{\]\]}} -> Undef[] : 0 <= i0 <= 2 and i1 <= 12i0 and ((3*floor((-2 + i1)/3) <= -3 + i1) or 3*floor((-2 + i1)/3) = -2 + i1); [MemRef_C[i0] -> [1 + 12i0{{\]\]}} -> Val__000000e_00[] : 0 < i0 <= 2; [MemRef_C[0] -> [1{{\]\]}} -> Val__000000e_00[] } + Unknown +; CHECK-NEXT: Written : { [MemRef_C[i0] -> [12i0{{\]\]}} -> Val__000000e_00[] : 0 <= i0 <= 2; [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_body[i0, o1] -> Val_sum[{{\]\]}} : 2 + 12i0 <= i1 <= 9 + 12i0 and ((3o1 = -3 - 12i0 + i1 and i0 >= 0 and 3 + 12i0 <= i1 <= 30) or (3o1 = -2 - 12i0 + i1 and i0 <= 2 and 11 <= i1 <= 8 + 12i0)); [MemRef_C[2] -> [33{{\]\]}} -> [Stmt_inner_body[2, 2] -> Val_sum[{{\]\]}}; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_body[0, o1] -> Val_sum[{{\]\]}} : 3o1 = -2 + i1 and 2 <= i1 <= 8; [MemRef_C[i0] -> [i1{{\]\]}} -> [Stmt_inner_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 - 12i0 + i1 and i0 <= 2 and i1 > 12i0 and 13 <= i1 <= 10 + 12i0; [MemRef_C[0] -> [i1{{\]\]}} -> [Stmt_inner_for[0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 + i1 and 0 < i1 <= 10; [MemRef_C[i0] -> [11 + 12i0{{\]\]}} -> [Stmt_inner_for[i0, 3] -> Val_phi[{{\]\]}} : 0 <= i0 <= 2 } +; CHECK-NEXT: } +; CHECK: After accesses { +; CHECK-NEXT: Stmt_outer_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_outer_body[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_outer_body[i0] -> MemRef_C[i0] : 0 <= i0 <= 2 }; +; CHECK-NEXT: Stmt_inner_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_inner_for[i0, i1] -> MemRef_C[i0] : i0 <= 2 and i1 >= 0 and 4 - 4i0 <= i1 <= 3; Stmt_inner_for[0, i1] -> MemRef_C[0] : 0 <= i1 <= 3 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_inner_for[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 4 - 4i0 <= i1 <= 11 - 4i0 and i1 <= 3; Stmt_inner_for[0, i1] -> MemRef_C[0] : 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_inner_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_body[i0, i1] -> MemRef_sum[] }; +; CHECK-NEXT: new: { Stmt_inner_body[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 3 - 4i0 <= i1 <= 11 - 4i0 and i1 <= 2; Stmt_inner_body[0, i1] -> MemRef_C[0] : 0 <= i1 <= 2 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_inner_body[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 4 - 4i0 <= i1 <= 10 - 4i0 and i1 <= 2; Stmt_inner_body[0, i1] -> MemRef_C[0] : 0 <= i1 <= 2 }; +; CHECK-NEXT: Stmt_inner_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_inc[i0, i1] -> MemRef_sum[] }; +; CHECK-NEXT: new: { Stmt_inner_inc[i0, i1] -> MemRef_C[i0] : i1 >= 0 and 3 - 4i0 <= i1 <= 9 - 4i0 and i1 <= 2; Stmt_inner_inc[0, i1] -> MemRef_C[0] : 0 <= i1 <= 2; Stmt_inner_inc[2, 2] -> MemRef_C[2] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_inner_inc[i0, i1] -> MemRef_C[i0] : 0 <= i0 <= 2 and 0 <= i1 <= 2 }; +; CHECK-NEXT: Stmt_inner_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_inner_exit[i0] -> MemRef_C[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_inner_exit[i0] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_inner_exit[i0] -> MemRef_C[i0] : 0 <= i0 <= 2 }; +; CHECK-NEXT: } Index: test/DeLICM/pass_existance.ll =================================================================== --- test/DeLICM/pass_existance.ll +++ test/DeLICM/pass_existance.ll @@ -33,4 +33,18 @@ ; Verify that the DeLICM has a custom printScop() function. -; CHECK: DeLICM result: +; CHECK: Original knowledge { +; CHECK-NEXT: Lifetime: [n] -> { [MemRef_A[0] -> [i1{{\]\]}} -> Val__000000e_00[] : n > 0 and i1 >= n; [MemRef_A[0] -> [i1{{\]\]}} -> Undef[] : n > 0 and i1 < n } + Unknown +; CHECK-NEXT: Written : [n] -> { [MemRef_A[0] -> [i1{{\]\]}} -> Val__000000e_00[] : 0 <= i1 < n } +; CHECK-NEXT: } +; CHECK: Mapped scalars { +; CHECK-NEXT: } +; CHECK: After knowledge { +; CHECK-NEXT: Lifetime: [n] -> { [MemRef_A[0] -> [i1{{\]\]}} -> Val__000000e_00[] : n > 0 and i1 >= n; [MemRef_A[0] -> [i1{{\]\]}} -> Undef[] : n > 0 and i1 < n } + Unknown +; CHECK-NEXT: Written : [n] -> { [MemRef_A[0] -> [i1{{\]\]}} -> Val__000000e_00[] : 0 <= i1 < n } +; CHECK-NEXT: } +; CHECK: After accesses { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: } Index: test/DeLICM/reduction_embedded.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_embedded.ll @@ -0,0 +1,151 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm -analyze < %s | FileCheck %s +; +; void func(double *A { +; for (int j = 0; j < 1; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.for + +outer.for: + %j = phi i32 [0, %entry], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.for, label %outer.exit + + + + reduction.for: + %i = phi i32 [0, %outer.for], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %outer.for], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + br i1 %i.cmp, label %body, label %reduction.exit + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + + reduction.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %phi, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + +; Unrolled flattened schedule: +; [0] Stmt_outer_for[0] +; [1] Stmt_reduction_for[0, 0] +; [2] Stmt_body[0, 0] +; [3] Stmt_reduction_inc[0, 0] +; [4] Stmt_reduction_for[0, 1] +; [5] Stmt_body[0, 1] +; [6] Stmt_reduction_inc[0, 1] +; [7] Stmt_reduction_for[0, 2] +; [8] Stmt_body[0, 2] +; [9] Stmt_reduction_inc[0, 2] +; [10] Stmt_reduction_for[0, 3] +; [11] Stmt_body[0, 3] +; [12] Stmt_reduction_inc[0, 3] +; [13] Stmt_reduction_for[0, 4] +; [14] Stmt_reduction_exit[0] +; [15] Stmt_outer_for[1] +; [16] Stmt_reduction_for[1, 0] +; [17] Stmt_body[1, 0] +; [18] Stmt_reduction_inc[1, 0] +; [19] Stmt_reduction_for[1, 1] +; [20] Stmt_body[1, 1] +; [21] Stmt_reduction_inc[1, 1] +; [22] Stmt_reduction_for[1, 2] +; [23] Stmt_body[1, 2] +; [24] Stmt_reduction_inc[1, 2] +; [25] Stmt_reduction_for[1, 3] +; [26] Stmt_body[1, 3] +; [27] Stmt_reduction_inc[1, 3] +; [28] Stmt_reduction_for[1, 4] +; [29] Stmt_reduction_exit[1] +; [30] Stmt_outer_for[2] + +; CHECK: Schedule after flattening { +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> [1 + 15i0 + 3i1] } +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> [14 + 15i0] } +; CHECK-NEXT: { Stmt_body[i0, i1] -> [2 + 15i0 + 3i1] } +; CHECK-NEXT: { Stmt_outer_for[i0] -> [15i0] } +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> [3 + 15i0 + 3i1] } +; CHECK-NEXT: } +; CHECK: Original knowledge { +; CHECK-NEXT: Lifetime: { [MemRef_A[i0] -> [i1{{\]\]}} -> Undef[] : 0 <= i0 <= 1 and i1 <= 14 + 15i0; [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, 4] -> Val_phi[{{\]\]}} : 0 <= i0 <= 1 and i1 >= 15 + 15i0 } + Unknown +; CHECK-NEXT: Written : { [MemRef_A[i0] -> [14 + 15i0{{\]\]}} -> [Stmt_reduction_for[i0, 4] -> Val_phi[{{\]\]}} : 0 <= i0 <= 1 } +; CHECK-NEXT: } +; CHECK: Mapped scalars { +; CHECK-NEXT: Mapping of MemRef_phi { +; CHECK-NEXT: Primary: +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 4 }; +; CHECK-NEXT: Secondary: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and i1 >= 0 and -5i0 <= i1 <= 8 - 5i0 and i1 <= 3 }; +; CHECK-NEXT: Secondary: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Target: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 4 } +; CHECK-NEXT: Lifetime: { Stmt_reduction_for[i0, i1] -> [2 + 15i0 + 3i1] : 0 <= i0 <= 1 and 0 <= i1 <= 3; Stmt_reduction_for[1, 4] -> [29]; Stmt_reduction_for[0, 4] -> [14] } +; CHECK-NEXT: Zone: +; CHECK-NEXT: Lifetime: { [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 - 15i0 + i1 and 0 <= i0 <= 1 and 2 + 15i0 <= i1 <= 11 + 15i0; [MemRef_A[1] -> [29{{\]\]}} -> [Stmt_reduction_for[1, 4] -> Val_phi[{{\]\]}}; [MemRef_A[0] -> [14{{\]\]}} -> [Stmt_reduction_for[0, 4] -> Val_phi[{{\]\]}} } + Undef +; CHECK-NEXT: Written : { [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 - 15i0 + i1 and 0 <= i0 <= 1 and 15i0 < i1 <= 13 + 15i0 } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK: After knowledge { +; CHECK-NEXT: Lifetime: { [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -2 - 15i0 + i1 and 0 <= i0 <= 1 and 2 + 15i0 <= i1 <= 11 + 15i0; [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, 4] -> Val_phi[{{\]\]}} : 0 <= i0 <= 1 and i1 >= 15 + 15i0; [MemRef_A[1] -> [29{{\]\]}} -> [Stmt_reduction_for[1, 4] -> Val_phi[{{\]\]}}; [MemRef_A[0] -> [14{{\]\]}} -> [Stmt_reduction_for[0, 4] -> Val_phi[{{\]\]}}; [MemRef_A[i0] -> [i1{{\]\]}} -> Undef[] : 0 <= i0 <= 1 and ((i1 <= 13 and 3*floor((-2 + i1)/3) <= -3 + i1) or (3*floor((-2 + i1)/3) = -2 + i1 and i1 <= 1 + 15i0)); [MemRef_A[1] -> [i1{{\]\]}} -> Undef[] : 14 <= i1 <= 28 and 3*floor((-2 + i1)/3) <= -3 + i1 } + Unknown +; CHECK-NEXT: Written : { [MemRef_A[i0] -> [i1{{\]\]}} -> [Stmt_reduction_for[i0, o1] -> Val_phi[{{\]\]}} : 3o1 = -1 - 15i0 + i1 and 0 <= i0 <= 1 and 15i0 < i1 <= 13 + 15i0; [MemRef_A[i0] -> [14 + 15i0{{\]\]}} -> [Stmt_reduction_for[i0, 4] -> Val_phi[{{\]\]}} : 0 <= i0 <= 1 } +; CHECK-NEXT: } +; CHECK: After accesses { +; CHECK-NEXT: Stmt_outer_for +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_outer_for[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 4 }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and i1 >= 0 and -5i0 <= i1 <= 8 - 5i0 and i1 <= 3 }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: } Index: test/ScopInfo/licm_load.ll =================================================================== --- test/ScopInfo/licm_load.ll +++ /dev/null @@ -1,54 +0,0 @@ -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare \ -; RUN: -polly-invariant-load-hoisting=true -polly-scops -analyze < %s \ -; RUN: | FileCheck %s -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare \ -; RUN: -polly-invariant-load-hoisting=true -polly-scops -analyze < %s \ -; RUN: | FileCheck %s -; -; void foo(int n, float A[static const restrict n], -; float B[static const restrict n], int j) { -; for (int i = 0; i < n; i++) -; A[i] = B[j]; -; } -; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @foo(i32 %n, float* noalias nonnull %A, float* noalias nonnull %B, i32 %j) { -entry: - %tmp = sext i32 %n to i64 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] - %cmp = icmp slt i64 %indvars.iv, %tmp - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %idxprom = sext i32 %j to i64 - %arrayidx = getelementptr inbounds float, float* %B, i64 %idxprom - %tmp1 = bitcast float* %arrayidx to i32* - %tmp2 = load i32, i32* %tmp1, align 4 - %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv - %tmp3 = bitcast float* %arrayidx2 to i32* - store i32 %tmp2, i32* %tmp3, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - -; CHECK: Invariant Accesses: { -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [n, j] -> { Stmt_{{[a-zA-Z_]*}}[{{[i0]*}}] -> MemRef_B[j] }; -; CHECK-NEXT: Execution Context: [n, j] -> { : n > 0 } -; CHECK-NEXT: } -; -; CHECK: Statements { -; CHECK: Stmt_for_body -; CHECK-DAG: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [n, j] -> { Stmt_for_body[i0] -> MemRef_A[i0] }; -; CHECK: } Index: test/ScopInfo/licm_potential_store.ll =================================================================== --- test/ScopInfo/licm_potential_store.ll +++ /dev/null @@ -1,88 +0,0 @@ -; RUN: opt %loadPolly -basicaa -sroa -instcombine -simplifycfg -tailcallopt \ -; RUN: -simplifycfg -reassociate -loop-rotate -instcombine -indvars \ -; RUN: -polly-prepare -polly-scops -analyze < %s \ -; RUN: \ -; RUN: | FileCheck %s --check-prefix=NOLICM - -; RUN: opt %loadPolly -basicaa -sroa -instcombine -simplifycfg -tailcallopt \ -; RUN: -simplifycfg -reassociate -loop-rotate -instcombine -indvars -licm \ -; RUN: -polly-prepare -polly-scops -analyze < %s \ -; RUN: \ -; RUN: | FileCheck %s --check-prefix=LICM - -; void foo(int n, float A[static const restrict n], float x) { -; // (0) -; for (int i = 0; i < 5; i += 1) { -; for (int j = 0; j < n; j += 1) { -; x = 7; // (1) -; } -; A[0] = x; // (3) -; } -; // (4) -; } - -; LICM: Statements -; NOLICM: Statements - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @foo(i32 %n, float* noalias nonnull %A, float %x) { -entry: - %n.addr = alloca i32, align 4 - %A.addr = alloca float*, align 8 - %x.addr = alloca float, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - store i32 %n, i32* %n.addr, align 4 - store float* %A, float** %A.addr, align 8 - store float %x, float* %x.addr, align 4 - %tmp = load i32, i32* %n.addr, align 4 - %tmp1 = zext i32 %tmp to i64 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc.4, %entry - %tmp2 = load i32, i32* %i, align 4 - %cmp = icmp slt i32 %tmp2, 5 - br i1 %cmp, label %for.body, label %for.end.6 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond.1 - -for.cond.1: ; preds = %for.inc, %for.body - %tmp3 = load i32, i32* %j, align 4 - %tmp4 = load i32, i32* %n.addr, align 4 - %cmp2 = icmp slt i32 %tmp3, %tmp4 - br i1 %cmp2, label %for.body.3, label %for.end - -for.body.3: ; preds = %for.cond.1 - store float 7.000000e+00, float* %x.addr, align 4 - br label %for.inc - -for.inc: ; preds = %for.body.3 - %tmp5 = load i32, i32* %j, align 4 - %add = add nsw i32 %tmp5, 1 - store i32 %add, i32* %j, align 4 - br label %for.cond.1 - -for.end: ; preds = %for.cond.1 - %tmp6 = load float, float* %x.addr, align 4 - %tmp7 = load float*, float** %A.addr, align 8 - %arrayidx = getelementptr inbounds float, float* %tmp7, i64 0 - store float %tmp6, float* %arrayidx, align 4 - br label %for.inc.4 - -for.inc.4: ; preds = %for.end - %tmp8 = load i32, i32* %i, align 4 - %add5 = add nsw i32 %tmp8, 1 - store i32 %add5, i32* %i, align 4 - br label %for.cond - -for.end.6: ; preds = %for.cond - ret void -} - -; CHECK: Statements { -; CHECK: Stmt_for_end -; CHECK: } Index: test/ScopInfo/licm_reduction.ll =================================================================== --- test/ScopInfo/licm_reduction.ll +++ /dev/null @@ -1,47 +0,0 @@ -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; -; XFAIL: * -; -; void test(int n, double B[static const restrict n], int j) { -; for (int i = 0; i < n; i += 1) { -; B[j] += i; -; } -; } -; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @test(i32 %n, double* noalias nonnull %B, i32 %j) { -entry: - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %i.0 = phi i32 [ 0, %entry ], [ %add1, %for.inc ] - %cmp = icmp slt i32 %i.0, %n - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %conv = sitofp i32 %i.0 to double - %idxprom = sext i32 %j to i64 - %arrayidx = getelementptr inbounds double, double* %B, i64 %idxprom - %tmp = load double, double* %arrayidx, align 8 - %add = fadd double %tmp, %conv - store double %add, double* %arrayidx, align 8 - br label %for.inc - -for.inc: ; preds = %for.body - %add1 = add nuw nsw i32 %i.0, 1 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - - -; CHECK: Statements { -; CHECK: Stmt_for_body -; CHECK-DAG: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [n, j] -> { Stmt_for_body[i0] -> MemRef_B[j] }; -; CHECK-DAG: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [n, j] -> { Stmt_for_body[i0] -> MemRef_B[j] }; -; CHECK: } Index: test/ScopInfo/licm_reduction_nested.ll =================================================================== --- test/ScopInfo/licm_reduction_nested.ll +++ /dev/null @@ -1,68 +0,0 @@ -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; -; XFAIL: * -; -; Even ScopDetection fails here after LICM because of PHI in exit node. -; -; void foo(unsigned long *restrict A, unsigned long *restrict B, -; unsigned long j) { -; for (unsigned long i = 0; i < 100; i++) -; for (unsigned long k = 0; k < 100; k++) -; A[j] += B[i + k]; -; } -; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @foo(i64* noalias %A, i64* noalias %B, i64 %j) { -entry: - br label %for.cond - -for.cond: ; preds = %for.inc.6, %entry - %i.0 = phi i64 [ 0, %entry ], [ %inc7, %for.inc.6 ] - %exitcond1 = icmp ne i64 %i.0, 100 - br i1 %exitcond1, label %for.body, label %for.end.8 - -for.body: ; preds = %for.cond - br label %for.cond.1 - -for.cond.1: ; preds = %for.inc, %for.body - %k.0 = phi i64 [ 0, %for.body ], [ %inc, %for.inc ] - %exitcond = icmp ne i64 %k.0, 100 - br i1 %exitcond, label %for.body.3, label %for.end - -for.body.3: ; preds = %for.cond.1 - %add = add nuw nsw i64 %i.0, %k.0 - %arrayidx = getelementptr inbounds i64, i64* %B, i64 %add - %tmp = load i64, i64* %arrayidx, align 8 - %arrayidx4 = getelementptr inbounds i64, i64* %A, i64 %j - %tmp2 = load i64, i64* %arrayidx4, align 8 - %add5 = add i64 %tmp2, %tmp - store i64 %add5, i64* %arrayidx4, align 8 - br label %for.inc - -for.inc: ; preds = %for.body.3 - %inc = add nuw nsw i64 %k.0, 1 - br label %for.cond.1 - -for.end: ; preds = %for.cond.1 - br label %for.inc.6 - -for.inc.6: ; preds = %for.end - %inc7 = add nuw nsw i64 %i.0, 1 - br label %for.cond - -for.end.8: ; preds = %for.cond - ret void -} - - -; CHECK: Statements { -; CHECK: Stmt_for_body_3 -; CHECK-DAG: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [j] -> { Stmt_for_body_3[i0, i1] -> MemRef_B[i0 + i1] }; -; CHECK-DAG: ReadAccess := [Reduction Type: +] [Scalar: 0] -; CHECK-NEXT: [j] -> { Stmt_for_body_3[i0, i1] -> MemRef_A[j] }; -; CHECK-DAG: MustWriteAccess := [Reduction Type: +] [Scalar: 0] -; CHECK-NEXT: [j] -> { Stmt_for_body_3[i0, i1] -> MemRef_A[j] }; -; CHECK: } Index: test/ScopInfo/licm_store.ll =================================================================== --- test/ScopInfo/licm_store.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; -; XFAIL: * -; -; void foo(float *restrict A, float *restrict B, long j) { -; for (long i = 0; i < 100; i++) -; A[j] = B[i]; -; } -; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @foo(float* noalias %A, float* noalias %B, i64 %j) { -entry: - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] - %exitcond = icmp ne i64 %i.0, 100 - br i1 %exitcond, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %arrayidx = getelementptr inbounds float, float* %B, i64 %i.0 - %tmp = bitcast float* %arrayidx to i32* - %tmp1 = load i32, i32* %tmp, align 4 - %arrayidx1 = getelementptr inbounds float, float* %A, i64 %j - %tmp2 = bitcast float* %arrayidx1 to i32* - store i32 %tmp1, i32* %tmp2, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %inc = add nuw nsw i64 %i.0, 1 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - -; CHECK: Statements { -; CHECK: Stmt_for_body -; CHECK-DAG: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [j] -> { Stmt_for_body[i0] -> MemRef_B[i0] }; -; CHECK-DAG: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [j] -> { Stmt_for_body[i0] -> MemRef_A[j] }; -; CHECK: } Index: unittests/CMakeLists.txt =================================================================== --- unittests/CMakeLists.txt +++ unittests/CMakeLists.txt @@ -16,8 +16,9 @@ set_property(TARGET ${test_name} PROPERTY FOLDER "Polly") endif() - target_link_libraries(${test_name} Polly LLVMCore LLVMSupport) + target_link_libraries(${test_name} Polly LLVMCore LLVMSupport LLVMipo) endfunction() add_subdirectory(Isl) add_subdirectory(Flatten) +add_subdirectory(DeLICM) Index: unittests/DeLICM/CMakeLists.txt =================================================================== --- /dev/null +++ unittests/DeLICM/CMakeLists.txt @@ -0,0 +1,3 @@ +add_polly_unittest(DeLICMTests + DeLICMTest.cpp + ) Index: unittests/DeLICM/DeLICMTest.cpp =================================================================== --- /dev/null +++ unittests/DeLICM/DeLICMTest.cpp @@ -0,0 +1,584 @@ +//===- DeLICMTest.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "polly/DeLICM.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LLVMContext.h" +#include "gtest/gtest.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace polly; + +/// Loop-like control structure to run body for true and false if passed indef. +/// +/// Takes an int variable that can be either 0 (false), 1 (true), or -1 (indef). +/// It then shadows that variable with a new variable of type bool. +/// +/// - If the int variable has the value of 0, the body is executed with the +/// bool variable set to false. +/// - If the int variable has the value of 1, the body is executed with the bool +/// variable set to true. +/// - If the int variable has the value of -1, the body is executed twice: once +/// with the bool variable set to false and a second time with true. +/// +/// For use in tests where the value of a boolean variable should not change the +/// result, such that both settings are tested at once. +#define BOOL_FOR(VAR) \ + for (int VAR##_iter = (VAR == indef) ? 0 : VAR; \ + VAR##_iter <= ((VAR == indef) ? 1 : VAR); VAR##_iter += 1) \ + for (bool VAR = VAR##_iter, VAR##_repeat = true; VAR##_repeat; \ + VAR##_repeat = false) + +#define indef (-1) + +namespace { +bool checkComputeReachingDefinition(const char *ScheduleStr, + const char *DefsStr, int IncludeDef, + int IncludeRedef, const char *ExpectedStr) { + BOOL_FOR(IncludeDef) BOOL_FOR(IncludeRedef) { + std::unique_ptr Ctx(isl_ctx_alloc(), + &isl_ctx_free); + + auto Schedule = give(isl_union_map_read_from_str(Ctx.get(), ScheduleStr)); + auto Defs = give(isl_union_map_read_from_str(Ctx.get(), DefsStr)); + auto Expected = give(isl_union_map_read_from_str(Ctx.get(), ExpectedStr)); + + auto Result = + computeReachingDefinition(Schedule, Defs, IncludeDef, IncludeRedef); + + auto Success = isl_union_map_is_equal(Result.keep(), Expected.keep()); + if (Success != isl_bool_true) + return false; + } + + return true; +} + +TEST(DeLICM, ReachingDefinitionZone) { + EXPECT_FALSE(computeReachingDefinition(nullptr, nullptr, false, false)); + + EXPECT_TRUE(checkComputeReachingDefinition( + "{ Dom[] -> [0] }", "{ Dom[] -> Elt[] }", true, indef, + "{ [Elt[] -> [i]] -> Dom[] : 0 <= i }")); + EXPECT_TRUE(checkComputeReachingDefinition( + "{ Dom[] -> [0] }", "{ Dom[] -> Elt[] }", false, indef, + "{ [Elt[] -> [i]] -> Dom[] : 0 < i }")); + + EXPECT_TRUE(checkComputeReachingDefinition( + "{ Dom1[] -> [0]; Dom2[] -> [10] }", + "{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }", true, false, + "{ [Elt[] -> [i]] -> Dom1[] : 0 <= i < 10; [Elt[] -> [i]] -> Dom2[] : 10 " + "<= i }")); + EXPECT_TRUE(checkComputeReachingDefinition( + "{ Dom1[] -> [0]; Dom2[] -> [10] }", + "{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }", false, false, + "{ [Elt[] -> [i]] -> Dom1[] : 0 < i < 10; [Elt[] -> [i]] -> Dom2[] : 10 " + "< i }")); + EXPECT_TRUE( + checkComputeReachingDefinition("{ Dom1[] -> [0]; Dom2[] -> [10] }", + "{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }", + false, true, "{ [Elt[] -> [i]] -> Dom1[] " + ": 0 < i <= 10; [Elt[] -> " + "[i]] -> Dom2[] : 10 < i }")); + EXPECT_TRUE( + checkComputeReachingDefinition("{ Dom1[] -> [0]; Dom2[] -> [10] }", + "{ Dom1[] -> Elt[]; Dom2[] -> Elt[] }", + true, true, "{ [Elt[] -> [i]] -> Dom1[] " + ": 0 <= i <= 10; [Elt[] -> " + "[i]] -> Dom2[] : 10 <= i }")); + + EXPECT_TRUE(checkComputeReachingDefinition( + "{ Dom1[] -> [0]; Dom2[] -> [10] }", + "{ Dom1[] -> Elt1[]; Dom2[] -> Elt2[] }", true, indef, + "{ [Elt1[] -> [i]] -> Dom1[] : 0 <= i; " + "[Elt2[] -> [i]] -> Dom2[] : 10 <= i }")); + + EXPECT_TRUE(checkComputeReachingDefinition( + "{ Dom[i] -> [i] }", "{ Dom[i] -> Elt[]; Dom2[] -> Elt[] }", true, false, + "{ [Elt[] -> [i]] -> Dom[i] }")); + + EXPECT_TRUE( + checkComputeReachingDefinition("{ Dom[1] -> [0]; Dom[2] -> [10] }", + "{ Dom[1] -> Elt[]; Dom[2] -> Elt[] }", + false, true, "{ [Elt[] -> [i]] -> Dom[1] " + ": 0 < i <= 10; [Elt[] -> " + "[i]] -> Dom[2] : 10 < i }")); + + EXPECT_TRUE(checkComputeReachingDefinition( + "{ Stmt_reduction_for[i] -> [3i] : 0 <= i <= 4 }", + "{ Stmt_reduction_for[i] -> Elt[] : 0 <= i <= 4 }", false, true, + "{ [Elt[] -> [i]] -> Stmt_reduction_for[0] : 0 < i <= 3; [Elt[] -> [i]] " + "-> Stmt_reduction_for[1] : 3 < i <= 6; [Elt[] -> [i]] -> " + "Stmt_reduction_for[2] : 6 < i <= 9; [Elt[] -> [i]] -> " + "Stmt_reduction_for[3] : 9 < i <= 12; [Elt[] -> [i]] -> " + "Stmt_reduction_for[4] : 12 < i }")); +} + +bool checkComputeArrayLifetime(const char *ScheduleStr, const char *WritesStr, + const char *ReadsStr, int ReadEltInSameInst, + int InclWrite, int InclLastRead, int ExitReads, + const char *ExpectedStr) { + BOOL_FOR(ReadEltInSameInst) + BOOL_FOR(InclWrite) BOOL_FOR(InclLastRead) BOOL_FOR(ExitReads) { + std::unique_ptr Ctx(isl_ctx_alloc(), + &isl_ctx_free); + + auto Schedule = give(isl_union_map_read_from_str(Ctx.get(), ScheduleStr)); + auto Writes = give(isl_union_map_read_from_str(Ctx.get(), WritesStr)); + auto Reads = give(isl_union_map_read_from_str(Ctx.get(), ReadsStr)); + auto Expected = give(isl_union_map_read_from_str(Ctx.get(), ExpectedStr)); + + auto Result = + computeArrayLifetime(Schedule, Writes, Reads, ReadEltInSameInst, + InclWrite, InclLastRead, ExitReads); + auto Success = isl_union_map_is_equal(Result.keep(), Expected.keep()); + if (Success != isl_bool_true) + return false; + } + + return true; +} + +TEST(DeLICM, ArrayPerWriteLifetimeZone) { + EXPECT_TRUE(checkComputeArrayLifetime("{ }", "{ }", "{ }", indef, indef, + indef, indef, "{ }")); + EXPECT_TRUE(checkComputeArrayLifetime("{ Read[] -> [10] }", + "{ Read[] -> A[] }", "{ }", indef, + indef, indef, false, "{ }")); + + EXPECT_TRUE(checkComputeArrayLifetime("{ Def[] -> [10] }", "{ Def[] -> A[] }", + "{ }", indef, indef, indef, false, + "{ }")); + EXPECT_TRUE(checkComputeArrayLifetime("{ Def[] -> [10] }", "{ Def[] -> A[] }", + "{ }", indef, false, indef, true, + "{ [A[] -> Def[]] -> [i] : 10 < i }")); + EXPECT_TRUE(checkComputeArrayLifetime("{ Def[] -> [10] }", "{ Def[] -> A[] }", + "{ }", indef, true, indef, true, + "{ [A[] -> Def[]] -> [i] : 10 <= i }")); + + EXPECT_TRUE(checkComputeArrayLifetime( + "{ Def[] -> [10]; Read[] -> [20] }", "{ Def[] -> A[] }", + "{ Read[] -> A[] }", indef, false, false, false, + "{ [A[] -> Def[]] -> [i] : 10 < i < 20 }")); + EXPECT_TRUE(checkComputeArrayLifetime( + "{ Def[] -> [10]; Read[] -> [20] }", "{ Def[] -> A[] }", + "{ Read[] -> A[] }", indef, true, false, false, + "{ [A[] -> Def[]] -> [i] : 10 <= i < 20 }")); + EXPECT_TRUE(checkComputeArrayLifetime( + "{ Def[] -> [10]; Read[] -> [20] }", "{ Def[] -> A[] }", + "{ Read[] -> A[] }", indef, false, true, false, + "{ [A[] -> Def[]] -> [i] : 10 < i <= 20 }")); + EXPECT_TRUE(checkComputeArrayLifetime( + "{ Def[] -> [10]; Read[] -> [20] }", "{ Def[] -> A[] }", + "{ Read[] -> A[] }", indef, true, true, false, + "{ [A[] -> Def[]] -> [i] : 10 <= i <= 20 }")); + EXPECT_TRUE(checkComputeArrayLifetime("{ Def[] -> [10]; Read[] -> [20] }", + "{ Def[] -> A[] }", "{ Read[] -> A[] }", + indef, false, indef, true, + "{ [A[] -> Def[]] -> [i] : 10 < i }")); + EXPECT_TRUE(checkComputeArrayLifetime("{ Def[] -> [10]; Read[] -> [20] }", + "{ Def[] -> A[] }", "{ Read[] -> A[] }", + indef, true, indef, true, + "{ [A[] -> Def[]] -> [i] : 10 <= i }")); + + EXPECT_TRUE(checkComputeArrayLifetime( + "{ Def[] -> [10]; Read1[] -> [20]; Read2[] -> [30] }", "{ Def[] -> A[] }", + "{ Read1[] -> A[]; Read2[] -> A[] }", indef, true, indef, true, + "{ [A[] -> Def[]] -> [i] : 10 <= i }")); + EXPECT_TRUE(checkComputeArrayLifetime( + "{ Def[] -> [10]; Read1[] -> [20]; Read2[] -> [30] }", "{ Def[] -> A[] }", + "{ Read1[] -> A[]; Read2[] -> A[] }", indef, true, true, false, + "{ [A[] -> Def[]] -> [i] : 10 <= i <= 30 }")); + + EXPECT_TRUE(checkComputeArrayLifetime( + "{ Def1[] -> [0]; Read[] -> [10]; Def2[] -> [20] }", + "{ Def1[] -> A[]; Def2[] -> A[] }", "{ Read[] -> A[] }", indef, true, + true, false, "{ [A[] -> Def1[]] -> [i] : 0 <= i <= 10 }")); + EXPECT_TRUE(checkComputeArrayLifetime( + "{ Def1[] -> [0]; Read[] -> [10]; Def2[] -> [20] }", + "{ Def1[] -> A[]; Def2[] -> A[] }", "{ Read[] -> A[] }", indef, true, + true, true, "{ [A[] -> Def1[]] -> [i] : 0 <= i <= 10; [A[] -> " + "Def2[]] -> [i] : 20 <= i }")); + + EXPECT_TRUE(checkComputeArrayLifetime( + "{ Def1[] -> [0]; Def2[] -> [10]; Read[] -> [10] }", + "{ Def1[] -> A[]; Def2[] -> A[] }", "{ Read[] -> A[] }", false, true, + true, true, "{ [A[] -> Def1[]] -> [i] : 0 <= i <= 10; [A[] -> " + "Def2[]] -> [i] : 10 <= i }")); + EXPECT_TRUE(checkComputeArrayLifetime( + "{ Def1[] -> [0]; Def2[] -> [10]; Read[] -> [10] }", + "{ Def1[] -> A[]; Def2[] -> A[] }", "{ Read[] -> A[] }", true, true, true, + true, "{ [A[] -> Def2[]] -> [i] : 10 <= i }")); +} + +bool checkComputeReachingOverwrite(const char *ScheduleStr, const char *DefsStr, + int IncludePrevWrite, int IncludeOverwrite, + const char *ExpectedStr) { + BOOL_FOR(IncludePrevWrite) BOOL_FOR(IncludeOverwrite) { + std::unique_ptr Ctx(isl_ctx_alloc(), + &isl_ctx_free); + + auto Schedule = give(isl_union_map_read_from_str(Ctx.get(), ScheduleStr)); + auto Defs = give(isl_union_map_read_from_str(Ctx.get(), DefsStr)); + auto Expected = give(isl_union_map_read_from_str(Ctx.get(), ExpectedStr)); + + auto Result = computeReachingOverwrite(Schedule, Defs, IncludePrevWrite, + IncludeOverwrite); + auto Success = isl_union_map_is_equal(Result.keep(), Expected.keep()); + if (Success != isl_bool_true) + return false; + } + + return true; +} + +TEST(DeLICM, ReachingOverwrite) { + EXPECT_TRUE(checkComputeReachingOverwrite( + "{ Write[] -> [0] }", "{ Write[] -> Elt[] }", indef, false, + "{ [Elt[] -> [i]] -> Write[] : i < 0 }")); + EXPECT_TRUE(checkComputeReachingOverwrite( + "{ Write[] -> [0] }", "{ Write[] -> Elt[] }", indef, true, + "{ [Elt[] -> [i]] -> Write[] : i <= 0 }")); + + EXPECT_TRUE(checkComputeReachingOverwrite( + "{ Write[0] -> [0]; Write[1] -> [10] }", "{ Write[i] -> Elt[]; }", false, + false, "{ [Elt[] -> [i]] -> Write[0] : i < 0 ; [Elt[] -> [i]] -> " + "Write[1] : 0 < i < 10 }")); + EXPECT_TRUE(checkComputeReachingOverwrite( + "{ Write[0] -> [0]; Write[1] -> [10] }", "{ Write[i] -> Elt[]; }", false, + true, "{ [Elt[] -> [i]] -> Write[0] : i <= 0 ; [Elt[] -> [i]] -> " + "Write[1] : 0 < i <= 10 }")); + EXPECT_TRUE(checkComputeReachingOverwrite( + "{ Write[0] -> [0]; Write[1] -> [10] }", "{ Write[i] -> Elt[]; }", true, + false, "{ [Elt[] -> [i]] -> Write[0] : i < 0 ; [Elt[] -> [i]] -> " + "Write[1] : 0 <= i < 10 }")); + EXPECT_TRUE(checkComputeReachingOverwrite( + "{ Write[0] -> [0]; Write[1] -> [10] }", "{ Write[i] -> Elt[]; }", true, + true, "{ [Elt[] -> [i]] -> Write[0] : i <= 0 ; [Elt[] -> [i]] -> " + "Write[1] : 0 <= i <= 10 }")); +} + +bool checkComputeArrayUnused(const char *ScheduleStr, const char *WritesStr, + const char *ReadsStr, int ReadEltInSameInst, + int IncludeLastRead, int IncludeWrite, + const char *ExpectedStr) { + BOOL_FOR(ReadEltInSameInst) BOOL_FOR(IncludeLastRead) BOOL_FOR(IncludeWrite) { + std::unique_ptr Ctx(isl_ctx_alloc(), + &isl_ctx_free); + + auto Schedule = give(isl_union_map_read_from_str(Ctx.get(), ScheduleStr)); + auto Writes = give(isl_union_map_read_from_str(Ctx.get(), WritesStr)); + auto Reads = give(isl_union_map_read_from_str(Ctx.get(), ReadsStr)); + auto Expected = give(isl_union_map_read_from_str(Ctx.get(), ExpectedStr)); + + auto Result = computeArrayUnused(Schedule, Writes, Reads, ReadEltInSameInst, + IncludeLastRead, IncludeWrite); + auto Success = isl_union_map_is_equal(Result.keep(), Expected.keep()); + if (Success != isl_bool_true) + return false; + } + + return isl_bool_true; +} + +TEST(DeLICM, ArrayUnused) { + EXPECT_TRUE(checkComputeArrayUnused("{ Read[] -> [0]; Write[] -> [10] }", + "{ Write[] -> Elt[] }", + "{ Read[] -> Elt[] }", indef, false, + false, "{ Elt[] -> [i] : 0 < i < 10 }")); + EXPECT_TRUE(checkComputeArrayUnused("{ Read[] -> [0]; Write[] -> [10] }", + "{ Write[] -> Elt[] }", + "{ Read[] -> Elt[] }", indef, false, true, + "{ Elt[] -> [i] : 0 < i <= 10 }")); + EXPECT_TRUE(checkComputeArrayUnused("{ Read[] -> [0]; Write[] -> [10] }", + "{ Write[] -> Elt[] }", + "{ Read[] -> Elt[] }", indef, true, false, + "{ Elt[] -> [i] : 0 <= i < 10 }")); + EXPECT_TRUE(checkComputeArrayUnused("{ Read[] -> [0]; Write[] -> [10] }", + "{ Write[] -> Elt[] }", + "{ Read[] -> Elt[] }", indef, true, true, + "{ Elt[] -> [i] : 0 <= i <= 10 }")); + + EXPECT_TRUE(checkComputeArrayUnused( + "{ Read[0] -> [-10]; Read[1] -> [0]; Write[] -> [10] }", + "{ Write[] -> Elt[] }", "{ Read[i] -> Elt[] }", indef, false, true, + "{ Elt[] -> [i] : 0 < i <= 10 }")); + EXPECT_TRUE(checkComputeArrayUnused("{ Read[] -> [0]; }", "{ }", + "{ Read[] -> Elt[] }", indef, indef, + indef, "{ }")); + EXPECT_TRUE(checkComputeArrayUnused( + "{ Write[] -> [0]; }", "{ Write[] -> Elt[] }", "{ }", indef, indef, true, + "{ Elt[] -> [i] : i <= 0 }")); + + EXPECT_TRUE(checkComputeArrayUnused("{ RW[] -> [0] }", "{ RW[] -> Elt[] }", + "{ RW[] -> Elt[] }", true, indef, false, + "{ Elt[] -> [i] : i < 0 }")); + EXPECT_TRUE(checkComputeArrayUnused("{ RW[] -> [0] }", "{ RW[] -> Elt[] }", + "{ RW[] -> Elt[] }", true, indef, true, + "{ Elt[] -> [i] : i <= 0 }")); + EXPECT_TRUE(checkComputeArrayUnused("{ RW[] -> [0] }", "{ RW[] -> Elt[] }", + "{ RW[] -> Elt[] }", false, true, true, + "{ Elt[] -> [0] }")); +} + +/// Get the universes of all spaces in @p USet. +IslPtr unionSpace(NonowningIslPtr USet) { + auto Result = give(isl_union_set_empty(isl_union_set_get_space(USet.keep()))); + foreachElt(USet, [=, &Result](IslPtr Set) { + auto Space = give(isl_set_get_space(Set.keep())); + auto Universe = give(isl_set_universe(Space.take())); + Result = give(isl_union_set_add_set(Result.take(), Universe.take())); + }); + return Result; +} + +bool checkIsConflictingNonsymmetric( + const char *ExistingKnownStr, const char *ExistingUnknownStr, + const char *ExistingUndefStr, const char *ExistingWrittenStr, + const char *ProposedKnownStr, const char *ProposedUnknownStr, + const char *ProposedUndefStr, const char *ProposedWrittenStr) { + std::unique_ptr Ctx(isl_ctx_alloc(), + &isl_ctx_free); + LLVMContext C; + + auto ExistingKnown = + give(isl_union_map_read_from_str(Ctx.get(), ExistingKnownStr)); + auto ExistingUnknown = + ExistingUnknownStr + ? give(isl_union_set_read_from_str(Ctx.get(), ExistingUnknownStr)) + : nullptr; + auto ExistingUndef = + ExistingUndefStr + ? give(isl_union_set_read_from_str(Ctx.get(), ExistingUndefStr)) + : nullptr; + auto ExistingWritten = + give(isl_union_map_read_from_str(Ctx.get(), ExistingWrittenStr)); + + auto ProposedKnown = + give(isl_union_map_read_from_str(Ctx.get(), ProposedKnownStr)); + auto ProposedUnknown = + ProposedUnknownStr + ? give(isl_union_set_read_from_str(Ctx.get(), ProposedUnknownStr)) + : nullptr; + auto ProposedUndef = + ProposedUndefStr + ? give(isl_union_set_read_from_str(Ctx.get(), ProposedUndefStr)) + : nullptr; + auto ProposedWritten = + give(isl_union_map_read_from_str(Ctx.get(), ProposedWrittenStr)); + + auto UndefVal = UndefValue::get(IntegerType::get(C, 8)); + auto UndefId = give(isl_id_alloc(Ctx.get(), "Undef", UndefVal)); + auto UndefSpace = give(isl_space_set_tuple_id( + isl_space_set_alloc(Ctx.get(), 0, 0), isl_dim_set, UndefId.take())); + auto UndefSet = give(isl_set_universe(UndefSpace.take())); + auto UndefUSet = give(isl_union_set_from_set(UndefSet.take())); + + auto ExistingDefined = give(isl_union_map_domain(ExistingKnown.copy())); + auto ExistingLifetime = ExistingKnown; + if (ExistingUnknown) { + ExistingDefined = give( + isl_union_set_union(ExistingDefined.take(), ExistingUnknown.copy())); + ExistingLifetime = give( + isl_union_map_union(ExistingLifetime.take(), + isl_union_map_from_domain(ExistingUnknown.copy()))); + } + if (ExistingUndef) { + ExistingDefined = + give(isl_union_set_union(ExistingDefined.take(), ExistingUndef.copy())); + ExistingLifetime = give(isl_union_map_union( + ExistingLifetime.take(), isl_union_map_from_domain_and_range( + ExistingUndef.copy(), UndefUSet.copy()))); + } + + auto ProposedDefined = give(isl_union_map_domain(ProposedKnown.copy())); + auto ProposedLifetime = ProposedKnown; + if (ProposedUnknown) { + ProposedDefined = give( + isl_union_set_union(ProposedDefined.take(), ProposedUnknown.copy())); + ProposedLifetime = give( + isl_union_map_union(ProposedLifetime.take(), + isl_union_map_from_domain(ProposedUnknown.copy()))); + } + if (ProposedUndef) { + ProposedDefined = + give(isl_union_set_union(ProposedDefined.take(), ProposedUndef.copy())); + ProposedLifetime = give(isl_union_map_union( + ProposedLifetime.take(), isl_union_map_from_domain_and_range( + ProposedUndef.copy(), UndefUSet.copy()))); + } + + auto ExistingUniverse = unionSpace(ExistingDefined); + ExistingUniverse = give(isl_union_set_union( + ExistingUniverse.take(), + unionSpace(give(isl_union_map_domain(ExistingWritten.copy()))).take())); + + auto ProposedUniverse = unionSpace(ProposedDefined); + ProposedUniverse = give(isl_union_set_union( + ProposedUniverse.take(), + unionSpace(give(isl_union_map_domain(ProposedWritten.copy()))).take())); + + // The 'universe' contains all statement instances; because there is no + // isl_union_set_universe, we derive it from all statement domains passes ti + // this function. + auto Universe = give( + isl_union_set_union(ExistingUniverse.take(), ProposedUniverse.take())); + + // If the user did not specify the Existing Undef zone, assume it to be the + // complement of all specified zones. + // The Existing's Unknown zone is already assumed to be implicit by + // isConflicting(). + if (!ExistingUndefStr) + ExistingLifetime = give(isl_union_map_union( + ExistingLifetime.take(), + isl_union_map_from_domain_and_range( + isl_union_set_subtract(Universe.copy(), ExistingDefined.copy()), + UndefUSet.copy()))); + + // If the user did not specify the Proposed Unknown zone, assume it to be the + // complement of all specified zones. + // The Proposed's Unknown zone is already assumed to be implicit by + // isConflicting(). + if (!ProposedUnknownStr) + ProposedLifetime = give( + isl_union_map_union(ProposedLifetime.take(), + isl_union_map_from_domain(isl_union_set_subtract( + Universe.copy(), ProposedDefined.copy())))); + + return isConflicting(ExistingLifetime, true, ExistingWritten, + ProposedLifetime, false, ProposedWritten); +} + +bool checkIsConflicting(const char *ThisKnownStr, const char *ThisUnknownStr, + const char *ThisUndefStr, const char *ThisWrittenStr, + const char *ThatKnownStr, const char *ThatUnknownStr, + const char *ThatUndefStr, const char *ThatWrittenStr) { + auto ThisExisting = checkIsConflictingNonsymmetric( + ThisKnownStr, ThisUnknownStr, ThisUndefStr, ThisWrittenStr, ThatKnownStr, + ThatUnknownStr, ThatUndefStr, ThatWrittenStr); + auto ThatExisting = checkIsConflictingNonsymmetric( + ThatKnownStr, ThatUnknownStr, ThatUndefStr, ThatWrittenStr, ThisKnownStr, + ThisUnknownStr, ThisUndefStr, ThisWrittenStr); + + // isConflicting should be symmetric. + EXPECT_EQ(ThisExisting, ThatExisting); + + return ThisExisting || ThatExisting; +} + +TEST(DeLICM, IsConflicting) { + EXPECT_FALSE( + checkIsConflicting("{}", "{}", nullptr, "{}", "{}", "{}", nullptr, "{}")); + EXPECT_FALSE( + checkIsConflicting("{}", nullptr, "{}", "{}", "{}", "{}", nullptr, "{}")); + + EXPECT_FALSE(checkIsConflicting("{}", nullptr, "{}", "{ Dom[0] -> Val[] }", + "{}", "{}", nullptr, "{}")); + + EXPECT_FALSE(checkIsConflicting("{}", nullptr, "{}", "{ Dom[0] -> [] }", "{}", + "{}", nullptr, "{}")); + + EXPECT_FALSE(checkIsConflicting("{ Dom[0] -> Val[] }", nullptr, "{}", "{}", + "{ Dom[0] -> Val[] }", "{}", nullptr, "{}")); + + EXPECT_FALSE(checkIsConflicting("{ Dom[i] -> Val[] : 0 < i }", nullptr, + "{ Dom[i] : i <= 0 }", "{}", "{}", "{}", + nullptr, "{ Dom[-1] -> [] }")); + + EXPECT_FALSE(checkIsConflicting("{ Dom[i] -> Val[] : i <= 10 }", nullptr, + "{ Dom[i] : 10 < i }", "{}", "{}", "{}", + nullptr, "{ Dom[10] -> [] }")); + + EXPECT_FALSE(checkIsConflicting("{}", nullptr, "{ Dom[0] }", "{}", + "{ Dom[0] -> Val[] }", "{}", nullptr, "{}")); + + EXPECT_FALSE(checkIsConflicting("{}", nullptr, "{ Dom[i] }", "{}", "{}", "{}", + nullptr, "{ Dom[0] -> Val[] }")); + + EXPECT_TRUE(checkIsConflicting("{}", "{ Dom[i] }", nullptr, "{ }", "{}", + nullptr, "{}", "{ Dom[0] -> Val[] }")); + + EXPECT_FALSE(checkIsConflicting("{}", nullptr, "{ Dom[i] }", + "{ Dom[0] -> Val[] }", "{ Dom[i] -> Val[] }", + "{}", nullptr, "{}")); + + EXPECT_TRUE(checkIsConflicting("{ Dom[0] -> ValA[] }", nullptr, "{}", "{}", + "{ Dom[0] -> ValB[] }", "{}", nullptr, "{}")); + + EXPECT_TRUE(checkIsConflicting("{ Dom[0] -> Val[] }", nullptr, "{}", "{}", + "{}", "{ Dom[0] }", nullptr, "{}")); + + EXPECT_TRUE(checkIsConflicting("{}", nullptr, "{}", "{}", + "{ Dom[] -> Val[] }", "{}", nullptr, "{}")); + + EXPECT_TRUE(checkIsConflicting("{ Dom[i] -> Val[] : 0 < i <= 10 }", nullptr, + "{}", "{}", "{}", "{}", nullptr, + "{ Dom[1] -> [] }")); + + EXPECT_TRUE(checkIsConflicting("{ Dom[i] -> Val[] : 0 < i <= 10 }", nullptr, + "{}", "{}", "{}", "{}", nullptr, + "{ Dom[9] -> [] }")); + + EXPECT_TRUE(checkIsConflicting("{ Dom[i] -> ValA[] }", nullptr, "{}", "{}", + "{ Dom[i] -> ValA[] }", "{}", nullptr, + "{ Dom[0] -> ValB[] }")); + + EXPECT_TRUE(checkIsConflicting("{ Dom[i] -> Val[] }", nullptr, "{}", "{}", + "{ Dom[i] -> Val[] }", "{}", nullptr, + "{ Dom[0] -> [] }")); + + EXPECT_TRUE(checkIsConflicting("{ Dom[i] -> [] }", nullptr, "{}", "{}", "{}", + "{}", nullptr, "{ Dom[0] -> [] }")); + + EXPECT_TRUE(checkIsConflicting("{}", nullptr, "{ Dom[i] }", + "{ Dom[0] -> [] }", "{ Dom[i] -> [] }", "{}", + nullptr, "{}")); + + EXPECT_TRUE(checkIsConflicting("{}", nullptr, "{}", "{}", "{}", "{}", nullptr, + "{ Dom[0] -> Val[] }")); + + EXPECT_TRUE(checkIsConflicting("{}", nullptr, "{}", "{}", "{}", "{}", nullptr, + "{ Dom[0] -> [] }")); + + EXPECT_TRUE(checkIsConflicting("{}", nullptr, "{}", "{ Dom[0] -> Val[] }", + "{}", "{ Dom[i] }", nullptr, "{}")); + + EXPECT_TRUE(checkIsConflicting("{}", nullptr, "{}", "{ Dom[0] -> [] }", "{}", + "{ Dom[i] }", nullptr, "{}")); + + EXPECT_TRUE(checkIsConflicting("{}", nullptr, "{}", "{ Dom[0] -> Val[] }", + "{ Dom[i] -> [] }", "{}", nullptr, "{}")); + + EXPECT_TRUE(checkIsConflicting("{}", nullptr, "{}", "{ Dom[0] -> [] }", + "{ Dom[i] -> Val[] }", "{}", nullptr, "{}")); + + EXPECT_TRUE(checkIsConflicting("{}", nullptr, "{}", "{ Dom[0] -> [] }", + "{ Dom[i] -> [] }", "{}", nullptr, "{}")); + + EXPECT_TRUE(checkIsConflicting("{}", "{}", nullptr, "{ Dom[0] -> ValA[] }", + "{}", "{}", nullptr, "{ Dom[0] -> ValB[] }")); + EXPECT_TRUE(checkIsConflicting("{}", "{}", nullptr, "{ Dom[0] -> [] }", "{}", + "{}", nullptr, "{ Dom[0] -> Val[] }")); + EXPECT_TRUE(checkIsConflicting("{}", "{}", nullptr, "{ Dom[0] -> Val[] }", + "{}", "{}", nullptr, "{ Dom[0] -> [] }")); + + EXPECT_TRUE(checkIsConflicting("{}", "{}", nullptr, "{ Dom[0] -> [] }", "{}", + "{}", nullptr, "{ Dom[0] -> [] }")); + + EXPECT_FALSE(checkIsConflicting("{}", "{}", nullptr, + "{ Dom[0] -> ValA[]; Dom[0] -> ValB[] }", + "{}", "{}", nullptr, "{ }")); +} +} // anonymous namespace