Index: include/polly/DeLICM.h =================================================================== --- include/polly/DeLICM.h +++ include/polly/DeLICM.h @@ -33,9 +33,11 @@ /// /// Used by unittesting. bool isConflicting(isl::union_set ExistingOccupied, - isl::union_set ExistingUnused, isl::union_set ExistingWrites, + isl::union_set ExistingUnused, isl::union_map ExistingKnown, + isl::union_map ExistingWrites, isl::union_set ProposedOccupied, - isl::union_set ProposedUnused, isl::union_set ProposedWrites, + isl::union_set ProposedUnused, isl::union_map ProposedKnown, + isl::union_map ProposedWrites, llvm::raw_ostream *OS = nullptr, unsigned Indent = 0); } // namespace polly Index: include/polly/Support/VirtualInstruction.h =================================================================== --- /dev/null +++ include/polly/Support/VirtualInstruction.h @@ -0,0 +1,148 @@ +//===------ VirtualInstruction.cpp ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Tools for determining which instructions are within a statement and the +// nature of their operands. +// +//===----------------------------------------------------------------------===// + +#ifndef POLLY_SUPPORT_VIRTUALINSTRUCTION_H +#define POLLY_SUPPORT_VIRTUALINSTRUCTION_H + +#include "polly/ScopInfo.h" + +namespace polly { + +/// Determine the nature of a value's use within a statement. +/// +/// These are not always representable by llvm::Use. For instance, scalar write +/// MemoryAccesses do use a value, but are not associated to an instruction's +/// argument. +/// +/// Despite its name it is not tied to virtual instructions (although it works +/// fine with them), but to promote consistent handling of values used in +/// statements. +class VirtualUse { +public: + /// The different types of uses. Handling usually differentiates a lot between + /// these; one can use a switch to handle each case (and get warned by the + /// compiler if one is not handled). + enum UseType { + // An llvm::Constant. + Constant, + + // A value that can be generated using ScopExpander. + Synthesizable, + + // Parameters, induction variables and affine expressions of those. + Affine, + + // Definition before the SCoP and not synthesizable. Can be an instruction + // outside the SCoP, a function argument or a global value. Whether there is + // a scalar MemoryAccess in this statement for reading it depends on the + // -polly-analyze-read-only-scalars switch. + ReadOnly, + + // Definition within this statement. No MemoryAccess between them necessary. + IntraValue, + + // Definition in another statement. There is a scalar MemoryAccess that + // makes it available in this statement. + InterValue + }; + +private: + /// The statement where a value is used. + ScopStmt *User; + + /// The value that is used. + Value *Val; + + /// The type of value use. + UseType Ty; + + /// The value represented as llvm::SCEV expression. + const SCEV *ScevExpr; + + /// If this is an inter-statement (or read-only) use, contains the + /// MemoryAccess that makes the value available in this statement. In case of + /// intra-statement uses, can contain an MemoryKind::Array access. In all + /// other cases, it is a nullptr. + MemoryAccess *InputMA; + + VirtualUse(ScopStmt *User, Value *Val, UseType Ty, const SCEV *ScevExpr, + MemoryAccess *InputMA) + : User(User), Val(Val), Ty(Ty), ScevExpr(ScevExpr), InputMA(InputMA) {} + +public: + /// Get a VirtualUse for an llvm::Use. + /// + /// @param S The Scop object. + /// @param U The llvm::Use the get information for. + /// @param LI The LoopInfo analysis. Needed to determine whether the value us + /// synthesizable and/or affine. + /// + /// @return The VirtualUse representing the same use as @p U. + static VirtualUse create(Scop *S, Use &U, LoopInfo *LI); + + /// Get a VirtualUse for an llvm::Use. Prefer this constructor if the user + /// statement is already known. + static VirtualUse create(ScopStmt *UserStmt, Use &U, LoopInfo *LI); + + /// Get a VirtualUse for any kind use of a value within a statement. + /// + /// @param UserStmt The statement in which @p Val is used. + /// @param IsEntryPHIUser Whether the user is a PHINode in the statement's + /// entry block. If True, this use can only be an + /// intra-statement use if @p UserStmt is a region + /// statement. Block statements do not include a + /// loop-backedge, such that it will use the value of a + /// previous iteration, not the value of the current + /// iteration. + /// @param UserScope Loop scope in which the value is used. Needed to + /// determine whether the value us synthesizable. + /// @param Val The value being used. + /// + /// @return A VirtualUse object that gives information about @p Val's use in + /// @p UserStmt. + static VirtualUse create(ScopStmt *UserStmt, bool IsEntryPHIUser, + Loop *UserScope, Value *Val); + + bool isConstant() const { return Ty == Constant; } + bool isAffine() const { return Ty == Affine; } + bool isSynthesizable() const { return Ty == Synthesizable; } + bool isReadOnly() const { return Ty == ReadOnly; } + bool isIntra() const { return Ty == IntraValue; } + bool isInter() const { return Ty == InterValue; } + + /// Return user statement. + ScopStmt *getUser() const { return User; } + + /// Return the used value. + llvm::Value *getValue() const { return Val; } + + /// Return the type of use. + UseType getType() const { return Ty; } + + /// Return the ScalarEvolution representation of @p Val. + const SCEV *getScevExpr() const { return ScevExpr; } + + /// Return the MemoryAccess that makes the value available in this statement, + /// if any. + MemoryAccess *getMemAccess() const { return InputMA; } + + void print(raw_ostream &OS) const; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump() const; +#endif +}; + +} // namespace polly + +#endif /* POLLY_SUPPORT_VIRTUALINSTRUCTION_H */ Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -62,6 +62,7 @@ Transform/FlattenAlgo.cpp Transform/DeLICM.cpp Transform/Simplify.cpp + Support/VirtualInstruction.cpp ${POLLY_HEADER_FILES} ) Index: lib/Support/GICHelper.cpp =================================================================== --- lib/Support/GICHelper.cpp +++ lib/Support/GICHelper.cpp @@ -182,6 +182,7 @@ replace(str, "\"", "_"); replace(str, " ", "__"); replace(str, "=>", "TO"); + replace(str, "+", "_"); } std::string polly::getIslCompatibleName(const std::string &Prefix, Index: lib/Support/VirtualInstruction.cpp =================================================================== --- /dev/null +++ lib/Support/VirtualInstruction.cpp @@ -0,0 +1,146 @@ +//===------ VirtualInstruction.cpp ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Tools for determining which instructions are within a statement and the +// nature of their operands. +// +//===----------------------------------------------------------------------===// + +#include "polly/Support/VirtualInstruction.h" +#include "polly/Support/SCEVValidator.h" + +using namespace polly; +using namespace llvm; + +namespace { +/// Is @p Val defined in @p Stmt? +bool isDefinedInStmt(Value *Val, ScopStmt *Stmt) { + auto *Inst = dyn_cast(Val); + if (!Inst) + return false; + return Stmt->contains(Inst->getParent()); +} + +/// If InputVal is not defined in the stmt itself, return the MemoryAccess that +/// reads the scalar. Return nullptr otherwise (if the value is defined in the +/// scop, or is synthesizable) +MemoryAccess *getInputAccessOf(Value *InputVal, ScopStmt *UserStmt, + bool IsEntryPHIUser) { + // Stmt may have a backedge to itself. + // In this case prefer to use definition of the value itself. + // Using the input MemoryAccess is only allowed if the user instruction is a + // PHI that handles the backedge. + if (!IsEntryPHIUser && UserStmt->isBlockStmt() && + isDefinedInStmt(InputVal, UserStmt)) + return nullptr; + + for (auto *MA : *UserStmt) { + if (!MA->isRead()) + continue; + if (!MA->isOriginalScalarKind()) + continue; + + if (MA->getAccessValue() == InputVal) + return MA; + } + return nullptr; +} +} // namespace + +VirtualUse VirtualUse ::create(Scop *S, Use &U, LoopInfo *LI) { + return create(S->getStmtFor(cast(U.getUser())), U, LI); +} + +VirtualUse VirtualUse ::create(ScopStmt *UserStmt, Use &U, LoopInfo *LI) { + auto *Val = U.get(); + auto *UserInst = U.getUser(); + auto IsEntryPHIUser = + isa(UserInst) && + cast(UserInst)->getParent() == UserStmt->getEntryBlock(); + return create(UserStmt, IsEntryPHIUser, LI->getLoopFor(getUseBlock(U)), Val); +} + +VirtualUse VirtualUse::create(ScopStmt *UserStmt, bool IsEntryPHIUser, + Loop *UserScope, Value *Val) { + if (isa(Val) || isa(Val)) + return VirtualUse(UserStmt, Val, Constant, nullptr, nullptr); + + auto *S = UserStmt->getParent(); + auto *SE = S->getSE(); + + if (SE->isSCEVable(Val->getType())) { + // Prefer affine over synthesizable. + auto ILS = S->getRequiredInvariantLoads(); + auto *ScevExpr = SE->getSCEVAtScope(Val, UserScope); + if (isAffineExpr(&S->getRegion(), UserScope, ScevExpr, *SE, &ILS) && + ILS.size() == S->getRequiredInvariantLoads().size()) + return VirtualUse(UserStmt, Val, Affine, ScevExpr, nullptr); + + if (canSynthesize(Val, *UserStmt->getParent(), SE, UserScope)) + return VirtualUse(UserStmt, Val, Synthesizable, ScevExpr, nullptr); + } + + auto *InputMA = getInputAccessOf(Val, UserStmt, IsEntryPHIUser); + + if (isa(Val)) + return VirtualUse(UserStmt, Val, ReadOnly, nullptr, InputMA); + + auto Inst = cast(Val); + if (!S->contains(Inst)) + return VirtualUse(UserStmt, Val, ReadOnly, nullptr, InputMA); + + if (isa(Inst) && Inst->getParent() == UserStmt->getEntryBlock()) + return VirtualUse(UserStmt, Val, IntraValue, nullptr, nullptr); + + if (InputMA) + return VirtualUse(UserStmt, Val, InterValue, nullptr, InputMA); + + return VirtualUse(UserStmt, Val, IntraValue, nullptr, nullptr); +} + +void VirtualUse::print(raw_ostream &OS) const { + OS << "User: [" << User->getBaseName() << "] "; + switch (Ty) { + case polly::VirtualUse::Constant: + OS << "Constant Op:"; + break; + case polly::VirtualUse::Affine: + OS << "Affine Op:"; + break; + case polly::VirtualUse::Synthesizable: + OS << "Synthesizable Op:"; + break; + case polly::VirtualUse::ReadOnly: + OS << "Read-Only Op:"; + break; + case polly::VirtualUse::IntraValue: + OS << "Intra Op:"; + break; + case polly::VirtualUse::InterValue: + OS << "Inter Op:"; + break; + } + if (Val) { + OS << ' '; + Val->print(errs(), true); + } + if (ScevExpr) { + OS << ' '; + ScevExpr->print(OS); + } + if (InputMA) + OS << ' ' << InputMA; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VirtualUse::dump() const { + print(errs()); + errs() << '\n'; +} +#endif Index: lib/Transform/DeLICM.cpp =================================================================== --- lib/Transform/DeLICM.cpp +++ lib/Transform/DeLICM.cpp @@ -102,6 +102,37 @@ // * Zone[] - Range between timepoints as described above // Has no tuple id // +// * ValInst[] - An llvm::Value as defined at a specific timepoint. +// +// A ValInst[] itself can be structured as one of: +// +// * [] - An unknown value. +// Always zero dimensions +// Has no tuple id +// +// * Value[] - An llvm::Value that is read-only in the SCoP, i.e. its +// runtime content does not depend on the timepoint. +// Always zero dimensions +// isl_id_get_name: Val_ +// isl_id_get_user: A pointer to an llvm::Value +// +// * [Expression] - An affine expression. +// The value was synthesizable in a given scope, and this is the result. +// This typically is a parameter, an induction variable or an affine +// expression of these. +// +// * SCEV[...] - An synthesizable llvm::SCEV Expression. +// In contrast to a Value[] is has at least one dimension per +// SCEVAddRecExpr in the SCEV. +// +// * [Domain[] -> Value[]] - An llvm::Value that may change during the +// Scop's execution. +// The tuple itself has no id, but it wraps a map space holding a +// statement instance which defines the llvm::Value as the map's domain +// and llvm::Value itself as range. +// +// @see makeValInst() +// // An annotation "{ Domain[] -> Scatter[] }" therefore means: A map from a // statement instance to a timepoint, aka a schedule. There is only one scatter // space, but most of the time multiple statements are processed in one set. @@ -132,6 +163,7 @@ #include "polly/ScopInfo.h" #include "polly/ScopPass.h" #include "polly/Support/ISLTools.h" +#include "polly/Support/VirtualInstruction.h" #include "llvm/ADT/Statistic.h" #define DEBUG_TYPE "polly-delicm" @@ -152,6 +184,11 @@ "Do more PHI writes than necessary in order to avoid partial accesses"), cl::init(false), cl::Hidden, cl::cat(PollyCategory)); +cl::opt + DelicmComputeKnown("polly-delicm-compute-known", + cl::desc("Compute known content of array elements"), + cl::init(true), cl::Hidden, cl::cat(PollyCategory)); + STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs"); STATISTIC(DeLICMOutOfQuota, "Analyses aborted because max_operations was reached"); @@ -374,21 +411,46 @@ return singleton(UMap, ResultSpace); } -/// If InputVal is not defined in the stmt itself, return the MemoryAccess that -/// reads the scalar. Return nullptr otherwise (if the value is defined in the -/// scop, or is synthesizable). -MemoryAccess *getInputAccessOf(Value *InputVal, ScopStmt *Stmt) { - for (auto *MA : *Stmt) { - if (!MA->isRead()) - continue; - if (!MA->isLatestScalarKind()) - continue; - - assert(MA->getAccessValue() == MA->getBaseAddr()); - if (MA->getAccessValue() == InputVal) - return MA; - } - return nullptr; +/// Create a domain-to-unknown value mapping. +/// +/// @param Domain { Domain[] } +/// +/// @return { Domain[] -> ValInst[] } +isl::map makeUnknownForDomain(isl::set Domain) { + return give(isl_map_from_domain(Domain.take())); +} + +/// Create a domain-to-unknown value mapping. +/// +/// @param Domain { Domain[] } +/// +/// @return { Domain[] -> ValInst[] } +isl::union_map makeUnknownForDomain(isl::union_set Domain) { + return give(isl_union_map_from_domain(Domain.take())); +} + +/// Return whether @p Map maps to an unknown value. +/// +/// @param { [] -> ValInst[] } +bool isMapToUnknown(const isl::map &Map) { + auto Space = give(isl_space_range(isl_map_get_space(Map.keep()))); + return !isl_map_has_tuple_id(Map.keep(), isl_dim_set) && + !isl_space_is_wrapping(Space.keep()) && + isl_map_dim(Map.keep(), isl_dim_out) == 0; +} + +/// Return only the mappings that map to known values. +/// +/// @param UMap { [] -> ValInst[] } +/// +/// @return { [] -> ValInst[] } +isl::union_map filterKnownValInst(const isl::union_map &UMap) { + auto Result = give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + foreachElt(UMap, [=, &Result](isl::map Map) { + if (!isMapToUnknown(Map)) + Result = give(isl_union_map_add_map(Result.take(), Map.take())); + }); + return Result; } /// Try to find a 'natural' extension of a mapped to elements outside its @@ -469,30 +531,58 @@ /// implicitly defined as the complement of #Occupied. isl::union_set Unused; - /// { [Element[] -> Scatter[]] } + /// { [Element[] -> Zone[]] -> ValInst[] } + /// Maps to the known content for each array element at any interval. + /// + /// Any element/interval can map to multiple known elements. This is due to + /// multiple llvm::Value referring to the same content. Examples are + /// + /// - A value stored and loaded again. The LoadInst represents the same value + /// as the StoreInst's value operand. + /// + /// - A PHINode is equal to any one of the incoming values. In case of + /// LCSSA-form, it is always equal to its single incoming value. + /// + /// Two Knowledges are considered not conflicting if at least one of the known + /// values match. Not known values are not stored as an unnamed tuple (as + /// #Written does), but maps to nothing. + /// + /// Known values are usually just defined for #Occupied elements. Knowing + /// #Unused contents has no advantage as it can be overwritten. + isl::union_map Known; + + /// { [Element[] -> Scatter[]] -> ValInst[] } /// The write actions currently in the scop or that would be added when - /// mapping a scalar. - isl::union_set Written; + /// mapping a scalar. Maps to the value that is written. + /// + /// Written values that cannot be identified are represented by an unknown + /// ValInst[] (an unnamed tuple of 0 dimension). It conflicts with itself. + isl::union_map Written; /// Check whether this Knowledge object is well-formed. void checkConsistency() const { #ifndef NDEBUG // Default-initialized object - if (!Occupied && !Unused && !Written) + if (!Occupied && !Unused && !Known && !Written) return; assert(Occupied || Unused); + assert(Known); assert(Written); + assert(filterKnownValInst(Known).is_equal(Known) && + "Known must only contain known elements"); + // If not all fields are defined, we cannot derived the universe. if (!Occupied || !Unused) return; - assert(isl_union_set_is_disjoint(Occupied.keep(), Unused.keep()) == - isl_bool_true); + assert(isl_union_set_is_disjoint(Occupied.keep(), Unused.keep()) != + isl_bool_false); auto Universe = give(isl_union_set_union(Occupied.copy(), Unused.copy())); - assert(isl_union_set_is_subset(Written.keep(), Universe.keep()) == - isl_bool_true); + + assert(Written.domain().is_subset(Universe).is_true_or_error()); + assert(Known.domain().is_subset(Universe).is_true_or_error()); #endif } @@ -503,14 +593,14 @@ /// Create a new object with the given members. Knowledge(isl::union_set Occupied, isl::union_set Unused, - isl::union_set Written) + isl::union_map Known, isl::union_map Written) : Occupied(std::move(Occupied)), Unused(std::move(Unused)), - Written(std::move(Written)) { + Known(std::move(Known)), Written(std::move(Written)) { checkConsistency(); } /// Return whether this object was not default-constructed. - bool isUsable() const { return (Occupied || Unused) && Written; } + bool isUsable() const { return (Occupied || Unused) && Known && Written; } /// Print the content of this object to @p OS. void print(llvm::raw_ostream &OS, unsigned Indent = 0) const { @@ -523,6 +613,7 @@ OS.indent(Indent) << "Unused: " << Unused << "\n"; else OS.indent(Indent) << "Unused: \n"; + OS.indent(Indent) << "Known: " << Known << "\n"; OS.indent(Indent) << "Written : " << Written << '\n'; } else { OS.indent(Indent) << "Invalid knowledge\n"; @@ -542,7 +633,8 @@ "That.Occupied.copy()));`"); Unused = give(isl_union_set_subtract(Unused.take(), That.Occupied.copy())); - Written = give(isl_union_set_union(Written.take(), That.Written.take())); + Known = give(isl_union_map_union(Known.take(), That.Known.copy())); + Written = give(isl_union_map_union(Written.take(), That.Written.take())); checkConsistency(); } @@ -584,20 +676,59 @@ } #endif - // Are the new lifetimes required for Proposed unused in Existing? - if (isl_union_set_is_subset(Proposed.Occupied.keep(), - Existing.Unused.keep()) != isl_bool_true) { + // Do the Existing and Proposed lifetimes conflicting? + // In order to not conflict, one of the following conditions must apply for + // each element/lifetime interval: + // + // 1. If occupied in one of the knowledges, it is unused in the other. + // + // - or - + // + // 2. Both contain the same value. + // + // Instead of partitioning the lifetimes into a part occupied by both and + // checking the conditions separately, we check only the second conditions + // and ensure that it will also apply when then first condition is true. + // This is done by adding another "the value occupying Proposed" to + // Proposed.Known and to Existing.Unused such that these match while looking + // for a common known value. Note that we use "unknown value" for this + // purpose such that we can re-use Existing.Unused to also match unknown + // values in Proposed.Written in a later check. + auto ProposedOccupiedAnyVal = makeUnknownForDomain(Proposed.Occupied); + auto ProposedValues = Proposed.Known.unite(ProposedOccupiedAnyVal); + + auto ExistingUnusedAnyVal = makeUnknownForDomain(Existing.Unused); + auto ExistingValues = Existing.Known.unite(ExistingUnusedAnyVal); + + auto SomeVal = ExistingValues.intersect(ProposedValues); + auto SomeValDomain = SomeVal.domain(); + + if (!Proposed.Occupied.is_subset(SomeValDomain)) { if (OS) { - auto ConflictingLifetimes = give(isl_union_set_subtract( - Proposed.Occupied.copy(), Existing.Unused.copy())); - OS->indent(Indent) << "Proposed lifetimes are not unused in existing\n"; - OS->indent(Indent) << "Conflicting lifetimes: " << ConflictingLifetimes - << "\n"; + auto Conflicting = Proposed.Occupied.subtract(SomeValDomain); + auto ExistingConflictingKnown = + Existing.Known.intersect_domain(Conflicting); + auto ProposedConflictingKnown = + Proposed.Known.intersect_domain(Conflicting); + + OS->indent(Indent) << "Proposed lifetime conflicting with Existing's\n"; + OS->indent(Indent) << "Conflicting occupied: " << Conflicting << "\n"; + if (!ExistingConflictingKnown.is_empty()) + OS->indent(Indent) + << "Existing Known: " << ExistingConflictingKnown << "\n"; + if (!ProposedConflictingKnown.is_empty()) + OS->indent(Indent) + << "Proposed Known: " << ProposedConflictingKnown << "\n"; } return true; } - // Do the writes in Existing only overwrite unused values in Proposed? + // Do the writes in Existing conflict with occupied values in Proposed? + // In order to not conflict, it must either write to unused lifetime or + // write the same value. To check, we remove the writes that write into + // Proposed.Unused (they never conflict) and then see whether the written + // value is already in Proposed.Known. + // // We convert here the set of lifetimes to actual timepoints. A lifetime is // in conflict with a set of write timepoints, if either a live timepoint is // clearly within the lifetime or if a write happens at the beginning of the @@ -608,46 +739,93 @@ // Knowledge to check this property also for accesses to MemoryKind::Array. auto ProposedFixedDefs = convertZoneToTimepoints(Proposed.Occupied, true, false); - if (isl_union_set_is_disjoint(Existing.Written.keep(), - ProposedFixedDefs.keep()) != isl_bool_true) { + auto ProposedFixedKnown = + convertZoneToTimepoints(Proposed.Known, isl::dim::in, true, false); + + auto ExistingConflictingWrites = + Existing.Written.intersect_domain(ProposedFixedDefs); + auto ExistingConflictingWritesDomain = ExistingConflictingWrites.domain(); + + auto SomeWrittenVal = + ProposedFixedKnown.intersect(ExistingConflictingWrites); + auto SomeWrittenValDomain = SomeWrittenVal.domain(); + + if (!ExistingConflictingWritesDomain.is_subset(SomeWrittenValDomain)) { if (OS) { - auto ConflictingWrites = give(isl_union_set_intersect( - Existing.Written.copy(), ProposedFixedDefs.copy())); - OS->indent(Indent) << "Proposed writes into range used by existing\n"; - OS->indent(Indent) << "Conflicting writes: " << ConflictingWrites - << "\n"; + auto ExistingConflictingWritten = + ExistingConflictingWrites.subtract_domain(SomeWrittenValDomain); + auto ProposedConflictingKnown = ProposedFixedKnown.subtract_domain( + ExistingConflictingWritten.domain()); + + OS->indent(Indent) + << "Proposed a lifetime where there is an Existing write into it\n"; + OS->indent(Indent) << "Existing conflicting writes: " + << ExistingConflictingWritten << "\n"; + if (!ProposedConflictingKnown.is_empty()) + OS->indent(Indent) + << "Proposed conflicting known: " << ProposedConflictingKnown + << "\n"; } return true; } - // Do the new writes in Proposed only overwrite unused values in Existing? - auto ExistingAvailableDefs = - convertZoneToTimepoints(Existing.Unused, true, false); - if (isl_union_set_is_subset(Proposed.Written.keep(), - ExistingAvailableDefs.keep()) != - isl_bool_true) { + // Do the writes in Proposed conflict with occupied values in Existing? + // We use the same trick with unknown values to save a subtract operation. + auto ExistingValuesFixedDefs = + convertZoneToTimepoints(ExistingValues, isl::dim::in, true, false); + + auto ProposedWrittenDomain = Proposed.Written.domain(); + auto ProposedWrittenAnyVal = + Proposed.Written.unite(makeUnknownForDomain(ProposedWrittenDomain)); + auto ProposedWrittenSomeVal = + ProposedWrittenAnyVal.intersect(ExistingValuesFixedDefs); + auto ProposedWrittenSomeValDomain = ProposedWrittenSomeVal.domain(); + + if (!ProposedWrittenDomain.is_subset(ProposedWrittenSomeValDomain)) { if (OS) { - auto ConflictingWrites = give(isl_union_set_subtract( - Proposed.Written.copy(), ExistingAvailableDefs.copy())); - OS->indent(Indent) - << "Proposed a lifetime where there is an Existing write into it\n"; - OS->indent(Indent) << "Conflicting writes: " << ConflictingWrites - << "\n"; + auto Conflicting = + ProposedWrittenDomain.subtract(ProposedWrittenSomeValDomain); + auto ExistingConflictingKnown = + ExistingValuesFixedDefs.intersect_domain(Conflicting); + auto ProposedConflictingWritten = + Proposed.Written.intersect_domain(Conflicting); + + OS->indent(Indent) << "Proposed writes into range used by Existing\n"; + OS->indent(Indent) << "Proposed conflicting writes: " + << ProposedConflictingWritten << "\n"; + if (!ExistingConflictingKnown.is_empty()) + OS->indent(Indent) + << "Existing conflicting known: " << ExistingConflictingKnown + << "\n"; } return true; } // Does Proposed write at the same time as Existing already does (order of - // writes is undefined)? - if (isl_union_set_is_disjoint(Existing.Written.keep(), - Proposed.Written.keep()) != isl_bool_true) { + // writes is undefined)? Writing the same value is permitted. + auto BothWritten = + Existing.Written.domain().intersect(Proposed.Written.domain()); + auto CommonWritten = filterKnownValInst(Existing.Written) + .intersect(filterKnownValInst(Proposed.Written)) + .domain(); + + if (!BothWritten.is_subset(CommonWritten)) { if (OS) { - auto ConflictingWrites = give(isl_union_set_intersect( - Existing.Written.copy(), Proposed.Written.copy())); + auto Conflicting = BothWritten.subtract(CommonWritten); + auto ExistingConflictingWritten = + Existing.Written.intersect_domain(Conflicting); + auto ProposedConflictingWritten = + Proposed.Written.intersect_domain(Conflicting); + OS->indent(Indent) << "Proposed writes at the same time as an already " "Existing write\n"; - OS->indent(Indent) << "Conflicting writes: " << ConflictingWrites - << "\n"; + OS->indent(Indent) << "Conflicting writes: " << Conflicting << "\n"; + if (!ExistingConflictingWritten.is_empty()) + OS->indent(Indent) + << "Exiting write: " << ExistingConflictingWritten << "\n"; + if (!ProposedConflictingWritten.is_empty()) + OS->indent(Indent) + << "Proposed write: " << ProposedConflictingWritten << "\n"; } return true; } @@ -686,6 +864,9 @@ /// The analyzed Scop. Scop *S; + /// LoopInfo analysis used to determine whether values are synthesizable. + LoopInfo *LI; + /// Parameter space that does not need realignment. isl::space ParamSpace; @@ -707,9 +888,21 @@ /// { DomainMustWrite[] -> Element[] } isl::union_map AllMustWrites; + /// The value instances written to array elements of all write accesses. + /// { [Element[] -> DomainWrite[]] -> ValInst[] } + isl::union_map AllWriteValInst; + + // { [Element[] -> DomainRead[]] -> ValInst[] } + isl::union_map AllReadValInst; + + /// All reaching definitions for MemoryKind::Array writes. + /// { [Element[] -> Zone[]] -> DomainWrite[] } + isl::union_map WriteReachDefZone; + /// Prepare the object before computing the zones of @p S. - ZoneAlgorithm(Scop *S) - : IslCtx(S->getSharedIslCtx()), S(S), Schedule(give(S->getSchedule())) { + ZoneAlgorithm(Scop *S, LoopInfo *LI) + : IslCtx(S->getSharedIslCtx()), S(S), LI(LI), + Schedule(give(S->getSchedule())) { auto Domains = give(S->getDomains()); @@ -802,15 +995,35 @@ void addArrayReadAccess(MemoryAccess *MA) { assert(MA->isLatestArrayKind()); assert(MA->isRead()); + auto *Stmt = MA->getStatement(); // { DomainRead[] -> Element[] } auto AccRel = getAccessRelationFor(MA); AllReads = give(isl_union_map_add_map(AllReads.take(), AccRel.copy())); + + if (auto *Load = dyn_cast_or_null(MA->getAccessValue())) { + // { DomainRead[] -> ValInst[] } + auto LoadValInst = + makeValInst(Load, Stmt, false, LI->getLoopFor(Load->getParent()), + Stmt->isBlockStmt()); + + // { DomainRead[] -> [Element[] -> DomainRead[]] } + auto IncludeElement = + give(isl_map_curry(isl_map_domain_map(AccRel.take()))); + + // { [Element[] -> DomainRead[]] -> ValInst[] } + auto EltLoadValInst = + give(isl_map_apply_domain(LoadValInst.take(), IncludeElement.take())); + + AllReadValInst = give( + isl_union_map_add_map(AllReadValInst.take(), EltLoadValInst.take())); + } } void addArrayWriteAccess(MemoryAccess *MA) { assert(MA->isLatestArrayKind()); assert(MA->isWrite()); + auto *Stmt = MA->getStatement(); // { Domain[] -> Element[] } auto AccRel = getAccessRelationFor(MA); @@ -822,6 +1035,23 @@ if (MA->isMayWrite()) AllMayWrites = give(isl_union_map_add_map(AllMayWrites.take(), AccRel.copy())); + + // { Domain[] -> ValInst[] } + auto WriteValInstance = + makeValInst(MA->getAccessValue(), Stmt, false, + LI->getLoopFor(MA->getAccessInstruction()->getParent()), + MA->isMustWrite()); + + // { Domain[] -> [Element[] -> Domain[]] } + auto IncludeElement = + give(isl_map_curry(isl_map_domain_map(AccRel.copy()))); + + // { [Element[] -> DomainWrite[]] -> ValInst[] } + auto EltWriteValInst = give( + isl_map_apply_domain(WriteValInstance.take(), IncludeElement.take())); + + AllWriteValInst = give( + isl_union_map_add_map(AllWriteValInst.take(), EltWriteValInst.take())); } protected: @@ -868,7 +1098,8 @@ auto UDomain = give(isl_union_set_from_set(Domain.copy())); auto UResult = getScatterFor(std::move(UDomain)); auto Result = singleton(std::move(UResult), std::move(ResultSpace)); - assert(isl_set_is_equal(give(isl_map_domain(Result.copy())).keep(), + assert(!Result || + isl_set_is_equal(give(isl_map_domain(Result.copy())).keep(), Domain.keep()) == isl_bool_true); return Result; } @@ -909,7 +1140,201 @@ Result = computeScalarReachingDefinition(Schedule, Domain, false, true); simplify(Result); - assert(Result); + return Result; + } + + /// Get the reaching definition of a scalar defined in @p DefDomain. + /// + /// @param DomainDef { DomainDef[] } + /// The write statements to get the reaching definition for. + /// + /// @return { Scatter[] -> DomainDef[] } + isl::map getScalarReachingDefinition(isl::set DomainDef) { + auto DomId = give(isl_set_get_tuple_id(DomainDef.keep())); + auto *Stmt = static_cast(isl_id_get_user(DomId.keep())); + + auto StmtResult = getScalarReachingDefinition(Stmt); + + return give(isl_map_intersect_range(StmtResult.take(), DomainDef.take())); + } + + /// Create a statement-to-unknown value mapping. + /// + /// @param Stmt The statement whose instances are mapped to unknown. + /// + /// @return { Domain[] -> ValInst[] } + isl::map makeUnknownForDomain(ScopStmt *Stmt) const { + return ::makeUnknownForDomain(getDomainFor(Stmt)); + } + + /// Create an isl_id that represents @p V. + isl::id makeValueId(Value *V) const { + if (!V) + return nullptr; + + auto Name = getIslCompatibleName("Val_", V, std::string()); + return give(isl_id_alloc(IslCtx.get(), Name.c_str(), V)); + } + + /// Create the space for an llvm::Value that is available everywhere. + isl::space makeValueSpace(Value *V) const { + auto Result = give(isl_space_set_from_params(ParamSpace.copy())); + return give(isl_space_set_tuple_id(Result.take(), isl_dim_set, + makeValueId(V).take())); + } + + /// Create a set with the llvm::Value @p V which is available everywhere. + isl::set makeValueSet(Value *V) const { + auto Space = makeValueSpace(V); + return give(isl_set_universe(Space.take())); + } + + /// Create a mapping from a statement instance to the instance of an + /// llvm::Value that can be used in there. + /// + /// Although LLVM IR used single static assignment, llvm::Values can have + /// different contents in loops, when they get redefined in the last + /// iteration. This function tries to get the statement instance of the + /// previous definition, relative to a user. + /// + /// Example: + /// for (int i = 0; i < N; i += 1) { + /// DEF: + /// int v = A[i]; + /// USE: + /// use(v); + /// } + /// + /// The value instance used by statement instance USE[i] is DEF[i]. Hence, + /// makeValInst returns: + /// + /// { USE[i] -> [DEF[i] -> v[]] : 0 <= i < N } + /// + /// @param Val The value to get the instance of. + /// @param UserStmt The statement that uses @p Val. Can be nullptr. + /// @param IsEntryPHIUser Whether the user is a PHINode in the statement's + /// entry block. + /// @param Scope Loop the using instruction resides in. + /// @param IsCertain Pass true if the definition of @p Val is a + /// MUST_WRITE or false if the write is conditional. + /// + /// @return { DomainUse[] -> ValInst[] } + isl::map makeValInst(Value *Val, ScopStmt *UserStmt, bool IsEntryPHIUser, + Loop *Scope, bool IsCertain = true) { + // When known knowledge is disabled, just return the unknown value. It will + // either get filtered out or conflict with itself (for Knowledge::Written). + if (!DelicmComputeKnown) + return makeUnknownForDomain(UserStmt); + + // If the definition/write is conditional, the previous write may "shine + // through" on conditions we cannot determine. Return the unknown value. + if (!IsCertain) + return makeUnknownForDomain(UserStmt); + + auto DomainUse = getDomainFor(UserStmt); + auto *SE = S->getSE(); + + auto VUse = VirtualUse::create(UserStmt, IsEntryPHIUser, Scope, Val); + switch (VUse.getType()) { + case VirtualUse::Constant: + case VirtualUse::ReadOnly: { + // The definition does not depend on the statement which uses it. + auto ValSet = makeValueSet(Val); + return give( + isl_map_from_domain_and_range(DomainUse.take(), ValSet.take())); + } + + case VirtualUse::Synthesizable: { + auto *ScevExpr = VUse.getScevExpr(); + auto UseDomainSpace = give(isl_set_get_space(DomainUse.keep())); + + // Construct the SCEV space. + // TODO: Add only the induction variables referenced in SCEVAddRecExpr + // expressions, not just all of them. + auto ScevId = give(isl_id_alloc(UseDomainSpace.get_ctx().get(), nullptr, + (void *)ScevExpr)); + auto ScevSpace = + give(isl_space_drop_dims(UseDomainSpace.copy(), isl_dim_set, 0, 0)); + ScevSpace = give( + isl_space_set_tuple_id(ScevSpace.take(), isl_dim_set, ScevId.copy())); + + // { DomainUse[] -> ScevExpr[] } + auto ValInst = give(isl_map_identity(isl_space_map_from_domain_and_range( + UseDomainSpace.copy(), ScevSpace.copy()))); + return ValInst; + } + + case VirtualUse::Affine: { + // With the induction variables fixed, we can evaluate this expression. + auto *SCEV = SE->getSCEVAtScope(Val, Scope); + + // { UserDomain[] -> [Expression] } + auto Aff = give(UserStmt->getPwAff(SCEV)); + auto Result = give(isl_map_from_pw_aff(Aff.take())); + Result = give(isl_map_set_tuple_id( + Result.take(), isl_dim_in, isl_set_get_tuple_id(DomainUse.keep()))); + + simplify(Result); + return Result; + } + + case VirtualUse::IntraValue: { + // Definition and use is in the same statement. We do not need to compute + // a reaching definition. + + // { llvm::Value } + auto ValSet = makeValueSet(Val); + + // { UserDomain[] -> llvm::Value } + auto ValInstSet = + give(isl_map_from_domain_and_range(DomainUse.take(), ValSet.take())); + + // { UserDomain[] -> [UserDomain[] - >llvm::Value] } + auto Result = + give(isl_map_reverse(isl_map_domain_map(ValInstSet.take()))); + simplify(Result); + return Result; + } + + case VirtualUse::InterValue: + // The value is defined in a different statement. + break; + } + + auto *Inst = cast(Val); + auto *ValStmt = S->getStmtFor(Inst); + + // If the llvm::Value is defined in a removed Stmt, we cannot derive its + // domain. We could use an arbitrary statement, but this could result in + // different ValInst[] for the same llvm::Value. + if (!ValStmt) + return ::makeUnknownForDomain(DomainUse); + + // { DomainDef[] } + auto DomainDef = getDomainFor(ValStmt); + + // { Scatter[] -> DomainDef[] } + auto ReachDef = getScalarReachingDefinition(DomainDef); + + // { DomainUse[] -> Scatter[] } + auto UserSched = getScatterFor(DomainUse); + + // { DomainUse[] -> DomainDef[] } + auto UsedInstance = + give(isl_map_apply_range(UserSched.take(), ReachDef.take())); + + // { llvm::Value } + auto ValSet = makeValueSet(Val); + + // { DomainUse[] -> llvm::Value[] } + auto ValInstSet = + give(isl_map_from_domain_and_range(DomainUse.take(), ValSet.take())); + + // { DomainUse[] -> [DomainDef[] -> llvm::Value] } + auto Result = + give(isl_map_range_product(UsedInstance.take(), ValInstSet.take())); + + simplify(Result); return Result; } @@ -918,6 +1343,8 @@ AllReads = makeEmptyUnionMap(); AllMayWrites = makeEmptyUnionMap(); AllMustWrites = makeEmptyUnionMap(); + AllWriteValInst = makeEmptyUnionMap(); + AllReadValInst = makeEmptyUnionMap(); for (auto &Stmt : *S) { for (auto *MA : Stmt) { @@ -935,6 +1362,11 @@ // { DomainWrite[] -> Element[] } auto AllWrites = give(isl_union_map_union(AllMustWrites.copy(), AllMayWrites.copy())); + + // { [Element[] -> Zone[]] -> DomainWrite[] } + WriteReachDefZone = + computeReachingDefinition(Schedule, AllWrites, false, true); + simplify(WriteReachDefZone); } /// Print the current state of all MemoryAccesses to @p. @@ -1157,6 +1589,8 @@ auto *DefMA = DefUse.getValueDef(SAI); assert(DefMA->isValueKind()); assert(DefMA->isMustWrite()); + auto *V = DefMA->getAccessValue(); + auto *DefInst = DefMA->getAccessInstruction(); // Stop if the scalar has already been mapped. if (!DefMA->getLatestScopArrayInfo()->isValueKind()) @@ -1194,12 +1628,30 @@ isl_map_wrap(isl_map_apply_domain(Lifetime.copy(), DefTarget.copy()))); simplify(EltZone); - // { [Element[] -> Scatter[]] } - auto DefEltSched = give(isl_map_wrap(isl_map_reverse( - isl_map_apply_domain(DefTarget.copy(), DefSched.copy())))); + // { DomainDef[] -> ValInst[] } + auto ValInst = makeValInst(V, DefMA->getStatement(), false, + LI->getLoopFor(DefInst->getParent()), true); + + // { DomainDef[] -> [Element[] -> Zone[]] } + auto EltKnownTranslator = + give(isl_map_range_product(DefTarget.copy(), Lifetime.copy())); + + // { [Element[] -> Zone[]] -> ValInst[] } + auto EltKnown = + give(isl_map_apply_domain(ValInst.copy(), EltKnownTranslator.take())); + simplify(EltKnown); + + // { DomainDef[] -> [Element[] -> Scatter[]] } + auto WrittenTranslator = + give(isl_map_range_product(DefTarget.copy(), DefSched.take())); + + // { [Element[] -> Scatter[]] -> ValInst[] } + auto DefEltSched = + give(isl_map_apply_domain(ValInst.copy(), WrittenTranslator.take())); simplify(DefEltSched); - Knowledge Proposed(EltZone, nullptr, DefEltSched); + Knowledge Proposed(EltZone, nullptr, filterKnownValInst(EltKnown), + DefEltSched); if (isConflicting(Proposed)) return false; @@ -1257,6 +1709,43 @@ NumberOfMappedValueScalars += 1; } + /// Express the incoming values of a PHI for each incoming statement in an + /// isl::union_map. + /// + /// @param SAI The PHI scalar represented by a ScopArrayInfo. + /// + /// @return { PHIWriteDomain[] -> ValInst[] } + isl::union_map determinePHIWrittenValues(const ScopArrayInfo *SAI) { + auto Result = makeEmptyUnionMap(); + + // Collect the incoming values. + for (auto *MA : DefUse.getPHIIncomings(SAI)) { + // { DomainWrite[] -> ValInst[] } + isl::union_map ValInst; + auto *WriteStmt = MA->getStatement(); + + auto Incoming = MA->getIncoming(); + assert(!Incoming.empty()); + if (Incoming.size() == 1) { + ValInst = makeValInst(Incoming[0].second, WriteStmt, false, + LI->getLoopFor(Incoming[0].first), true); + } else { + // If the PHI is in a subregion's exit node it can have multiple + // incoming values (+ maybe another incoming edge from an unrelated + // block). We cannot directly represent it as a single llvm::Value. + // We currently model it as unknown value, but modeling as the PHIInst + // itself could be OK, too. + ValInst = makeUnknownForDomain(WriteStmt); + } + + Result = give(isl_union_map_union(Result.take(), ValInst.take())); + } + + assert(isl_union_map_is_single_valued(Result.keep()) == isl_bool_true && + "Cannot have multiple incoming values for same incoming statement"); + return Result; + } + /// Try to map a MemoryKind::PHI scalar to a given array element. /// /// @param SAI Representation of the scalar's memory to map. @@ -1339,23 +1828,35 @@ auto WriteLifetime = give(isl_union_map_apply_domain( isl_union_map_from_map(Lifetime.copy()), PerPHIWrites.copy())); + // { DomainWrite[] -> ValInst[] } + auto WrittenValue = determinePHIWrittenValues(SAI); + // { DomainWrite[] -> [Element[] -> Scatter[]] } auto WrittenTranslator = give(isl_union_map_range_product(WritesTarget.copy(), Schedule.copy())); - // { [Element[] -> Scatter[]] } - auto Written = give(isl_union_map_range(WrittenTranslator.copy())); + // { [Element[] -> Scatter[]] -> ValInst[] } + auto Written = give(isl_union_map_apply_domain(WrittenValue.copy(), + WrittenTranslator.copy())); simplify(Written); // { DomainWrite[] -> [Element[] -> Zone[]] } auto LifetimeTranslator = give( - isl_union_map_range_product(WritesTarget.copy(), WriteLifetime.take())); + isl_union_map_range_product(WritesTarget.copy(), WriteLifetime.copy())); + + // { DomainWrite[] -> ValInst[] } + auto WrittenKnownValue = filterKnownValInst(WrittenValue); + + // { [Element[] -> Zone[]] -> ValInst[] } + auto EltLifetimeInst = give(isl_union_map_apply_domain( + WrittenKnownValue.copy(), LifetimeTranslator.copy())); + simplify(EltLifetimeInst); // { [Element[] -> Zone[] } auto Occupied = give(isl_union_map_range(LifetimeTranslator.copy())); simplify(Occupied); - Knowledge Proposed(Occupied, nullptr, Written); + Knowledge Proposed(Occupied, nullptr, EltLifetimeInst, Written); if (isConflicting(Proposed)) return false; @@ -1458,9 +1959,11 @@ // Add initial scalar. Either the value written by the store, or all inputs // of its statement. - auto WrittenVal = TargetStoreMA->getAccessValue(); - if (auto InputAcc = getInputAccessOf(WrittenVal, TargetStmt)) - Worklist.push_back(InputAcc); + auto WrittenValUse = VirtualUse::create( + TargetStmt, TargetStoreMA->getAccessInstruction()->getOperandUse(0), + LI); + if (WrittenValUse.isInter()) + Worklist.push_back(WrittenValUse.getMemAccess()); else ProcessAllIncoming(TargetStmt); @@ -1536,23 +2039,31 @@ return Result; } - /// Determine when an array element is written to. + /// Compute which value an array element stores at every instant. /// - /// @return { [Element[] -> Scatter[]] } - isl::union_set computeWritten() const { - // { WriteDomain[] -> Element[] } - auto AllWrites = - give(isl_union_map_union(AllMustWrites.copy(), AllMayWrites.copy())); + /// @return { [Element[] -> Zone[]] -> ValInst[] } + isl::union_map computeKnown() const { + // { [Element[] -> Zone[]] -> [Element[] -> DomainWrite[]] } + auto EltReachdDef = + distributeDomain(give(isl_union_map_curry(WriteReachDefZone.copy()))); - // { Scatter[] -> Element[] } - auto WriteTimepoints = - give(isl_union_map_apply_domain(AllWrites.copy(), Schedule.copy())); + // { [Element[] -> DomainWrite[]] -> ValInst[] } + auto AllKnownWriteValInst = filterKnownValInst(AllWriteValInst); - auto Result = - give(isl_union_map_wrap(isl_union_map_reverse(WriteTimepoints.copy()))); + // { [Element[] -> Zone[]] -> ValInst[] } + return EltReachdDef.apply_range(AllKnownWriteValInst); + } - simplify(Result); - return Result; + /// Determine when an array element is written to, and which value instance is + /// written. + /// + /// @return { [Element[] -> Scatter[]] -> ValInst[] } + isl::union_map computeWritten() const { + // { [Element[] -> Scatter[]] -> ValInst[] } + auto EltWritten = applyDomainRange(AllWriteValInst, Schedule); + + simplify(EltWritten); + return EltWritten; } /// Determine whether an access touches at most one element. @@ -1588,7 +2099,7 @@ bool isModified() const { return NumberOfTargetsMapped > 0; } public: - DeLICMImpl(Scop *S) : ZoneAlgorithm(S) {} + DeLICMImpl(Scop *S, LoopInfo *LI) : ZoneAlgorithm(S, LI) {} /// Calculate the lifetime (definition to last use) of every array element. /// @@ -1601,7 +2112,8 @@ } DefUse.compute(S); - isl::union_set EltUnused, EltWritten; + isl::union_set EltUnused; + isl::union_map EltKnown, EltWritten; { IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps); @@ -1609,11 +2121,12 @@ computeCommon(); EltUnused = computeLifetime(); + EltKnown = computeKnown(); EltWritten = computeWritten(); } DeLICMAnalyzed++; - if (!EltUnused || !EltWritten) { + if (!EltUnused || !EltKnown || !EltWritten) { assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota && "The only reason that these things have not been computed should " "be if the max-operations limit hit"); @@ -1628,7 +2141,7 @@ return false; } - Zone = OriginalZone = Knowledge(nullptr, EltUnused, EltWritten); + Zone = OriginalZone = Knowledge(nullptr, EltUnused, EltKnown, EltWritten); DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4)); assert(Zone.isUsable() && OriginalZone.isUsable()); @@ -1718,7 +2231,8 @@ std::unique_ptr Impl; void collapseToUnused(Scop &S) { - Impl = make_unique(&S); + Impl = make_unique( + &S, &getAnalysis().getLoopInfo()); if (!Impl->computeZone()) { DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n"); @@ -1738,6 +2252,7 @@ virtual void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredTransitive(); + AU.addRequired(); AU.setPreservesAll(); } @@ -1770,20 +2285,20 @@ INITIALIZE_PASS_BEGIN(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false, false) INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false, false) -bool polly::isConflicting(isl::union_set ExistingOccupied, - isl::union_set ExistingUnused, - isl::union_set ExistingWrites, - isl::union_set ProposedOccupied, - isl::union_set ProposedUnused, - isl::union_set ProposedWrites, llvm::raw_ostream *OS, - unsigned Indent) { +bool polly::isConflicting( + isl::union_set ExistingOccupied, isl::union_set ExistingUnused, + isl::union_map ExistingKnown, isl::union_map ExistingWrites, + isl::union_set ProposedOccupied, isl::union_set ProposedUnused, + isl::union_map ProposedKnown, isl::union_map ProposedWrites, + llvm::raw_ostream *OS, unsigned Indent) { Knowledge Existing(std::move(ExistingOccupied), std::move(ExistingUnused), - std::move(ExistingWrites)); + std::move(ExistingKnown), std::move(ExistingWrites)); Knowledge Proposed(std::move(ProposedOccupied), std::move(ProposedUnused), - std::move(ProposedWrites)); + std::move(ProposedKnown), std::move(ProposedWrites)); return Knowledge::isConflicting(Existing, Proposed, OS, Indent); } Index: test/DeLICM/reduction_constant_selfconflict.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_constant_selfconflict.ll @@ -0,0 +1,102 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-compute-known=true -polly-delicm -analyze < %s | FileCheck %s +; +; void func(double *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) { /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %reduction.preheader], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + %A_idx = getelementptr inbounds double, double* %A, i32 %j + br i1 %i.cmp, label %body, label %reduction.exit + + + + body: + %add = fadd double %phi, 4.2 + store double %add, double* %A_idx + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + + reduction.exit: + store double %phi, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_reduction_preheader +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_preheader[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 4 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 4 }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and i1 >= 0 and -5i0 <= i1 <= 8 - 5i0 and i1 <= 3 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_A[i0] }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : i1 >= 0 and -5i0 <= i1 <= 7 - 5i0 and i1 <= 3; Stmt_reduction_inc[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and i1 >= 0 and -5i0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: } Index: test/DeLICM/reduction_looprotate.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_looprotate.ll @@ -0,0 +1,68 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm -analyze < %s | FileCheck %s +; +; void func(double *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; for (int i = 0; i < 4; i += 1) { /* reduction */ +; double phi = A[j]; +; phi += 4.2; +; A[j] = phi; +; } +; } +; } +; +; There is nothing to in this case. All accesses are in body. +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + br label %body + + + + body: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + %val = load double, double* %A_idx + %add = fadd double %val, 4.2 + store double %add, double* %A_idx + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i.inc, 4 + br i1 %i.cmp, label %reduction.for, label %reduction.exit + + reduction.exit: + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: No modification has been made Index: test/DeLICM/reduction_looprotate_affine.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_looprotate_affine.ll @@ -0,0 +1,97 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-overapproximate-writes=true -polly-delicm-compute-known=true -polly-delicm -analyze < %s | FileCheck %s +; +; void func(int *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; int phi = j; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 42; +; A[j] = phi; +; } +; } +; +define void @func(i32* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi i32 [%j, %reduction.preheader], [%mul, %reduction.inc] + br label %body + + + + body: + %mul = mul i32 %phi, 42 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i.inc, 4 + br i1 %i.cmp, label %reduction.for, label %reduction.exit + + reduction.exit: + %A_idx = getelementptr inbounds i32, i32* %A, i32 %j + store i32 %mul, i32* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_reduction_preheader +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_preheader[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_mul[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i1 <= 3 and -14i0 <= 3i1 <= 22 - 14i0; Stmt_body[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_mul[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i1 <= 3 and -14i0 <= 3i1 <= 22 - 14i0; Stmt_reduction_inc[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_mul[] }; +; CHECK-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: } Index: test/DeLICM/reduction_looprotate_gvnpre.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_looprotate_gvnpre.ll @@ -0,0 +1,94 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-overapproximate-writes=true -polly-delicm-compute-known=true -polly-delicm -analyze < %s | FileCheck %s +; +; void func(double *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) { /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; } +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %reduction.preheader], [%add, %reduction.inc] + br label %body + + + + body: + %add = fadd double %phi, 4.2 + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %add, double* %A_idx + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i.inc, 4 + br i1 %i.cmp, label %reduction.for, label %reduction.exit + + reduction.exit: + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_reduction_preheader +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_preheader[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 and 3i1 <= 22 - 13i0 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : i0 >= 0 and 0 <= i1 <= 3 and 3i1 <= 21 - 13i0; Stmt_body[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_A[i0] }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : i0 >= 0 and 0 <= i1 <= 3 and 3i1 <= 21 - 13i0; Stmt_reduction_inc[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: } Index: test/DeLICM/reduction_looprotate_licm.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_looprotate_licm.ll @@ -0,0 +1,100 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-overapproximate-writes=true -polly-delicm-compute-known=true -polly-delicm -analyze < %s | FileCheck %s +; +; void func(double *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = A[j]; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + %init = load double, double* %A_idx + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [%init, %reduction.preheader], [%add, %reduction.inc] + br label %body + + + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i.inc, 4 + br i1 %i.cmp, label %reduction.for, label %reduction.exit + + reduction.exit: + store double %add, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_reduction_preheader +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_preheader[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i1 <= 3 and -14i0 <= 3i1 <= 22 - 14i0; Stmt_body[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i1 <= 3 and -14i0 <= 3i1 <= 22 - 14i0; Stmt_reduction_inc[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: } Index: test/DeLICM/reduction_looprotate_load.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_looprotate_load.ll @@ -0,0 +1,99 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-overapproximate-writes=true -polly-delicm-compute-known=true -polly-delicm -analyze < %s | FileCheck %s +; +; void func(int *A, double* StartPtr) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; int phi = *StartPtr; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A, double* noalias nonnull %StartPtr) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + %Start = load double, double* %StartPtr + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [%Start, %reduction.preheader], [%add, %reduction.inc] + br label %body + + + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i.inc, 4 + br i1 %i.cmp, label %reduction.for, label %reduction.exit + + reduction.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %add, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_reduction_preheader +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_StartPtr[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_preheader[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i1 <= 3 and -14i0 <= 3i1 <= 22 - 14i0; Stmt_body[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i1 <= 3 and -14i0 <= 3i1 <= 22 - 14i0; Stmt_reduction_inc[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: } Index: test/DeLICM/reduction_looprotate_loadhoist.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_looprotate_loadhoist.ll @@ -0,0 +1,111 @@ +; RUN: opt %loadPolly -polly-invariant-load-hoisting -polly-flatten-schedule -polly-delicm-overapproximate-writes=true -polly-delicm-compute-known=true -polly-delicm -analyze < %s | FileCheck %s +; +; void func(int *A, int* StartPtr) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; int Start = *Startptr; +; int phi = Start; +; for (int i = Start; i < 4; i += 1) /* reduction */ +; phi += 42; +; A[j] = phi; +; } +; } +; +define void @func(i32* noalias nonnull %A, i32* noalias nonnull %StartPtr) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + %Start = load i32, i32* %StartPtr + br label %reduction.for + + reduction.for: + %i = phi i32 [%Start, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi i32 [%Start, %reduction.preheader], [%mul, %reduction.inc] + br label %body + + + + body: + %mul = mul i32 %phi, 2 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i.inc, 4 + br i1 %i.cmp, label %reduction.for, label %reduction.exit + + reduction.exit: + %A_idx = getelementptr inbounds i32, i32* %A, i32 %j + store i32 %mul, i32* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + +; FIXME: The accesses for %i should not be there because, with +; load-hoisting %Start is an affine loop. To be fixed in ScopBuilder. + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_reduction_preheader +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_reduction_preheader[i0] -> MemRef_i__phi[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_reduction_preheader[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: [Start] -> { Stmt_reduction_preheader[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_reduction_for[i0, i1] -> MemRef_i__phi[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: [Start] -> { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and -i0 < i1 <= 3 - Start; Stmt_reduction_for[1, 0] -> MemRef_A[1] : Start >= 4; Stmt_reduction_for[0, 0] -> MemRef_A[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: [Start] -> { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and i0 <= i1 <= 3 - Start; Stmt_reduction_for[i0, 0] -> MemRef_A[i0] : 0 <= i0 <= 1 and i0 <= -4 + Start; Stmt_reduction_for[1, 0] -> MemRef_A[1] : Start <= 4 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_reduction_for[i0, i1] -> MemRef_i[] }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_body[i0, i1] -> MemRef_mul[] }; +; CHECK-NEXT: new: [Start] -> { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and i0 <= i1 <= 3 - Start; Stmt_body[i0, 0] -> MemRef_A[i0] : 0 <= i0 <= 1 and i0 <= -4 + Start; Stmt_body[1, 0] -> MemRef_A[1] : Start <= 4 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: [Start] -> { Stmt_body[i0, i1] -> MemRef_A[i0] : Start <= 3 and 0 <= i0 <= 1 and i0 <= i1 <= 3 - Start; Stmt_body[1, 0] -> MemRef_A[1]; Stmt_body[0, 0] -> MemRef_A[0] : Start >= 4 }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_reduction_inc[i0, i1] -> MemRef_i__phi[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_reduction_inc[i0, i1] -> MemRef_mul[] }; +; CHECK-NEXT: new: [Start] -> { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : Start <= 3 and 0 <= i0 <= 1 and i0 <= i1 <= 3 - Start; Stmt_reduction_inc[1, 0] -> MemRef_A[1]; Stmt_reduction_inc[0, 0] -> MemRef_A[0] : Start >= 4 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: [Start] -> { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 < i1 <= 3 - Start; Stmt_reduction_inc[i0, 0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_reduction_inc[i0, i1] -> MemRef_i[] }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [Start] -> { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [Start] -> { Stmt_reduction_exit[i0] -> MemRef_mul[] }; +; CHECK-NEXT: new: [Start] -> { Stmt_reduction_exit[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: } Index: test/DeLICM/reduction_looprotate_readonly.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_looprotate_readonly.ll @@ -0,0 +1,98 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-overapproximate-writes=true -polly-delicm-compute-known=true -polly-delicm -analyze < %s | FileCheck %s +; +; void func(double *A, double Start) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = Start; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A, double %Start) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [%Start, %reduction.preheader], [%add, %reduction.inc] + br label %body + + + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i.inc, 4 + br i1 %i.cmp, label %reduction.for, label %reduction.exit + + reduction.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %add, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_reduction_preheader +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_Start[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_preheader[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i1 <= 3 and -14i0 <= 3i1 <= 22 - 14i0; Stmt_body[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i1 <= 3 and -14i0 <= 3i1 <= 22 - 14i0; Stmt_reduction_inc[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: } Index: test/DeLICM/reduction_looprotate_synthesizable.ll =================================================================== --- /dev/null +++ test/DeLICM/reduction_looprotate_synthesizable.ll @@ -0,0 +1,98 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-overapproximate-writes=true -polly-delicm-compute-known=true -polly-delicm -analyze < %s | FileCheck %s +; +; void func(int *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; int phi; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 42; +; A[j] = phi; +; } +; } +; +define void @func(i32* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [0, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.preheader, label %outer.exit + + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi i32 [undef, %reduction.preheader], [%conv, %reduction.inc] + br label %body + + + + body: + %fval = sitofp i32 %phi to double + %add = fadd double %fval, 4.2 + %conv = fptosi double %add to i32 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i.inc, 4 + br i1 %i.cmp, label %reduction.for, label %reduction.exit + + reduction.exit: + %A_idx = getelementptr inbounds i32, i32* %A, i32 %j + store i32 %conv, i32* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_reduction_preheader +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_preheader[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_conv[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] : 0 <= i1 <= 3 and -14i0 <= 3i1 <= 22 - 14i0; Stmt_body[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_conv[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i1 <= 3 and -14i0 <= 3i1 <= 22 - 14i0; Stmt_reduction_inc[1, 3] -> MemRef_A[1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] : 0 <= i0 <= 1 and 0 <= i1 <= 3 }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_conv[] }; +; CHECK-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: } Index: test/DeLICM/reduction_overapproximate.ll =================================================================== --- test/DeLICM/reduction_overapproximate.ll +++ test/DeLICM/reduction_overapproximate.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-overapproximate-writes=true -polly-delicm -analyze < %s | FileCheck %s --check-prefix=APPROX -; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-overapproximate-writes=false -polly-delicm -analyze < %s | FileCheck %s --check-prefix=EXACT +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-compute-known=true -polly-delicm-overapproximate-writes=true -polly-delicm -analyze < %s | FileCheck %s --check-prefix=APPROX +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-compute-known=true -polly-delicm-overapproximate-writes=false -polly-delicm -analyze < %s | FileCheck %s --check-prefix=EXACT ; ; void func(double *A { ; for (int j = -1; j < 3; j += 1) { /* outer */ @@ -69,9 +69,6 @@ ret void } -; TODO: MemRef_phi__phi[] should be mapped to A[j-1] as as well, but it is -; occupied by %add. It is actually the same value but DeLICM currently -; does not known without the "Known"-extension. ; APPROX: After accesses { ; APPROX-NEXT: Stmt_reduction_checkloop @@ -81,9 +78,11 @@ ; APPROX-NEXT: Stmt_reduction_preheader ; APPROX-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] ; APPROX-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_phi__phi[] }; +; APPROX-NEXT: new: { Stmt_reduction_preheader[i0] -> MemRef_A[-1 + i0] : 2 <= i0 <= 3 }; ; APPROX-NEXT: Stmt_reduction_for ; APPROX-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] ; APPROX-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; APPROX-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[-1 + i0] : i0 <= 3 and i1 >= 0 and 2 - 3i0 <= i1 <= 11 - 3i0 and i1 <= 2 and i1 <= -2 + i0 }; ; APPROX-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] ; APPROX-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; ; APPROX-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[-1 + i0] : i0 <= 3 and i1 >= 0 and 2 - 3i0 <= i1 <= 11 - 3i0 and i1 <= 2 and i1 <= -2 + i0 }; @@ -100,6 +99,7 @@ ; APPROX-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[-1 + i0] : i0 <= 3 and 0 <= i1 <= -2 + i0 }; ; APPROX-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] ; APPROX-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; APPROX-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[2] : i0 <= 3 and 0 <= i1 <= -2 + i0 }; ; APPROX-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] ; APPROX-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_val__phi[] }; ; APPROX-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[-1 + i0] : 2 <= i0 <= 3 and 0 <= i1 <= -2 + i0 }; Index: unittests/DeLICM/DeLICMTest.cpp =================================================================== --- unittests/DeLICM/DeLICMTest.cpp +++ unittests/DeLICM/DeLICMTest.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "polly/DeLICM.h" +#include "llvm/Support/Debug.h" #include "gtest/gtest.h" #include #include @@ -32,16 +33,36 @@ return Result; } -void completeLifetime(isl::union_set Universe, isl::union_set &Unknown, +void completeLifetime(isl::union_set Universe, isl::union_map OccupiedAndKnown, + isl::union_set &Occupied, isl::union_map &Known, isl::union_set &Undef) { - if (!Unknown) { + auto ParamSpace = give(isl_union_set_get_space(Universe.keep())); + + if (OccupiedAndKnown) { + assert(!Occupied); + assert(!Known); + + Known = isl::union_map::empty(ParamSpace); + Occupied = OccupiedAndKnown.domain(); + foreachElt(OccupiedAndKnown, [&Known](isl::map Map) { + if (isl_map_has_tuple_name(Map.keep(), isl_dim_out) != isl_bool_true) + return; + Known = give(isl_union_map_add_map(Known.take(), Map.take())); + }); + } + + if (!Occupied) { assert(Undef); - Unknown = give(isl_union_set_subtract(Universe.copy(), Undef.copy())); + Occupied = give(isl_union_set_subtract(Universe.copy(), Undef.copy())); } if (!Undef) { - assert(Unknown); - Undef = give(isl_union_set_subtract(Universe.copy(), Unknown.copy())); + assert(Occupied); + Undef = give(isl_union_set_subtract(Universe.copy(), Occupied.copy())); + } + + if (!Known) { // By default, nothin is known. + Known = isl::union_map::empty(ParamSpace); } } @@ -49,7 +70,7 @@ const char *OccupiedStr; const char *UndefStr; const char *WrittenStr; -} Knowledge; +} KnowledgeStr; isl::union_set parseSetOrNull(isl_ctx *Ctx, const char *Str) { if (!Str) @@ -57,40 +78,50 @@ return isl::union_set(Ctx, Str); } -bool checkIsConflictingNonsymmetric(Knowledge Existing, Knowledge Proposed) { +isl::union_map parseMapOrNull(isl_ctx *Ctx, const char *Str) { + if (!Str) + return nullptr; + return isl::union_map(Ctx, Str); +} + +bool checkIsConflictingNonsymmetric(KnowledgeStr Existing, + KnowledgeStr Proposed) { std::unique_ptr Ctx(isl_ctx_alloc(), &isl_ctx_free); // Parse knowledge. - auto ExistingOccupied = parseSetOrNull(Ctx.get(), Existing.OccupiedStr); + auto ExistingOccupiedAndKnown = + parseMapOrNull(Ctx.get(), Existing.OccupiedStr); auto ExistingUnused = parseSetOrNull(Ctx.get(), Existing.UndefStr); - auto ExistingWritten = parseSetOrNull(Ctx.get(), Existing.WrittenStr); + auto ExistingWritten = parseMapOrNull(Ctx.get(), Existing.WrittenStr); - auto ProposedOccupied = parseSetOrNull(Ctx.get(), Proposed.OccupiedStr); + auto ProposedOccupiedAndKnown = + parseMapOrNull(Ctx.get(), Proposed.OccupiedStr); auto ProposedUnused = parseSetOrNull(Ctx.get(), Proposed.UndefStr); - auto ProposedWritten = parseSetOrNull(Ctx.get(), Proposed.WrittenStr); + auto ProposedWritten = parseMapOrNull(Ctx.get(), Proposed.WrittenStr); // Determine universe (set of all possible domains). auto Universe = give(isl_union_set_empty(isl_space_params_alloc(Ctx.get(), 0))); - if (ExistingOccupied) - Universe = - give(isl_union_set_union(Universe.take(), ExistingOccupied.copy())); + if (ExistingOccupiedAndKnown) + Universe = give(isl_union_set_union( + Universe.take(), ExistingOccupiedAndKnown.domain().take())); if (ExistingUnused) Universe = give(isl_union_set_union(Universe.take(), ExistingUnused.copy())); if (ExistingWritten) - Universe = - give(isl_union_set_union(Universe.take(), ExistingWritten.copy())); - if (ProposedOccupied) - Universe = - give(isl_union_set_union(Universe.take(), ProposedOccupied.copy())); + Universe = give( + isl_union_set_union(Universe.take(), ExistingWritten.domain().take())); + if (ProposedOccupiedAndKnown) + Universe = give(isl_union_set_union( + Universe.take(), ProposedOccupiedAndKnown.domain().take())); if (ProposedUnused) Universe = give(isl_union_set_union(Universe.take(), ProposedUnused.copy())); if (ProposedWritten) - Universe = - give(isl_union_set_union(Universe.take(), ProposedWritten.copy())); + Universe = give( + isl_union_set_union(Universe.take(), ProposedWritten.domain().take())); + Universe = unionSpace(Universe); // Add a space the universe that does not occur anywhere else to ensure @@ -105,29 +136,39 @@ Universe = give(isl_union_set_add_set(Universe.take(), NewSet.take())); // Using the universe, fill missing data. - completeLifetime(Universe, ExistingOccupied, ExistingUnused); - completeLifetime(Universe, ProposedOccupied, ProposedUnused); + isl::union_set ExistingOccupied; + isl::union_map ExistingKnown; + completeLifetime(Universe, ExistingOccupiedAndKnown, ExistingOccupied, + ExistingKnown, ExistingUnused); + + isl::union_set ProposedOccupied; + isl::union_map ProposedKnown; + completeLifetime(Universe, ProposedOccupiedAndKnown, ProposedOccupied, + ProposedKnown, ProposedUnused); - auto Result = - isConflicting(ExistingOccupied, ExistingUnused, ExistingWritten, - ProposedOccupied, ProposedUnused, ProposedWritten); + auto Result = isConflicting(ExistingOccupied, ExistingUnused, ExistingKnown, + ExistingWritten, ProposedOccupied, ProposedUnused, + ProposedKnown, ProposedWritten, &llvm::dbgs()); // isConflicting does not require ExistingOccupied nor ProposedUnused and are // implicitly assumed to be the remainder elements. Test the implicitness as // well. EXPECT_EQ(Result, - isConflicting(ExistingOccupied, ExistingUnused, ExistingWritten, - ProposedOccupied, {}, ProposedWritten)); + isConflicting(ExistingOccupied, ExistingUnused, ExistingKnown, + ExistingWritten, ProposedOccupied, {}, ProposedKnown, + ProposedWritten)); EXPECT_EQ(Result, - isConflicting({}, ExistingUnused, ExistingWritten, ProposedOccupied, - ProposedUnused, ProposedWritten)); - EXPECT_EQ(Result, isConflicting({}, ExistingUnused, ExistingWritten, - ProposedOccupied, {}, ProposedWritten)); + isConflicting({}, ExistingUnused, ExistingKnown, ExistingWritten, + ProposedOccupied, ProposedUnused, ProposedKnown, + ProposedWritten)); + EXPECT_EQ(Result, isConflicting({}, ExistingUnused, ExistingKnown, + ExistingWritten, ProposedOccupied, {}, + ProposedKnown, ProposedWritten)); return Result; } -bool checkIsConflicting(Knowledge Existing, Knowledge Proposed) { +bool checkIsConflicting(KnowledgeStr Existing, KnowledgeStr Proposed) { auto Forward = checkIsConflictingNonsymmetric(Existing, Proposed); auto Backward = checkIsConflictingNonsymmetric(Proposed, Existing); @@ -140,40 +181,67 @@ TEST(DeLICM, isConflicting) { // Check occupied vs. occupied. - EXPECT_TRUE( - checkIsConflicting({"{ Dom[i] }", nullptr, "{}"}, {nullptr, "{}", "{}"})); - EXPECT_TRUE(checkIsConflicting({"{ Dom[i] }", nullptr, "{}"}, - {"{ Dom[i] }", nullptr, "{}"})); - EXPECT_FALSE(checkIsConflicting({"{ Dom[0] }", nullptr, "{}"}, + EXPECT_TRUE(checkIsConflicting({"{ Dom[i] -> [] }", nullptr, "{}"}, + {nullptr, "{}", "{}"})); + EXPECT_TRUE(checkIsConflicting({"{ Dom[i] -> [] }", nullptr, "{}"}, + {"{ Dom[i] -> [] }", nullptr, "{}"})); + EXPECT_FALSE(checkIsConflicting({"{ Dom[0] -> [] }", nullptr, "{}"}, + {nullptr, "{ Dom[0] }", "{}"})); + EXPECT_FALSE(checkIsConflicting({"{ Dom[i] -> [] : i != 0 }", nullptr, "{}"}, + {"{ Dom[0] -> [] }", nullptr, "{}"})); + + // Check occupied vs. occupied with known values. + EXPECT_FALSE(checkIsConflicting({"{ Dom[i] -> Val[] }", nullptr, "{}"}, + {"{ Dom[i] -> Val[] }", nullptr, "{}"})); + EXPECT_TRUE(checkIsConflicting({"{ Dom[i] -> ValA[] }", nullptr, "{}"}, + {"{ Dom[i] -> ValB[] }", nullptr, "{}"})); + EXPECT_TRUE(checkIsConflicting({"{ Dom[i] -> Val[] }", nullptr, "{}"}, + {"{ Dom[i] -> [] }", nullptr, "{}"})); + EXPECT_FALSE(checkIsConflicting({"{ Dom[0] -> Val[] }", nullptr, "{}"}, {nullptr, "{ Dom[0] }", "{}"})); - EXPECT_FALSE(checkIsConflicting({"{ Dom[i] : i != 0 }", nullptr, "{}"}, - {"{ Dom[0] }", nullptr, "{}"})); // Check occupied vs. written. - EXPECT_TRUE( - checkIsConflicting({nullptr, "{}", "{}"}, {"{}", nullptr, "{ Dom[0] }"})); - EXPECT_FALSE( - checkIsConflicting({"{}", nullptr, "{}"}, {"{}", nullptr, "{ Dom[0] }"})); - - EXPECT_TRUE(checkIsConflicting({"{ Dom[i] }", nullptr, "{}"}, - {"{}", nullptr, "{ Dom[0] }"})); - EXPECT_FALSE(checkIsConflicting({"{ DomA[i] }", nullptr, "{}"}, - {"{}", nullptr, "{ DomB[0] }"})); + EXPECT_TRUE(checkIsConflicting({nullptr, "{}", "{}"}, + {"{}", nullptr, "{ Dom[0] -> [] }"})); + EXPECT_FALSE(checkIsConflicting({"{}", nullptr, "{}"}, + {"{}", nullptr, "{ Dom[0] -> [] }"})); + EXPECT_TRUE(checkIsConflicting({"{ Dom[i] -> [] }", nullptr, "{}"}, + {"{}", nullptr, "{ Dom[0] -> [] }"})); + EXPECT_FALSE(checkIsConflicting({"{ DomA[i] -> [] }", nullptr, "{}"}, + {"{}", nullptr, "{ DomB[0] -> [] }"})); // Dom[1] represents the time between 0 and 1. Now Proposed writes at timestep // 0 such that will have a different value between 0 and 1. Hence it is // conflicting with Existing. - EXPECT_TRUE(checkIsConflicting({"{ Dom[1] }", nullptr, "{}"}, - {"{}", nullptr, "{ Dom[0] }"})); - EXPECT_FALSE(checkIsConflicting({"{ Dom[i] : i != 1 }", nullptr, "{}"}, - {"{}", nullptr, "{ Dom[0] }"})); + EXPECT_TRUE(checkIsConflicting({"{ Dom[1] -> [] }", nullptr, "{}"}, + {"{}", nullptr, "{ Dom[0] -> [] }"})); + EXPECT_FALSE(checkIsConflicting({"{ Dom[i] -> [] : i != 1 }", nullptr, "{}"}, + {"{}", nullptr, "{ Dom[0] -> [] }"})); + + // Check occupied vs. written with known values. + EXPECT_FALSE(checkIsConflicting({"{ Dom[i] -> Val[] }", nullptr, "{}"}, + {"{}", nullptr, "{ Dom[0] -> Val[] }"})); + EXPECT_TRUE(checkIsConflicting({"{ Dom[i] -> ValA[] }", nullptr, "{}"}, + {"{}", nullptr, "{ Dom[0] -> ValB[] }"})); + EXPECT_TRUE(checkIsConflicting({"{ Dom[i] -> Val[] }", nullptr, "{}"}, + {"{}", nullptr, "{ Dom[0] -> [] }"})); + EXPECT_TRUE(checkIsConflicting({"{ Dom[i] -> [] }", nullptr, "{}"}, + {"{}", nullptr, "{ Dom[0] -> Val[] }"})); // Check written vs. written. - EXPECT_TRUE(checkIsConflicting({"{}", nullptr, "{ Dom[0] }"}, - {"{}", nullptr, "{ Dom[0] }"})); - EXPECT_FALSE(checkIsConflicting({"{}", nullptr, "{ Dom[-1] }"}, - {"{}", nullptr, "{ Dom[0] }"})); - EXPECT_FALSE(checkIsConflicting({"{}", nullptr, "{ Dom[1] }"}, - {"{}", nullptr, "{ Dom[0] }"})); + EXPECT_TRUE(checkIsConflicting({"{}", nullptr, "{ Dom[0] -> [] }"}, + {"{}", nullptr, "{ Dom[0] -> [] }"})); + EXPECT_FALSE(checkIsConflicting({"{}", nullptr, "{ Dom[-1] -> [] }"}, + {"{}", nullptr, "{ Dom[0] -> [] }"})); + EXPECT_FALSE(checkIsConflicting({"{}", nullptr, "{ Dom[1] -> [] }"}, + {"{}", nullptr, "{ Dom[0] -> [] }"})); + + // Check written vs. written with known values. + EXPECT_FALSE(checkIsConflicting({"{}", nullptr, "{ Dom[0] -> Val[] }"}, + {"{}", nullptr, "{ Dom[0] -> Val[] }"})); + EXPECT_TRUE(checkIsConflicting({"{}", nullptr, "{ Dom[0] -> ValA[] }"}, + {"{}", nullptr, "{ Dom[0] -> ValB[] }"})); + EXPECT_TRUE(checkIsConflicting({"{}", nullptr, "{ Dom[0] -> Val[] }"}, + {"{}", nullptr, "{ Dom[0] -> [] }"})); } } // anonymous namespace