Index: include/polly/RegisterPasses.h =================================================================== --- include/polly/RegisterPasses.h +++ include/polly/RegisterPasses.h @@ -23,6 +23,14 @@ } // namespace llvm namespace polly { +enum PassPositionChoice { + POSITION_EARLY, + POSITION_AFTER_LOOPOPT, + POSITION_BEFORE_VECTORIZER +}; + +extern PassPositionChoice PassPosition; + void initializePollyPasses(llvm::PassRegistry &Registry); void registerPollyPasses(llvm::legacy::PassManagerBase &PM); } // namespace polly Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1528,7 +1528,11 @@ } /// Add @p Access to this statement's list of accesses. - void addAccess(MemoryAccess *Access); + /// + /// @param Access The access to add. + /// @param Prepend If true, will add @p Access before all other instructions + /// (instead of appending it). + void addAccess(MemoryAccess *Access, bool Preprend = false); /// Remove a MemoryAccess from this statement. /// Index: include/polly/Support/ISLTools.h =================================================================== --- include/polly/Support/ISLTools.h +++ include/polly/Support/ISLTools.h @@ -368,6 +368,11 @@ isl::union_map convertZoneToTimepoints(isl::union_map Zone, isl::dim Dim, bool InclStart, bool InclEnd); +/// Overload of convertZoneToTimepoints(isl::map,InclStart,InclEnd) to process +/// only a single map. +isl::map convertZoneToTimepoints(isl::map Zone, isl::dim Dim, bool InclStart, + bool InclEnd); + /// Distribute the domain to the tuples of a wrapped range map. /// /// @param Map { Domain[] -> [Range1[] -> Range2[]] } Index: include/polly/ZoneAlgo.h =================================================================== --- include/polly/ZoneAlgo.h +++ include/polly/ZoneAlgo.h @@ -67,6 +67,10 @@ /// { DomainRead[] -> Element[] } isl::union_map AllReads; + /// The loaded values (llvm::LoadInst) of all reads. + /// { [Element[] -> DomainRead[]] -> ValInst[] } + isl::union_map AllReadValInst; + /// Combined access relations of all MemoryKind::Array, MAY_WRITE accesses. /// { DomainMayWrite[] -> Element[] } isl::union_map AllMayWrites; @@ -75,6 +79,11 @@ /// { DomainMustWrite[] -> Element[] } isl::union_map AllMustWrites; + /// Combined access relations of all MK_Array write accesses (union of + /// AllMayWrites and AllMustWrites). + /// { DomainWrite[] -> Element[] } + isl::union_map AllWrites; + /// The value instances written to array elements of all write accesses. /// { [Element[] -> DomainWrite[]] -> ValInst[] } isl::union_map AllWriteValInst; @@ -220,6 +229,26 @@ public: /// Return the SCoP this object is analyzing. Scop *getScop() const { return S; } + + /// A reaching definition zone is known to have the definition's written value + /// if the definition is a MUST_WRITE. + /// + /// @return { [Element[] -> Zone[]] -> ValInst[] } + isl::union_map computeKnownFromMustWrites() const; + + /// A reaching definition zone is known to be the same value as any load that + /// reads from that array element in that period. + /// + /// @return { [Element[] -> Zone[]] -> ValInst[] } + isl::union_map computeKnownFromLoad() const; + + /// Compute which value an array element stores at every instant. + /// + /// @param FromWrite Use stores as source of information. + /// @param FromRead Use loads as source of information. + /// + /// @return { [Element[] -> Zone[]] -> ValInst[] } + isl::union_map computeKnown(bool FromWrite, bool FromRead) const; }; /// Create a domain-to-unknown value mapping. Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -1276,7 +1276,7 @@ } } -void ScopStmt::addAccess(MemoryAccess *Access) { +void ScopStmt::addAccess(MemoryAccess *Access, bool Prepend) { Instruction *AccessInst = Access->getAccessInstruction(); if (Access->isArrayKind()) { @@ -1305,6 +1305,10 @@ PHIReads[PHI] = Access; } + if (Prepend) { + MemAccs.insert(MemAccs.begin(), Access); + return; + } MemAccs.push_back(Access); } Index: lib/Support/ISLTools.cpp =================================================================== --- lib/Support/ISLTools.cpp +++ lib/Support/ISLTools.cpp @@ -431,6 +431,21 @@ return give(isl_union_map_union(Zone.take(), ShiftedZone.take())); } +isl::map polly::convertZoneToTimepoints(isl::map Zone, isl::dim Dim, + bool InclStart, bool InclEnd) { + if (!InclStart && InclEnd) + return Zone; + + auto ShiftedZone = shiftDim(Zone, Dim, -1, -1); + if (InclStart && !InclEnd) + return ShiftedZone; + else if (!InclStart && !InclEnd) + return give(isl_map_intersect(Zone.take(), ShiftedZone.take())); + + assert(InclStart && InclEnd); + return give(isl_map_union(Zone.take(), ShiftedZone.take())); +} + isl::map polly::distributeDomain(isl::map Map) { // Note that we cannot take Map apart into { Domain[] -> Range1[] } and { // Domain[] -> Range2[] } and combine again. We would loose any relation Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -62,15 +62,10 @@ cl::desc("Only run scop detection, but no other optimizations"), cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); -enum PassPositionChoice { - POSITION_EARLY, - POSITION_AFTER_LOOPOPT, - POSITION_BEFORE_VECTORIZER -}; - enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL }; -static cl::opt PassPosition( +PassPositionChoice polly::PassPosition; +static cl::opt PassPositionX( "polly-position", cl::desc("Where to run polly in the pass pipeline"), cl::values( clEnumValN(POSITION_EARLY, "early", "Before everything"), @@ -78,8 +73,8 @@ "After the loop optimizer (but within the inline cycle)"), clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer", "Right before the vectorizer")), - cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore, - cl::cat(PollyCategory)); + cl::Hidden, cl::location(PassPosition), cl::init(POSITION_EARLY), + cl::ZeroOrMore, cl::cat(PollyCategory)); static cl::opt Optimizer("polly-optimizer", cl::desc("Select the scheduling optimizer"), Index: lib/Transform/DeLICM.cpp =================================================================== --- lib/Transform/DeLICM.cpp +++ lib/Transform/DeLICM.cpp @@ -778,9 +778,15 @@ isl_map_wrap(isl_map_apply_domain(Lifetime.copy(), DefTarget.copy()))); simplify(EltZone); + // When known knowledge is disabled, just return the unknown value. It will + // either get filtered out or conflict with itself. // { DomainDef[] -> ValInst[] } - auto ValInst = makeValInst(V, DefMA->getStatement(), - LI->getLoopFor(DefInst->getParent())); + isl::map ValInst; + if (DelicmComputeKnown) + ValInst = makeValInst(V, DefMA->getStatement(), + LI->getLoopFor(DefInst->getParent())); + else + ValInst = makeUnknownForDomain(DefMA->getStatement()); // { DomainDef[] -> [Element[] -> Zone[]] } auto EltKnownTranslator = @@ -1209,21 +1215,6 @@ return Result; } - /// Compute which value an array element stores at every instant. - /// - /// @return { [Element[] -> Zone[]] -> ValInst[] } - isl::union_map computeKnown() const { - // { [Element[] -> Zone[]] -> [Element[] -> DomainWrite[]] } - auto EltReachdDef = - distributeDomain(give(isl_union_map_curry(WriteReachDefZone.copy()))); - - // { [Element[] -> DomainWrite[]] -> ValInst[] } - auto AllKnownWriteValInst = filterKnownValInst(AllWriteValInst); - - // { [Element[] -> Zone[]] -> ValInst[] } - return EltReachdDef.apply_range(AllKnownWriteValInst); - } - /// Determine when an array element is written to, and which value instance is /// written. /// @@ -1290,7 +1281,7 @@ computeCommon(); EltUnused = computeLifetime(); - EltKnown = computeKnown(); + EltKnown = computeKnown(true, false); EltWritten = computeWritten(); } DeLICMAnalyzed++; Index: lib/Transform/ForwardOpTree.cpp =================================================================== --- lib/Transform/ForwardOpTree.cpp +++ lib/Transform/ForwardOpTree.cpp @@ -13,11 +13,16 @@ #include "polly/ForwardOpTree.h" +#include "polly/Options.h" +#include "polly/RegisterPasses.h" #include "polly/ScopBuilder.h" #include "polly/ScopInfo.h" #include "polly/ScopPass.h" #include "polly/Support/GICHelper.h" +#include "polly/Support/ISLOStream.h" +#include "polly/Support/ISLTools.h" #include "polly/Support/VirtualInstruction.h" +#include "polly/ZoneAlgo.h" #include "llvm/Analysis/ValueTracking.h" #define DEBUG_TYPE "polly-delicm" @@ -25,7 +30,26 @@ using namespace polly; using namespace llvm; +static cl::opt AnalyzeKnown( + "polly-optree-analyze-known", + cl::desc("Analyze array contents for load forwarding (default: on if " + "-polly-position=before-vectorizer, off otherwise)"), + cl::cat(PollyCategory), cl::Optional); + +static cl::opt + MaxOps("polly-optree-max-ops", + cl::desc("Maximum number of ISL operations to invest for known " + "analysis; 0=no limit"), + cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); + +STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); +STATISTIC(KnownOutOfQuota, + "Analyses aborted because max_operations was reached"); +STATISTIC(KnownIncompatible, "Number of SCoPs incompatible for analysis"); + STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); +STATISTIC(TotalKnownLoadsForwarded, + "Number of forwarded loads because their value was known"); STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); STATISTIC(TotalModifiedStmts, @@ -81,17 +105,14 @@ /// the MemoryAccess is removed and the all the operand tree instructions are /// moved into the statement. All original instructions are left in the source /// statements. The simplification pass can clean these up. -class ForwardOpTreeImpl { +class ForwardOpTreeImpl : ZoneAlgorithm { private: - /// The SCoP we are currently processing. - Scop *S; - - /// LoopInfo is required for VirtualUse. - LoopInfo *LI; - /// How many instructions have been copied to other statements. int NumInstructionsCopied = 0; + /// Number of loads forwarded because their value was known. + int NumKnownLoadsForwarded = 0; + /// How many read-only accesses have been copied. int NumReadOnlyCopied = 0; @@ -104,10 +125,140 @@ /// Whether we carried out at least one change to the SCoP. bool Modified = false; + /// Contains the zones where array elements are known to contain a specific + /// value. + /// { [Element[] -> Zone[]] -> ValInst[] } + /// @see computeKnown() + isl::union_map Known; + + /// Get list of array elements that do contain the same ValInst[] at Domain[]. + /// + /// @param ValInst { Domain[] -> ValInst[] } + /// The values for which we search for alternative locations, + /// per statement instance. + /// + /// @return { Domain[] -> Element[] } + /// For each statement instance, the array elements that contain the + /// same ValInst. + isl::union_map findSameContentElements(isl::map ValInst) { + assert(ValInst.is_single_valued().is_true()); + + // { Domain[] } + isl::set Domain = ValInst.domain(); + + // { Domain[] -> Scatter[] } + isl::map Schedule = getScatterFor(Domain); + + // { Element[] -> [Scatter[] -> ValInst[]] } + isl::union_map MustKnownCurried = + convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); + + // { [Domain[] -> ValInst[]] -> Scatter[] } + isl::map DomValSched = ValInst.domain_map().apply_range(Schedule); + + // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } + isl::map SchedValDomVal = + DomValSched.range_product(ValInst.range_map()).reverse(); + + // { Element[] -> [Domain[] -> ValInst[]] } + isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); + + // { Domain[] -> Element[] } + isl::union_map MustKnownMap = + MustKnownInst.uncurry().domain().unwrap().reverse(); + simplify(MustKnownMap); + + return MustKnownMap; + } + + /// Find a single array element for each statement instance, within a single + /// array. + /// + /// @param MustKnown { Domain[] -> Element[] } + /// Set of candidate array elements. + /// @param Domain { Domain[] } + /// The statement instance for which we need elements for. + /// + /// @return { Domain[] -> Element[] } + /// For each statement instance, an array element out of @p MustKnown. + /// All array elements must be in the same array (Polly does not yet + /// support reading from different accesses using the same + /// MemoryAccess). If no mapping for all of @p Domain exists, returns + /// null. + isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { + // { Domain[] -> Element[] } + isl::map Result; + + // MemoryAccesses can read only elements from a single array + // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). + // Look through all spaces until we find one that contains at least the + // wanted statement instance.s + MustKnown.foreach_map([&, this](isl::map Map) -> isl::stat { + // Get the array this is accessing. + isl::id ArrayId = Map.get_tuple_id(isl::dim::out); + ScopArrayInfo *SAI = static_cast(ArrayId.get_user()); + + // No support for generation of indirect array accesses. + if (SAI->getBasePtrOriginSAI()) + return isl::stat::ok; // continue + + // Determine whether this map contains all wanted values. + isl::set MapDom = Map.domain(); + if (!Domain.is_subset(MapDom).is_true()) + return isl::stat::ok; // continue + + // There might be multiple array elements the contain the same value, but + // choose only one of them. lexmin is used because it returns a one-value + // mapping, we currently do not care about which one. + // TODO: Get the simplest access function. + Result = Map.lexmin(); + return isl::stat::error; // break + }); + + return Result; + } + +public: + /// Compute the zones of known array element contents. + /// + /// @return True if the computed #Known is usable. + bool computeKnownValues() { + isl::union_map MustKnown, KnownFromLoad, KnownFromInit; + + // Check that nothing strange occurs. + if (!isCompatibleScop()) { + KnownIncompatible++; + return false; + } + + isl_ctx_reset_error(IslCtx.get()); + { + IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), MaxOps); + + computeCommon(); + Known = computeKnown(true, true); + simplify(Known); + } + + if (!Known) { + assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); + KnownOutOfQuota++; + DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n"); + return false; + } + + KnownAnalyzed++; + DEBUG(dbgs() << "All known: " << Known << "\n"); + + return true; + } + void printStatistics(raw_ostream &OS, int Indent = 0) { OS.indent(Indent) << "Statistics {\n"; OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied << '\n'; + OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded + << '\n'; OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied << '\n'; OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees @@ -130,17 +281,63 @@ OS.indent(Indent) << "}\n"; } + /// Create a new MemoryAccess of type read and MemoryKind::Array. + /// + /// @param Stmt The statement in which the access occurs. + /// @param LI The instruction that does the access. + /// @param AccessRelation The array element that each statement instance + /// accesses. + /// + /// @param The newly created access. + MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, + isl::map AccessRelation) { + isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); + ScopArrayInfo *SAI = reinterpret_cast(ArrayId.get_user()); + + // Create a dummy SCEV access, to be replaced anyway. + SmallVector Sizes; + Sizes.reserve(SAI->getNumberOfDimensions()); + SmallVector Subscripts; + Subscripts.reserve(SAI->getNumberOfDimensions()); + for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { + Sizes.push_back(SAI->getDimensionSize(i)); + Subscripts.push_back(nullptr); + } + + MemoryAccess *Access = + new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), + LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); + S->addAccessFunction(Access); + Stmt->addAccess(Access, true); + + Access->setNewAccessRelation(AccessRelation); + + return Access; + } + /// Forwards a speculatively executable instruction. /// - /// If the instruction itself cannot be executed speculatively, returns - /// FD_NotApplicable. + /// @param TargetStmt The statement the operand tree will be copied to. + /// @param UseInst The (possibly speculatable) instruction to forward. + /// @param DefStmt The statement @p UseInst is defined in. + /// @param DefLoop The loop which contains @p UseInst. + /// @param DefToTarget { DomainDef[] -> DomainTarget[] } + /// A mapping from the statement instance @p UseInst is + /// defined to the statement instance it is forwarded to. + /// @param DoIt If false, only determine whether an operand tree can be + /// forwarded. If true, carry out the forwarding. Do not + /// use DoIt==true if an operand tree is not known to be + /// forwardable. /// - /// The parameters the same as for - /// @see forwardTree() + /// @return FD_NotApplicable if @p UseInst is not speculatable. + /// FD_CannotForward if one of @p UseInst's operands is not + /// forwardable. + /// FD_CanForwardTree if @p UseInst is forwardable. + /// FD_DidForward if @p DoIt was true. ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt, Instruction *UseInst, - ScopStmt *UseStmt, Loop *UseLoop, - bool DoIt) { + ScopStmt *DefStmt, Loop *DefLoop, + isl::map DefToTarget, bool DoIt) { // PHIs, unless synthesizable, are not yet supported. if (isa(UseInst)) return FD_NotApplicable; @@ -160,10 +357,6 @@ if (mayBeMemoryDependent(*UseInst)) return FD_NotApplicable; - Loop *DefLoop = LI->getLoopFor(UseInst->getParent()); - ScopStmt *DefStmt = S->getStmtFor(UseInst); - assert(DefStmt && "Value must be defined somewhere"); - if (DoIt) { // To ensure the right order, prepend this instruction before its // operands. This ensures that its operands are inserted before the @@ -177,7 +370,7 @@ for (Value *OpVal : UseInst->operand_values()) { ForwardingDecision OpDecision = - forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt); + forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt); switch (OpDecision) { case FD_CannotForward: assert(!DoIt); @@ -202,23 +395,150 @@ return FD_CanForwardTree; } + /// Forward a load by reading from an array element that contains the same + /// value. Typically the location it was loaded from. + /// + /// @param TargetStmt The statement the operand tree will be copied to. + /// @param Inst The (possibly speculatable) instruction to forward. + /// @param UseStmt The statement that uses @p Inst. + /// @param UseLoop The loop @p Inst is used in. + /// @param UseToTarget { DomainUse[] -> DomainTarget[] } + /// A mapping from the statement instance @p Inst is used + /// to the statement instance it is forwarded to. + /// @param DefStmt The statement @p Inst is defined in. + /// @param DefLoop The loop which contains @p Inst. + /// @param DefToTarget { DomainDef[] -> DomainTarget[] } + /// A mapping from the statement instance @p Inst is + /// defined to the statement instance it is forwarded to. + /// @param DoIt If false, only determine whether an operand tree can be + /// forwarded. If true, carry out the forwarding. Do not + /// use DoIt==true if an operand tree is not known to be + /// forwardable. + /// + /// @return FD_NotApplicable if @p Inst is not a LoadInst. + /// FD_CannotForward if no array element to load from was found. + /// FD_CanForwardLeaf if the load is already in the target statement + /// instance. + /// FD_CanForwardTree if @p Inst is forwardable. + /// FD_DidForward if @p DoIt was true. + ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, + ScopStmt *UseStmt, Loop *UseLoop, + isl::map UseToTarget, ScopStmt *DefStmt, + Loop *DefLoop, isl::map DefToTarget, + bool DoIt) { + // Cannot do anything without successful known analysis. + if (Known.is_null()) + return FD_NotApplicable; + + LoadInst *LI = dyn_cast(Inst); + if (!LI) + return FD_NotApplicable; + + MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); + if (Access) { + if (DoIt) + return FD_DidForward; + + // Forwarding is trivial: The load is already in target statement. + return FD_CanForwardLeaf; + } + + if (DoIt) + TargetStmt->prependInstruction(LI); + + ForwardingDecision OpDecision = + forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, + DefToTarget, DoIt); + switch (OpDecision) { + case FD_CannotForward: + return OpDecision; + + case FD_CanForwardLeaf: + case FD_CanForwardTree: + case FD_DidForward: + break; + + default: + llvm_unreachable("Shouldn't return this"); + } + + // { DomainUse[] -> ValInst[] } + isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); + + // { DomainTarget[] -> ValInst[] } + isl::map ToExpectedVal = ExpectedVal.apply_domain(UseToTarget); + + // { DomainTo[] -> Element[] } + isl::union_map Candidates = findSameContentElements(ToExpectedVal); + isl::map SameVal = singleLocation(Candidates, getDomainFor(UseStmt)); + if (!SameVal) + return FD_CannotForward; + + if (!DoIt) + return FD_CanForwardTree; + + // Necessary so matmul pattern detection recognizes this access. It + // expects the map to have exactly 2 constrains (i0=o0 and i1=o1, for the + // two surrounding loops) + SameVal = SameVal.gist_domain( + TargetStmt->getDomain().intersect_params(give(S->getContext()))); + + Access = makeReadArrayAccess(TargetStmt, LI, SameVal); + assert(Access); + (void)Access; + + NumKnownLoadsForwarded++; + TotalKnownLoadsForwarded++; + return FD_DidForward; + } + + /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a + /// use in every instance of @p UseStmt. + /// + /// @param UseStmt Statement a scalar is used in. + /// @param DefStmt Statement a scalar is defined in. + /// + /// @return { DomainUse[] -> DomainDef[] } + isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) { + // { DomainUse[] -> Scatter[] } + isl::map UseScatter = getScatterFor(UseStmt); + + // { Zone[] -> DomainDef[] } + isl::map ReachDefZone = getScalarReachingDefinition(DefStmt); + + // { Scatter[] -> DomainDef[] } + isl::map ReachDefTimepoints = + convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true); + + // { DomainUse[] -> DomainDef[] } + return UseScatter.apply_range(ReachDefTimepoints); + } + /// Determines whether an operand tree can be forwarded or carries out a /// forwarding, depending on the @p DoIt flag. /// - /// @param TargetStmt The statement the operand tree will be copied to. - /// @param UseVal The value (usually an instruction) which is root of an - /// operand tree. - /// @param UseStmt The statement that uses @p UseVal. - /// @param UseLoop The loop @p UseVal is used in. - /// @param DoIt If false, only determine whether an operand tree can be - /// forwarded. If true, carry out the forwarding. Do not use - /// DoIt==true if an operand tree is not known to be - /// forwardable. + /// @param TargetStmt The statement the operand tree will be copied to. + /// @param UseVal The value (usually an instruction) which is root of an + /// operand tree. + /// @param UseStmt The statement that uses @p UseVal. + /// @param UseLoop The loop @p UseVal is used in. + /// @param UseToTarget { DomainUse[] -> DomainTarget[] } + /// A mapping from the statement instance @p UseVal is used + /// to the statement instance it is forwarded to. + /// @param DoIt If false, only determine whether an operand tree can be + /// forwarded. If true, carry out the forwarding. Do not + /// use DoIt==true if an operand tree is not known to be + /// forwardable. /// /// @return If DoIt==false, return whether the operand tree can be forwarded. /// If DoIt==true, return FD_DidForward. - ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, - ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) { + ForwardingDecision forwardTree(ScopStmt *TargetStmt, llvm::Value *UseVal, + ScopStmt *UseStmt, llvm::Loop *UseLoop, + isl::map UseToTarget, bool DoIt) { + ScopStmt *DefStmt = nullptr; + Loop *DefLoop = nullptr; + isl::map DefToTarget; + VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); switch (VUse.getKind()) { case VirtualUse::Constant: @@ -275,14 +595,41 @@ return FD_DidForward; case VirtualUse::Intra: + // Knowing that UseStmt and DefStmt are the same statement instance, just + // reuse the information about UseStmt for DefStmt + DefStmt = UseStmt; + DefToTarget = UseToTarget; + + LLVM_FALLTHROUGH; case VirtualUse::Inter: - auto Inst = cast(UseVal); + Instruction *Inst = cast(UseVal); + + if (!DefStmt) + DefStmt = S->getStmtFor(Inst); + assert(DefStmt && "Value must be defined somewhere"); + DefLoop = LI->getLoopFor(Inst->getParent()); + + if (DefToTarget.is_null() && !Known.is_null()) { + // { UseDomain[] -> DefDomain[] } + isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt); + + // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to + // { DefDomain[] -> TargetDomain[] } + DefToTarget = UseToTarget.apply_domain(UseToDef); + simplify(DefToTarget); + } - ForwardingDecision SpeculativeResult = - forwardSpeculatable(TargetStmt, Inst, UseStmt, UseLoop, DoIt); + ForwardingDecision SpeculativeResult = forwardSpeculatable( + TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt); if (SpeculativeResult != FD_NotApplicable) return SpeculativeResult; + ForwardingDecision KnownResult = + forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget, + DefStmt, DefLoop, DefToTarget, DoIt); + if (KnownResult != FD_NotApplicable) + return KnownResult; + // When no method is found to forward the operand tree, we effectively // cannot handle it. DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); @@ -300,14 +647,21 @@ ScopStmt *Stmt = RA->getStatement(); Loop *InLoop = Stmt->getSurroundingLoop(); - ForwardingDecision Assessment = - forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false); + isl::map TargetToUse; + if (!Known.is_null()) { + isl::space DomSpace = Stmt->getDomainSpace(); + TargetToUse = + isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); + } + + ForwardingDecision Assessment = forwardTree( + Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false); assert(Assessment != FD_DidForward); if (Assessment != FD_CanForwardTree) return false; - ForwardingDecision Execution = - forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true); + ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt, + InLoop, TargetToUse, true); assert(Execution == FD_DidForward && "A previous positive assessment must also be executable"); (void)Execution; @@ -317,7 +671,8 @@ } public: - ForwardOpTreeImpl(Scop *S, LoopInfo *LI) : S(S), LI(LI) {} + ForwardOpTreeImpl(Scop *S, LoopInfo *LI) + : ZoneAlgorithm("polly-optree", S, LI) {} /// Return which SCoP this instance is processing. Scop *getScop() const { return S; } @@ -413,6 +768,13 @@ LoopInfo &LI = getAnalysis().getLoopInfo(); Impl = make_unique(&S, &LI); + if ((AnalyzeKnown.getNumOccurrences() && AnalyzeKnown) || + (!AnalyzeKnown.getNumOccurrences() && + PassPosition == POSITION_BEFORE_VECTORIZER)) { + DEBUG(dbgs() << "Prepare forwarders...\n"); + Impl->computeKnownValues(); + } + DEBUG(dbgs() << "Forwarding operand trees...\n"); Impl->forwardOperandTrees(); Index: lib/Transform/ZoneAlgo.cpp =================================================================== --- lib/Transform/ZoneAlgo.cpp +++ lib/Transform/ZoneAlgo.cpp @@ -237,6 +237,32 @@ return give(isl_map_from_domain(Domain.take())); } +/// Return whether @p Map maps to an unknown value. +/// +/// @param { [] -> ValInst[] } +static bool isMapToUnknown(const isl::map &Map) { + isl::space Space = give(isl_space_range(isl_map_get_space(Map.keep()))); + return !isl_map_has_tuple_id(Map.keep(), isl_dim_set) && + !isl_space_is_wrapping(Space.keep()) && + isl_map_dim(Map.keep(), isl_dim_out) == 0; +} + +/// Return only the mappings that map to known values. +/// +/// @param UMap { [] -> ValInst[] } +/// +/// @return { [] -> ValInst[] } +static isl::union_map filterKnownValInst(const isl::union_map &UMap) { + isl::union_map Result = + give(isl_union_map_empty(isl_union_map_get_space(UMap.keep()))); + UMap.foreach_map([=, &Result](isl::map Map) -> isl::stat { + if (!isMapToUnknown(Map)) + Result = give(isl_union_map_add_map(Result.take(), Map.take())); + return isl::stat::ok; + }); + return Result; +} + static std::string printInstruction(Instruction *Instr, bool IsForDebug = false) { std::string Result; @@ -330,10 +356,28 @@ void ZoneAlgorithm::addArrayReadAccess(MemoryAccess *MA) { assert(MA->isLatestArrayKind()); assert(MA->isRead()); + ScopStmt *Stmt = MA->getStatement(); // { DomainRead[] -> Element[] } auto AccRel = getAccessRelationFor(MA); AllReads = give(isl_union_map_add_map(AllReads.take(), AccRel.copy())); + + if (LoadInst *Load = dyn_cast_or_null(MA->getAccessInstruction())) { + // { DomainRead[] -> ValInst[] } + isl::map LoadValInst = makeValInst( + Load, Stmt, LI->getLoopFor(Load->getParent()), Stmt->isBlockStmt()); + + // { DomainRead[] -> [Element[] -> DomainRead[]] } + isl::map IncludeElement = + give(isl_map_curry(isl_map_domain_map(AccRel.take()))); + + // { [Element[] -> DomainRead[]] -> ValInst[] } + isl::map EltLoadValInst = + give(isl_map_apply_domain(LoadValInst.take(), IncludeElement.take())); + + AllReadValInst = give( + isl_union_map_add_map(AllReadValInst.take(), EltLoadValInst.take())); + } } void ZoneAlgorithm::addArrayWriteAccess(MemoryAccess *MA) { @@ -578,6 +622,7 @@ AllMayWrites = makeEmptyUnionMap(); AllMustWrites = makeEmptyUnionMap(); AllWriteValInst = makeEmptyUnionMap(); + AllReadValInst = makeEmptyUnionMap(); for (auto &Stmt : *S) { for (auto *MA : Stmt) { @@ -593,7 +638,7 @@ } // { DomainWrite[] -> Element[] } - auto AllWrites = + AllWrites = give(isl_union_map_union(AllMustWrites.copy(), AllMayWrites.copy())); // { [Element[] -> Zone[]] -> DomainWrite[] } @@ -611,3 +656,76 @@ } OS.indent(Indent) << "}\n"; } + +isl::union_map ZoneAlgorithm::computeKnownFromMustWrites() const { + // { [Element[] -> Zone[]] -> [Element[] -> DomainWrite[]] } + isl::union_map EltReachdDef = distributeDomain(WriteReachDefZone.curry()); + + // { [Element[] -> DomainWrite[]] -> ValInst[] } + isl::union_map AllKnownWriteValInst = filterKnownValInst(AllWriteValInst); + + // { [Element[] -> Zone[]] -> ValInst[] } + return EltReachdDef.apply_range(AllKnownWriteValInst); +} + +isl::union_map ZoneAlgorithm::computeKnownFromLoad() const { + // { Element[] } + isl::union_set AllAccessedElts = AllReads.range().unite(AllWrites.range()); + + // { Element[] -> Scatter[] } + isl::union_map EltZoneUniverse = isl::union_map::from_domain_and_range( + AllAccessedElts, isl::set::universe(ScatterSpace)); + + // This assumes there are no "holes" in + // isl_union_map_domain(WriteReachDefZone); alternatively, compute the zone + // before the first write or that are not written at all. + // { Element[] -> Scatter[] } + isl::union_set NonReachDef = + EltZoneUniverse.wrap().subtract(WriteReachDefZone.domain()); + + // { [Element[] -> Zone[]] -> ReachDefId[] } + isl::union_map DefZone = + WriteReachDefZone.unite(isl::union_map::from_domain(NonReachDef)); + + // { [Element[] -> Scatter[]] -> Element[] } + isl::union_map EltZoneElt = EltZoneUniverse.domain_map(); + + // { [Element[] -> Zone[]] -> [Element[] -> ReachDefId[]] } + isl::union_map DefZoneEltDefId = EltZoneElt.range_product(DefZone); + + // { Element[] -> [Zone[] -> ReachDefId[]] } + isl::union_map EltDefZone = DefZone.curry(); + + // { [Element[] -> Zone[] -> [Element[] -> ReachDefId[]] } + isl::union_map EltZoneEltDefid = distributeDomain(EltDefZone); + + // { [Element[] -> Scatter[]] -> DomainRead[] } + isl::union_map Reads = AllReads.range_product(Schedule).reverse(); + + // { [Element[] -> Scatter[]] -> [Element[] -> DomainRead[]] } + isl::union_map ReadsElt = EltZoneElt.range_product(Reads); + + // { [Element[] -> Scatter[]] -> ValInst[] } + isl::union_map ScatterKnown = ReadsElt.apply_range(AllReadValInst); + + // { [Element[] -> ReachDefId[]] -> ValInst[] } + isl::union_map DefidKnown = + DefZoneEltDefId.apply_domain(ScatterKnown).reverse(); + + // { [Element[] -> Zone[]] -> ValInst[] } + return DefZoneEltDefId.apply_range(DefidKnown); +} + +isl::union_map ZoneAlgorithm::computeKnown(bool FromWrite, + bool FromRead) const { + isl::union_map Result = makeEmptyUnionMap(); + + if (FromWrite) + Result = Result.unite(computeKnownFromMustWrites()); + + if (FromRead) + Result = Result.unite(computeKnownFromLoad()); + + simplify(Result); + return Result; +} Index: test/ForwardOpTree/forward_load.ll =================================================================== --- /dev/null +++ test/ForwardOpTree/forward_load.ll @@ -0,0 +1,51 @@ +; RUN: opt %loadPolly -polly-optree-analyze-known -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +; Rematerialize a load. +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %B_idx = getelementptr inbounds double, double* %B, i32 %j + %val = load double, double* %B_idx + br label %bodyB + + bodyB: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %val, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Known loads forwarded: 1 +; CHECK: Operand trees forwarded: 1 +; CHECK: Statements with forwarded operand trees: 1 +; CHECK: } + +; CHECK: Stmt_bodyB +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: null; +; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = load double, double* %B_idx +; CHECK-NEXT: store double %val, double* %A_idx +; CHECK-NEXT: } Index: test/ForwardOpTree/forward_load_differentarray.ll =================================================================== --- /dev/null +++ test/ForwardOpTree/forward_load_differentarray.ll @@ -0,0 +1,78 @@ +; RUN: opt %loadPolly -polly-optree-analyze-known -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B, double* noalias nonnull %C) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %B_idx = getelementptr inbounds double, double* %B, i32 %j + %val = load double, double* %B_idx + br label %bodyB + + bodyB: + store double 0.0, double* %B_idx + %C_idx = getelementptr inbounds double, double* %C, i32 %j + store double %val, double* %C_idx + br label %bodyC + + bodyC: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %val, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Known loads forwarded: 2 +; CHECK: Operand trees forwarded: 2 +; CHECK: Statements with forwarded operand trees: 2 +; CHECK: } + +; CHECK-NEXT: After statements { +; CHECK-NEXT: Stmt_bodyA +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_B[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_val[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = load double, double* %B_idx +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_bodyB +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: null; +; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_C[i0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = load double, double* %B_idx +; CHECK-NEXT: store double 0.000000e+00, double* %B_idx +; CHECK-NEXT: store double %val, double* %C_idx +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_bodyC +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: null; +; CHECK-NEXT: new: [n] -> { Stmt_bodyC[i0] -> MemRef_C[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = load double, double* %B_idx +; CHECK-NEXT: store double %val, double* %A_idx +; CHECK-NEXT: } +; CHECK-NEXT: } Index: test/ForwardOpTree/forward_load_fromloop.ll =================================================================== --- /dev/null +++ test/ForwardOpTree/forward_load_fromloop.ll @@ -0,0 +1,63 @@ +; RUN: opt %loadPolly -polly-optree-analyze-known -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp sle i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %i = phi i32 [0, %for], [%i.inc, %bodyA] + %B_idx = getelementptr inbounds double, double* %B, i32 %i + %val = load double, double* %B_idx + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i, %n + br i1 %i.cmp, label %bodyA, label %bodyB + + bodyB: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %val, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Known loads forwarded: 1 +; CHECK: Operand trees forwarded: 1 +; CHECK: Statements with forwarded operand trees: 1 +; CHECK: } + +; CHECK: After statements { +; CHECK-NEXT: Stmt_bodyA +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0, i1] -> MemRef_B[i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0, i1] -> MemRef_val[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = load double, double* %B_idx +; CHECK-NEXT: %i.cmp = icmp slt i32 %i, %n +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_bodyB +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: null; +; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[n] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = load double, double* %B_idx +; CHECK-NEXT: store double %val, double* %A_idx +; CHECK-NEXT: } +; CHECK-NEXT: } Index: test/ForwardOpTree/noforward_load_conditional.ll =================================================================== --- /dev/null +++ test/ForwardOpTree/noforward_load_conditional.ll @@ -0,0 +1,39 @@ +; RUN: opt %loadPolly -polly-optree-analyze-known -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %B_idx = getelementptr inbounds double, double* %B, i32 %j + %val = load double, double* %B_idx + %cond = icmp slt i32 %j, 1 + br i1 %cond, label %bodyA_true, label %bodyB + + bodyA_true: + store double 0.0, double* %B_idx + br label %bodyB + + bodyB: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %val, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: ForwardOpTree executed, but did not modify anything Index: test/ForwardOpTree/noforward_load_writebetween.ll =================================================================== --- /dev/null +++ test/ForwardOpTree/noforward_load_writebetween.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly -polly-optree-analyze-known -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +; Cannot rematerialize %val from B[0] at bodyC because B[0] has been +; overwritten in bodyB. +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %B_idx = getelementptr inbounds double, double* %B, i32 %j + %val = load double, double* %B_idx + br label %bodyB + + bodyB: + store double 0.0, double* %B_idx + br label %bodyC + + bodyC: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %val, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: ForwardOpTree executed, but did not modify anything