Index: include/polly/ZoneAlgo.h =================================================================== --- include/polly/ZoneAlgo.h +++ include/polly/ZoneAlgo.h @@ -15,6 +15,8 @@ #define POLLY_ZONEALGO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "isl/isl-noexceptions.h" #include @@ -22,12 +24,14 @@ class Value; class LoopInfo; class Loop; +class PHINode; } // namespace llvm namespace polly { class Scop; class ScopStmt; class MemoryAccess; +class ScopArrayInfo; /// Return only the mappings that map to known values. /// @@ -108,6 +112,47 @@ /// { Element[] } isl::union_set CompatibleElts; + /// List of PHIs that may transitively refer to themselves. + /// + /// Computing them would require a polyhedral transitive closure operation, + /// for which isl may only return an approximation. For correctness, we always + /// require an exact result. Hence, we exclude such PHIs. + llvm::SmallPtrSet RecursivePHIs; + + /// PHIs that have been computed. + /// + /// Computed PHIs are replaced by their incoming values using #NormalizeMap. + llvm::DenseSet ComputedPHIs; + + /// For computed PHIs, contains the ValInst they stand for. + /// + /// To show an example, assume the following PHINode: + /// + /// Stmt: + /// %phi = phi double [%val1, %bb1], [%val2, %bb2] + /// + /// It's ValInst is: + /// + /// { [Stmt[i] -> phi[]] } + /// + /// The value %phi will be either %val1 or %val2, depending on whether in + /// iteration i %bb1 or %bb2 has been executed before. In SCoPs, this can be + /// determined at compile-time, and the result stored in #NormalizeMap. For + /// the previous example, it could be: + /// + /// { [Stmt[i] -> phi[]] -> [Stmt[0] -> val1[]]; + /// [Stmt[i] -> phi[]] -> [Stmt[i] -> val2[]] : i > 0 } + /// + /// Only ValInsts in #ComputedPHIs are present in this map. Other values are + /// assumed to represent themselves. This is to avoid adding lots of identity + /// entries to this map. + /// + /// { PHIValInst[] -> IncomingValInst[] } + isl::union_map NormalizeMap; + + /// Cache for computePerPHI(const ScopArrayInfo *) + llvm::SmallDenseMap PerPHIMaps; + /// Prepare the object before computing the zones of @p S. /// /// @param PassName Name of the pass using this analysis. @@ -142,7 +187,7 @@ /// have. /// /// @return { ValInst[] } - isl::map getWrittenValue(MemoryAccess *MA, isl::map AccRel); + isl::union_map getWrittenValue(MemoryAccess *MA, isl::map AccRel); void addArrayWriteAccess(MemoryAccess *MA); @@ -151,6 +196,17 @@ isl::union_map makeEmptyUnionMap() const; + /// For each 'execution' of a PHINode, get the incoming block that was + /// executed before. + /// + /// For each PHI instance we can directly determine which was the incoming + /// block, and hence derive which value the PHI has. + /// + /// @param SAI The ScopArrayInfo representing the PHI's storage. + /// + /// @return { DomainPHIRead[] -> DomainPHIWrite[] } + isl::union_map computePerPHI(const polly::ScopArrayInfo *SAI); + /// Find the array elements that can be used for zone analysis. void collectCompatibleElts(); @@ -244,6 +300,15 @@ isl::map makeValInst(llvm::Value *Val, ScopStmt *UserStmt, llvm::Loop *Scope, bool IsCertain = true); + /// Create and normalize a ValInst. + /// + /// @see makeValInst + /// @see normalizeValInst + /// @see #NormalizedPHI + isl::union_map makeNormalizedValInst(llvm::Value *Val, ScopStmt *UserStmt, + llvm::Loop *Scope, + bool IsCertain = true); + /// Return whether @p MA can be used for transformations (e.g. OpTree load /// forwarding, DeLICM mapping). bool isCompatibleAccess(MemoryAccess *MA); @@ -251,9 +316,29 @@ /// Compute the different zones. void computeCommon(); + /// Compute the normalization map that replaces PHIs by their incoming + /// values. + /// + /// @see #NormalizeMap + void computeNormalizedPHIs(); + /// Print the current state of all MemoryAccesses to @p. void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const; + /// Is @p MA a PHI READ access that can be normalized? + /// + /// @see #NormalizeMap + bool isNormalizable(MemoryAccess *MA); + + /// @{ + /// Determine whether the argument does not map to any computed PHI. Those + /// should have been replaced by their incoming values. + /// + /// @see #NormalizedPHI + bool isNormalized(isl::map Map); + bool isNormalized(isl::union_map Map); + /// @} + public: /// Return the SCoP this object is analyzing. Scop *getScop() const { return S; } Index: lib/Transform/DeLICM.cpp =================================================================== --- lib/Transform/DeLICM.cpp +++ lib/Transform/DeLICM.cpp @@ -660,52 +660,6 @@ return std::make_pair(DefUses, Lifetime); } - /// For each 'execution' of a PHINode, get the incoming block that was - /// executed before. - /// - /// For each PHI instance we can directly determine which was the incoming - /// block, and hence derive which value the PHI has. - /// - /// @param SAI The ScopArrayInfo representing the PHI's storage. - /// - /// @return { DomainPHIRead[] -> DomainPHIWrite[] } - isl::union_map computePerPHI(const ScopArrayInfo *SAI) { - assert(SAI->isPHIKind()); - - // { DomainPHIWrite[] -> Scatter[] } - auto PHIWriteScatter = makeEmptyUnionMap(); - - // Collect all incoming block timepoint. - for (auto *MA : S->getPHIIncomings(SAI)) { - auto Scatter = getScatterFor(MA); - PHIWriteScatter = - give(isl_union_map_add_map(PHIWriteScatter.take(), Scatter.take())); - } - - // { DomainPHIRead[] -> Scatter[] } - auto PHIReadScatter = getScatterFor(S->getPHIRead(SAI)); - - // { DomainPHIRead[] -> Scatter[] } - auto BeforeRead = beforeScatter(PHIReadScatter, true); - - // { Scatter[] } - auto WriteTimes = singleton( - give(isl_union_map_range(PHIWriteScatter.copy())), ScatterSpace); - - // { DomainPHIRead[] -> Scatter[] } - auto PHIWriteTimes = - give(isl_map_intersect_range(BeforeRead.take(), WriteTimes.take())); - auto LastPerPHIWrites = give(isl_map_lexmax(PHIWriteTimes.take())); - - // { DomainPHIRead[] -> DomainPHIWrite[] } - auto Result = give(isl_union_map_apply_range( - isl_union_map_from_map(LastPerPHIWrites.take()), - isl_union_map_reverse(PHIWriteScatter.take()))); - assert(isl_union_map_is_single_valued(Result.keep()) == isl_bool_true); - assert(isl_union_map_is_injective(Result.keep()) == isl_bool_true); - return Result; - } - /// Try to map a MemoryKind::Value to a given array element. /// /// @param SAI Representation of the scalar's memory to map. Index: lib/Transform/ForwardOpTree.cpp =================================================================== --- lib/Transform/ForwardOpTree.cpp +++ lib/Transform/ForwardOpTree.cpp @@ -51,6 +51,11 @@ cl::desc("Analyze array contents for load forwarding"), cl::cat(PollyCategory), cl::init(true), cl::Hidden); +static cl::opt + NormalizePHIs("polly-optree-normalize-phi", + cl::desc("Replace PHIs by their incoming values"), + cl::cat(PollyCategory), cl::init(false), cl::Hidden); + static cl::opt MaxOps("polly-optree-max-ops", cl::desc("Maximum number of ISL operations to invest for known " @@ -280,16 +285,19 @@ IslQuotaScope QuotaScope = MaxOpGuard.enter(); computeCommon(); + if (NormalizePHIs) + computeNormalizedPHIs(); Known = computeKnown(true, true); // Preexisting ValInsts use the known content analysis of themselves. Translator = makeIdentityMap(Known.range(), false); } - if (!Known || !Translator) { + if (!Known || !Translator || !NormalizeMap) { assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); Known = nullptr; Translator = nullptr; + NormalizeMap = nullptr; DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); return false; } @@ -462,6 +470,7 @@ // { DomainDef[] -> ValInst[] } isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); + assert(isNormalized(ExpectedVal) && "LoadInsts are always normalized"); // { DomainTarget[] -> ValInst[] } isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); @@ -578,7 +587,7 @@ } // { DomainDef[] -> ValInst[] } - isl::union_map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); + isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop); // { DomainTarget[] -> ValInst[] } isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); Index: lib/Transform/ZoneAlgo.cpp =================================================================== --- lib/Transform/ZoneAlgo.cpp +++ lib/Transform/ZoneAlgo.cpp @@ -160,6 +160,9 @@ STATISTIC(NumIncompatibleArrays, "Number of not zone-analyzable arrays"); STATISTIC(NumCompatibleArrays, "Number of zone-analyzable arrays"); +STATISTIC(NumRecursivePHIs, "Number of recursive PHIs"); +STATISTIC(NumNormalizablePHIs, "Number of normalizable PHIs"); +STATISTIC(NumPHINormialization, "Number of PHI executed normalizations"); using namespace polly; using namespace llvm; @@ -409,7 +412,8 @@ } } -isl::map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA, isl::map AccRel) { +isl::union_map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA, + isl::map AccRel) { if (!MA->isMustWrite()) return {}; @@ -423,7 +427,7 @@ if (AccVal && AccVal->getType() == MA->getLatestScopArrayInfo()->getElementType() && AccRel.is_single_valued().is_true()) - return makeValInst(AccVal, Stmt, L); + return makeNormalizedValInst(AccVal, Stmt, L); // memset(_, '0', ) is equivalent to writing the null value to all touched // elements. isMustWrite() ensures that all of an element's bytes are @@ -433,7 +437,7 @@ Type *Ty = MA->getLatestScopArrayInfo()->getElementType(); if (WrittenConstant && WrittenConstant->isZeroValue()) { Constant *Zero = Constant::getNullValue(Ty); - return makeValInst(Zero, Stmt, L); + return makeNormalizedValInst(Zero, Stmt, L); } } @@ -455,7 +459,7 @@ AllMayWrites = AllMayWrites.add_map(AccRel); // { Domain[] -> ValInst[] } - isl::map WriteValInstance = getWrittenValue(MA, AccRel); + isl::union_map WriteValInstance = getWrittenValue(MA, AccRel); if (!WriteValInstance) WriteValInstance = makeUnknownForDomain(Stmt); @@ -463,9 +467,90 @@ isl::map IncludeElement = AccRel.domain_map().curry(); // { [Element[] -> DomainWrite[]] -> ValInst[] } - isl::map EltWriteValInst = WriteValInstance.apply_domain(IncludeElement); + isl::union_map EltWriteValInst = + WriteValInstance.apply_domain(IncludeElement); - AllWriteValInst = AllWriteValInst.add_map(EltWriteValInst); + AllWriteValInst = AllWriteValInst.unite(EltWriteValInst); +} + +/// Return whether @p PHI refers (also transitively through other PHIs) to +/// itself. +/// +/// loop: +/// %phi1 = phi [0, %preheader], [%phi1, %loop] +/// br i1 %c, label %loop, label %exit +/// +/// exit: +/// %phi2 = phi [%phi1, %bb] +/// +/// In this example, %phi1 is recursive, but %phi2 is not. +static bool isRecursivePHI(const PHINode *PHI) { + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(PHI); + + while (!Worklist.empty()) { + const PHINode *Cur = Worklist.pop_back_val(); + + if (Visited.count(Cur)) + continue; + Visited.insert(Cur); + + for (const Use &Incoming : Cur->incoming_values()) { + Value *IncomingVal = Incoming.get(); + auto *IncomingPHI = dyn_cast(IncomingVal); + if (!IncomingPHI) + continue; + + if (IncomingPHI == PHI) + return true; + Worklist.push_back(IncomingPHI); + } + } + return false; +} + +isl::union_map ZoneAlgorithm::computePerPHI(const ScopArrayInfo *SAI) { + // TODO: If the PHI has an incoming block from before the SCoP, it is not + // represented in any ScopStmt. + + auto *PHI = cast(SAI->getBasePtr()); + auto It = PerPHIMaps.find(PHI); + if (It != PerPHIMaps.end()) + return It->second; + + assert(SAI->isPHIKind()); + + // { DomainPHIWrite[] -> Scatter[] } + isl::union_map PHIWriteScatter = makeEmptyUnionMap(); + + // Collect all incoming block timepoints. + for (MemoryAccess *MA : S->getPHIIncomings(SAI)) { + isl::map Scatter = getScatterFor(MA); + PHIWriteScatter = PHIWriteScatter.add_map(Scatter); + } + + // { DomainPHIRead[] -> Scatter[] } + isl::map PHIReadScatter = getScatterFor(S->getPHIRead(SAI)); + + // { DomainPHIRead[] -> Scatter[] } + isl::map BeforeRead = beforeScatter(PHIReadScatter, true); + + // { Scatter[] } + isl::set WriteTimes = singleton(PHIWriteScatter.range(), ScatterSpace); + + // { DomainPHIRead[] -> Scatter[] } + isl::map PHIWriteTimes = BeforeRead.intersect_range(WriteTimes); + isl::map LastPerPHIWrites = PHIWriteTimes.lexmax(); + + // { DomainPHIRead[] -> DomainPHIWrite[] } + isl::union_map Result = + isl::union_map(LastPerPHIWrites).apply_range(PHIWriteScatter.reverse()); + assert(!Result.is_single_valued().is_false()); + assert(!Result.is_injective().is_false()); + + PerPHIMaps.insert({PHI, Result}); + return Result; } isl::union_set ZoneAlgorithm::makeEmptyUnionSet() const { @@ -681,6 +766,57 @@ llvm_unreachable("Unhandled use type"); } +/// Remove all computed PHIs out of @p Input and replace by their incoming +/// value. +/// +/// @param Input { [] -> ValInst[] } +/// @param ComputedPHIs Set of PHIs that are replaced. Its ValInst must appear +/// on the LHS of @p NormalizeMap. +/// @param NormalizeMap { ValInst[] -> ValInst[] } +static isl::union_map normalizeValInst(isl::union_map Input, + const DenseSet &ComputedPHIs, + isl::union_map NormalizeMap) { + isl::union_map Result = isl::union_map::empty(Input.get_space()); + Input.foreach_map( + [&Result, &ComputedPHIs, &NormalizeMap](isl::map Map) -> isl::stat { + isl::space Space = Map.get_space(); + isl::space RangeSpace = Space.range(); + + // Instructions within the SCoP are always wrapped. Non-wrapped tuples + // are therefore invariant in the SCoP and don't need normalization. + if (!RangeSpace.is_wrapping()) { + Result = Result.add_map(Map); + return isl::stat::ok; + } + + auto *PHI = dyn_cast(static_cast( + RangeSpace.unwrap().get_tuple_id(isl::dim::out).get_user())); + + // If no normalization is necessary, then the ValInst stands for itself. + if (!ComputedPHIs.count(PHI)) { + Result = Result.add_map(Map); + return isl::stat::ok; + } + + // Otherwise, apply the normalization. + isl::union_map Mapped = isl::union_map(Map).apply_range(NormalizeMap); + Result = Result.unite(Mapped); + NumPHINormialization++; + return isl::stat::ok; + }); + return Result; +} + +isl::union_map ZoneAlgorithm::makeNormalizedValInst(llvm::Value *Val, + ScopStmt *UserStmt, + llvm::Loop *Scope, + bool IsCertain) { + isl::map ValInst = makeValInst(Val, UserStmt, Scope, IsCertain); + isl::union_map Normalized = + normalizeValInst(ValInst, ComputedPHIs, NormalizeMap); + return Normalized; +} + bool ZoneAlgorithm::isCompatibleAccess(MemoryAccess *MA) { if (!MA) return false; @@ -690,6 +826,64 @@ return isa(AccInst) || isa(AccInst); } +bool ZoneAlgorithm::isNormalizable(MemoryAccess *MA) { + assert(MA->isRead()); + + // Exclude ExitPHIs, we are assuming that a normalizable PHI has a READ + // MemoryAccess. + if (!MA->isOriginalPHIKind()) + return false; + + // Exclude recursive PHIs, normalizing them would require a transitive + // closure. + auto *PHI = cast(MA->getAccessInstruction()); + if (RecursivePHIs.count(PHI)) + return false; + + // Ensure that each incoming value can be represented by a ValInst[]. + // We do represent values from statements associated to multiple incoming + // value by the PHI itself, but we do not handle this case yet (especially + // isNormalized()) when normalizing. + const ScopArrayInfo *SAI = MA->getOriginalScopArrayInfo(); + auto Incomings = S->getPHIIncomings(SAI); + for (MemoryAccess *Incoming : Incomings) { + if (Incoming->getIncoming().size() != 1) + return false; + } + + return true; +} + +bool ZoneAlgorithm::isNormalized(isl::map Map) { + isl::space Space = Map.get_space(); + isl::space RangeSpace = Space.range(); + + if (!RangeSpace.is_wrapping()) + return true; + + auto *PHI = dyn_cast(static_cast( + RangeSpace.unwrap().get_tuple_id(isl::dim::out).get_user())); + if (!PHI) + return true; + + auto *IncomingStmt = static_cast( + RangeSpace.unwrap().get_tuple_id(isl::dim::in).get_user()); + MemoryAccess *PHIRead = IncomingStmt->lookupPHIReadOf(PHI); + if (!isNormalizable(PHIRead)) + return true; + + return false; +} + +bool ZoneAlgorithm::isNormalized(isl::union_map UMap) { + auto Result = UMap.foreach_map([this](isl::map Map) -> isl::stat { + if (isNormalized(Map)) + return isl::stat::ok; + return isl::stat::error; + }); + return Result == isl::stat::ok; +} + void ZoneAlgorithm::computeCommon() { AllReads = makeEmptyUnionMap(); AllMayWrites = makeEmptyUnionMap(); @@ -697,8 +891,13 @@ AllWriteValInst = makeEmptyUnionMap(); AllReadValInst = makeEmptyUnionMap(); - for (auto &Stmt : *S) { - for (auto *MA : Stmt) { + // Default to empty, i.e. no normalization/replacement is taking place. Call + // computeNormalizedPHIs() to initialize. + NormalizeMap = makeEmptyUnionMap(); + ComputedPHIs.clear(); + + for (ScopStmt &Stmt : *S) { + for (MemoryAccess *MA : Stmt) { if (!MA->isLatestArrayKind()) continue; @@ -720,6 +919,97 @@ simplify(WriteReachDefZone); } +void ZoneAlgorithm::computeNormalizedPHIs() { + // Determine which PHIs can reference themselves. They are excluded from + // normalization to avoid problems with transitive closures. + for (ScopStmt &Stmt : *S) { + for (MemoryAccess *MA : Stmt) { + if (!MA->isPHIKind()) + continue; + if (!MA->isRead()) + continue; + + // TODO: Can be more efficient since isRecursivePHI can theoretically + // determine recursiveness for multiple values and/or cache results. + auto *PHI = cast(MA->getAccessInstruction()); + if (isRecursivePHI(PHI)) { + NumRecursivePHIs++; + RecursivePHIs.insert(PHI); + } + } + } + + // { PHIValInst[] -> IncomingValInst[] } + isl::union_map AllPHIMaps = makeEmptyUnionMap(); + + // Discover new PHIs and try to normalize them. + DenseSet AllPHIs; + for (ScopStmt &Stmt : *S) { + for (MemoryAccess *MA : Stmt) { + if (!MA->isOriginalPHIKind()) + continue; + if (!MA->isRead()) + continue; + if (!isNormalizable(MA)) + continue; + + auto *PHI = cast(MA->getAccessInstruction()); + const ScopArrayInfo *SAI = MA->getOriginalScopArrayInfo(); + + // { PHIDomain[] -> PHIValInst[] } + isl::map PHIValInst = makeValInst(PHI, &Stmt, Stmt.getSurroundingLoop()); + + // { IncomingDomain[] -> IncomingValInst[] } + isl::union_map IncomingValInsts = makeEmptyUnionMap(); + + // Get all incoming values. + for (MemoryAccess *MA : S->getPHIIncomings(SAI)) { + ScopStmt *IncomingStmt = MA->getStatement(); + + auto Incoming = MA->getIncoming(); + assert(Incoming.size() == 1 && "The incoming value must be " + "representable by something else than " + "the PHI itself"); + Value *IncomingVal = Incoming[0].second; + + // { IncomingDomain[] -> IncomingValInst[] } + isl::map IncomingValInst = makeValInst( + IncomingVal, IncomingStmt, IncomingStmt->getSurroundingLoop()); + + IncomingValInsts = IncomingValInsts.add_map(IncomingValInst); + } + + // Determine which instance of the PHI statement corresponds to which + // incoming value. + // { PHIDomain[] -> IncomingDomain[] } + isl::union_map PerPHI = computePerPHI(SAI); + + // { PHIValInst[] -> IncomingValInst[] } + isl::union_map PHIMap = + PerPHI.apply_domain(PHIValInst).apply_range(IncomingValInsts); + assert(!PHIMap.is_single_valued().is_false()); + + // Resolve transitiveness: The incoming value of the newly discovered PHI + // may reference a previously normalized PHI. At the same time, already + // normalized PHIs might be normalized to the new PHI. At the end, none of + // the PHIs may appear on the right-hand-side of the normalization map. + PHIMap = normalizeValInst(PHIMap, AllPHIs, AllPHIMaps); + AllPHIs.insert(PHI); + AllPHIMaps = normalizeValInst(AllPHIMaps, AllPHIs, PHIMap); + + AllPHIMaps = AllPHIMaps.unite(PHIMap); + NumNormalizablePHIs++; + } + } + simplify(AllPHIMaps); + + // Apply the normalization. + ComputedPHIs = AllPHIs; + NormalizeMap = AllPHIMaps; + + assert(!NormalizeMap || isNormalized(NormalizeMap)); +} + void ZoneAlgorithm::printAccesses(llvm::raw_ostream &OS, int Indent) const { OS.indent(Indent) << "After accesses {\n"; for (auto &Stmt : *S) { @@ -742,6 +1032,7 @@ } isl::union_map ZoneAlgorithm::computeKnownFromLoad() const { + // { Element[] } isl::union_set AllAccessedElts = AllReads.range().unite(AllWrites.range()); @@ -755,6 +1046,7 @@ // { Element[] -> Scatter[] } isl::union_set NonReachDef = EltZoneUniverse.wrap().subtract(WriteReachDefZone.domain()); + simplify(NonReachDef); // { [Element[] -> Zone[]] -> ReachDefId[] } isl::union_map DefZone = @@ -786,9 +1078,14 @@ DefZoneEltDefId.apply_domain(ScatterKnown).reverse(); // { [Element[] -> Zone[]] -> ValInst[] } - return DefZoneEltDefId.apply_range(DefidKnown); + auto Result = DefZoneEltDefId.apply_range(DefidKnown); + + // TODO: Loads can be normalized to other ValInsts. + + return Result; } +// { [Element[] -> Zone[]] -> ValInst[] } isl::union_map ZoneAlgorithm::computeKnown(bool FromWrite, bool FromRead) const { isl::union_map Result = makeEmptyUnionMap(); @@ -799,6 +1096,6 @@ if (FromRead) Result = Result.unite(computeKnownFromLoad()); - simplify(Result); + // simplify(Result); return Result; } Index: test/ForwardOpTree/atax.ll =================================================================== --- /dev/null +++ test/ForwardOpTree/atax.ll @@ -0,0 +1,154 @@ +; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define internal fastcc void @kernel_atax([2100 x double]* nocapture readonly %A, double* nocapture readonly %x, double* nocapture %y, double* nocapture %tmp) unnamed_addr #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %y15 = bitcast double* %y to i8* + call void @llvm.memset.p0i8.i64(i8* %y15, i8 0, i64 16800, i32 8, i1 false) + br label %for.body3 + +for.body3: ; preds = %for.inc40, %entry.split + %indvars.iv8 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next9, %for.inc40 ] + %arrayidx5 = getelementptr inbounds double, double* %tmp, i64 %indvars.iv8 + store double 0.000000e+00, double* %arrayidx5, align 8, !tbaa !6 + br label %for.body8 + +for.body8: ; preds = %for.body8, %for.body3 + %0 = phi double [ 0.000000e+00, %for.body3 ], [ %add, %for.body8 ] + %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ] + %arrayidx14 = getelementptr inbounds [2100 x double], [2100 x double]* %A, i64 %indvars.iv8, i64 %indvars.iv + %1 = load double, double* %arrayidx14, align 8, !tbaa !6 + %arrayidx16 = getelementptr inbounds double, double* %x, i64 %indvars.iv + %2 = load double, double* %arrayidx16, align 8, !tbaa !6 + %mul = fmul double %1, %2 + %add = fadd double %0, %mul + store double %add, double* %arrayidx5, align 8, !tbaa !6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 2 + br i1 %exitcond, label %for.end21, label %for.body8 + +for.end21: ; preds = %for.body8 + br label %for.body24 + +for.body24: ; preds = %for.body24.for.body24_crit_edge, %for.end21 + %3 = phi double [ %add, %for.end21 ], [ %.pre, %for.body24.for.body24_crit_edge ] + %indvars.iv5 = phi i64 [ 0, %for.end21 ], [ %indvars.iv.next6, %for.body24.for.body24_crit_edge ] + %arrayidx26 = getelementptr inbounds double, double* %y, i64 %indvars.iv5 + %4 = load double, double* %arrayidx26, align 8, !tbaa !6 + %arrayidx30 = getelementptr inbounds [2100 x double], [2100 x double]* %A, i64 %indvars.iv8, i64 %indvars.iv5 + %5 = load double, double* %arrayidx30, align 8, !tbaa !6 + %mul33 = fmul double %5, %3 + %add34 = fadd double %4, %mul33 + store double %add34, double* %arrayidx26, align 8, !tbaa !6 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + %exitcond7 = icmp eq i64 %indvars.iv.next6, 2 + br i1 %exitcond7, label %for.inc40, label %for.body24.for.body24_crit_edge + +for.body24.for.body24_crit_edge: ; preds = %for.body24 + %.pre = load double, double* %arrayidx5, align 8, !tbaa !6 + br label %for.body24 + +for.inc40: ; preds = %for.body24 + %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 + %exitcond10 = icmp eq i64 %indvars.iv.next9, 2 + br i1 %exitcond10, label %for.end42, label %for.body3 + +for.end42: ; preds = %for.inc40 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1 + +attributes #0 = { noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 6.0.0 (trunk 312565) (llvm/trunk 312564)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"any pointer", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"double", !4, i64 0} + + +; CHECK: Statistics { +; CHECK: Operand trees forwarded: 2 +; CHECK: Statements with forwarded operand trees: 2 +; CHECK: } + +; CHECK-NEXT: After statements { +; CHECK-NEXT: Stmt_for_body3 +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body3[i0] -> MemRef_tmp[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body3[i0] -> MemRef1__phi[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: store double 0.000000e+00, double* %arrayidx5, align 8, !tbaa !2 +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_for_body8 +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef1__phi[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef1__phi[] }; +; CHECK-NEXT: new: { Stmt_for_body8[i0, i1] -> MemRef_tmp[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_A[i0, i1] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_x[i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_tmp[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %0 = phi double [ 0.000000e+00, %for.body3 ], [ %add, %for.body8 ] +; CHECK-NEXT: %1 = load double, double* %arrayidx14, align 8, !tbaa !2 +; CHECK-NEXT: %2 = load double, double* %arrayidx16, align 8, !tbaa !2 +; CHECK-NEXT: %mul = fmul double %1, %2 +; CHECK-NEXT: %add = fadd double %0, %mul +; CHECK-NEXT: store double %add, double* %arrayidx5, align 8, !tbaa !2 +; CHECK-NEXT: %exitcond = icmp eq i64 %indvars.iv.next, 2 +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_for_end21 +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_end21[i0] -> MemRef_add[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_end21[i0] -> MemRef5__phi[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_for_body24 +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef5__phi[] }; +; CHECK-NEXT: new: { Stmt_for_body24[i0, i1] -> MemRef_tmp[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef_y[i1] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef_A[i0, i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef_y[i1] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %3 = phi double [ %add, %for.end21 ], [ %.pre, %for.body24.for.body24_crit_edge ] +; CHECK-NEXT: %4 = load double, double* %arrayidx26, align 8, !tbaa !2 +; CHECK-NEXT: %5 = load double, double* %arrayidx30, align 8, !tbaa !2 +; CHECK-NEXT: %mul33 = fmul double %5, %3 +; CHECK-NEXT: %add34 = fadd double %4, %mul33 +; CHECK-NEXT: store double %add34, double* %arrayidx26, align 8, !tbaa !2 +; CHECK-NEXT: %exitcond7 = icmp eq i64 %indvars.iv.next6, 2 +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_for_body24_for_body24_crit_edge +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body24_for_body24_crit_edge[i0, i1] -> MemRef5__phi[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body24_for_body24_crit_edge[i0, i1] -> MemRef_tmp[i0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %.pre = load double, double* %arrayidx5, align 8, !tbaa !2 +; CHECK-NEXT: } +; CHECK-NEXT: } Index: test/ForwardOpTree/forward_phi_load.ll =================================================================== --- /dev/null +++ test/ForwardOpTree/forward_phi_load.ll @@ -0,0 +1,82 @@ +; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +; Rematerialize a load. +; +; for (int j = 0; j < n; j += 1) { +; bodyA: +; double val = B[j]; +; +; bodyB: +; double phi = val; +; +; bodyC: +; A[j] = phi; +; } +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %B_idx = getelementptr inbounds double, double* %B, i32 %j + %val = load double, double* %B_idx + br label %bodyB + + bodyB: + %phi = phi double [%val, %bodyA] + br label %bodyC + + bodyC: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %phi, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Reloads: 2 +; CHECK: } + +; CHECK-NEXT: After statements { +; CHECK-NEXT: Stmt_bodyA +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_B[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = load double, double* %B_idx +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_bodyB +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_phi[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %phi = phi double [ %val, %bodyA ] +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_bodyC +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_phi[] }; +; CHECK-NEXT: new: [n] -> { Stmt_bodyC[i0] -> MemRef_B[i0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: store double %phi, double* %A_idx +; CHECK-NEXT: } +; CHECK-NEXT: } Index: test/ForwardOpTree/jacobi-1d.ll =================================================================== --- /dev/null +++ test/ForwardOpTree/jacobi-1d.ll @@ -0,0 +1,108 @@ +; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define internal fastcc void @kernel_jacobi_1d(double* noalias nocapture %A, double* noalias nocapture %B) unnamed_addr #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %arrayidx6.phi.trans.insert = getelementptr inbounds double, double* %A, i64 1 + %arrayidx21.phi.trans.insert = getelementptr inbounds double, double* %B, i64 1 + br label %for.body + +for.body: ; preds = %for.inc33, %entry.split + %t.03 = phi i32 [ 0, %entry.split ], [ %inc34, %for.inc33 ] + %.pre = load double, double* %A, align 8, !tbaa !6 + %.pre10 = load double, double* %arrayidx6.phi.trans.insert, align 8, !tbaa !6 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body + %0 = phi double [ %.pre10, %for.body ], [ %2, %for.body3 ] + %1 = phi double [ %.pre, %for.body ], [ %0, %for.body3 ] + %indvars.iv = phi i64 [ 1, %for.body ], [ %indvars.iv.next, %for.body3 ] + %add = fadd double %1, %0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidx9 = getelementptr inbounds double, double* %A, i64 %indvars.iv.next + %2 = load double, double* %arrayidx9, align 8, !tbaa !6 + %add10 = fadd double %add, %2 + %mul = fmul double %add10, 3.333300e-01 + %arrayidx12 = getelementptr inbounds double, double* %B, i64 %indvars.iv + store double %mul, double* %arrayidx12, align 8, !tbaa !6 + %exitcond = icmp eq i64 %indvars.iv.next, 3 + br i1 %exitcond, label %for.end, label %for.body3 + +for.end: ; preds = %for.body3 + %.pre11 = load double, double* %B, align 8, !tbaa !6 + %.pre12 = load double, double* %arrayidx21.phi.trans.insert, align 8, !tbaa !6 + br label %for.inc33 + +for.inc33: ; preds = %for.body16 + %inc34 = add nuw nsw i32 %t.03, 1 + %exitcond9 = icmp eq i32 %inc34, 2 + br i1 %exitcond9, label %for.end35, label %for.body + +for.end35: ; preds = %for.inc33 + ret void +} + +attributes #0 = { noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 6.0.0 (llvm/trunk 312874)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"any pointer", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"double", !4, i64 0} + + +; CHECK: Statistics { +; CHECK: Operand trees forwarded: 2 +; CHECK: Statements with forwarded operand trees: 1 +; CHECK: } + +; CHECK-NEXT: After statements { +; CHECK-NEXT: Stmt_for_body +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef1__phi[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef2__phi[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %.pre = load double, double* %A, align 8, !tbaa !2 +; CHECK-NEXT: %.pre10 = load double, double* %arrayidx6.phi.trans.insert, align 8, !tbaa !2 +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_for_body3 +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef1__phi[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef1__phi[] }; +; CHECK-NEXT: new: { Stmt_for_body3[i0, i1] -> MemRef_A[1 + i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef2__phi[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef2__phi[] }; +; CHECK-NEXT: new: { Stmt_for_body3[i0, i1] -> MemRef_A[i1] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef_A[2 + i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef_B[1 + i1] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %0 = phi double [ %.pre10, %for.body ], [ %2, %for.body3 ] +; CHECK-NEXT: %1 = phi double [ %.pre, %for.body ], [ %0, %for.body3 ] +; CHECK-NEXT: %add = fadd double %1, %0 +; CHECK-NEXT: %2 = load double, double* %arrayidx9, align 8, !tbaa !2 +; CHECK-NEXT: %add10 = fadd double %add, %2 +; CHECK-NEXT: %mul = fmul double %add10, 3.333300e-01 +; CHECK-NEXT: store double %mul, double* %arrayidx12, align 8, !tbaa !2 +; CHECK-NEXT: %exitcond = icmp eq i64 %indvars.iv.next, 3 +; CHECK-NEXT: } +; CHECK-NEXT: } Index: test/ForwardOpTree/noforward_selfrefphi.ll =================================================================== --- /dev/null +++ test/ForwardOpTree/noforward_selfrefphi.ll @@ -0,0 +1,49 @@ +; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +; Contains a self-referencing PHINode that would require a +; transitive closure to handle. +; +; for (int j = 0; j < n; j += 1) { +; double phi = 0.0; +; for (int i = 0; i < m; i += 1) +; phi = phi; +; A[j] = phi; +; } +; +define void @func(i32 %n, i32 %m, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %for.preheader, label %exit + + for.preheader: + br label %for.inner + + for.inner: + %i = phi i32 [0, %for.preheader], [%i.inc, %for.inner] + %phi = phi double [0.0, %for.preheader], [%phi, %for.inner] + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i.inc, %m + br i1 %i.cmp, label %for.inner, label %for.exit + + for.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %phi, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: ForwardOpTree executed, but did not modify anything